diff --git a/cron/generate_indieblog_rss.php b/cron/generate_indieblog_rss.php deleted file mode 100644 index b7e04f6..0000000 --- a/cron/generate_indieblog_rss.php +++ /dev/null @@ -1,89 +0,0 @@ -<?php - -// Configuration -$jsonUrl = "https://indieblog.page/export"; // Source JSON URL -$rssFile = "/app/data/public/indieblog_feed.xml"; // RSS output file -$cacheFile = "/app/data/cache/indieblog_cache.txt"; // Cache for processed IDs -$recentCutoff = time() - 86400; // Articles published within the last 24 hours - -// Ensure necessary directories exist -@mkdir(dirname($rssFile), 0777, true); -@mkdir(dirname($cacheFile), 0777, true); - -// Load JSON data -$jsonData = @file_get_contents($jsonUrl); -if (!$jsonData) { - die("Error: Unable to fetch JSON from $jsonUrl\n"); -} - -// Decode JSON into an array -$data = json_decode($jsonData, true); -if (!$data) { - die("Error: Invalid JSON data from $jsonUrl\n"); -} - -// Load cache of already processed IDs -$includedIds = file_exists($cacheFile) ? file($cacheFile, FILE_IGNORE_NEW_LINES) : []; - -// Filter recent items -$recentItems = array_filter($data, function ($item) use ($recentCutoff, $includedIds) { - return isset($item['published'], $item['itemid']) && - $item['published'] >= $recentCutoff && - !in_array($item['itemid'], $includedIds); -}); - -// Sort items by publication date (newest first) -usort($recentItems, fn($a, $b) => $b['published'] <=> $a['published']); - -// Generate RSS feed -$rss = new SimpleXMLElement('<?xml version="1.0" encoding="UTF-8"?><rss version="2.0"></rss>'); -$channel = $rss->addChild('channel'); -$channel->addChild('title', 'IndieBlog Feed (Recent)'); -$channel->addChild('link', 'https://indieblog.page/'); -$channel->addChild('description', 'RSS feed of articles published in the last 24 hours'); -$channel->addChild('language', 'en'); - -// Add recent items to RSS feed -$newEntries = []; -foreach ($recentItems as $item) { - $rssItem = $channel->addChild('item'); - $rssItem->addChild('title', htmlspecialchars($item['itemtitle'] ?? 'No title')); - $rssItem->addChild('link', htmlspecialchars($item['itemurl'] ?? '')); - $rssItem->addChild('pubDate', date(DATE_RSS, $item['published'])); - - // Fetch original RSS feed content for the entry - $originalContent = fetchOriginalContent($item['itemurl']); - $rssItem->addChild('description', htmlspecialchars($originalContent ?? $item['feedtitle'] ?? 'No content')); - - $newEntries[] = $item['itemid']; -} - -// Update cache with newly processed IDs -if (!empty($newEntries)) { - file_put_contents($cacheFile, implode("\n", array_merge($includedIds, $newEntries))); -} - -// Save RSS feed -$rss->asXML($rssFile); - -/** - * Fetches the original content from a given URL. - * - * @param string $url The URL of the article to fetch. - * @return string|null The extracted content, or null if unavailable. - */ -function fetchOriginalContent(string $url): ?string -{ - $html = @file_get_contents($url); - if (!$html) { - return null; - } - - // Extract content between <body> tags (basic extraction, adapt as needed) - if (preg_match('/<body.*?>(.*?)<\/body>/si', $html, $matches)) { - // Clean HTML content (strip tags, keep minimal formatting) - return strip_tags($matches[1], '<p><br><a><strong><em>'); - } - - return null; -}