str_replace('/', '\/', $pattern), $excludePatterns); // Crée une expression régulière unique pour matcher toutes les exclusions $excludeRegex = '/' . implode('|', $excludePatterns) . '/i'; // Vérifier si le script est en mode dry run $isDryRun = in_array('dry-run', $argv); $maxResults = 100; // Limite par défaut pour la sortie foreach ($argv as $arg) { if (preg_match('/^max-results=(\d+)$/', $arg, $matches)) { $maxResults = (int)$matches[1]; } } @mkdir(dirname($rssFile), 0777, true); @mkdir(dirname($cacheFile), 0777, true); $jsonData = @file_get_contents($jsonUrl); if (!$jsonData) die("Error: Unable to fetch JSON from $jsonUrl\n"); $data = json_decode($jsonData, true); if (!$data) die("Error: Invalid JSON data from $jsonUrl\n"); $includedIds = file_exists($cacheFile) ? file($cacheFile, FILE_IGNORE_NEW_LINES) : []; // Filtrer les articles récents, non déjà inclus, et ne contenant pas de termes exclus $newItems = []; $excludedItems = []; foreach ($data as $item) { if (!isset($item['published'], $item['itemid'], $item['itemtitle'])) { continue; } $published = $item['published']; $itemId = $item['itemid']; $itemTitle = $item['itemtitle']; if (in_array($itemId, $includedIds) || $published < $cutoffDate || preg_match($excludeRegex, $itemTitle)) { $excludedItems[] = $item; } else { $newItems[] = $item; } } usort($newItems, fn($a, $b) => $b['published'] <=> $a['published']); usort($excludedItems, fn($a, $b) => $b['published'] <=> $a['published']); if ($isDryRun) { // Mode dry-run : afficher les entrées incluses et exclues echo "=== Dry Run Mode ===\n"; $showIncluded = array_slice($newItems, 0, $maxResults); $showExcluded = array_slice($excludedItems, 0, $maxResults); echo "Filtered IN (" . count($newItems) . " total, showing up to $maxResults):\n"; foreach ($showIncluded as $item) { echo "- " . ($item['itemtitle'] ?? 'No title') . "\n"; } echo "\nFiltered OUT (" . count($excludedItems) . " total, showing up to $maxResults):\n"; foreach ($showExcluded as $item) { echo "- " . ($item['itemtitle'] ?? 'No title') . "\n"; } exit(0); } // Groupement par jour pour les nouveaux éléments $groupedByDay = []; foreach ($newItems as $item) { $day = isset($item['published']) && is_numeric($item['published']) ? date('Y-m-d', $item['published']) : date('Y-m-d'); $groupedByDay[$day][] = $item; } $rss = new SimpleXMLElement(''); $channel = $rss->addChild('channel'); $channel->addChild('title', 'IndieBlog Feed (Filtered, Last 10 Days)'); $channel->addChild('link', 'https://indieblog.page/'); $channel->addChild('description', 'RSS feed of articles from the last 10 days, excluding unwanted topics'); $channel->addChild('language', 'en'); $newEntries = []; foreach ($groupedByDay as $day => $items) { $rssItem = $channel->addChild('item'); $rssItem->addChild('title', "Links for $day"); $rssItem->addChild('link', "https://indieblog.page/$day#" . md5(json_encode($items))); $rssItem->addChild('guid', "https://indieblog.page/$day#" . md5(json_encode($items))); $rssItem->addChild('pubDate', date(DATE_RSS, strtotime($day))); $description = ""; addCData($rssItem->addChild('description'), $description); } if (!empty($newEntries)) { file_put_contents($cacheFile, implode("\n", array_merge($includedIds, $newEntries))); } $rss->asXML($rssFile); function addCData(SimpleXMLElement $node, $content) { $domNode = dom_import_simplexml($node); $domOwner = $domNode->ownerDocument; $domNode->appendChild($domOwner->createCDATASection($content)); }