snippets/cron/generate_indieblog_rss.php

90 lines
3.0 KiB
PHP
Raw Permalink Normal View History

<?php
// Configuration
$jsonUrl = "https://indieblog.page/export"; // Source JSON URL
$rssFile = "/app/data/public/indieblog_feed.xml"; // RSS output file
$cacheFile = "/app/data/cache/indieblog_cache.txt"; // Cache for processed IDs
$recentCutoff = time() - 86400; // Articles published within the last 24 hours
// Ensure necessary directories exist
@mkdir(dirname($rssFile), 0777, true);
@mkdir(dirname($cacheFile), 0777, true);
// Load JSON data
$jsonData = @file_get_contents($jsonUrl);
if (!$jsonData) {
die("Error: Unable to fetch JSON from $jsonUrl\n");
}
// Decode JSON into an array
$data = json_decode($jsonData, true);
if (!$data) {
die("Error: Invalid JSON data from $jsonUrl\n");
}
// Load cache of already processed IDs
$includedIds = file_exists($cacheFile) ? file($cacheFile, FILE_IGNORE_NEW_LINES) : [];
// Filter recent items
$recentItems = array_filter($data, function ($item) use ($recentCutoff, $includedIds) {
return isset($item['published'], $item['itemid']) &&
$item['published'] >= $recentCutoff &&
!in_array($item['itemid'], $includedIds);
});
// Sort items by publication date (newest first)
usort($recentItems, fn($a, $b) => $b['published'] <=> $a['published']);
// Generate RSS feed
$rss = new SimpleXMLElement('<?xml version="1.0" encoding="UTF-8"?><rss version="2.0"></rss>');
$channel = $rss->addChild('channel');
$channel->addChild('title', 'IndieBlog Feed (Recent)');
$channel->addChild('link', 'https://indieblog.page/');
$channel->addChild('description', 'RSS feed of articles published in the last 24 hours');
$channel->addChild('language', 'en');
// Add recent items to RSS feed
$newEntries = [];
foreach ($recentItems as $item) {
$rssItem = $channel->addChild('item');
$rssItem->addChild('title', htmlspecialchars($item['itemtitle'] ?? 'No title'));
$rssItem->addChild('link', htmlspecialchars($item['itemurl'] ?? ''));
$rssItem->addChild('pubDate', date(DATE_RSS, $item['published']));
// Fetch original RSS feed content for the entry
$originalContent = fetchOriginalContent($item['itemurl']);
$rssItem->addChild('description', htmlspecialchars($originalContent ?? $item['feedtitle'] ?? 'No content'));
$newEntries[] = $item['itemid'];
}
// Update cache with newly processed IDs
if (!empty($newEntries)) {
file_put_contents($cacheFile, implode("\n", array_merge($includedIds, $newEntries)));
}
// Save RSS feed
$rss->asXML($rssFile);
/**
* Fetches the original content from a given URL.
*
* @param string $url The URL of the article to fetch.
* @return string|null The extracted content, or null if unavailable.
*/
function fetchOriginalContent(string $url): ?string
{
$html = @file_get_contents($url);
if (!$html) {
return null;
}
// Extract content between <body> tags (basic extraction, adapt as needed)
if (preg_match('/<body.*?>(.*?)<\/body>/si', $html, $matches)) {
// Clean HTML content (strip tags, keep minimal formatting)
return strip_tags($matches[1], '<p><br><a><strong><em>');
}
return null;
}