snippets/generate_indieblog_rss.php

<?php

// Configuration
$jsonUrl = "https://indieblog.page/export"; // Source JSON URL
$rssFile = "/app/data/public/indieblog_feed.xml"; // RSS output file
$cacheFile = "/app/data/cache/indieblog_cache.txt"; // Cache for processed IDs
$recentCutoff = time() - 86400; // Articles published within the last 24 hours

// Ensure necessary directories exist
@mkdir(dirname($rssFile), 0777, true);
@mkdir(dirname($cacheFile), 0777, true);

// Load JSON data
$jsonData = @file_get_contents($jsonUrl);
if (!$jsonData) {
    die("Error: Unable to fetch JSON from $jsonUrl\n");
}

// Decode JSON into an array
$data = json_decode($jsonData, true);
if (!$data) {
    die("Error: Invalid JSON data from $jsonUrl\n");
}

// Load cache of already processed IDs
$includedIds = file_exists($cacheFile) ? file($cacheFile, FILE_IGNORE_NEW_LINES) : [];

// Filter recent items
$recentItems = array_filter($data, function ($item) use ($recentCutoff, $includedIds) {
    return isset($item['published'], $item['itemid']) &&
           $item['published'] >= $recentCutoff &&
           !in_array($item['itemid'], $includedIds);
});

// Sort items by publication date (newest first)
usort($recentItems, fn($a, $b) => $b['published'] <=> $a['published']);

// Generate RSS feed
$rss = new SimpleXMLElement('<?xml version="1.0" encoding="UTF-8"?><rss version="2.0"></rss>');
$channel = $rss->addChild('channel');
$channel->addChild('title', 'IndieBlog Feed (Recent)');
$channel->addChild('link', 'https://indieblog.page/');
$channel->addChild('description', 'RSS feed of articles published in the last 24 hours');
$channel->addChild('language', 'en');

// Add recent items to RSS feed
$newEntries = [];
foreach ($recentItems as $item) {
    $rssItem = $channel->addChild('item');
    $rssItem->addChild('title', htmlspecialchars($item['itemtitle'] ?? 'No title'));
    $rssItem->addChild('link', htmlspecialchars($item['itemurl'] ?? ''));
    $rssItem->addChild('pubDate', date(DATE_RSS, $item['published']));

    // Fetch original RSS feed content for the entry
    $originalContent = fetchOriginalContent($item['itemurl']);
    $rssItem->addChild('description', htmlspecialchars($originalContent ?? $item['feedtitle'] ?? 'No content'));

    $newEntries[] = $item['itemid'];
}

// Update cache with newly processed IDs
if (!empty($newEntries)) {
    file_put_contents($cacheFile, implode("\n", array_merge($includedIds, $newEntries)));
}

// Save RSS feed
$rss->asXML($rssFile);

/**
 * Fetches the original content from a given URL.
 *
 * @param string $url The URL of the article to fetch.
 * @return string|null The extracted content, or null if unavailable.
 */
function fetchOriginalContent(string $url): ?string
{
    $html = @file_get_contents($url);
    if (!$html) {
        return null;
    }

    // Extract content between <body> tags (basic extraction, adapt as needed)
    if (preg_match('/<body.*?>(.*?)<\/body>/si', $html, $matches)) {
        // Clean HTML content (strip tags, keep minimal formatting)
        return strip_tags($matches[1], '<p><br><a><strong><em>');
    }

    return null;
}