90 lines
3.0 KiB
PHP
90 lines
3.0 KiB
PHP
<?php
|
|
|
|
// Configuration
|
|
$jsonUrl = "https://indieblog.page/export"; // Source JSON URL
|
|
$rssFile = "/app/data/public/indieblog_feed.xml"; // RSS output file
|
|
$cacheFile = "/app/data/cache/indieblog_cache.txt"; // Cache for processed IDs
|
|
$recentCutoff = time() - 86400; // Articles published within the last 24 hours
|
|
|
|
// Ensure necessary directories exist
|
|
@mkdir(dirname($rssFile), 0777, true);
|
|
@mkdir(dirname($cacheFile), 0777, true);
|
|
|
|
// Load JSON data
|
|
$jsonData = @file_get_contents($jsonUrl);
|
|
if (!$jsonData) {
|
|
die("Error: Unable to fetch JSON from $jsonUrl\n");
|
|
}
|
|
|
|
// Decode JSON into an array
|
|
$data = json_decode($jsonData, true);
|
|
if (!$data) {
|
|
die("Error: Invalid JSON data from $jsonUrl\n");
|
|
}
|
|
|
|
// Load cache of already processed IDs
|
|
$includedIds = file_exists($cacheFile) ? file($cacheFile, FILE_IGNORE_NEW_LINES) : [];
|
|
|
|
// Filter recent items
|
|
$recentItems = array_filter($data, function ($item) use ($recentCutoff, $includedIds) {
|
|
return isset($item['published'], $item['itemid']) &&
|
|
$item['published'] >= $recentCutoff &&
|
|
!in_array($item['itemid'], $includedIds);
|
|
});
|
|
|
|
// Sort items by publication date (newest first)
|
|
usort($recentItems, fn($a, $b) => $b['published'] <=> $a['published']);
|
|
|
|
// Generate RSS feed
|
|
$rss = new SimpleXMLElement('<?xml version="1.0" encoding="UTF-8"?><rss version="2.0"></rss>');
|
|
$channel = $rss->addChild('channel');
|
|
$channel->addChild('title', 'IndieBlog Feed (Recent)');
|
|
$channel->addChild('link', 'https://indieblog.page/');
|
|
$channel->addChild('description', 'RSS feed of articles published in the last 24 hours');
|
|
$channel->addChild('language', 'en');
|
|
|
|
// Add recent items to RSS feed
|
|
$newEntries = [];
|
|
foreach ($recentItems as $item) {
|
|
$rssItem = $channel->addChild('item');
|
|
$rssItem->addChild('title', htmlspecialchars($item['itemtitle'] ?? 'No title'));
|
|
$rssItem->addChild('link', htmlspecialchars($item['itemurl'] ?? ''));
|
|
$rssItem->addChild('pubDate', date(DATE_RSS, $item['published']));
|
|
|
|
// Fetch original RSS feed content for the entry
|
|
$originalContent = fetchOriginalContent($item['itemurl']);
|
|
$rssItem->addChild('description', htmlspecialchars($originalContent ?? $item['feedtitle'] ?? 'No content'));
|
|
|
|
$newEntries[] = $item['itemid'];
|
|
}
|
|
|
|
// Update cache with newly processed IDs
|
|
if (!empty($newEntries)) {
|
|
file_put_contents($cacheFile, implode("\n", array_merge($includedIds, $newEntries)));
|
|
}
|
|
|
|
// Save RSS feed
|
|
$rss->asXML($rssFile);
|
|
|
|
/**
|
|
* Fetches the original content from a given URL.
|
|
*
|
|
* @param string $url The URL of the article to fetch.
|
|
* @return string|null The extracted content, or null if unavailable.
|
|
*/
|
|
function fetchOriginalContent(string $url): ?string
|
|
{
|
|
$html = @file_get_contents($url);
|
|
if (!$html) {
|
|
return null;
|
|
}
|
|
|
|
// Extract content between <body> tags (basic extraction, adapt as needed)
|
|
if (preg_match('/<body.*?>(.*?)<\/body>/si', $html, $matches)) {
|
|
// Clean HTML content (strip tags, keep minimal formatting)
|
|
return strip_tags($matches[1], '<p><br><a><strong><em>');
|
|
}
|
|
|
|
return null;
|
|
}
|