change(indieblog): when building rss feed, include original article content too

This commit is contained in:
SansGuidon 2024-11-22 11:06:24 +00:00
parent 9ed4c26438
commit dce7b451d0

View File

@ -49,8 +49,12 @@ foreach ($recentItems as $item) {
$rssItem = $channel->addChild('item');
$rssItem->addChild('title', htmlspecialchars($item['itemtitle'] ?? 'No title'));
$rssItem->addChild('link', htmlspecialchars($item['itemurl'] ?? ''));
$rssItem->addChild('description', htmlspecialchars($item['feedtitle'] ?? ''));
$rssItem->addChild('pubDate', date(DATE_RSS, $item['published']));
// Fetch original RSS feed content for the entry
$originalContent = fetchOriginalContent($item['itemurl']);
$rssItem->addChild('description', htmlspecialchars($originalContent ?? $item['feedtitle'] ?? 'No content'));
$newEntries[] = $item['itemid'];
}
@ -61,3 +65,25 @@ if (!empty($newEntries)) {
// Save RSS feed
$rss->asXML($rssFile);
/**
* Fetches the original content from a given URL.
*
* @param string $url The URL of the article to fetch.
* @return string|null The extracted content, or null if unavailable.
*/
function fetchOriginalContent(string $url): ?string
{
$html = @file_get_contents($url);
if (!$html) {
return null;
}
// Extract content between <body> tags (basic extraction, adapt as needed)
if (preg_match('/<body.*?>(.*?)<\/body>/si', $html, $matches)) {
// Clean HTML content (strip tags, keep minimal formatting)
return strip_tags($matches[1], '<p><br><a><strong><em>');
}
return null;
}