change(indieblog): when building rss feed, include original article content too

2024-11-22 11:06:24 +00:00
parent 9ed4c26438
commit dce7b451d0
1 changed files with 27 additions and 1 deletions
--- a/generate_indieblog_rss.php
+++ b/generate_indieblog_rss.php
@@ -49,8 +49,12 @@ foreach ($recentItems as $item) {
    $rssItem = $channel->addChild('item');
    $rssItem->addChild('title', htmlspecialchars($item['itemtitle'] ?? 'No title'));
    $rssItem->addChild('link', htmlspecialchars($item['itemurl'] ?? ''));
-    $rssItem->addChild('description', htmlspecialchars($item['feedtitle'] ?? ''));
    $rssItem->addChild('pubDate', date(DATE_RSS, $item['published']));
+
+    // Fetch original RSS feed content for the entry
+    $originalContent = fetchOriginalContent($item['itemurl']);
+    $rssItem->addChild('description', htmlspecialchars($originalContent ?? $item['feedtitle'] ?? 'No content'));
+
    $newEntries[] = $item['itemid'];
 }

@@ -61,3 +65,25 @@ if (!empty($newEntries)) {

 // Save RSS feed
 $rss->asXML($rssFile);
+
+/**
+ * Fetches the original content from a given URL.
+ *
+ * @param string $url The URL of the article to fetch.
+ * @return string|null The extracted content, or null if unavailable.
+ */
+function fetchOriginalContent(string $url): ?string
+{
+    $html = @file_get_contents($url);
+    if (!$html) {
+        return null;
+    }
+
+    // Extract content between <body> tags (basic extraction, adapt as needed)
+    if (preg_match('/<body.*?>(.*?)<\/body>/si', $html, $matches)) {
+        // Clean HTML content (strip tags, keep minimal formatting)
+        return strip_tags($matches[1], '<p><br><a><strong><em>');
+    }
+
+    return null;
+}