diff --git a/aggregators/filmspourenfants-adapter.js b/aggregators/filmspourenfants-adapter.js index 2c1974b..48ffe5f 100644 --- a/aggregators/filmspourenfants-adapter.js +++ b/aggregators/filmspourenfants-adapter.js @@ -6,7 +6,7 @@ const path = require('path'); const BASE_URL = 'https://www.filmspourenfants.net'; // Setup disk cache -const CACHE_DIR = path.join(__dirname, '../cache'); +const CACHE_DIR = process.env.CACHE_DIR || '/app/data/cache'; if (!fs.existsSync(CACHE_DIR)) fs.mkdirSync(CACHE_DIR, { recursive: true }); // Cache operations @@ -38,7 +38,7 @@ function saveCache(type, key, data) { // Extract age from string like "À partir de 8 ans" or "Déconseillé aux moins de: 8 ans" function extractAgeFromText(text) { if (!text) return null; - + const match = text.match(/(\d+)\s*ans/i); if (match && match[1]) { return parseInt(match[1]); @@ -50,24 +50,24 @@ async function searchMovies(query) { // Check cache first const cached = loadCache('search', query); if (cached) return cached; - + const searchUrl = `${BASE_URL}/films-resultats/?_s=${encodeURIComponent(query)}`; console.log('Searching FilmsPourEnfants:', searchUrl); - + try { - const response = await axios.get(searchUrl, { - headers: { 'User-Agent': 'Mozilla/5.0' } + const response = await axios.get(searchUrl, { + headers: { 'User-Agent': 'Mozilla/5.0' } }); const $ = cheerio.load(response.data); const results = []; - + $('section.gp-post-item').each((_, el) => { const title = $(el).find('h2.gp-loop-title a').text().trim(); const link = $(el).find('h2.gp-loop-title a').attr('href'); const img = $(el).find('.gp-post-thumbnail img').attr('src'); const ageText = $(el).find('.gp-loop-cats a').text().trim(); const age = extractAgeFromText(ageText); - + if (title && link) { results.push({ title, @@ -78,7 +78,7 @@ async function searchMovies(query) { }); } }); - + console.log(`FilmsPourEnfants found ${results.length} results`); saveCache('search', query, results); return results; @@ -90,27 +90,27 @@ async function searchMovies(query) { async function getMovieDetails(movieUrl) { if (!movieUrl) return {}; - + // Check cache first const cached = loadCache('detail', movieUrl); if (cached) return cached; - + console.log('Fetching details for:', movieUrl); - + try { - const response = await axios.get(movieUrl, { - headers: { 'User-Agent': 'Mozilla/5.0' } + const response = await axios.get(movieUrl, { + headers: { 'User-Agent': 'Mozilla/5.0' } }); const $ = cheerio.load(response.data); const details = {}; - + // Title details.title = $('h1.gp-entry-title').text().trim(); - + // Get metadata $('.gp-entry-meta .gp-post-meta').each((_, el) => { const text = $(el).text().trim(); - + if (text.includes('Année:')) { details.year = $(el).find('a').text().trim(); } else if (text.includes('Déconseillé aux moins de:')) { @@ -125,11 +125,11 @@ async function getMovieDetails(movieUrl) { }); } }); - + // More detailed metadata $('#gp-hub-details span').each((_, el) => { const label = $(el).find('strong').text().trim(); - + if (label === 'Déconseillé aux moins de:') { details.ageText = $(el).find('a').text().trim(); details.age = extractAgeFromText(details.ageText); @@ -153,13 +153,13 @@ async function getMovieDetails(movieUrl) { }); } }); - + // Get summary - first paragraph in the entry-text details.summary = $('.gp-entry-text h4').first().text().trim(); - + // Get main image details.img = $('.gp-post-thumbnail img').attr('src') || $('.gp-hub-header-thumbnail img').attr('src'); - + // Get messages section let messages = ''; $('.gp-entry-text h3').each((_, el) => { @@ -170,7 +170,7 @@ async function getMovieDetails(movieUrl) { } }); details.messages = messages.trim(); - + // Get difficult scenes section const difficultScenesHeading = $('.gp-entry-text h2:contains("SCÈNES DIFFICILES")'); let difficultScenes = ''; @@ -184,7 +184,7 @@ async function getMovieDetails(movieUrl) { } } details.difficultScenes = difficultScenes.trim(); - + console.log(`Fetched details for: ${details.title}, Age: ${details.age}`); saveCache('detail', movieUrl, details); return details; @@ -199,7 +199,7 @@ async function searchAndEnrich(query) { const results = await searchMovies(query); return await Promise.all(results.map(async movie => { const details = await getMovieDetails(movie.link); - + return { title: movie.title, year: details.year || null, diff --git a/aggregators/filmstouspublics-adapter.js b/aggregators/filmstouspublics-adapter.js index ff5f8bd..c8264dd 100644 --- a/aggregators/filmstouspublics-adapter.js +++ b/aggregators/filmstouspublics-adapter.js @@ -5,7 +5,7 @@ const path = require('path'); const BASE_URL = 'https://www.filmstouspublics.fr'; // Setup disk cache -const CACHE_DIR = path.join(__dirname, '../cache'); +const CACHE_DIR = process.env.CACHE_DIR || '/app/data/cache'; if (!fs.existsSync(CACHE_DIR)) fs.mkdirSync(CACHE_DIR, { recursive: true }); // Load cache from disk if available @@ -46,19 +46,19 @@ function calculateAverageAge(ageRatings) { return !isNaN(numAge) && numAge > 0; // Only include positive ages }) .map(age => typeof age === 'string' ? parseInt(age) : age); - + if (ages.length === 0) return null; - + // Calculate average const avg = ages.reduce((sum, age) => sum + age, 0) / ages.length; - + // Calculate median (more useful for skewed distributions) const sorted = [...ages].sort((a, b) => a - b); const mid = Math.floor(sorted.length / 2); - const median = sorted.length % 2 === 0 - ? (sorted[mid - 1] + sorted[mid]) / 2 + const median = sorted.length % 2 === 0 + ? (sorted[mid - 1] + sorted[mid]) / 2 : sorted[mid]; - + return { average: avg.toFixed(1), median, countries: ages.length, min: sorted[0], max: sorted[sorted.length-1] }; } @@ -66,33 +66,33 @@ async function searchMovies(query) { // Check cache first const cached = loadCache('search', query); if (cached) return cached; - + const searchUrl = `${BASE_URL}/?s=${encodeURIComponent(query)}`; console.log('Searching FilmsTousPublics:', searchUrl); - + try { - const response = await axios.get(searchUrl, { - headers: { 'User-Agent': 'Mozilla/5.0' } + const response = await axios.get(searchUrl, { + headers: { 'User-Agent': 'Mozilla/5.0' } }); const $ = cheerio.load(response.data); const results = []; - + // Better selector for different article structures $('article[class*="tipi-xs-12"], article[class*="post"]').each((_, el) => { const title = $(el).find('h3.title a, .title-wrap .title a').text().trim(); const link = $(el).find('h3.title a, .title-wrap .title a').attr('href'); - + // Handle lazy-loaded images properly const imgEl = $(el).find('.mask img'); const img = imgEl.attr('data-lazy-src') || imgEl.attr('src'); - + // Get rating if available let rating = null; const ratingEl = $(el).find('.lets-review-api-wrap, .lets-review-final-score'); if (ratingEl.length) { rating = ratingEl.attr('data-api-score') || ratingEl.text().trim(); } - + if (title && link) { results.push({ title, @@ -102,7 +102,7 @@ async function searchMovies(query) { }); } }); - + console.log(`FilmsTousPublics found ${results.length} results`); saveCache('search', query, results); return results; @@ -114,33 +114,33 @@ async function searchMovies(query) { async function getMovieClassification(movieUrl) { if (!movieUrl) return {}; - + // Check cache first const cached = loadCache('detail', movieUrl); if (cached) return cached; - + console.log('Fetching details for:', movieUrl); - + try { - const response = await axios.get(movieUrl, { - headers: { 'User-Agent': 'Mozilla/5.0' } + const response = await axios.get(movieUrl, { + headers: { 'User-Agent': 'Mozilla/5.0' } }); const $ = cheerio.load(response.data); - + // Get country age ratings const ageRatings = {}; console.log('Found pullquote elements:', $('aside.pullquote').length); - + // More robust approach: Find all
tags inside the pullquote section $('aside.pullquote p').each((_, el) => { const text = $(el).text().trim(); console.log('Processing age text:', text); - + // Detect "Tous publics" for France (All audiences) if (text.includes('Tous publics')) { ageRatings.france = 0; // Set to 0 for averaging but "All" for display console.log('Found France rating: Tous publics (0)'); - } + } // More flexible regex for the weird dashes used else { // Just extract any number that appears in string after "Déconseillé aux" @@ -148,12 +148,12 @@ async function getMovieClassification(movieUrl) { if (match && match[1]) { const age = parseInt(match[1]); console.log('Found age restriction:', age); - + // Identify country by image alt or src const img = $(el).find('img'); const alt = img.attr('alt') || ''; const src = img.attr('src') || ''; - + // Check for all possible countries - more flexible matching if (alt.includes('France') || src.toLowerCase().includes('france')) { ageRatings.france = age; @@ -178,19 +178,19 @@ async function getMovieClassification(movieUrl) { } } }); - + console.log('Found age ratings:', ageRatings); - + // Get summary/plot (first few paragraphs) let summary = ''; $('.entry-content > p').each((i, el) => { // Skip pullquote or other non-content paragraphs - if (!$(el).find('.pullquote').length && i < 3 && $(el).text().trim().length > 30) { + if (!$(el).find('.pullquote').length && i < 3 && $(el).text().trim().length > 30) { summary += $(el).text().trim() + ' '; } }); summary = summary.trim(); - + // Get movie metadata const metadata = {}; $('h3:contains("Informations") + ul li').each((_, el) => { @@ -211,14 +211,14 @@ async function getMovieClassification(movieUrl) { metadata.studio = text.replace('Studio :', '').trim(); } }); - + // Get overall rating let overallRating = null; const ratingEl = $('.lets-review-block__final-score .score'); if (ratingEl.length) { overallRating = ratingEl.text().trim(); } - + // Extract year from release date if available let year = null; if (metadata.releaseDate) { @@ -227,7 +227,7 @@ async function getMovieClassification(movieUrl) { year = yearMatch[0]; } } - + const result = { summary, year, @@ -235,7 +235,7 @@ async function getMovieClassification(movieUrl) { overallRating, ...metadata }; - + // Cache the result saveCache('detail', movieUrl, result); return result; @@ -250,11 +250,11 @@ async function searchAndEnrich(query) { const results = await searchMovies(query); return await Promise.all(results.map(async movie => { const details = await getMovieClassification(movie.link); - + // Calculate average age const ageStats = calculateAverageAge(details.ageRatings || {}); console.log(`Movie: ${movie.title}, Age stats:`, ageStats); - + // Convert country codes to readable names for frontend display const countryNames = { france: "France", @@ -266,22 +266,22 @@ async function searchAndEnrich(query) { netherlands: "Pays-Bas", usa: "États-Unis" }; - + // Format age ratings for display const formattedAgeRatings = {}; for (const [country, age] of Object.entries(details.ageRatings || {})) { const countryName = countryNames[country] || country; formattedAgeRatings[countryName] = age === 0 ? "Tous publics" : `${age}+`; } - + // Get a recommended age - prefer median, then average, then fallback - const recommendedAge = + const recommendedAge = ageStats?.median ? `${ageStats.median}+` : ageStats?.average ? `${ageStats.average}+` : details.ageRatings?.france === 0 ? "Tous publics" : details.ageRatings?.france ? `${details.ageRatings.france}+` : "Non spécifié"; - + return { title: movie.title, year: details.year, @@ -291,8 +291,8 @@ async function searchAndEnrich(query) { rating: movie.rating || details.overallRating, // Age information for display age: recommendedAge.replace('Tous publics', '0+').replace('Non spécifié', '-'), - ageFrance: details.ageRatings?.france === 0 ? "Tous publics" : - details.ageRatings?.france ? `${details.ageRatings.france}+` : + ageFrance: details.ageRatings?.france === 0 ? "Tous publics" : + details.ageRatings?.france ? `${details.ageRatings.france}+` : "Non spécifié", ageAverage: ageStats?.average || null, ageMedian: ageStats?.median || null,