const axios = require('axios'); const cheerio = require('cheerio'); const fs = require('fs'); const path = require('path'); const BASE_URL = 'https://www.filmstouspublics.fr'; // Setup disk cache const CACHE_DIR = path.join(__dirname, '../cache'); if (!fs.existsSync(CACHE_DIR)) fs.mkdirSync(CACHE_DIR, { recursive: true }); // Load cache from disk if available function loadCache(type, key) { try { const file = path.join(CACHE_DIR, `${type}_${key.replace(/[^a-z0-9]/gi, '_')}.json`); if (fs.existsSync(file)) { const stats = fs.statSync(file); // Cache valid for 24 hours if (Date.now() - stats.mtime.getTime() < 86400000) { console.log(`Cache hit for ${type}:`, key); return JSON.parse(fs.readFileSync(file, 'utf8')); } } } catch (e) { console.error('Cache load error:', e.message); } return null; } // Save cache to disk function saveCache(type, key, data) { try { const file = path.join(CACHE_DIR, `${type}_${key.replace(/[^a-z0-9]/gi, '_')}.json`); fs.writeFileSync(file, JSON.stringify(data)); console.log(`Saved to cache: ${type}/${key}`); } catch (e) { console.error('Cache save error:', e.message); } } // Calculate average and median ages from country ratings function calculateAverageAge(ageRatings) { // Only include positive age restrictions (exclude zeros/Tous publics) const ages = Object.values(ageRatings) .filter(age => { const numAge = typeof age === 'string' ? parseInt(age) : age; return !isNaN(numAge) && numAge > 0; // Only include positive ages }) .map(age => typeof age === 'string' ? parseInt(age) : age); if (ages.length === 0) return null; // Calculate average const avg = ages.reduce((sum, age) => sum + age, 0) / ages.length; // Calculate median (more useful for skewed distributions) const sorted = [...ages].sort((a, b) => a - b); const mid = Math.floor(sorted.length / 2); const median = sorted.length % 2 === 0 ? (sorted[mid - 1] + sorted[mid]) / 2 : sorted[mid]; return { average: avg.toFixed(1), median, countries: ages.length, min: sorted[0], max: sorted[sorted.length-1] }; } async function searchMovies(query) { // Check cache first const cached = loadCache('search', query); if (cached) return cached; const searchUrl = `${BASE_URL}/?s=${encodeURIComponent(query)}`; console.log('Searching FilmsTousPublics:', searchUrl); try { const response = await axios.get(searchUrl, { headers: { 'User-Agent': 'Mozilla/5.0' } }); const $ = cheerio.load(response.data); const results = []; // Better selector for different article structures $('article[class*="tipi-xs-12"], article[class*="post"]').each((_, el) => { const title = $(el).find('h3.title a, .title-wrap .title a').text().trim(); const link = $(el).find('h3.title a, .title-wrap .title a').attr('href'); // Handle lazy-loaded images properly const imgEl = $(el).find('.mask img'); const img = imgEl.attr('data-lazy-src') || imgEl.attr('src'); // Get rating if available let rating = null; const ratingEl = $(el).find('.lets-review-api-wrap, .lets-review-final-score'); if (ratingEl.length) { rating = ratingEl.attr('data-api-score') || ratingEl.text().trim(); } if (title && link) { results.push({ title, link, img, rating, }); } }); console.log(`FilmsTousPublics found ${results.length} results`); saveCache('search', query, results); return results; } catch (error) { console.error('Error searching FilmsTousPublics:', error.message); return []; } } async function getMovieClassification(movieUrl) { if (!movieUrl) return {}; // Check cache first const cached = loadCache('detail', movieUrl); if (cached) return cached; console.log('Fetching details for:', movieUrl); try { const response = await axios.get(movieUrl, { headers: { 'User-Agent': 'Mozilla/5.0' } }); const $ = cheerio.load(response.data); // Get country age ratings const ageRatings = {}; console.log('Found pullquote elements:', $('aside.pullquote').length); // More robust approach: Find all

tags inside the pullquote section $('aside.pullquote p').each((_, el) => { const text = $(el).text().trim(); console.log('Processing age text:', text); // Detect "Tous publics" for France (All audiences) if (text.includes('Tous publics')) { ageRatings.france = 0; // Set to 0 for averaging but "All" for display console.log('Found France rating: Tous publics (0)'); } // More flexible regex for the weird dashes used else { // Just extract any number that appears in string after "Déconseillé aux" const match = text.match(/Déconseillé aux[^0-9]*(\d+)[^0-9]*ans/i); if (match && match[1]) { const age = parseInt(match[1]); console.log('Found age restriction:', age); // Identify country by image alt or src const img = $(el).find('img'); const alt = img.attr('alt') || ''; const src = img.attr('src') || ''; // Check for all possible countries - more flexible matching if (alt.includes('France') || src.toLowerCase().includes('france')) { ageRatings.france = age; } else if (alt.includes('Allemagne') || src.toLowerCase().includes('allemagne')) { ageRatings.germany = age; } else if (alt.includes('espagne') || alt.includes('Espagne') || src.toLowerCase().includes('espagne')) { ageRatings.spain = age; } else if (alt.includes('Royaume') || src.toLowerCase().includes('royaume')) { ageRatings.uk = age; } else if (alt.includes('Suede') || alt.includes('Suède') || src.toLowerCase().includes('suede')) { ageRatings.sweden = age; } else if (alt.includes('Suisse') || src.toLowerCase().includes('suisse')) { ageRatings.switzerland = age; } else if (alt.includes('Pays') || src.toLowerCase().includes('pays-bas')) { ageRatings.netherlands = age; } else if (src.toLowerCase().includes('etats-unis')) { ageRatings.usa = age; } else { // Unknown country, log for debugging console.log('Unidentified country with age rating:', age, 'Alt:', alt, 'Src:', src); } } } }); console.log('Found age ratings:', ageRatings); // Get summary/plot (first few paragraphs) let summary = ''; $('.entry-content > p').each((i, el) => { // Skip pullquote or other non-content paragraphs if (!$(el).find('.pullquote').length && i < 3 && $(el).text().trim().length > 30) { summary += $(el).text().trim() + ' '; } }); summary = summary.trim(); // Get movie metadata const metadata = {}; $('h3:contains("Informations") + ul li').each((_, el) => { const text = $(el).text().trim(); if (text.includes('Durée :')) { metadata.duration = text.replace('Durée :', '').trim(); } else if (text.includes('Nom original :')) { metadata.originalTitle = text.replace('Nom original :', '').trim(); } else if (text.includes('Sortie :')) { metadata.releaseDate = text.replace('Sortie :', '').trim(); } else if (text.includes('Réalisateur :')) { metadata.director = text.replace('Réalisateur :', '').trim(); } else if (text.includes('Producteur :')) { metadata.producer = text.replace('Producteur :', '').trim(); } else if (text.includes('Acteurs :')) { metadata.actors = text.replace('Acteurs :', '').trim(); } else if (text.includes('Studio :')) { metadata.studio = text.replace('Studio :', '').trim(); } }); // Get overall rating let overallRating = null; const ratingEl = $('.lets-review-block__final-score .score'); if (ratingEl.length) { overallRating = ratingEl.text().trim(); } // Extract year from release date if available let year = null; if (metadata.releaseDate) { const yearMatch = metadata.releaseDate.match(/\b(19|20)\d{2}\b/); if (yearMatch) { year = yearMatch[0]; } } const result = { summary, year, ageRatings, overallRating, ...metadata }; // Cache the result saveCache('detail', movieUrl, result); return result; } catch (error) { console.error('Error getting FilmsTousPublics movie details:', error.message); return {}; } } async function searchAndEnrich(query) { try { const results = await searchMovies(query); return await Promise.all(results.map(async movie => { const details = await getMovieClassification(movie.link); // Calculate average age const ageStats = calculateAverageAge(details.ageRatings || {}); console.log(`Movie: ${movie.title}, Age stats:`, ageStats); // Convert country codes to readable names for frontend display const countryNames = { france: "France", germany: "Allemagne", spain: "Espagne", uk: "Royaume-Uni", sweden: "Suède", switzerland: "Suisse", netherlands: "Pays-Bas", usa: "États-Unis" }; // Format age ratings for display const formattedAgeRatings = {}; for (const [country, age] of Object.entries(details.ageRatings || {})) { const countryName = countryNames[country] || country; formattedAgeRatings[countryName] = age === 0 ? "Tous publics" : `${age}+`; } // Get a recommended age - prefer median, then average, then fallback const recommendedAge = ageStats?.median ? `${ageStats.median}+` : ageStats?.average ? `${ageStats.average}+` : details.ageRatings?.france === 0 ? "Tous publics" : details.ageRatings?.france ? `${details.ageRatings.france}+` : "Non spécifié"; return { title: movie.title, year: details.year, img: movie.img, link: movie.link, source: 'filmstouspublics', rating: movie.rating || details.overallRating, // Age information for display age: recommendedAge.replace('Tous publics', '0+').replace('Non spécifié', '-'), ageFrance: details.ageRatings?.france === 0 ? "Tous publics" : details.ageRatings?.france ? `${details.ageRatings.france}+` : "Non spécifié", ageAverage: ageStats?.average || null, ageMedian: ageStats?.median || null, ageRecommended: recommendedAge, countriesCount: ageStats?.countries || 0, ageRange: { min: ageStats?.min || null, max: ageStats?.max || null }, ageDetails: formattedAgeRatings, summary: details.summary, ...details }; })); } catch (error) { console.error('FilmsTousPublics searchAndEnrich error:', error.message); return []; } } module.exports = { searchAndEnrich };