fix cache dir

This commit is contained in:
SansGuidon 2025-05-24 23:50:44 +02:00
parent be66cf7e53
commit ea0c5c7765
2 changed files with 68 additions and 68 deletions

View File

@ -6,7 +6,7 @@ const path = require('path');
const BASE_URL = 'https://www.filmspourenfants.net'; const BASE_URL = 'https://www.filmspourenfants.net';
// Setup disk cache // Setup disk cache
const CACHE_DIR = path.join(__dirname, '../cache'); const CACHE_DIR = process.env.CACHE_DIR || '/app/data/cache';
if (!fs.existsSync(CACHE_DIR)) fs.mkdirSync(CACHE_DIR, { recursive: true }); if (!fs.existsSync(CACHE_DIR)) fs.mkdirSync(CACHE_DIR, { recursive: true });
// Cache operations // Cache operations
@ -38,7 +38,7 @@ function saveCache(type, key, data) {
// Extract age from string like "À partir de 8 ans" or "Déconseillé aux moins de: 8 ans" // Extract age from string like "À partir de 8 ans" or "Déconseillé aux moins de: 8 ans"
function extractAgeFromText(text) { function extractAgeFromText(text) {
if (!text) return null; if (!text) return null;
const match = text.match(/(\d+)\s*ans/i); const match = text.match(/(\d+)\s*ans/i);
if (match && match[1]) { if (match && match[1]) {
return parseInt(match[1]); return parseInt(match[1]);
@ -50,24 +50,24 @@ async function searchMovies(query) {
// Check cache first // Check cache first
const cached = loadCache('search', query); const cached = loadCache('search', query);
if (cached) return cached; if (cached) return cached;
const searchUrl = `${BASE_URL}/films-resultats/?_s=${encodeURIComponent(query)}`; const searchUrl = `${BASE_URL}/films-resultats/?_s=${encodeURIComponent(query)}`;
console.log('Searching FilmsPourEnfants:', searchUrl); console.log('Searching FilmsPourEnfants:', searchUrl);
try { try {
const response = await axios.get(searchUrl, { const response = await axios.get(searchUrl, {
headers: { 'User-Agent': 'Mozilla/5.0' } headers: { 'User-Agent': 'Mozilla/5.0' }
}); });
const $ = cheerio.load(response.data); const $ = cheerio.load(response.data);
const results = []; const results = [];
$('section.gp-post-item').each((_, el) => { $('section.gp-post-item').each((_, el) => {
const title = $(el).find('h2.gp-loop-title a').text().trim(); const title = $(el).find('h2.gp-loop-title a').text().trim();
const link = $(el).find('h2.gp-loop-title a').attr('href'); const link = $(el).find('h2.gp-loop-title a').attr('href');
const img = $(el).find('.gp-post-thumbnail img').attr('src'); const img = $(el).find('.gp-post-thumbnail img').attr('src');
const ageText = $(el).find('.gp-loop-cats a').text().trim(); const ageText = $(el).find('.gp-loop-cats a').text().trim();
const age = extractAgeFromText(ageText); const age = extractAgeFromText(ageText);
if (title && link) { if (title && link) {
results.push({ results.push({
title, title,
@ -78,7 +78,7 @@ async function searchMovies(query) {
}); });
} }
}); });
console.log(`FilmsPourEnfants found ${results.length} results`); console.log(`FilmsPourEnfants found ${results.length} results`);
saveCache('search', query, results); saveCache('search', query, results);
return results; return results;
@ -90,27 +90,27 @@ async function searchMovies(query) {
async function getMovieDetails(movieUrl) { async function getMovieDetails(movieUrl) {
if (!movieUrl) return {}; if (!movieUrl) return {};
// Check cache first // Check cache first
const cached = loadCache('detail', movieUrl); const cached = loadCache('detail', movieUrl);
if (cached) return cached; if (cached) return cached;
console.log('Fetching details for:', movieUrl); console.log('Fetching details for:', movieUrl);
try { try {
const response = await axios.get(movieUrl, { const response = await axios.get(movieUrl, {
headers: { 'User-Agent': 'Mozilla/5.0' } headers: { 'User-Agent': 'Mozilla/5.0' }
}); });
const $ = cheerio.load(response.data); const $ = cheerio.load(response.data);
const details = {}; const details = {};
// Title // Title
details.title = $('h1.gp-entry-title').text().trim(); details.title = $('h1.gp-entry-title').text().trim();
// Get metadata // Get metadata
$('.gp-entry-meta .gp-post-meta').each((_, el) => { $('.gp-entry-meta .gp-post-meta').each((_, el) => {
const text = $(el).text().trim(); const text = $(el).text().trim();
if (text.includes('Année:')) { if (text.includes('Année:')) {
details.year = $(el).find('a').text().trim(); details.year = $(el).find('a').text().trim();
} else if (text.includes('Déconseillé aux moins de:')) { } else if (text.includes('Déconseillé aux moins de:')) {
@ -125,11 +125,11 @@ async function getMovieDetails(movieUrl) {
}); });
} }
}); });
// More detailed metadata // More detailed metadata
$('#gp-hub-details span').each((_, el) => { $('#gp-hub-details span').each((_, el) => {
const label = $(el).find('strong').text().trim(); const label = $(el).find('strong').text().trim();
if (label === 'Déconseillé aux moins de:') { if (label === 'Déconseillé aux moins de:') {
details.ageText = $(el).find('a').text().trim(); details.ageText = $(el).find('a').text().trim();
details.age = extractAgeFromText(details.ageText); details.age = extractAgeFromText(details.ageText);
@ -153,13 +153,13 @@ async function getMovieDetails(movieUrl) {
}); });
} }
}); });
// Get summary - first paragraph in the entry-text // Get summary - first paragraph in the entry-text
details.summary = $('.gp-entry-text h4').first().text().trim(); details.summary = $('.gp-entry-text h4').first().text().trim();
// Get main image // Get main image
details.img = $('.gp-post-thumbnail img').attr('src') || $('.gp-hub-header-thumbnail img').attr('src'); details.img = $('.gp-post-thumbnail img').attr('src') || $('.gp-hub-header-thumbnail img').attr('src');
// Get messages section // Get messages section
let messages = ''; let messages = '';
$('.gp-entry-text h3').each((_, el) => { $('.gp-entry-text h3').each((_, el) => {
@ -170,7 +170,7 @@ async function getMovieDetails(movieUrl) {
} }
}); });
details.messages = messages.trim(); details.messages = messages.trim();
// Get difficult scenes section // Get difficult scenes section
const difficultScenesHeading = $('.gp-entry-text h2:contains("SCÈNES DIFFICILES")'); const difficultScenesHeading = $('.gp-entry-text h2:contains("SCÈNES DIFFICILES")');
let difficultScenes = ''; let difficultScenes = '';
@ -184,7 +184,7 @@ async function getMovieDetails(movieUrl) {
} }
} }
details.difficultScenes = difficultScenes.trim(); details.difficultScenes = difficultScenes.trim();
console.log(`Fetched details for: ${details.title}, Age: ${details.age}`); console.log(`Fetched details for: ${details.title}, Age: ${details.age}`);
saveCache('detail', movieUrl, details); saveCache('detail', movieUrl, details);
return details; return details;
@ -199,7 +199,7 @@ async function searchAndEnrich(query) {
const results = await searchMovies(query); const results = await searchMovies(query);
return await Promise.all(results.map(async movie => { return await Promise.all(results.map(async movie => {
const details = await getMovieDetails(movie.link); const details = await getMovieDetails(movie.link);
return { return {
title: movie.title, title: movie.title,
year: details.year || null, year: details.year || null,

View File

@ -5,7 +5,7 @@ const path = require('path');
const BASE_URL = 'https://www.filmstouspublics.fr'; const BASE_URL = 'https://www.filmstouspublics.fr';
// Setup disk cache // Setup disk cache
const CACHE_DIR = path.join(__dirname, '../cache'); const CACHE_DIR = process.env.CACHE_DIR || '/app/data/cache';
if (!fs.existsSync(CACHE_DIR)) fs.mkdirSync(CACHE_DIR, { recursive: true }); if (!fs.existsSync(CACHE_DIR)) fs.mkdirSync(CACHE_DIR, { recursive: true });
// Load cache from disk if available // Load cache from disk if available
@ -46,19 +46,19 @@ function calculateAverageAge(ageRatings) {
return !isNaN(numAge) && numAge > 0; // Only include positive ages return !isNaN(numAge) && numAge > 0; // Only include positive ages
}) })
.map(age => typeof age === 'string' ? parseInt(age) : age); .map(age => typeof age === 'string' ? parseInt(age) : age);
if (ages.length === 0) return null; if (ages.length === 0) return null;
// Calculate average // Calculate average
const avg = ages.reduce((sum, age) => sum + age, 0) / ages.length; const avg = ages.reduce((sum, age) => sum + age, 0) / ages.length;
// Calculate median (more useful for skewed distributions) // Calculate median (more useful for skewed distributions)
const sorted = [...ages].sort((a, b) => a - b); const sorted = [...ages].sort((a, b) => a - b);
const mid = Math.floor(sorted.length / 2); const mid = Math.floor(sorted.length / 2);
const median = sorted.length % 2 === 0 const median = sorted.length % 2 === 0
? (sorted[mid - 1] + sorted[mid]) / 2 ? (sorted[mid - 1] + sorted[mid]) / 2
: sorted[mid]; : sorted[mid];
return { average: avg.toFixed(1), median, countries: ages.length, min: sorted[0], max: sorted[sorted.length-1] }; return { average: avg.toFixed(1), median, countries: ages.length, min: sorted[0], max: sorted[sorted.length-1] };
} }
@ -66,33 +66,33 @@ async function searchMovies(query) {
// Check cache first // Check cache first
const cached = loadCache('search', query); const cached = loadCache('search', query);
if (cached) return cached; if (cached) return cached;
const searchUrl = `${BASE_URL}/?s=${encodeURIComponent(query)}`; const searchUrl = `${BASE_URL}/?s=${encodeURIComponent(query)}`;
console.log('Searching FilmsTousPublics:', searchUrl); console.log('Searching FilmsTousPublics:', searchUrl);
try { try {
const response = await axios.get(searchUrl, { const response = await axios.get(searchUrl, {
headers: { 'User-Agent': 'Mozilla/5.0' } headers: { 'User-Agent': 'Mozilla/5.0' }
}); });
const $ = cheerio.load(response.data); const $ = cheerio.load(response.data);
const results = []; const results = [];
// Better selector for different article structures // Better selector for different article structures
$('article[class*="tipi-xs-12"], article[class*="post"]').each((_, el) => { $('article[class*="tipi-xs-12"], article[class*="post"]').each((_, el) => {
const title = $(el).find('h3.title a, .title-wrap .title a').text().trim(); const title = $(el).find('h3.title a, .title-wrap .title a').text().trim();
const link = $(el).find('h3.title a, .title-wrap .title a').attr('href'); const link = $(el).find('h3.title a, .title-wrap .title a').attr('href');
// Handle lazy-loaded images properly // Handle lazy-loaded images properly
const imgEl = $(el).find('.mask img'); const imgEl = $(el).find('.mask img');
const img = imgEl.attr('data-lazy-src') || imgEl.attr('src'); const img = imgEl.attr('data-lazy-src') || imgEl.attr('src');
// Get rating if available // Get rating if available
let rating = null; let rating = null;
const ratingEl = $(el).find('.lets-review-api-wrap, .lets-review-final-score'); const ratingEl = $(el).find('.lets-review-api-wrap, .lets-review-final-score');
if (ratingEl.length) { if (ratingEl.length) {
rating = ratingEl.attr('data-api-score') || ratingEl.text().trim(); rating = ratingEl.attr('data-api-score') || ratingEl.text().trim();
} }
if (title && link) { if (title && link) {
results.push({ results.push({
title, title,
@ -102,7 +102,7 @@ async function searchMovies(query) {
}); });
} }
}); });
console.log(`FilmsTousPublics found ${results.length} results`); console.log(`FilmsTousPublics found ${results.length} results`);
saveCache('search', query, results); saveCache('search', query, results);
return results; return results;
@ -114,33 +114,33 @@ async function searchMovies(query) {
async function getMovieClassification(movieUrl) { async function getMovieClassification(movieUrl) {
if (!movieUrl) return {}; if (!movieUrl) return {};
// Check cache first // Check cache first
const cached = loadCache('detail', movieUrl); const cached = loadCache('detail', movieUrl);
if (cached) return cached; if (cached) return cached;
console.log('Fetching details for:', movieUrl); console.log('Fetching details for:', movieUrl);
try { try {
const response = await axios.get(movieUrl, { const response = await axios.get(movieUrl, {
headers: { 'User-Agent': 'Mozilla/5.0' } headers: { 'User-Agent': 'Mozilla/5.0' }
}); });
const $ = cheerio.load(response.data); const $ = cheerio.load(response.data);
// Get country age ratings // Get country age ratings
const ageRatings = {}; const ageRatings = {};
console.log('Found pullquote elements:', $('aside.pullquote').length); console.log('Found pullquote elements:', $('aside.pullquote').length);
// More robust approach: Find all <p> tags inside the pullquote section // More robust approach: Find all <p> tags inside the pullquote section
$('aside.pullquote p').each((_, el) => { $('aside.pullquote p').each((_, el) => {
const text = $(el).text().trim(); const text = $(el).text().trim();
console.log('Processing age text:', text); console.log('Processing age text:', text);
// Detect "Tous publics" for France (All audiences) // Detect "Tous publics" for France (All audiences)
if (text.includes('Tous publics')) { if (text.includes('Tous publics')) {
ageRatings.france = 0; // Set to 0 for averaging but "All" for display ageRatings.france = 0; // Set to 0 for averaging but "All" for display
console.log('Found France rating: Tous publics (0)'); console.log('Found France rating: Tous publics (0)');
} }
// More flexible regex for the weird dashes used // More flexible regex for the weird dashes used
else { else {
// Just extract any number that appears in string after "Déconseillé aux" // Just extract any number that appears in string after "Déconseillé aux"
@ -148,12 +148,12 @@ async function getMovieClassification(movieUrl) {
if (match && match[1]) { if (match && match[1]) {
const age = parseInt(match[1]); const age = parseInt(match[1]);
console.log('Found age restriction:', age); console.log('Found age restriction:', age);
// Identify country by image alt or src // Identify country by image alt or src
const img = $(el).find('img'); const img = $(el).find('img');
const alt = img.attr('alt') || ''; const alt = img.attr('alt') || '';
const src = img.attr('src') || ''; const src = img.attr('src') || '';
// Check for all possible countries - more flexible matching // Check for all possible countries - more flexible matching
if (alt.includes('France') || src.toLowerCase().includes('france')) { if (alt.includes('France') || src.toLowerCase().includes('france')) {
ageRatings.france = age; ageRatings.france = age;
@ -178,19 +178,19 @@ async function getMovieClassification(movieUrl) {
} }
} }
}); });
console.log('Found age ratings:', ageRatings); console.log('Found age ratings:', ageRatings);
// Get summary/plot (first few paragraphs) // Get summary/plot (first few paragraphs)
let summary = ''; let summary = '';
$('.entry-content > p').each((i, el) => { $('.entry-content > p').each((i, el) => {
// Skip pullquote or other non-content paragraphs // Skip pullquote or other non-content paragraphs
if (!$(el).find('.pullquote').length && i < 3 && $(el).text().trim().length > 30) { if (!$(el).find('.pullquote').length && i < 3 && $(el).text().trim().length > 30) {
summary += $(el).text().trim() + ' '; summary += $(el).text().trim() + ' ';
} }
}); });
summary = summary.trim(); summary = summary.trim();
// Get movie metadata // Get movie metadata
const metadata = {}; const metadata = {};
$('h3:contains("Informations") + ul li').each((_, el) => { $('h3:contains("Informations") + ul li').each((_, el) => {
@ -211,14 +211,14 @@ async function getMovieClassification(movieUrl) {
metadata.studio = text.replace('Studio :', '').trim(); metadata.studio = text.replace('Studio :', '').trim();
} }
}); });
// Get overall rating // Get overall rating
let overallRating = null; let overallRating = null;
const ratingEl = $('.lets-review-block__final-score .score'); const ratingEl = $('.lets-review-block__final-score .score');
if (ratingEl.length) { if (ratingEl.length) {
overallRating = ratingEl.text().trim(); overallRating = ratingEl.text().trim();
} }
// Extract year from release date if available // Extract year from release date if available
let year = null; let year = null;
if (metadata.releaseDate) { if (metadata.releaseDate) {
@ -227,7 +227,7 @@ async function getMovieClassification(movieUrl) {
year = yearMatch[0]; year = yearMatch[0];
} }
} }
const result = { const result = {
summary, summary,
year, year,
@ -235,7 +235,7 @@ async function getMovieClassification(movieUrl) {
overallRating, overallRating,
...metadata ...metadata
}; };
// Cache the result // Cache the result
saveCache('detail', movieUrl, result); saveCache('detail', movieUrl, result);
return result; return result;
@ -250,11 +250,11 @@ async function searchAndEnrich(query) {
const results = await searchMovies(query); const results = await searchMovies(query);
return await Promise.all(results.map(async movie => { return await Promise.all(results.map(async movie => {
const details = await getMovieClassification(movie.link); const details = await getMovieClassification(movie.link);
// Calculate average age // Calculate average age
const ageStats = calculateAverageAge(details.ageRatings || {}); const ageStats = calculateAverageAge(details.ageRatings || {});
console.log(`Movie: ${movie.title}, Age stats:`, ageStats); console.log(`Movie: ${movie.title}, Age stats:`, ageStats);
// Convert country codes to readable names for frontend display // Convert country codes to readable names for frontend display
const countryNames = { const countryNames = {
france: "France", france: "France",
@ -266,22 +266,22 @@ async function searchAndEnrich(query) {
netherlands: "Pays-Bas", netherlands: "Pays-Bas",
usa: "États-Unis" usa: "États-Unis"
}; };
// Format age ratings for display // Format age ratings for display
const formattedAgeRatings = {}; const formattedAgeRatings = {};
for (const [country, age] of Object.entries(details.ageRatings || {})) { for (const [country, age] of Object.entries(details.ageRatings || {})) {
const countryName = countryNames[country] || country; const countryName = countryNames[country] || country;
formattedAgeRatings[countryName] = age === 0 ? "Tous publics" : `${age}+`; formattedAgeRatings[countryName] = age === 0 ? "Tous publics" : `${age}+`;
} }
// Get a recommended age - prefer median, then average, then fallback // Get a recommended age - prefer median, then average, then fallback
const recommendedAge = const recommendedAge =
ageStats?.median ? `${ageStats.median}+` : ageStats?.median ? `${ageStats.median}+` :
ageStats?.average ? `${ageStats.average}+` : ageStats?.average ? `${ageStats.average}+` :
details.ageRatings?.france === 0 ? "Tous publics" : details.ageRatings?.france === 0 ? "Tous publics" :
details.ageRatings?.france ? `${details.ageRatings.france}+` : details.ageRatings?.france ? `${details.ageRatings.france}+` :
"Non spécifié"; "Non spécifié";
return { return {
title: movie.title, title: movie.title,
year: details.year, year: details.year,
@ -291,8 +291,8 @@ async function searchAndEnrich(query) {
rating: movie.rating || details.overallRating, rating: movie.rating || details.overallRating,
// Age information for display // Age information for display
age: recommendedAge.replace('Tous publics', '0+').replace('Non spécifié', '-'), age: recommendedAge.replace('Tous publics', '0+').replace('Non spécifié', '-'),
ageFrance: details.ageRatings?.france === 0 ? "Tous publics" : ageFrance: details.ageRatings?.france === 0 ? "Tous publics" :
details.ageRatings?.france ? `${details.ageRatings.france}+` : details.ageRatings?.france ? `${details.ageRatings.france}+` :
"Non spécifié", "Non spécifié",
ageAverage: ageStats?.average || null, ageAverage: ageStats?.average || null,
ageMedian: ageStats?.median || null, ageMedian: ageStats?.median || null,