add adapter for filmspourenfants
This commit is contained in:
parent
e3f0f39a9d
commit
dfcceb0f7d
229
aggregators/filmspourenfants-adapter.js
Normal file
229
aggregators/filmspourenfants-adapter.js
Normal file
@ -0,0 +1,229 @@
|
||||
const axios = require('axios');
|
||||
const cheerio = require('cheerio');
|
||||
const fs = require('fs');
|
||||
const path = require('path');
|
||||
|
||||
const BASE_URL = 'https://www.filmspourenfants.net';
|
||||
|
||||
// Setup disk cache
|
||||
const CACHE_DIR = path.join(__dirname, '../cache');
|
||||
if (!fs.existsSync(CACHE_DIR)) fs.mkdirSync(CACHE_DIR, { recursive: true });
|
||||
|
||||
// Cache operations
|
||||
function loadCache(type, key) {
|
||||
try {
|
||||
const file = path.join(CACHE_DIR, `fpe_${type}_${key.replace(/[^a-z0-9]/gi, '_')}.json`);
|
||||
if (fs.existsSync(file)) {
|
||||
const stats = fs.statSync(file);
|
||||
// Cache valid for 24 hours
|
||||
if (Date.now() - stats.mtime.getTime() < 86400000) {
|
||||
return JSON.parse(fs.readFileSync(file, 'utf8'));
|
||||
}
|
||||
}
|
||||
} catch (e) {
|
||||
console.error('Cache load error:', e.message);
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
function saveCache(type, key, data) {
|
||||
try {
|
||||
const file = path.join(CACHE_DIR, `fpe_${type}_${key.replace(/[^a-z0-9]/gi, '_')}.json`);
|
||||
fs.writeFileSync(file, JSON.stringify(data));
|
||||
} catch (e) {
|
||||
console.error('Cache save error:', e.message);
|
||||
}
|
||||
}
|
||||
|
||||
// Extract age from string like "À partir de 8 ans" or "Déconseillé aux moins de: 8 ans"
|
||||
function extractAgeFromText(text) {
|
||||
if (!text) return null;
|
||||
|
||||
const match = text.match(/(\d+)\s*ans/i);
|
||||
if (match && match[1]) {
|
||||
return parseInt(match[1]);
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
async function searchMovies(query) {
|
||||
// Check cache first
|
||||
const cached = loadCache('search', query);
|
||||
if (cached) return cached;
|
||||
|
||||
const searchUrl = `${BASE_URL}/films-resultats/?_s=${encodeURIComponent(query)}`;
|
||||
console.log('Searching FilmsPourEnfants:', searchUrl);
|
||||
|
||||
try {
|
||||
const response = await axios.get(searchUrl, {
|
||||
headers: { 'User-Agent': 'Mozilla/5.0' }
|
||||
});
|
||||
const $ = cheerio.load(response.data);
|
||||
const results = [];
|
||||
|
||||
$('section.gp-post-item').each((_, el) => {
|
||||
const title = $(el).find('h2.gp-loop-title a').text().trim();
|
||||
const link = $(el).find('h2.gp-loop-title a').attr('href');
|
||||
const img = $(el).find('.gp-post-thumbnail img').attr('src');
|
||||
const ageText = $(el).find('.gp-loop-cats a').text().trim();
|
||||
const age = extractAgeFromText(ageText);
|
||||
|
||||
if (title && link) {
|
||||
results.push({
|
||||
title,
|
||||
link,
|
||||
img,
|
||||
ageText,
|
||||
age
|
||||
});
|
||||
}
|
||||
});
|
||||
|
||||
console.log(`FilmsPourEnfants found ${results.length} results`);
|
||||
saveCache('search', query, results);
|
||||
return results;
|
||||
} catch (error) {
|
||||
console.error('Error searching FilmsPourEnfants:', error.message);
|
||||
return [];
|
||||
}
|
||||
}
|
||||
|
||||
async function getMovieDetails(movieUrl) {
|
||||
if (!movieUrl) return {};
|
||||
|
||||
// Check cache first
|
||||
const cached = loadCache('detail', movieUrl);
|
||||
if (cached) return cached;
|
||||
|
||||
console.log('Fetching details for:', movieUrl);
|
||||
|
||||
try {
|
||||
const response = await axios.get(movieUrl, {
|
||||
headers: { 'User-Agent': 'Mozilla/5.0' }
|
||||
});
|
||||
const $ = cheerio.load(response.data);
|
||||
const details = {};
|
||||
|
||||
// Title
|
||||
details.title = $('h1.gp-entry-title').text().trim();
|
||||
|
||||
// Get metadata
|
||||
$('.gp-entry-meta .gp-post-meta').each((_, el) => {
|
||||
const text = $(el).text().trim();
|
||||
|
||||
if (text.includes('Année:')) {
|
||||
details.year = $(el).find('a').text().trim();
|
||||
} else if (text.includes('Déconseillé aux moins de:')) {
|
||||
details.ageText = $(el).find('a').text().trim();
|
||||
details.age = extractAgeFromText(details.ageText);
|
||||
} else if (text.includes('Durée:')) {
|
||||
details.duration = $(el).find('a').text().trim();
|
||||
} else if (text.includes('Thèmes:')) {
|
||||
details.themes = [];
|
||||
$(el).find('a').each((_, themeEl) => {
|
||||
details.themes.push($(themeEl).text().trim());
|
||||
});
|
||||
}
|
||||
});
|
||||
|
||||
// More detailed metadata
|
||||
$('#gp-hub-details span').each((_, el) => {
|
||||
const label = $(el).find('strong').text().trim();
|
||||
|
||||
if (label === 'Déconseillé aux moins de:') {
|
||||
details.ageText = $(el).find('a').text().trim();
|
||||
details.age = extractAgeFromText(details.ageText);
|
||||
} else if (label === 'Format:') {
|
||||
details.format = $(el).find('a').text().trim();
|
||||
} else if (!details.year && label === 'Année:') {
|
||||
details.year = $(el).find('a').text().trim();
|
||||
} else if (!details.duration && label === 'Durée:') {
|
||||
details.duration = $(el).find('a').text().trim();
|
||||
} else if (label === 'Studio:') {
|
||||
details.studio = [];
|
||||
$(el).find('a').each((_, studioEl) => {
|
||||
details.studio.push($(studioEl).text().trim());
|
||||
});
|
||||
} else if (label === 'Pays:') {
|
||||
details.country = $(el).find('a').text().trim();
|
||||
} else if (label === 'Créateurs:') {
|
||||
details.creators = [];
|
||||
$(el).find('a').each((_, creatorEl) => {
|
||||
details.creators.push($(creatorEl).text().trim());
|
||||
});
|
||||
}
|
||||
});
|
||||
|
||||
// Get summary - first paragraph in the entry-text
|
||||
details.summary = $('.gp-entry-text h4').first().text().trim();
|
||||
|
||||
// Get main image
|
||||
details.img = $('.gp-post-thumbnail img').attr('src') || $('.gp-hub-header-thumbnail img').attr('src');
|
||||
|
||||
// Get messages section
|
||||
let messages = '';
|
||||
$('.gp-entry-text h3').each((_, el) => {
|
||||
const heading = $(el).text().trim();
|
||||
const paragraph = $(el).next('p').text().trim();
|
||||
if (heading && paragraph) {
|
||||
messages += `${heading}: ${paragraph}\n\n`;
|
||||
}
|
||||
});
|
||||
details.messages = messages.trim();
|
||||
|
||||
// Get difficult scenes section
|
||||
const difficultScenesHeading = $('.gp-entry-text h2:contains("SCÈNES DIFFICILES")');
|
||||
let difficultScenes = '';
|
||||
if (difficultScenesHeading.length) {
|
||||
let current = difficultScenesHeading.next();
|
||||
while (current.length && current[0].tagName.toLowerCase() !== 'h2') {
|
||||
if (current[0].tagName.toLowerCase() === 'h3' || current[0].tagName.toLowerCase() === 'p') {
|
||||
difficultScenes += current.text().trim() + '\n\n';
|
||||
}
|
||||
current = current.next();
|
||||
}
|
||||
}
|
||||
details.difficultScenes = difficultScenes.trim();
|
||||
|
||||
console.log(`Fetched details for: ${details.title}, Age: ${details.age}`);
|
||||
saveCache('detail', movieUrl, details);
|
||||
return details;
|
||||
} catch (error) {
|
||||
console.error('Error getting FilmsPourEnfants movie details:', error.message);
|
||||
return {};
|
||||
}
|
||||
}
|
||||
|
||||
async function searchAndEnrich(query) {
|
||||
try {
|
||||
const results = await searchMovies(query);
|
||||
return await Promise.all(results.map(async movie => {
|
||||
const details = await getMovieDetails(movie.link);
|
||||
|
||||
return {
|
||||
title: movie.title,
|
||||
year: details.year || null,
|
||||
img: movie.img || details.img,
|
||||
link: movie.link,
|
||||
source: 'filmspourenfants',
|
||||
// Format age for frontend display
|
||||
age: details.age ? `${details.age}+` : movie.age ? `${movie.age}+` : "-",
|
||||
ageDetails: {
|
||||
France: details.age ? `${details.age}+` : movie.age ? `${movie.age}+` : "Non spécifié"
|
||||
},
|
||||
duration: details.duration,
|
||||
summary: details.summary,
|
||||
themes: details.themes,
|
||||
studio: details.studio,
|
||||
creators: details.creators,
|
||||
difficultScenes: details.difficultScenes,
|
||||
messages: details.messages
|
||||
};
|
||||
}));
|
||||
} catch (error) {
|
||||
console.error('FilmsPourEnfants searchAndEnrich error:', error.message);
|
||||
return [];
|
||||
}
|
||||
}
|
||||
|
||||
module.exports = { searchAndEnrich };
|
10
server.js
10
server.js
@ -4,6 +4,7 @@ const cinecheck = require('./aggregators/cinecheck-adapter');
|
||||
const commonsense = require('./aggregators/commonsense-adapter');
|
||||
const filmages = require('./aggregators/filmages-adapter');
|
||||
const filmstouspublics = require('./aggregators/filmstouspublics-adapter');
|
||||
const filmspourenfants = require('./aggregators/filmspourenfants-adapter');
|
||||
const { mergeResults } = require('./merge');
|
||||
|
||||
const app = express();
|
||||
@ -19,7 +20,7 @@ app.get('/search', async (req, res) => {
|
||||
console.log('Query:', q);
|
||||
|
||||
try {
|
||||
const [cine, cs, fa, ftp] = await Promise.all([
|
||||
const [cine, cs, fa, ftp, fpe] = await Promise.all([
|
||||
cinecheck.searchAndEnrich(q).catch(e => {
|
||||
console.error('Cinecheck failed:', e.message);
|
||||
return [];
|
||||
@ -35,6 +36,10 @@ app.get('/search', async (req, res) => {
|
||||
filmstouspublics.searchAndEnrich(q).catch(e => {
|
||||
console.error('FilmsTousPublics failed:', e.message);
|
||||
return [];
|
||||
}),
|
||||
filmspourenfants.searchAndEnrich(q).catch(e => {
|
||||
console.error('FilmsPourEnfants failed:', e.message);
|
||||
return [];
|
||||
})
|
||||
]);
|
||||
|
||||
@ -42,8 +47,9 @@ app.get('/search', async (req, res) => {
|
||||
console.log('CSM results:', cs.length);
|
||||
console.log('Filmages results:', fa.length);
|
||||
console.log('FilmsTousPublics results:', ftp.length);
|
||||
console.log('FilmsPourEnfants results:', fpe.length);
|
||||
|
||||
const merged = mergeResults([cine, cs, fa, ftp]);
|
||||
const merged = mergeResults([cine, cs, fa, ftp, fpe]);
|
||||
res.json(merged);
|
||||
} catch (e) {
|
||||
console.error('General search error:', e);
|
||||
|
Loading…
x
Reference in New Issue
Block a user