add adapter for filmstouspublics
This commit is contained in:
parent
6c0142c01b
commit
e3f0f39a9d
1
.gitignore
vendored
1
.gitignore
vendored
@ -1 +1,2 @@
|
||||
node_modules
|
||||
cache
|
||||
|
316
aggregators/filmstouspublics-adapter.js
Normal file
316
aggregators/filmstouspublics-adapter.js
Normal file
@ -0,0 +1,316 @@
|
||||
const axios = require('axios');
|
||||
const cheerio = require('cheerio');
|
||||
const fs = require('fs');
|
||||
const path = require('path');
|
||||
const BASE_URL = 'https://www.filmstouspublics.fr';
|
||||
|
||||
// Setup disk cache
|
||||
const CACHE_DIR = path.join(__dirname, '../cache');
|
||||
if (!fs.existsSync(CACHE_DIR)) fs.mkdirSync(CACHE_DIR, { recursive: true });
|
||||
|
||||
// Load cache from disk if available
|
||||
function loadCache(type, key) {
|
||||
try {
|
||||
const file = path.join(CACHE_DIR, `${type}_${key.replace(/[^a-z0-9]/gi, '_')}.json`);
|
||||
if (fs.existsSync(file)) {
|
||||
const stats = fs.statSync(file);
|
||||
// Cache valid for 24 hours
|
||||
if (Date.now() - stats.mtime.getTime() < 86400000) {
|
||||
console.log(`Cache hit for ${type}:`, key);
|
||||
return JSON.parse(fs.readFileSync(file, 'utf8'));
|
||||
}
|
||||
}
|
||||
} catch (e) {
|
||||
console.error('Cache load error:', e.message);
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
// Save cache to disk
|
||||
function saveCache(type, key, data) {
|
||||
try {
|
||||
const file = path.join(CACHE_DIR, `${type}_${key.replace(/[^a-z0-9]/gi, '_')}.json`);
|
||||
fs.writeFileSync(file, JSON.stringify(data));
|
||||
console.log(`Saved to cache: ${type}/${key}`);
|
||||
} catch (e) {
|
||||
console.error('Cache save error:', e.message);
|
||||
}
|
||||
}
|
||||
|
||||
// Calculate average and median ages from country ratings
|
||||
function calculateAverageAge(ageRatings) {
|
||||
// Only include positive age restrictions (exclude zeros/Tous publics)
|
||||
const ages = Object.values(ageRatings)
|
||||
.filter(age => {
|
||||
const numAge = typeof age === 'string' ? parseInt(age) : age;
|
||||
return !isNaN(numAge) && numAge > 0; // Only include positive ages
|
||||
})
|
||||
.map(age => typeof age === 'string' ? parseInt(age) : age);
|
||||
|
||||
if (ages.length === 0) return null;
|
||||
|
||||
// Calculate average
|
||||
const avg = ages.reduce((sum, age) => sum + age, 0) / ages.length;
|
||||
|
||||
// Calculate median (more useful for skewed distributions)
|
||||
const sorted = [...ages].sort((a, b) => a - b);
|
||||
const mid = Math.floor(sorted.length / 2);
|
||||
const median = sorted.length % 2 === 0
|
||||
? (sorted[mid - 1] + sorted[mid]) / 2
|
||||
: sorted[mid];
|
||||
|
||||
return { average: avg.toFixed(1), median, countries: ages.length, min: sorted[0], max: sorted[sorted.length-1] };
|
||||
}
|
||||
|
||||
async function searchMovies(query) {
|
||||
// Check cache first
|
||||
const cached = loadCache('search', query);
|
||||
if (cached) return cached;
|
||||
|
||||
const searchUrl = `${BASE_URL}/?s=${encodeURIComponent(query)}`;
|
||||
console.log('Searching FilmsTousPublics:', searchUrl);
|
||||
|
||||
try {
|
||||
const response = await axios.get(searchUrl, {
|
||||
headers: { 'User-Agent': 'Mozilla/5.0' }
|
||||
});
|
||||
const $ = cheerio.load(response.data);
|
||||
const results = [];
|
||||
|
||||
// Better selector for different article structures
|
||||
$('article[class*="tipi-xs-12"], article[class*="post"]').each((_, el) => {
|
||||
const title = $(el).find('h3.title a, .title-wrap .title a').text().trim();
|
||||
const link = $(el).find('h3.title a, .title-wrap .title a').attr('href');
|
||||
|
||||
// Handle lazy-loaded images properly
|
||||
const imgEl = $(el).find('.mask img');
|
||||
const img = imgEl.attr('data-lazy-src') || imgEl.attr('src');
|
||||
|
||||
// Get rating if available
|
||||
let rating = null;
|
||||
const ratingEl = $(el).find('.lets-review-api-wrap, .lets-review-final-score');
|
||||
if (ratingEl.length) {
|
||||
rating = ratingEl.attr('data-api-score') || ratingEl.text().trim();
|
||||
}
|
||||
|
||||
if (title && link) {
|
||||
results.push({
|
||||
title,
|
||||
link,
|
||||
img,
|
||||
rating,
|
||||
});
|
||||
}
|
||||
});
|
||||
|
||||
console.log(`FilmsTousPublics found ${results.length} results`);
|
||||
saveCache('search', query, results);
|
||||
return results;
|
||||
} catch (error) {
|
||||
console.error('Error searching FilmsTousPublics:', error.message);
|
||||
return [];
|
||||
}
|
||||
}
|
||||
|
||||
async function getMovieClassification(movieUrl) {
|
||||
if (!movieUrl) return {};
|
||||
|
||||
// Check cache first
|
||||
const cached = loadCache('detail', movieUrl);
|
||||
if (cached) return cached;
|
||||
|
||||
console.log('Fetching details for:', movieUrl);
|
||||
|
||||
try {
|
||||
const response = await axios.get(movieUrl, {
|
||||
headers: { 'User-Agent': 'Mozilla/5.0' }
|
||||
});
|
||||
const $ = cheerio.load(response.data);
|
||||
|
||||
// Get country age ratings
|
||||
const ageRatings = {};
|
||||
console.log('Found pullquote elements:', $('aside.pullquote').length);
|
||||
|
||||
// More robust approach: Find all <p> tags inside the pullquote section
|
||||
$('aside.pullquote p').each((_, el) => {
|
||||
const text = $(el).text().trim();
|
||||
console.log('Processing age text:', text);
|
||||
|
||||
// Detect "Tous publics" for France (All audiences)
|
||||
if (text.includes('Tous publics')) {
|
||||
ageRatings.france = 0; // Set to 0 for averaging but "All" for display
|
||||
console.log('Found France rating: Tous publics (0)');
|
||||
}
|
||||
// More flexible regex for the weird dashes used
|
||||
else {
|
||||
// Just extract any number that appears in string after "Déconseillé aux"
|
||||
const match = text.match(/Déconseillé aux[^0-9]*(\d+)[^0-9]*ans/i);
|
||||
if (match && match[1]) {
|
||||
const age = parseInt(match[1]);
|
||||
console.log('Found age restriction:', age);
|
||||
|
||||
// Identify country by image alt or src
|
||||
const img = $(el).find('img');
|
||||
const alt = img.attr('alt') || '';
|
||||
const src = img.attr('src') || '';
|
||||
|
||||
// Check for all possible countries - more flexible matching
|
||||
if (alt.includes('France') || src.toLowerCase().includes('france')) {
|
||||
ageRatings.france = age;
|
||||
} else if (alt.includes('Allemagne') || src.toLowerCase().includes('allemagne')) {
|
||||
ageRatings.germany = age;
|
||||
} else if (alt.includes('espagne') || alt.includes('Espagne') || src.toLowerCase().includes('espagne')) {
|
||||
ageRatings.spain = age;
|
||||
} else if (alt.includes('Royaume') || src.toLowerCase().includes('royaume')) {
|
||||
ageRatings.uk = age;
|
||||
} else if (alt.includes('Suede') || alt.includes('Suède') || src.toLowerCase().includes('suede')) {
|
||||
ageRatings.sweden = age;
|
||||
} else if (alt.includes('Suisse') || src.toLowerCase().includes('suisse')) {
|
||||
ageRatings.switzerland = age;
|
||||
} else if (alt.includes('Pays') || src.toLowerCase().includes('pays-bas')) {
|
||||
ageRatings.netherlands = age;
|
||||
} else if (src.toLowerCase().includes('etats-unis')) {
|
||||
ageRatings.usa = age;
|
||||
} else {
|
||||
// Unknown country, log for debugging
|
||||
console.log('Unidentified country with age rating:', age, 'Alt:', alt, 'Src:', src);
|
||||
}
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
console.log('Found age ratings:', ageRatings);
|
||||
|
||||
// Get summary/plot (first few paragraphs)
|
||||
let summary = '';
|
||||
$('.entry-content > p').each((i, el) => {
|
||||
// Skip pullquote or other non-content paragraphs
|
||||
if (!$(el).find('.pullquote').length && i < 3 && $(el).text().trim().length > 30) {
|
||||
summary += $(el).text().trim() + ' ';
|
||||
}
|
||||
});
|
||||
summary = summary.trim();
|
||||
|
||||
// Get movie metadata
|
||||
const metadata = {};
|
||||
$('h3:contains("Informations") + ul li').each((_, el) => {
|
||||
const text = $(el).text().trim();
|
||||
if (text.includes('Durée :')) {
|
||||
metadata.duration = text.replace('Durée :', '').trim();
|
||||
} else if (text.includes('Nom original :')) {
|
||||
metadata.originalTitle = text.replace('Nom original :', '').trim();
|
||||
} else if (text.includes('Sortie :')) {
|
||||
metadata.releaseDate = text.replace('Sortie :', '').trim();
|
||||
} else if (text.includes('Réalisateur :')) {
|
||||
metadata.director = text.replace('Réalisateur :', '').trim();
|
||||
} else if (text.includes('Producteur :')) {
|
||||
metadata.producer = text.replace('Producteur :', '').trim();
|
||||
} else if (text.includes('Acteurs :')) {
|
||||
metadata.actors = text.replace('Acteurs :', '').trim();
|
||||
} else if (text.includes('Studio :')) {
|
||||
metadata.studio = text.replace('Studio :', '').trim();
|
||||
}
|
||||
});
|
||||
|
||||
// Get overall rating
|
||||
let overallRating = null;
|
||||
const ratingEl = $('.lets-review-block__final-score .score');
|
||||
if (ratingEl.length) {
|
||||
overallRating = ratingEl.text().trim();
|
||||
}
|
||||
|
||||
// Extract year from release date if available
|
||||
let year = null;
|
||||
if (metadata.releaseDate) {
|
||||
const yearMatch = metadata.releaseDate.match(/\b(19|20)\d{2}\b/);
|
||||
if (yearMatch) {
|
||||
year = yearMatch[0];
|
||||
}
|
||||
}
|
||||
|
||||
const result = {
|
||||
summary,
|
||||
year,
|
||||
ageRatings,
|
||||
overallRating,
|
||||
...metadata
|
||||
};
|
||||
|
||||
// Cache the result
|
||||
saveCache('detail', movieUrl, result);
|
||||
return result;
|
||||
} catch (error) {
|
||||
console.error('Error getting FilmsTousPublics movie details:', error.message);
|
||||
return {};
|
||||
}
|
||||
}
|
||||
|
||||
async function searchAndEnrich(query) {
|
||||
try {
|
||||
const results = await searchMovies(query);
|
||||
return await Promise.all(results.map(async movie => {
|
||||
const details = await getMovieClassification(movie.link);
|
||||
|
||||
// Calculate average age
|
||||
const ageStats = calculateAverageAge(details.ageRatings || {});
|
||||
console.log(`Movie: ${movie.title}, Age stats:`, ageStats);
|
||||
|
||||
// Convert country codes to readable names for frontend display
|
||||
const countryNames = {
|
||||
france: "France",
|
||||
germany: "Allemagne",
|
||||
spain: "Espagne",
|
||||
uk: "Royaume-Uni",
|
||||
sweden: "Suède",
|
||||
switzerland: "Suisse",
|
||||
netherlands: "Pays-Bas",
|
||||
usa: "États-Unis"
|
||||
};
|
||||
|
||||
// Format age ratings for display
|
||||
const formattedAgeRatings = {};
|
||||
for (const [country, age] of Object.entries(details.ageRatings || {})) {
|
||||
const countryName = countryNames[country] || country;
|
||||
formattedAgeRatings[countryName] = age === 0 ? "Tous publics" : `${age}+`;
|
||||
}
|
||||
|
||||
// Get a recommended age - prefer median, then average, then fallback
|
||||
const recommendedAge =
|
||||
ageStats?.median ? `${ageStats.median}+` :
|
||||
ageStats?.average ? `${ageStats.average}+` :
|
||||
details.ageRatings?.france === 0 ? "Tous publics" :
|
||||
details.ageRatings?.france ? `${details.ageRatings.france}+` :
|
||||
"Non spécifié";
|
||||
|
||||
return {
|
||||
title: movie.title,
|
||||
year: details.year,
|
||||
img: movie.img,
|
||||
link: movie.link,
|
||||
source: 'filmstouspublics',
|
||||
rating: movie.rating || details.overallRating,
|
||||
// Age information for display
|
||||
age: recommendedAge.replace('Tous publics', '0+').replace('Non spécifié', '-'),
|
||||
ageFrance: details.ageRatings?.france === 0 ? "Tous publics" :
|
||||
details.ageRatings?.france ? `${details.ageRatings.france}+` :
|
||||
"Non spécifié",
|
||||
ageAverage: ageStats?.average || null,
|
||||
ageMedian: ageStats?.median || null,
|
||||
ageRecommended: recommendedAge,
|
||||
countriesCount: ageStats?.countries || 0,
|
||||
ageRange: {
|
||||
min: ageStats?.min || null,
|
||||
max: ageStats?.max || null
|
||||
},
|
||||
ageDetails: formattedAgeRatings,
|
||||
summary: details.summary,
|
||||
...details
|
||||
};
|
||||
}));
|
||||
} catch (error) {
|
||||
console.error('FilmsTousPublics searchAndEnrich error:', error.message);
|
||||
return [];
|
||||
}
|
||||
}
|
||||
|
||||
module.exports = { searchAndEnrich };
|
79
server.js
79
server.js
@ -3,80 +3,55 @@ const cors = require('cors');
|
||||
const cinecheck = require('./aggregators/cinecheck-adapter');
|
||||
const commonsense = require('./aggregators/commonsense-adapter');
|
||||
const filmages = require('./aggregators/filmages-adapter');
|
||||
const filmstouspublics = require('./aggregators/filmstouspublics-adapter');
|
||||
const { mergeResults } = require('./merge');
|
||||
|
||||
const app = express();
|
||||
app.use(cors());
|
||||
|
||||
// Helper to normalize text and get words for matching
|
||||
function getWords(text) {
|
||||
if (!text || typeof text !== 'string') return [];
|
||||
return text
|
||||
.toLowerCase()
|
||||
// Remove punctuation, keep letters, numbers, and whitespace. Handles Unicode.
|
||||
.replace(/[^\p{L}\p{N}\s]/gu, '')
|
||||
.replace(/\s+/g, ' ') // Normalize multiple spaces to single
|
||||
.trim()
|
||||
.split(' ')
|
||||
.filter(Boolean); // Remove empty strings from split
|
||||
}
|
||||
|
||||
app.get('/search', async (req, res) => {
|
||||
const q = req.query.q;
|
||||
if (!q) {
|
||||
return res.status(400).json({ error: "Missing query. Predictable." });
|
||||
return res.status(400).json({ error: "Missing query. Try typing words." });
|
||||
}
|
||||
|
||||
console.log('===== SEARCH LOGS =====');
|
||||
console.log('Query:', q);
|
||||
|
||||
try {
|
||||
const [cine, cs, fa] = await Promise.all([
|
||||
cinecheck.searchAndEnrich(q).catch(e => { console.error('Cinecheck failed:', e.message); return []; }),
|
||||
commonsense.searchAndEnrich(q).catch(e => { console.error('Commonsense failed:', e.message); return []; }),
|
||||
filmages.searchAndEnrich(q).catch(e => { console.error('Filmages failed:', e.message); return []; })
|
||||
const [cine, cs, fa, ftp] = await Promise.all([
|
||||
cinecheck.searchAndEnrich(q).catch(e => {
|
||||
console.error('Cinecheck failed:', e.message);
|
||||
return [];
|
||||
}),
|
||||
commonsense.searchAndEnrich(q).catch(e => {
|
||||
console.error('Commonsense failed:', e.message);
|
||||
return [];
|
||||
}),
|
||||
filmages.searchAndEnrich(q).catch(e => {
|
||||
console.error('Filmages failed:', e.message);
|
||||
return [];
|
||||
}),
|
||||
filmstouspublics.searchAndEnrich(q).catch(e => {
|
||||
console.error('FilmsTousPublics failed:', e.message);
|
||||
return [];
|
||||
})
|
||||
]);
|
||||
|
||||
console.log('===== SEARCH LOGS =====');
|
||||
console.log('Cinecheck results:', cine.length);
|
||||
console.log('CSM results:', cs.length);
|
||||
console.log('Filmages results:', fa.length);
|
||||
console.log('Raw CSM data:', cs); // Inspect full data
|
||||
|
||||
let merged = mergeResults([cine, cs, fa]);
|
||||
|
||||
// Sort merged results based on query relevance
|
||||
const queryWords = getWords(q);
|
||||
if (queryWords.length > 0) {
|
||||
merged.forEach(item => {
|
||||
const titleWords = getWords(item.title);
|
||||
const uniqueQueryWords = [...new Set(queryWords)];
|
||||
const uniqueTitleWords = [...new Set(titleWords)];
|
||||
|
||||
let commonWordCount = 0;
|
||||
for (const qw of uniqueQueryWords) {
|
||||
if (uniqueTitleWords.includes(qw)) {
|
||||
commonWordCount++;
|
||||
}
|
||||
}
|
||||
|
||||
item.matchScore1 = uniqueQueryWords.length > 0 ? commonWordCount / uniqueQueryWords.length : 0;
|
||||
|
||||
const unionLength = new Set([...uniqueQueryWords, ...uniqueTitleWords]).size;
|
||||
item.matchScore2 = unionLength > 0 ? commonWordCount / unionLength : 0;
|
||||
});
|
||||
|
||||
merged.sort((a, b) => {
|
||||
if (b.matchScore1 !== a.matchScore1) return b.matchScore1 - a.matchScore1;
|
||||
if (b.matchScore2 !== a.matchScore2) return b.matchScore2 - a.matchScore2;
|
||||
return getWords(a.title).length - getWords(b.title).length; // Shorter titles preferred as tertiary sort
|
||||
});
|
||||
}
|
||||
console.log('FilmsTousPublics results:', ftp.length);
|
||||
|
||||
const merged = mergeResults([cine, cs, fa, ftp]);
|
||||
res.json(merged);
|
||||
} catch (e) {
|
||||
console.error('General search error:', e);
|
||||
res.status(500).json({ error: e.message || "Server's taking a nap." });
|
||||
res.status(500).json({ error: e.message || "Server error. You broke something." });
|
||||
}
|
||||
});
|
||||
|
||||
const PORT = 3000;
|
||||
app.listen(PORT, () => {
|
||||
console.log(`Backend sorting your life out on http://localhost:${PORT}. You're welcome.`);
|
||||
console.log(`Backend multi-agrégateurs prêt sur http://localhost:${PORT}`);
|
||||
});
|
||||
|
Loading…
x
Reference in New Issue
Block a user