aggregate more source

This commit is contained in:
SansGuidon 2025-05-17 20:46:23 +02:00
parent 820cc6f209
commit 459b455fee
6 changed files with 422 additions and 52 deletions

View File

@ -0,0 +1,81 @@
const axios = require('axios');
const cheerio = require('cheerio');
const CINECHECK_BASE = 'https://www.cinecheck.be';
async function searchMovies(query) {
const url = `${CINECHECK_BASE}/umbraco/surface/searchresults/search?query=${encodeURIComponent(query)}&producties=0&amount=5`;
const res = await axios.get(url, {
headers: {
'x-umb-culture': 'fr-BE',
'x-umb-key': '0a0c11a9-ece8-4dc8-8578-e5aab235d9ff',
'x-requested-with': 'XMLHttpRequest',
'User-Agent': 'Mozilla/5.0',
}
});
const $ = cheerio.load(res.data);
const results = [];
$('.c-search__result').each((_, el) => {
const title = $(el).find('.c-search__title').text().trim().replace(/\s*\(.+?\)\s*$/, '');
const yearMatch = $(el).find('.c-search__title').text().match(/\((\d{4})\)/);
const year = yearMatch ? yearMatch[1] : null;
const imgSrc = $(el).find('img.c-search__image').attr('src')
? CINECHECK_BASE + $(el).find('img.c-search__image').attr('src')
: null;
const link = $(el).find('a.c-search__hiddenlink').attr('href')
? CINECHECK_BASE + $(el).find('a.c-search__hiddenlink').attr('href')
: null;
if (title && link) {
results.push({ title, year, img: imgSrc, link });
}
});
return results;
}
async function getMovieClassification(movieUrl) {
const res = await axios.get(movieUrl, { headers: { 'User-Agent': 'Mozilla/5.0' } });
const $ = cheerio.load(res.data);
const year = $('.c-movie__details .c-movie__label').first().text().trim() || null;
const genres = $('.c-movie__details .c-movie__label').eq(1).text().split(',').map(s => s.trim());
const img = $('.c-movie__cover img').attr('src')
? CINECHECK_BASE + $('.c-movie__cover img').attr('src')
: null;
const marks = [];
$('.c-header__marks .c-header__mark').each((_, el) => {
const label = $(el).find('span.vh').text().trim();
if (label) marks.push(label);
});
const details = [];
$('.c-classificatie__item').each((_, el) => {
const type = $(el).find('svg use').first().attr('xlink:href') || '';
const typeName = type.split('#')[1] || '';
const description = $(el).find('.js-classificatie-text').text().trim();
if (typeName && description) {
details.push({ type: typeName, description });
}
});
const summary = $('.c-movie__introtext p').first().text().trim();
return {
year,
genres,
img,
marks,
details,
summary
};
}
async function searchAndEnrich(query) {
const results = await searchMovies(query);
return await Promise.all(results.map(async m => ({
title: m.title,
year: m.year,
img: m.img,
link: m.link,
source: 'cinecheck',
...(await getMovieClassification(m.link))
})));
}
module.exports = { searchAndEnrich };

View File

@ -0,0 +1,56 @@
const axios = require('axios');
const cheerio = require('cheerio');
const BASE = 'https://www.commonsensemedia.org';
async function searchMovies(query) {
const url = `${BASE}/search/${encodeURIComponent(query)}`;
const res = await axios.get(url, {
headers: { 'User-Agent': 'Mozilla/5.0', 'accept-language': 'en-US,en;q=0.9' }
});
const $ = cheerio.load(res.data);
const results = [];
$('.search-results-list__row').each((_, el) => {
const type = $(el).find('.media-type').text().trim();
if (type.toLowerCase() !== 'movie') return; // ignore non-movies
const title = $(el).find('.search-results-product-title').text().trim();
const link = $(el).find('a.search-results-product-title').attr('href');
const absLink = link ? BASE + link : null;
const img = $(el).find('img.search-results-product-image').attr('src');
// Pas d'année la plupart du temps sur CSM.
results.push({ title, year: null, img, link: absLink });
});
console.log('CSM search:', results);
return results;
}
async function getMovieClassification(movieUrl) {
if (!movieUrl) return {};
const res = await axios.get(movieUrl, { headers: { 'User-Agent': 'Mozilla/5.0' } });
const $ = cheerio.load(res.data);
const age = $('[data-test="age-rating"]').first().text().replace('age', '').replace('+', '').trim() || null;
const summary = $('[data-test="review-summary"]').first().text().trim();
const details = [];
$('[data-test="product-rating-section"]').each((_, el) => {
const label = $(el).find('[data-test="rating-section-label"]').text().trim();
const score = $(el).find('.icon-circle-solid.active,.icon-star-solid.active').length;
const desc = $(el).find('[data-test="rating-section-description"]').text().trim();
if (label) details.push({ type: label, score, description: desc });
});
return { age, summary, details };
}
async function searchAndEnrich(query) {
const results = await searchMovies(query);
return await Promise.all(results.map(async m => ({
title: m.title,
year: m.year,
img: m.img,
link: m.link,
source: 'commonsense',
...(await getMovieClassification(m.link))
})));
}
module.exports = { searchAndEnrich };

View File

@ -0,0 +1,99 @@
const axios = require('axios');
const cheerio = require('cheerio');
const BASE_URL = 'http://www.filmages.ch/';
async function searchMovies(query) {
const searchUrl = `${BASE_URL}films/recherche/search/${encodeURIComponent(query)}.html`;
const response = await axios.get(searchUrl, { headers: { 'User-Agent': 'Mozilla/5.0' } });
const $ = cheerio.load(response.data);
const results = [];
$('table.layout_simpletable tbody tr.item').each((_, el) => {
const row = $(el);
const titleFrench = row.find('td.field.title_french a').text().trim();
const link = row.find('td.field.title_french a').attr('href');
const titleOriginal = row.find('td.field.title_original').text().trim();
const director = row.find('td.field.director').text().trim();
const ageLegal = row.find('td.field.age_legal').text().trim();
const ageSuggested = row.find('td.field.age_suggested').text().trim();
if (titleFrench && link) {
results.push({
titleFrench,
titleOriginal,
director,
ageLegalSearch: ageLegal,
ageSuggestedSearch: ageSuggested,
link: BASE_URL + link, // Make absolute
});
}
});
// console.log('FilmAges search results:', results);
return results;
}
async function getMovieClassification(movieUrl) {
if (!movieUrl) return {};
const response = await axios.get(movieUrl, { headers: { 'User-Agent': 'Mozilla/5.0' } });
const $ = cheerio.load(response.data);
const mainDetails = $('#reader_main .layout_full .item');
const rightDetails = $('#reader_right_1 .layout_full .item'); // For ages
const rightCriteria = $('#reader_right_2 .layout_full .item'); // For indications
const titleOriginal = mainDetails.find('.field.title_original .value').text().trim();
const year = mainDetails.find('.field.year .value').text().trim();
const summary = mainDetails.find('.field.summary .value').text().trim();
const synthesis = mainDetails.find('.field.final_remark .value').text().trim();
const ageLegal = rightDetails.find('.field.age_legal .value').text().trim();
const ageSuggested = rightDetails.find('.field.age_suggested .value').text().trim();
const indications = [];
rightCriteria.find('.field.indication .value a').each((_, el) => {
indications.push($(el).text().trim());
});
const counterIndications = [];
rightCriteria.find('.field.counter_indication .value a').each((_, el) => {
counterIndications.push($(el).text().trim());
});
const director = mainDetails.find('.field.director .value').text().trim();
return {
titleOriginalPage: titleOriginal, // To distinguish from search result one
year,
summary,
synthesis,
ageLegal,
ageSuggested,
indications,
counterIndications,
directorPage: director,
};
}
async function searchAndEnrich(query) {
const searchResults = await searchMovies(query);
const enrichedResults = await Promise.all(
searchResults.map(async (movie) => {
const classification = await getMovieClassification(movie.link);
return {
title: movie.titleFrench || movie.titleOriginal, // Prioritize French title for matching
year: classification.year, // Year is only on detail page
img: null, // No images readily available from search/detail
link: movie.link,
source: 'filmages',
details: {
...movie, // Keep all search results fields
...classification, // Add all detail page fields
}
};
})
);
// console.log('FilmAges enriched:', enrichedResults);
return enrichedResults;
}
module.exports = { searchAndEnrich };

View File

@ -2,54 +2,142 @@
<html lang="fr">
<head>
<meta charset="UTF-8">
<title>Agrégateur de films pour parents</title>
<title>Agrégateur Multi-Source</title>
<style>
body { font-family: system-ui, sans-serif; background: #252525; color: #ececec; }
.film { border:1px solid #444; margin:16px 0; padding:16px; border-radius: 6px; background: #181818; }
.film img { max-height: 180px; float:right; margin-left:16px; }
.film h2 { margin-top:0;}
.searchbox { margin:32px 0; }
input[type="text"] { font-size:1.2em; width:22em; background:#111; color:#fff; border:1px solid #444; border-radius:4px; padding:6px; }
button { font-size:1.1em; padding:6px 14px; background:#333; color:#fff; border-radius:4px; border:1px solid #555; }
.marks { font-size:0.95em; color:#b4ffb4; margin-bottom:6px; }
.details { font-size:0.93em; color: #ffbdbd;}
.year { color:#aaa;}
body { font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, Helvetica, Arial, sans-serif; background: #1a1a1a; color: #e0e0e0; margin: 0; padding: 20px; font-size: 14px; }
.container { max-width: 1200px; margin: auto; }
h1 { color: #fff; text-align: center; }
.searchbox { margin: 20px 0 30px; display: flex; justify-content: center; }
input[type="text"] { font-size: 1.1em; width: clamp(200px, 60%, 500px); background: #2c2c2c; color: #fff; border: 1px solid #444; border-radius: 4px; padding: 10px 12px; }
button { font-size: 1.1em; padding: 10px 18px; background: #007aff; color: #fff; border-radius: 4px; border: none; margin-left: 10px; cursor: pointer; transition: background-color 0.2s; }
button:hover { background: #005bb5; }
#films { margin-top: 20px; }
.results-table { width: 100%; border-collapse: collapse; table-layout: fixed; }
.results-table th, .results-table td { border: 1px solid #333; padding: 12px; text-align: left; vertical-align: top; }
.results-table th { background-color: #252525; color: #ccc; font-weight: 600; }
.results-table td:nth-child(1) { width: 25%; }
.results-table td:nth-child(2) { width: 10%; }
.results-table td:nth-child(3) { width: 65%; }
.source-block { border: 1px solid #383838; border-radius: 4px; padding: 10px; margin-bottom: 10px; background-color: #222; }
.source-block:last-child { margin-bottom: 0; }
.source-name { font-weight: bold; color: #58a6ff; margin-bottom: 8px; font-size: 1.1em; }
.source-block img { max-height: 100px; float: right; margin-left: 10px; border-radius: 3px; }
.source-block p { margin: 4px 0; line-height: 1.5; }
.source-block a { color: #79c0ff; text-decoration: none; }
.source-block a:hover { text-decoration: underline; }
.year { color: #aaa; }
.loader { text-align: center; font-size: 1.2em; color: #888; }
.no-results { text-align: center; font-size: 1.1em; color: #999; margin-top:30px;}
</style>
</head>
<body>
<h1>Cinécheck Agrégateur</h1>
<div class="searchbox">
<input type="text" id="q" placeholder="Recherche... (ex: Minecraft)" />
<button onclick="search()">Go</button>
<div class="container">
<h1>Ciné-agrégateur Multi-Source</h1>
<div class="searchbox">
<input type="text" id="q" placeholder="Ex: Dune, Spider-Man, Oppenheimer..." />
<button onclick="search()">Rechercher</button>
</div>
<div id="films"></div>
</div>
<div id="films"></div>
<script>
async function search() {
const q = document.getElementById('q').value.trim();
if (!q) return;
document.getElementById('films').innerHTML = 'Loading...';
const res = await fetch(`http://localhost:3000/search?q=${encodeURIComponent(q)}`);
const films = await res.json();
if (!Array.isArray(films) || !films.length) {
document.getElementById('films').innerHTML = "Rien trouvé. Essaie autre chose.";
return;
const query = document.getElementById('q').value.trim();
if (!query) return;
const filmsDiv = document.getElementById('films');
filmsDiv.innerHTML = '<p class="loader">Recherche en cours...</p>';
try {
const response = await fetch(`http://localhost:3000/search?q=${encodeURIComponent(query)}`);
if (!response.ok) {
// Gilfoyle: "The network, or your server, is garbage."
filmsDiv.innerHTML = `<p class="no-results">Erreur: ${response.status} ${response.statusText || 'Impossible de joindre le backend.'}</p>`;
return;
}
const films = await response.json();
if (!Array.isArray(films) || !films.length) {
filmsDiv.innerHTML = '<p class="no-results">Aucun résultat trouvé. Essayez un autre terme.</p>';
return;
}
let html = `<table class="results-table">
<thead>
<tr>
<th>Titre</th>
<th>Année</th>
<th>Sources d'information</th>
</tr>
</thead>
<tbody>`;
films.forEach(film => {
html += `<tr>
<td>${film.title || 'Titre inconnu'}</td>
<td class="year">${film.year || 'N/A'}</td>
<td>`;
film.results.forEach(r => {
html += `<div class="source-block">
<p class="source-name">${r.source.charAt(0).toUpperCase() + r.source.slice(1)}</p>`;
if (r.img) {
html += `<img src="${r.img}" alt="Affiche pour ${film.title}">`;
}
if (r.link) {
html += `<p><a href="${r.link}" target="_blank">Voir la fiche détaillée</a></p>`;
}
// CommonSense Media specific
if (r.source === 'commonsense') {
html += `<p><b>Âge conseillé:</b> ${r.age || '-'}</p>`;
html += `<p><b>Résumé (CSM):</b> ${r.summary || r.parentsNeedToKnow || '-'}</p>`;
if (r.details && r.details.length) {
html += `<p><b>Détails (CSM):</b></p><ul>`;
r.details.forEach(d => {
html += `<li>${d.type}: ${d.score}/5 - ${d.description || ''}</li>`;
});
html += `</ul>`;
}
}
// Cinecheck specific
else if (r.source === 'cinecheck') {
html += `<p><b>Âge(s) (Cinecheck):</b> ${r.marks && r.marks.length ? r.marks.join(', ') : '-'}</p>`;
html += `<p><b>Résumé (Cinecheck):</b> ${r.summary || '-'}</p>`;
if (r.details && r.details.length) {
html += `<p><b>Pictogrammes (Cinecheck):</b> ${r.details.map(d => d.type).join(', ') || '-'}</p>`;
}
}
// Filmages specific
else if (r.source === 'filmages') {
html += `<p><b>Titre original (Filmages):</b> ${r.details.titleOriginalPage || r.details.titleOriginal || '-'}</p>`;
html += `<p><b>Âge légal (Filmages):</b> ${r.details.ageLegal || '-'}</p>`;
html += `<p><b>Âge suggéré (Filmages):</b> ${r.details.ageSuggested || '-'}</p>`;
html += `<p><b>Résumé (Filmages):</b> ${r.details.summary || '-'}</p>`;
html += `<p><b>Synthèse (Filmages):</b> ${r.details.synthesis || '-'}</p>`;
if (r.details.indications && r.details.indications.length) {
html += `<p><b>Indications:</b> ${r.details.indications.join(', ')}</p>`;
}
if (r.details.counterIndications && r.details.counterIndications.length) {
html += `<p><b>Contre-indications:</b> ${r.details.counterIndications.join(', ')}</p>`;
}
}
// Fallback for any other or new source
else {
html += `<p><b>Résumé:</b> ${r.summary || '-'}</p>`;
html += `<p><b>Âge:</b> ${r.age || '-'}</p>`;
}
html += `</div>`;
});
html += `</td></tr>`;
});
html += '</tbody></table>';
filmsDiv.innerHTML = html;
} catch (error) {
// Mike: "Didn't go as planned."
console.error('Search function error:', error);
filmsDiv.innerHTML = `<p class="no-results">Une erreur s'est produite lors de la recherche. Vérifiez la console.</p>`;
}
document.getElementById('films').innerHTML = '';
films.forEach(f => {
const div = document.createElement('div');
div.className = 'film';
div.innerHTML = `
<h2>${f.title} ${f.year ? `<span class="year">(${f.year})</span>` : ''}</h2>
${f.img ? `<img src="${f.img}" alt="cover">` : ""}
<div class="marks"><b>Conseil d'âge:</b> ${f.marks ? f.marks.join(', ') : '-'}</div>
<div class="summary">${f.summary || 'Pas de résumé.'}</div>
<ul class="details">
${f.details && f.details.length ? f.details.map(d => `<li><b>${d.type}:</b> ${d.description}</li>`).join('') : ''}
</ul>
<a href="${f.link}" target="_blank" style="color:#7af;">Voir sur Cinecheck</a>
`;
document.getElementById('films').appendChild(div);
});
}
document.getElementById('q').addEventListener('keydown', e => { if (e.key === 'Enter') search(); });
</script>

29
merge.js Normal file
View File

@ -0,0 +1,29 @@
// Utilitaire pour merger les résultats de plusieurs agrégateurs
function normalizeTitle(str) {
return str ? str.toLowerCase().replace(/[^a-z0-9]/g, '') : '';
}
function normalizeTitle(str) {
return str ? str.toLowerCase().replace(/[^a-z0-9]/g, '') : '';
}
function mergeResults(arrays) {
const map = {};
arrays.flat().forEach(entry => {
// Note: only title, fallback if no year
const key = normalizeTitle(entry.title) + (entry.year ? '|' + entry.year : '');
if (!map[key]) {
map[key] = {
title: entry.title,
year: entry.year,
results: []
};
}
map[key].results.push({
source: entry.source,
...entry
});
});
return Object.values(map);
}
module.exports = { mergeResults };

View File

@ -1,25 +1,42 @@
const express = require('express');
const cors = require('cors');
const cinecheck = require('./cinecheck-adapter');
const cinecheck = require('./aggregators/cinecheck-adapter');
const commonsense = require('./aggregators/commonsense-adapter');
const filmages = require('./aggregators/filmages-adapter'); // New
const { mergeResults } = require('./merge');
const app = express();
app.use(cors());
app.get('/search', async (req, res) => {
const q = req.query.q;
if (!q) return res.status(400).json({ error: "Missing query" });
if (!q) {
// Gilfoyle: "A search query without a query. Bold."
return res.status(400).json({ error: "Missing query. Astounding." });
}
try {
const results = await cinecheck.searchMovies(q);
// Enrichir chaque film avec la classification (en parallèle, le minimum pour survivre)
const enriched = await Promise.all(results.map(async m => {
const details = await cinecheck.getMovieClassification(m.link);
return { ...m, ...details };
}));
res.json(enriched);
// Mike: "Run 'em all. See what sticks."
const [cine, cs, fa] = await Promise.all([
cinecheck.searchAndEnrich(q).catch(e => { console.error('Cinecheck failed:', e.message); return []; }),
commonsense.searchAndEnrich(q).catch(e => { console.error('Commonsense failed:', e.message); return []; }),
filmages.searchAndEnrich(q).catch(e => { console.error('Filmages failed:', e.message); return []; })
]);
if (!cine.length && !cs.length && !fa.length) {
// Gilfoyle: "Zero results. Is this a surprise to anyone?"
}
const merged = mergeResults([cine, cs, fa]);
res.json(merged);
} catch (e) {
res.status(500).json({ error: e.message });
// Mike: "Something went sideways. Happens."
console.error('General search error:', e);
res.status(500).json({ error: e.message || "Server had a moment." });
}
});
app.listen(3000, () => {
console.log('Backend prêt. http://localhost:3000');
const PORT = 3000;
app.listen(PORT, () => {
// Gilfoyle: "It's listening. Don't get excited."
console.log(`Backend multi-agrégateurs opérationnel sur http://localhost:${PORT}. Ne me remerciez pas.`);
});