aggregate more source
This commit is contained in:
81
aggregators/cinecheck-adapter.js
Normal file
81
aggregators/cinecheck-adapter.js
Normal file
@ -0,0 +1,81 @@
|
||||
const axios = require('axios');
|
||||
const cheerio = require('cheerio');
|
||||
const CINECHECK_BASE = 'https://www.cinecheck.be';
|
||||
|
||||
async function searchMovies(query) {
|
||||
const url = `${CINECHECK_BASE}/umbraco/surface/searchresults/search?query=${encodeURIComponent(query)}&producties=0&amount=5`;
|
||||
const res = await axios.get(url, {
|
||||
headers: {
|
||||
'x-umb-culture': 'fr-BE',
|
||||
'x-umb-key': '0a0c11a9-ece8-4dc8-8578-e5aab235d9ff',
|
||||
'x-requested-with': 'XMLHttpRequest',
|
||||
'User-Agent': 'Mozilla/5.0',
|
||||
}
|
||||
});
|
||||
const $ = cheerio.load(res.data);
|
||||
const results = [];
|
||||
$('.c-search__result').each((_, el) => {
|
||||
const title = $(el).find('.c-search__title').text().trim().replace(/\s*\(.+?\)\s*$/, '');
|
||||
const yearMatch = $(el).find('.c-search__title').text().match(/\((\d{4})\)/);
|
||||
const year = yearMatch ? yearMatch[1] : null;
|
||||
const imgSrc = $(el).find('img.c-search__image').attr('src')
|
||||
? CINECHECK_BASE + $(el).find('img.c-search__image').attr('src')
|
||||
: null;
|
||||
const link = $(el).find('a.c-search__hiddenlink').attr('href')
|
||||
? CINECHECK_BASE + $(el).find('a.c-search__hiddenlink').attr('href')
|
||||
: null;
|
||||
if (title && link) {
|
||||
results.push({ title, year, img: imgSrc, link });
|
||||
}
|
||||
});
|
||||
return results;
|
||||
}
|
||||
|
||||
async function getMovieClassification(movieUrl) {
|
||||
const res = await axios.get(movieUrl, { headers: { 'User-Agent': 'Mozilla/5.0' } });
|
||||
const $ = cheerio.load(res.data);
|
||||
|
||||
const year = $('.c-movie__details .c-movie__label').first().text().trim() || null;
|
||||
const genres = $('.c-movie__details .c-movie__label').eq(1).text().split(',').map(s => s.trim());
|
||||
const img = $('.c-movie__cover img').attr('src')
|
||||
? CINECHECK_BASE + $('.c-movie__cover img').attr('src')
|
||||
: null;
|
||||
const marks = [];
|
||||
$('.c-header__marks .c-header__mark').each((_, el) => {
|
||||
const label = $(el).find('span.vh').text().trim();
|
||||
if (label) marks.push(label);
|
||||
});
|
||||
const details = [];
|
||||
$('.c-classificatie__item').each((_, el) => {
|
||||
const type = $(el).find('svg use').first().attr('xlink:href') || '';
|
||||
const typeName = type.split('#')[1] || '';
|
||||
const description = $(el).find('.js-classificatie-text').text().trim();
|
||||
if (typeName && description) {
|
||||
details.push({ type: typeName, description });
|
||||
}
|
||||
});
|
||||
const summary = $('.c-movie__introtext p').first().text().trim();
|
||||
|
||||
return {
|
||||
year,
|
||||
genres,
|
||||
img,
|
||||
marks,
|
||||
details,
|
||||
summary
|
||||
};
|
||||
}
|
||||
|
||||
async function searchAndEnrich(query) {
|
||||
const results = await searchMovies(query);
|
||||
return await Promise.all(results.map(async m => ({
|
||||
title: m.title,
|
||||
year: m.year,
|
||||
img: m.img,
|
||||
link: m.link,
|
||||
source: 'cinecheck',
|
||||
...(await getMovieClassification(m.link))
|
||||
})));
|
||||
}
|
||||
|
||||
module.exports = { searchAndEnrich };
|
56
aggregators/commonsense-adapter.js
Normal file
56
aggregators/commonsense-adapter.js
Normal file
@ -0,0 +1,56 @@
|
||||
const axios = require('axios');
|
||||
const cheerio = require('cheerio');
|
||||
const BASE = 'https://www.commonsensemedia.org';
|
||||
|
||||
async function searchMovies(query) {
|
||||
const url = `${BASE}/search/${encodeURIComponent(query)}`;
|
||||
const res = await axios.get(url, {
|
||||
headers: { 'User-Agent': 'Mozilla/5.0', 'accept-language': 'en-US,en;q=0.9' }
|
||||
});
|
||||
const $ = cheerio.load(res.data);
|
||||
const results = [];
|
||||
$('.search-results-list__row').each((_, el) => {
|
||||
const type = $(el).find('.media-type').text().trim();
|
||||
if (type.toLowerCase() !== 'movie') return; // ignore non-movies
|
||||
const title = $(el).find('.search-results-product-title').text().trim();
|
||||
const link = $(el).find('a.search-results-product-title').attr('href');
|
||||
const absLink = link ? BASE + link : null;
|
||||
const img = $(el).find('img.search-results-product-image').attr('src');
|
||||
// Pas d'année la plupart du temps sur CSM.
|
||||
results.push({ title, year: null, img, link: absLink });
|
||||
});
|
||||
console.log('CSM search:', results);
|
||||
return results;
|
||||
}
|
||||
|
||||
async function getMovieClassification(movieUrl) {
|
||||
if (!movieUrl) return {};
|
||||
const res = await axios.get(movieUrl, { headers: { 'User-Agent': 'Mozilla/5.0' } });
|
||||
const $ = cheerio.load(res.data);
|
||||
|
||||
const age = $('[data-test="age-rating"]').first().text().replace('age', '').replace('+', '').trim() || null;
|
||||
const summary = $('[data-test="review-summary"]').first().text().trim();
|
||||
const details = [];
|
||||
$('[data-test="product-rating-section"]').each((_, el) => {
|
||||
const label = $(el).find('[data-test="rating-section-label"]').text().trim();
|
||||
const score = $(el).find('.icon-circle-solid.active,.icon-star-solid.active').length;
|
||||
const desc = $(el).find('[data-test="rating-section-description"]').text().trim();
|
||||
if (label) details.push({ type: label, score, description: desc });
|
||||
});
|
||||
|
||||
return { age, summary, details };
|
||||
}
|
||||
|
||||
async function searchAndEnrich(query) {
|
||||
const results = await searchMovies(query);
|
||||
return await Promise.all(results.map(async m => ({
|
||||
title: m.title,
|
||||
year: m.year,
|
||||
img: m.img,
|
||||
link: m.link,
|
||||
source: 'commonsense',
|
||||
...(await getMovieClassification(m.link))
|
||||
})));
|
||||
}
|
||||
|
||||
module.exports = { searchAndEnrich };
|
99
aggregators/filmages-adapter.js
Normal file
99
aggregators/filmages-adapter.js
Normal file
@ -0,0 +1,99 @@
|
||||
const axios = require('axios');
|
||||
const cheerio = require('cheerio');
|
||||
const BASE_URL = 'http://www.filmages.ch/';
|
||||
|
||||
async function searchMovies(query) {
|
||||
const searchUrl = `${BASE_URL}films/recherche/search/${encodeURIComponent(query)}.html`;
|
||||
const response = await axios.get(searchUrl, { headers: { 'User-Agent': 'Mozilla/5.0' } });
|
||||
const $ = cheerio.load(response.data);
|
||||
const results = [];
|
||||
|
||||
$('table.layout_simpletable tbody tr.item').each((_, el) => {
|
||||
const row = $(el);
|
||||
const titleFrench = row.find('td.field.title_french a').text().trim();
|
||||
const link = row.find('td.field.title_french a').attr('href');
|
||||
const titleOriginal = row.find('td.field.title_original').text().trim();
|
||||
const director = row.find('td.field.director').text().trim();
|
||||
const ageLegal = row.find('td.field.age_legal').text().trim();
|
||||
const ageSuggested = row.find('td.field.age_suggested').text().trim();
|
||||
|
||||
if (titleFrench && link) {
|
||||
results.push({
|
||||
titleFrench,
|
||||
titleOriginal,
|
||||
director,
|
||||
ageLegalSearch: ageLegal,
|
||||
ageSuggestedSearch: ageSuggested,
|
||||
link: BASE_URL + link, // Make absolute
|
||||
});
|
||||
}
|
||||
});
|
||||
// console.log('FilmAges search results:', results);
|
||||
return results;
|
||||
}
|
||||
|
||||
async function getMovieClassification(movieUrl) {
|
||||
if (!movieUrl) return {};
|
||||
const response = await axios.get(movieUrl, { headers: { 'User-Agent': 'Mozilla/5.0' } });
|
||||
const $ = cheerio.load(response.data);
|
||||
|
||||
const mainDetails = $('#reader_main .layout_full .item');
|
||||
const rightDetails = $('#reader_right_1 .layout_full .item'); // For ages
|
||||
const rightCriteria = $('#reader_right_2 .layout_full .item'); // For indications
|
||||
|
||||
const titleOriginal = mainDetails.find('.field.title_original .value').text().trim();
|
||||
const year = mainDetails.find('.field.year .value').text().trim();
|
||||
const summary = mainDetails.find('.field.summary .value').text().trim();
|
||||
const synthesis = mainDetails.find('.field.final_remark .value').text().trim();
|
||||
|
||||
const ageLegal = rightDetails.find('.field.age_legal .value').text().trim();
|
||||
const ageSuggested = rightDetails.find('.field.age_suggested .value').text().trim();
|
||||
|
||||
const indications = [];
|
||||
rightCriteria.find('.field.indication .value a').each((_, el) => {
|
||||
indications.push($(el).text().trim());
|
||||
});
|
||||
|
||||
const counterIndications = [];
|
||||
rightCriteria.find('.field.counter_indication .value a').each((_, el) => {
|
||||
counterIndications.push($(el).text().trim());
|
||||
});
|
||||
|
||||
const director = mainDetails.find('.field.director .value').text().trim();
|
||||
|
||||
return {
|
||||
titleOriginalPage: titleOriginal, // To distinguish from search result one
|
||||
year,
|
||||
summary,
|
||||
synthesis,
|
||||
ageLegal,
|
||||
ageSuggested,
|
||||
indications,
|
||||
counterIndications,
|
||||
directorPage: director,
|
||||
};
|
||||
}
|
||||
|
||||
async function searchAndEnrich(query) {
|
||||
const searchResults = await searchMovies(query);
|
||||
const enrichedResults = await Promise.all(
|
||||
searchResults.map(async (movie) => {
|
||||
const classification = await getMovieClassification(movie.link);
|
||||
return {
|
||||
title: movie.titleFrench || movie.titleOriginal, // Prioritize French title for matching
|
||||
year: classification.year, // Year is only on detail page
|
||||
img: null, // No images readily available from search/detail
|
||||
link: movie.link,
|
||||
source: 'filmages',
|
||||
details: {
|
||||
...movie, // Keep all search results fields
|
||||
...classification, // Add all detail page fields
|
||||
}
|
||||
};
|
||||
})
|
||||
);
|
||||
// console.log('FilmAges enriched:', enrichedResults);
|
||||
return enrichedResults;
|
||||
}
|
||||
|
||||
module.exports = { searchAndEnrich };
|
Reference in New Issue
Block a user