fix commons, hack ajax

This commit is contained in:
SansGuidon 2025-05-18 00:17:42 +02:00
parent 44be436d2d
commit 6c0142c01b

View File

@ -3,54 +3,128 @@ const cheerio = require('cheerio');
const BASE = 'https://www.commonsensemedia.org';
async function searchMovies(query) {
const url = `${BASE}/search/${encodeURIComponent(query)}`;
// Hit the AJAX endpoint directly, not the HTML page
const url = `${BASE}/ajax/search/${encodeURIComponent(query)}`;
console.log('Searching CSM via AJAX endpoint:', url);
const res = await axios.get(url, {
headers: { 'User-Agent': 'Mozilla/5.0', 'accept-language': 'en-US,en;q=0.9' }
headers: {
'User-Agent': 'Mozilla/5.0',
'X-Requested-With': 'XMLHttpRequest'
}
});
const $ = cheerio.load(res.data);
// The response is JSON containing HTML chunks
console.log('Got AJAX response, status:', res.status);
if (!res.data || !Array.isArray(res.data)) {
console.log('Invalid AJAX response format');
return [];
}
// Find the HTML insert command with search results
const searchResultsCommand = res.data.find(cmd =>
cmd.command === 'insert' && cmd.data && cmd.data.includes('search-main-list-content')
);
if (!searchResultsCommand) {
console.log('No search results in AJAX response');
return [];
}
// Parse the HTML chunk
const $ = cheerio.load(searchResultsCommand.data);
const results = [];
$('.search-results-list__row').each((_, el) => {
const type = $(el).find('.media-type').text().trim();
if (type.toLowerCase() !== 'movie') return; // ignore non-movies
const title = $(el).find('.search-results-product-title').text().trim();
const link = $(el).find('a.search-results-product-title').attr('href');
const absLink = link ? BASE + link : null;
const img = $(el).find('img.search-results-product-image').attr('src');
// Pas d'année la plupart du temps sur CSM.
results.push({ title, year: null, img, link: absLink });
$('.review-teaser').each((_, el) => {
const typeEl = $(el).find('.review-teaser-type');
if (!typeEl.text().trim().toUpperCase().includes('MOVIE')) return;
const title = $(el).find('.review-teaser-title a').text().trim();
const link = $(el).find('.review-teaser-title a').attr('href');
const fullLink = link ? BASE + link : null;
// Get image (might be lazy-loaded)
const imgEl = $(el).find('.review-image img');
const imgSrc = imgEl.attr('data-src') || imgEl.attr('src');
const img = imgSrc && !imgSrc.includes('ratio_2_3') ?
(imgSrc.startsWith('/') ? BASE + imgSrc : imgSrc) : null;
// Get age rating
const age = $(el).find('.rating__age').text().trim().replace('age', '').replace('+', '').trim();
// Get summary
const summary = $(el).find('.review-teaser-one-liner').text().trim();
// Get year
const yearMatch = $(el).text().match(/Release Year:\s*(\d{4})/);
const year = yearMatch ? yearMatch[1] : null;
if (title && fullLink) {
results.push({
title,
link: fullLink,
img,
age,
summary,
year
});
}
});
console.log('CSM search:', results);
console.log('CSM search found:', results.length, 'results');
console.log('First result:', results[0]);
return results;
}
async function getMovieClassification(movieUrl) {
async function getMovieDetails(movieUrl) {
if (!movieUrl) return {};
const res = await axios.get(movieUrl, { headers: { 'User-Agent': 'Mozilla/5.0' } });
const $ = cheerio.load(res.data);
const age = $('[data-test="age-rating"]').first().text().replace('age', '').replace('+', '').trim() || null;
const summary = $('[data-test="review-summary"]').first().text().trim();
const details = [];
$('[data-test="product-rating-section"]').each((_, el) => {
const label = $(el).find('[data-test="rating-section-label"]').text().trim();
const score = $(el).find('.icon-circle-solid.active,.icon-star-solid.active').length;
const desc = $(el).find('[data-test="rating-section-description"]').text().trim();
if (label) details.push({ type: label, score, description: desc });
});
return { age, summary, details };
try {
const res = await axios.get(movieUrl, {
headers: { 'User-Agent': 'Mozilla/5.0' }
});
const $ = cheerio.load(res.data);
// Additional details from the full page
const parentsNeedToKnow = $('[data-test="parents-need-to-know"]').text().trim();
// Get all rating categories
const details = [];
$('[data-test="product-rating-section"]').each((_, section) => {
const type = $(section).find('[data-test="rating-section-label"]').text().trim();
const score = $(section).find('.icon-circle-solid.active').length;
const description = $(section).find('[data-test="rating-section-description"]').text().trim();
if (type) {
details.push({ type, score, description });
}
});
return { parentsNeedToKnow, details };
} catch (error) {
console.error('Error fetching movie details:', error.message);
return {};
}
}
async function searchAndEnrich(query) {
const results = await searchMovies(query);
return await Promise.all(results.map(async m => ({
title: m.title,
year: m.year,
img: m.img,
link: m.link,
source: 'commonsense',
...(await getMovieClassification(m.link))
})));
try {
const results = await searchMovies(query);
return await Promise.all(results.map(async movie => ({
title: movie.title,
year: movie.year,
img: movie.img,
link: movie.link,
source: 'commonsense',
summary: movie.summary,
age: movie.age,
...(await getMovieDetails(movie.link))
})));
} catch (error) {
console.error('Error in CSM searchAndEnrich:', error.message);
return [];
}
}
module.exports = { searchAndEnrich };