fix commons, hack ajax
This commit is contained in:
parent
44be436d2d
commit
6c0142c01b
@ -3,54 +3,128 @@ const cheerio = require('cheerio');
|
||||
const BASE = 'https://www.commonsensemedia.org';
|
||||
|
||||
async function searchMovies(query) {
|
||||
const url = `${BASE}/search/${encodeURIComponent(query)}`;
|
||||
// Hit the AJAX endpoint directly, not the HTML page
|
||||
const url = `${BASE}/ajax/search/${encodeURIComponent(query)}`;
|
||||
console.log('Searching CSM via AJAX endpoint:', url);
|
||||
|
||||
const res = await axios.get(url, {
|
||||
headers: { 'User-Agent': 'Mozilla/5.0', 'accept-language': 'en-US,en;q=0.9' }
|
||||
headers: {
|
||||
'User-Agent': 'Mozilla/5.0',
|
||||
'X-Requested-With': 'XMLHttpRequest'
|
||||
}
|
||||
});
|
||||
const $ = cheerio.load(res.data);
|
||||
|
||||
// The response is JSON containing HTML chunks
|
||||
console.log('Got AJAX response, status:', res.status);
|
||||
if (!res.data || !Array.isArray(res.data)) {
|
||||
console.log('Invalid AJAX response format');
|
||||
return [];
|
||||
}
|
||||
|
||||
// Find the HTML insert command with search results
|
||||
const searchResultsCommand = res.data.find(cmd =>
|
||||
cmd.command === 'insert' && cmd.data && cmd.data.includes('search-main-list-content')
|
||||
);
|
||||
|
||||
if (!searchResultsCommand) {
|
||||
console.log('No search results in AJAX response');
|
||||
return [];
|
||||
}
|
||||
|
||||
// Parse the HTML chunk
|
||||
const $ = cheerio.load(searchResultsCommand.data);
|
||||
const results = [];
|
||||
$('.search-results-list__row').each((_, el) => {
|
||||
const type = $(el).find('.media-type').text().trim();
|
||||
if (type.toLowerCase() !== 'movie') return; // ignore non-movies
|
||||
const title = $(el).find('.search-results-product-title').text().trim();
|
||||
const link = $(el).find('a.search-results-product-title').attr('href');
|
||||
const absLink = link ? BASE + link : null;
|
||||
const img = $(el).find('img.search-results-product-image').attr('src');
|
||||
// Pas d'année la plupart du temps sur CSM.
|
||||
results.push({ title, year: null, img, link: absLink });
|
||||
|
||||
$('.review-teaser').each((_, el) => {
|
||||
const typeEl = $(el).find('.review-teaser-type');
|
||||
if (!typeEl.text().trim().toUpperCase().includes('MOVIE')) return;
|
||||
|
||||
const title = $(el).find('.review-teaser-title a').text().trim();
|
||||
const link = $(el).find('.review-teaser-title a').attr('href');
|
||||
const fullLink = link ? BASE + link : null;
|
||||
|
||||
// Get image (might be lazy-loaded)
|
||||
const imgEl = $(el).find('.review-image img');
|
||||
const imgSrc = imgEl.attr('data-src') || imgEl.attr('src');
|
||||
const img = imgSrc && !imgSrc.includes('ratio_2_3') ?
|
||||
(imgSrc.startsWith('/') ? BASE + imgSrc : imgSrc) : null;
|
||||
|
||||
// Get age rating
|
||||
const age = $(el).find('.rating__age').text().trim().replace('age', '').replace('+', '').trim();
|
||||
|
||||
// Get summary
|
||||
const summary = $(el).find('.review-teaser-one-liner').text().trim();
|
||||
|
||||
// Get year
|
||||
const yearMatch = $(el).text().match(/Release Year:\s*(\d{4})/);
|
||||
const year = yearMatch ? yearMatch[1] : null;
|
||||
|
||||
if (title && fullLink) {
|
||||
results.push({
|
||||
title,
|
||||
link: fullLink,
|
||||
img,
|
||||
age,
|
||||
summary,
|
||||
year
|
||||
});
|
||||
}
|
||||
});
|
||||
console.log('CSM search:', results);
|
||||
|
||||
console.log('CSM search found:', results.length, 'results');
|
||||
console.log('First result:', results[0]);
|
||||
return results;
|
||||
}
|
||||
|
||||
async function getMovieClassification(movieUrl) {
|
||||
async function getMovieDetails(movieUrl) {
|
||||
if (!movieUrl) return {};
|
||||
const res = await axios.get(movieUrl, { headers: { 'User-Agent': 'Mozilla/5.0' } });
|
||||
const $ = cheerio.load(res.data);
|
||||
|
||||
const age = $('[data-test="age-rating"]').first().text().replace('age', '').replace('+', '').trim() || null;
|
||||
const summary = $('[data-test="review-summary"]').first().text().trim();
|
||||
const details = [];
|
||||
$('[data-test="product-rating-section"]').each((_, el) => {
|
||||
const label = $(el).find('[data-test="rating-section-label"]').text().trim();
|
||||
const score = $(el).find('.icon-circle-solid.active,.icon-star-solid.active').length;
|
||||
const desc = $(el).find('[data-test="rating-section-description"]').text().trim();
|
||||
if (label) details.push({ type: label, score, description: desc });
|
||||
});
|
||||
|
||||
return { age, summary, details };
|
||||
|
||||
try {
|
||||
const res = await axios.get(movieUrl, {
|
||||
headers: { 'User-Agent': 'Mozilla/5.0' }
|
||||
});
|
||||
|
||||
const $ = cheerio.load(res.data);
|
||||
|
||||
// Additional details from the full page
|
||||
const parentsNeedToKnow = $('[data-test="parents-need-to-know"]').text().trim();
|
||||
|
||||
// Get all rating categories
|
||||
const details = [];
|
||||
$('[data-test="product-rating-section"]').each((_, section) => {
|
||||
const type = $(section).find('[data-test="rating-section-label"]').text().trim();
|
||||
const score = $(section).find('.icon-circle-solid.active').length;
|
||||
const description = $(section).find('[data-test="rating-section-description"]').text().trim();
|
||||
|
||||
if (type) {
|
||||
details.push({ type, score, description });
|
||||
}
|
||||
});
|
||||
|
||||
return { parentsNeedToKnow, details };
|
||||
} catch (error) {
|
||||
console.error('Error fetching movie details:', error.message);
|
||||
return {};
|
||||
}
|
||||
}
|
||||
|
||||
async function searchAndEnrich(query) {
|
||||
const results = await searchMovies(query);
|
||||
return await Promise.all(results.map(async m => ({
|
||||
title: m.title,
|
||||
year: m.year,
|
||||
img: m.img,
|
||||
link: m.link,
|
||||
source: 'commonsense',
|
||||
...(await getMovieClassification(m.link))
|
||||
})));
|
||||
try {
|
||||
const results = await searchMovies(query);
|
||||
return await Promise.all(results.map(async movie => ({
|
||||
title: movie.title,
|
||||
year: movie.year,
|
||||
img: movie.img,
|
||||
link: movie.link,
|
||||
source: 'commonsense',
|
||||
summary: movie.summary,
|
||||
age: movie.age,
|
||||
...(await getMovieDetails(movie.link))
|
||||
})));
|
||||
} catch (error) {
|
||||
console.error('Error in CSM searchAndEnrich:', error.message);
|
||||
return [];
|
||||
}
|
||||
}
|
||||
|
||||
module.exports = { searchAndEnrich };
|
||||
|
Loading…
x
Reference in New Issue
Block a user