fix commons, hack ajax
This commit is contained in:
parent
44be436d2d
commit
6c0142c01b
@ -3,54 +3,128 @@ const cheerio = require('cheerio');
|
|||||||
const BASE = 'https://www.commonsensemedia.org';
|
const BASE = 'https://www.commonsensemedia.org';
|
||||||
|
|
||||||
async function searchMovies(query) {
|
async function searchMovies(query) {
|
||||||
const url = `${BASE}/search/${encodeURIComponent(query)}`;
|
// Hit the AJAX endpoint directly, not the HTML page
|
||||||
|
const url = `${BASE}/ajax/search/${encodeURIComponent(query)}`;
|
||||||
|
console.log('Searching CSM via AJAX endpoint:', url);
|
||||||
|
|
||||||
const res = await axios.get(url, {
|
const res = await axios.get(url, {
|
||||||
headers: { 'User-Agent': 'Mozilla/5.0', 'accept-language': 'en-US,en;q=0.9' }
|
headers: {
|
||||||
|
'User-Agent': 'Mozilla/5.0',
|
||||||
|
'X-Requested-With': 'XMLHttpRequest'
|
||||||
|
}
|
||||||
});
|
});
|
||||||
const $ = cheerio.load(res.data);
|
|
||||||
|
// The response is JSON containing HTML chunks
|
||||||
|
console.log('Got AJAX response, status:', res.status);
|
||||||
|
if (!res.data || !Array.isArray(res.data)) {
|
||||||
|
console.log('Invalid AJAX response format');
|
||||||
|
return [];
|
||||||
|
}
|
||||||
|
|
||||||
|
// Find the HTML insert command with search results
|
||||||
|
const searchResultsCommand = res.data.find(cmd =>
|
||||||
|
cmd.command === 'insert' && cmd.data && cmd.data.includes('search-main-list-content')
|
||||||
|
);
|
||||||
|
|
||||||
|
if (!searchResultsCommand) {
|
||||||
|
console.log('No search results in AJAX response');
|
||||||
|
return [];
|
||||||
|
}
|
||||||
|
|
||||||
|
// Parse the HTML chunk
|
||||||
|
const $ = cheerio.load(searchResultsCommand.data);
|
||||||
const results = [];
|
const results = [];
|
||||||
$('.search-results-list__row').each((_, el) => {
|
|
||||||
const type = $(el).find('.media-type').text().trim();
|
$('.review-teaser').each((_, el) => {
|
||||||
if (type.toLowerCase() !== 'movie') return; // ignore non-movies
|
const typeEl = $(el).find('.review-teaser-type');
|
||||||
const title = $(el).find('.search-results-product-title').text().trim();
|
if (!typeEl.text().trim().toUpperCase().includes('MOVIE')) return;
|
||||||
const link = $(el).find('a.search-results-product-title').attr('href');
|
|
||||||
const absLink = link ? BASE + link : null;
|
const title = $(el).find('.review-teaser-title a').text().trim();
|
||||||
const img = $(el).find('img.search-results-product-image').attr('src');
|
const link = $(el).find('.review-teaser-title a').attr('href');
|
||||||
// Pas d'année la plupart du temps sur CSM.
|
const fullLink = link ? BASE + link : null;
|
||||||
results.push({ title, year: null, img, link: absLink });
|
|
||||||
|
// Get image (might be lazy-loaded)
|
||||||
|
const imgEl = $(el).find('.review-image img');
|
||||||
|
const imgSrc = imgEl.attr('data-src') || imgEl.attr('src');
|
||||||
|
const img = imgSrc && !imgSrc.includes('ratio_2_3') ?
|
||||||
|
(imgSrc.startsWith('/') ? BASE + imgSrc : imgSrc) : null;
|
||||||
|
|
||||||
|
// Get age rating
|
||||||
|
const age = $(el).find('.rating__age').text().trim().replace('age', '').replace('+', '').trim();
|
||||||
|
|
||||||
|
// Get summary
|
||||||
|
const summary = $(el).find('.review-teaser-one-liner').text().trim();
|
||||||
|
|
||||||
|
// Get year
|
||||||
|
const yearMatch = $(el).text().match(/Release Year:\s*(\d{4})/);
|
||||||
|
const year = yearMatch ? yearMatch[1] : null;
|
||||||
|
|
||||||
|
if (title && fullLink) {
|
||||||
|
results.push({
|
||||||
|
title,
|
||||||
|
link: fullLink,
|
||||||
|
img,
|
||||||
|
age,
|
||||||
|
summary,
|
||||||
|
year
|
||||||
|
});
|
||||||
|
}
|
||||||
});
|
});
|
||||||
console.log('CSM search:', results);
|
|
||||||
|
console.log('CSM search found:', results.length, 'results');
|
||||||
|
console.log('First result:', results[0]);
|
||||||
return results;
|
return results;
|
||||||
}
|
}
|
||||||
|
|
||||||
async function getMovieClassification(movieUrl) {
|
async function getMovieDetails(movieUrl) {
|
||||||
if (!movieUrl) return {};
|
if (!movieUrl) return {};
|
||||||
const res = await axios.get(movieUrl, { headers: { 'User-Agent': 'Mozilla/5.0' } });
|
|
||||||
const $ = cheerio.load(res.data);
|
try {
|
||||||
|
const res = await axios.get(movieUrl, {
|
||||||
const age = $('[data-test="age-rating"]').first().text().replace('age', '').replace('+', '').trim() || null;
|
headers: { 'User-Agent': 'Mozilla/5.0' }
|
||||||
const summary = $('[data-test="review-summary"]').first().text().trim();
|
});
|
||||||
const details = [];
|
|
||||||
$('[data-test="product-rating-section"]').each((_, el) => {
|
const $ = cheerio.load(res.data);
|
||||||
const label = $(el).find('[data-test="rating-section-label"]').text().trim();
|
|
||||||
const score = $(el).find('.icon-circle-solid.active,.icon-star-solid.active').length;
|
// Additional details from the full page
|
||||||
const desc = $(el).find('[data-test="rating-section-description"]').text().trim();
|
const parentsNeedToKnow = $('[data-test="parents-need-to-know"]').text().trim();
|
||||||
if (label) details.push({ type: label, score, description: desc });
|
|
||||||
});
|
// Get all rating categories
|
||||||
|
const details = [];
|
||||||
return { age, summary, details };
|
$('[data-test="product-rating-section"]').each((_, section) => {
|
||||||
|
const type = $(section).find('[data-test="rating-section-label"]').text().trim();
|
||||||
|
const score = $(section).find('.icon-circle-solid.active').length;
|
||||||
|
const description = $(section).find('[data-test="rating-section-description"]').text().trim();
|
||||||
|
|
||||||
|
if (type) {
|
||||||
|
details.push({ type, score, description });
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
return { parentsNeedToKnow, details };
|
||||||
|
} catch (error) {
|
||||||
|
console.error('Error fetching movie details:', error.message);
|
||||||
|
return {};
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
async function searchAndEnrich(query) {
|
async function searchAndEnrich(query) {
|
||||||
const results = await searchMovies(query);
|
try {
|
||||||
return await Promise.all(results.map(async m => ({
|
const results = await searchMovies(query);
|
||||||
title: m.title,
|
return await Promise.all(results.map(async movie => ({
|
||||||
year: m.year,
|
title: movie.title,
|
||||||
img: m.img,
|
year: movie.year,
|
||||||
link: m.link,
|
img: movie.img,
|
||||||
source: 'commonsense',
|
link: movie.link,
|
||||||
...(await getMovieClassification(m.link))
|
source: 'commonsense',
|
||||||
})));
|
summary: movie.summary,
|
||||||
|
age: movie.age,
|
||||||
|
...(await getMovieDetails(movie.link))
|
||||||
|
})));
|
||||||
|
} catch (error) {
|
||||||
|
console.error('Error in CSM searchAndEnrich:', error.message);
|
||||||
|
return [];
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
module.exports = { searchAndEnrich };
|
module.exports = { searchAndEnrich };
|
||||||
|
Loading…
x
Reference in New Issue
Block a user