131 lines
3.9 KiB
JavaScript
131 lines
3.9 KiB
JavaScript
const axios = require('axios');
|
|
const cheerio = require('cheerio');
|
|
const BASE = 'https://www.commonsensemedia.org';
|
|
|
|
async function searchMovies(query) {
|
|
// Hit the AJAX endpoint directly, not the HTML page
|
|
const url = `${BASE}/ajax/search/${encodeURIComponent(query)}`;
|
|
console.log('Searching CSM via AJAX endpoint:', url);
|
|
|
|
const res = await axios.get(url, {
|
|
headers: {
|
|
'User-Agent': 'Mozilla/5.0',
|
|
'X-Requested-With': 'XMLHttpRequest'
|
|
}
|
|
});
|
|
|
|
// The response is JSON containing HTML chunks
|
|
console.log('Got AJAX response, status:', res.status);
|
|
if (!res.data || !Array.isArray(res.data)) {
|
|
console.log('Invalid AJAX response format');
|
|
return [];
|
|
}
|
|
|
|
// Find the HTML insert command with search results
|
|
const searchResultsCommand = res.data.find(cmd =>
|
|
cmd.command === 'insert' && cmd.data && cmd.data.includes('search-main-list-content')
|
|
);
|
|
|
|
if (!searchResultsCommand) {
|
|
console.log('No search results in AJAX response');
|
|
return [];
|
|
}
|
|
|
|
// Parse the HTML chunk
|
|
const $ = cheerio.load(searchResultsCommand.data);
|
|
const results = [];
|
|
|
|
$('.review-teaser').each((_, el) => {
|
|
const typeEl = $(el).find('.review-teaser-type');
|
|
if (!typeEl.text().trim().toUpperCase().includes('MOVIE')) return;
|
|
|
|
const title = $(el).find('.review-teaser-title a').text().trim();
|
|
const link = $(el).find('.review-teaser-title a').attr('href');
|
|
const fullLink = link ? BASE + link : null;
|
|
|
|
// Get image (might be lazy-loaded)
|
|
const imgEl = $(el).find('.review-image img');
|
|
const imgSrc = imgEl.attr('data-src') || imgEl.attr('src');
|
|
const img = imgSrc && !imgSrc.includes('ratio_2_3') ?
|
|
(imgSrc.startsWith('/') ? BASE + imgSrc : imgSrc) : null;
|
|
|
|
// Get age rating
|
|
const age = $(el).find('.rating__age').text().trim().replace('age', '').replace('+', '').trim();
|
|
|
|
// Get summary
|
|
const summary = $(el).find('.review-teaser-one-liner').text().trim();
|
|
|
|
// Get year
|
|
const yearMatch = $(el).text().match(/Release Year:\s*(\d{4})/);
|
|
const year = yearMatch ? yearMatch[1] : null;
|
|
|
|
if (title && fullLink) {
|
|
results.push({
|
|
title,
|
|
link: fullLink,
|
|
img,
|
|
age,
|
|
summary,
|
|
year
|
|
});
|
|
}
|
|
});
|
|
|
|
console.log('CSM search found:', results.length, 'results');
|
|
console.log('First result:', results[0]);
|
|
return results;
|
|
}
|
|
|
|
async function getMovieDetails(movieUrl) {
|
|
if (!movieUrl) return {};
|
|
|
|
try {
|
|
const res = await axios.get(movieUrl, {
|
|
headers: { 'User-Agent': 'Mozilla/5.0' }
|
|
});
|
|
|
|
const $ = cheerio.load(res.data);
|
|
|
|
// Additional details from the full page
|
|
const parentsNeedToKnow = $('[data-test="parents-need-to-know"]').text().trim();
|
|
|
|
// Get all rating categories
|
|
const details = [];
|
|
$('[data-test="product-rating-section"]').each((_, section) => {
|
|
const type = $(section).find('[data-test="rating-section-label"]').text().trim();
|
|
const score = $(section).find('.icon-circle-solid.active').length;
|
|
const description = $(section).find('[data-test="rating-section-description"]').text().trim();
|
|
|
|
if (type) {
|
|
details.push({ type, score, description });
|
|
}
|
|
});
|
|
|
|
return { parentsNeedToKnow, details };
|
|
} catch (error) {
|
|
console.error('Error fetching movie details:', error.message);
|
|
return {};
|
|
}
|
|
}
|
|
|
|
async function searchAndEnrich(query) {
|
|
try {
|
|
const results = await searchMovies(query);
|
|
return await Promise.all(results.map(async movie => ({
|
|
title: movie.title,
|
|
year: movie.year,
|
|
img: movie.img,
|
|
link: movie.link,
|
|
source: 'commonsense',
|
|
summary: movie.summary,
|
|
age: movie.age,
|
|
...(await getMovieDetails(movie.link))
|
|
})));
|
|
} catch (error) {
|
|
console.error('Error in CSM searchAndEnrich:', error.message);
|
|
return [];
|
|
}
|
|
}
|
|
|
|
module.exports = { searchAndEnrich };
|