const axios = require('axios'); const cheerio = require('cheerio'); const BASE = 'https://www.commonsensemedia.org'; async function searchMovies(query) { // Hit the AJAX endpoint directly, not the HTML page const url = `${BASE}/ajax/search/${encodeURIComponent(query)}`; console.log('Searching CSM via AJAX endpoint:', url); const res = await axios.get(url, { headers: { 'User-Agent': 'Mozilla/5.0', 'X-Requested-With': 'XMLHttpRequest' } }); // The response is JSON containing HTML chunks console.log('Got AJAX response, status:', res.status); if (!res.data || !Array.isArray(res.data)) { console.log('Invalid AJAX response format'); return []; } // Find the HTML insert command with search results const searchResultsCommand = res.data.find(cmd => cmd.command === 'insert' && cmd.data && cmd.data.includes('search-main-list-content') ); if (!searchResultsCommand) { console.log('No search results in AJAX response'); return []; } // Parse the HTML chunk const $ = cheerio.load(searchResultsCommand.data); const results = []; $('.review-teaser').each((_, el) => { const typeEl = $(el).find('.review-teaser-type'); if (!typeEl.text().trim().toUpperCase().includes('MOVIE')) return; const title = $(el).find('.review-teaser-title a').text().trim(); const link = $(el).find('.review-teaser-title a').attr('href'); const fullLink = link ? BASE + link : null; // Get image (might be lazy-loaded) const imgEl = $(el).find('.review-image img'); const imgSrc = imgEl.attr('data-src') || imgEl.attr('src'); const img = imgSrc && !imgSrc.includes('ratio_2_3') ? (imgSrc.startsWith('/') ? BASE + imgSrc : imgSrc) : null; // Get age rating const age = $(el).find('.rating__age').text().trim().replace('age', '').replace('+', '').trim(); // Get summary const summary = $(el).find('.review-teaser-one-liner').text().trim(); // Get year const yearMatch = $(el).text().match(/Release Year:\s*(\d{4})/); const year = yearMatch ? yearMatch[1] : null; if (title && fullLink) { results.push({ title, link: fullLink, img, age, summary, year }); } }); console.log('CSM search found:', results.length, 'results'); console.log('First result:', results[0]); return results; } async function getMovieDetails(movieUrl) { if (!movieUrl) return {}; try { const res = await axios.get(movieUrl, { headers: { 'User-Agent': 'Mozilla/5.0' } }); const $ = cheerio.load(res.data); // Additional details from the full page const parentsNeedToKnow = $('[data-test="parents-need-to-know"]').text().trim(); // Get all rating categories const details = []; $('[data-test="product-rating-section"]').each((_, section) => { const type = $(section).find('[data-test="rating-section-label"]').text().trim(); const score = $(section).find('.icon-circle-solid.active').length; const description = $(section).find('[data-test="rating-section-description"]').text().trim(); if (type) { details.push({ type, score, description }); } }); return { parentsNeedToKnow, details }; } catch (error) { console.error('Error fetching movie details:', error.message); return {}; } } async function searchAndEnrich(query) { try { const results = await searchMovies(query); return await Promise.all(results.map(async movie => ({ title: movie.title, year: movie.year, img: movie.img, link: movie.link, source: 'commonsense', summary: movie.summary, age: movie.age, ...(await getMovieDetails(movie.link)) }))); } catch (error) { console.error('Error in CSM searchAndEnrich:', error.message); return []; } } module.exports = { searchAndEnrich };