cine-kids/merge.js

95 lines
3.1 KiB
JavaScript

function normalizeTitle(str) {
return str ? str.toLowerCase().replace(/[^a-z0-9 ]/g, '') : '';
}
function hasSignificantWordOverlap(a, b) {
const skipWords = new Set(['the','le','la','les','de','du','des','and','et','a','an','un','une','dans','en','on']);
const aw = normalizeTitle(a).split(/\s+/).filter(w => w.length > 3 && !skipWords.has(w));
const bw = normalizeTitle(b).split(/\s+/).filter(w => w.length > 3 && !skipWords.has(w));
const overlap = aw.filter(w => bw.includes(w));
if (overlap.length > 0) {
console.log(`[MERGE] Overlap: "${a}" <-> "${b}" | Words: ${overlap.join(', ')}`);
return true;
}
return false;
}
// Compute match score: exact > startsWith > includes > other
function getMatchScore(film, query) {
const nTitle = normalizeTitle(film.title);
const nQuery = normalizeTitle(query);
if (nTitle === nQuery) return 100;
if (nTitle.startsWith(nQuery)) return 80;
if (nTitle.includes(nQuery)) return 60;
return 10;
}
function stripSeason(title) {
return normalizeTitle(title).replace(/(saison|season)\s*\d+/g, '').trim();
}
function mergeResults(arrays, query = '', limit = 5) {
const merged = [];
arrays.flat().forEach(entry => {
const entryYear = entry.year ? entry.year.toString() : '';
let foundIdx = -1;
for (let i = 0; i < merged.length; i++) {
const m = merged[i];
// Regroup series/seasons from same source if base title matches (strip "season X"/"saison X")
const isSeason = /saison|season/i.test(entry.title) && /saison|season/i.test(m.title);
if (
m.results[0] && m.results[0].source === entry.source &&
isSeason &&
stripSeason(m.title) === stripSeason(entry.title)
) {
foundIdx = i;
break;
}
// Default merge: Match same year AND at least one significant word in common
if (
(m.year ? m.year.toString() : '') === entryYear &&
hasSignificantWordOverlap(entry.title, m.title)
) {
foundIdx = i;
break;
}
}
if (foundIdx >= 0) {
merged[foundIdx].results.push({ source: entry.source, ...entry });
} else {
merged.push({
title: entry.title,
year: entry.year,
results: [{ source: entry.source, ...entry }],
__raw: entry // For tie-break
});
}
});
let out = merged;
if (query) {
out.forEach(f => f.__score = getMatchScore(f, query));
out = out.sort((a, b) => b.__score - a.__score);
// PATCH: filter only films with ALL significant query words in title
const normQuery = query.trim().toLowerCase();
const skip = new Set(['the','le','la','les','de','du','des','and','et','a','an','un','une','dans','en','on']);
const queryWords = normQuery.split(/\s+/).filter(w => w.length > 3 && !skip.has(w));
if (queryWords.length) {
out = out.filter(film => {
const title = (film.title || '').toLowerCase();
return queryWords.every(qw => title.includes(qw));
});
}
}
// Remove internals, trim to limit
return out.slice(0, limit).map(f => {
delete f.__score; delete f.__raw;
return f;
});
}
module.exports = { mergeResults };