cine-kids/merge.js

69 lines
2.2 KiB
JavaScript

function normalizeTitle(str) {
return str ? str.toLowerCase().replace(/[^a-z0-9 ]/g, '') : '';
}
function hasSignificantWordOverlap(a, b) {
const skipWords = new Set(['the','le','la','les','de','du','des','and','et','a','an','un','une','dans','en','on']);
const aw = normalizeTitle(a).split(/\s+/).filter(w => w.length > 3 && !skipWords.has(w));
const bw = normalizeTitle(b).split(/\s+/).filter(w => w.length > 3 && !skipWords.has(w));
const overlap = aw.filter(w => bw.includes(w));
if (overlap.length > 0) {
console.log(`[MERGE] Overlap: "${a}" <-> "${b}" | Words: ${overlap.join(', ')}`);
return true;
}
return false;
}
// Compute match score: exact > startsWith > includes > other
function getMatchScore(film, query) {
const nTitle = normalizeTitle(film.title);
const nQuery = normalizeTitle(query);
if (nTitle === nQuery) return 100;
if (nTitle.startsWith(nQuery)) return 80;
if (nTitle.includes(nQuery)) return 60;
return 10;
}
function mergeResults(arrays, query = '', limit = 5) {
const merged = [];
arrays.flat().forEach(entry => {
const entryYear = entry.year ? entry.year.toString() : '';
let foundIdx = -1;
for (let i = 0; i < merged.length; i++) {
const m = merged[i];
// Match same year AND at least one significant word in common
if (
(m.year ? m.year.toString() : '') === entryYear &&
hasSignificantWordOverlap(entry.title, m.title)
) {
foundIdx = i;
break;
}
}
if (foundIdx >= 0) {
console.log(`[MERGE] ${entry.title} (${entryYear}) merged with ${merged[foundIdx].title} (${entryYear})`);
merged[foundIdx].results.push({ source: entry.source, ...entry });
} else {
merged.push({
title: entry.title,
year: entry.year,
results: [{ source: entry.source, ...entry }],
__raw: entry // For tie-break
});
}
});
let out = merged;
if (query) {
out.forEach(f => f.__score = getMatchScore(f, query));
out = out.sort((a, b) => b.__score - a.__score);
}
// Remove internals, trim to limit
return out.slice(0, limit).map(f => {
delete f.__score; delete f.__raw;
return f;
});
}
module.exports = { mergeResults };