improve merging by year, support query url
This commit is contained in:
parent
36e52afc93
commit
224446313a
49
merge.js
49
merge.js
@ -1,5 +1,17 @@
|
||||
function normalizeTitle(str) {
|
||||
return str ? str.toLowerCase().replace(/[^a-z0-9]/g, '') : '';
|
||||
return str ? str.toLowerCase().replace(/[^a-z0-9 ]/g, '') : '';
|
||||
}
|
||||
|
||||
function hasSignificantWordOverlap(a, b) {
|
||||
const skipWords = new Set(['the','le','la','les','de','du','des','and','et','a','an','un','une','dans','en','on']);
|
||||
const aw = normalizeTitle(a).split(/\s+/).filter(w => w.length > 3 && !skipWords.has(w));
|
||||
const bw = normalizeTitle(b).split(/\s+/).filter(w => w.length > 3 && !skipWords.has(w));
|
||||
const overlap = aw.filter(w => bw.includes(w));
|
||||
if (overlap.length > 0) {
|
||||
console.log(`[MERGE] Overlap: "${a}" <-> "${b}" | Words: ${overlap.join(', ')}`);
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
// Compute match score: exact > startsWith > includes > other
|
||||
@ -12,25 +24,36 @@ function getMatchScore(film, query) {
|
||||
return 10;
|
||||
}
|
||||
|
||||
// Merge and rank by match
|
||||
function mergeResults(arrays, query = '', limit = 5) {
|
||||
const map = {};
|
||||
const merged = [];
|
||||
arrays.flat().forEach(entry => {
|
||||
const key = normalizeTitle(entry.title) + (entry.year ? '|' + entry.year : '');
|
||||
if (!map[key]) {
|
||||
map[key] = {
|
||||
const entryYear = entry.year ? entry.year.toString() : '';
|
||||
let foundIdx = -1;
|
||||
for (let i = 0; i < merged.length; i++) {
|
||||
const m = merged[i];
|
||||
// Match same year AND at least one significant word in common
|
||||
if (
|
||||
(m.year ? m.year.toString() : '') === entryYear &&
|
||||
hasSignificantWordOverlap(entry.title, m.title)
|
||||
) {
|
||||
foundIdx = i;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (foundIdx >= 0) {
|
||||
console.log(`[MERGE] ${entry.title} (${entryYear}) merged with ${merged[foundIdx].title} (${entryYear})`);
|
||||
merged[foundIdx].results.push({ source: entry.source, ...entry });
|
||||
} else {
|
||||
merged.push({
|
||||
title: entry.title,
|
||||
year: entry.year,
|
||||
results: [],
|
||||
results: [{ source: entry.source, ...entry }],
|
||||
__raw: entry // For tie-break
|
||||
};
|
||||
});
|
||||
}
|
||||
map[key].results.push({
|
||||
source: entry.source,
|
||||
...entry
|
||||
});
|
||||
});
|
||||
let out = Object.values(map);
|
||||
|
||||
let out = merged;
|
||||
if (query) {
|
||||
out.forEach(f => f.__score = getMatchScore(f, query));
|
||||
out = out.sort((a, b) => b.__score - a.__score);
|
||||
|
@ -82,6 +82,7 @@
|
||||
async function search() {
|
||||
const query = document.getElementById('q').value.trim();
|
||||
if (!query) return;
|
||||
window.history.replaceState({}, '', '?q=' + encodeURIComponent(query));
|
||||
const filmsDiv = document.getElementById('films');
|
||||
filmsDiv.innerHTML = '<p class="loader">Searching...</p>';
|
||||
|
||||
@ -189,6 +190,14 @@
|
||||
filmsDiv.innerHTML = `<p class="no-results">Search failed. Check the console.</p>`;
|
||||
}
|
||||
}
|
||||
window.addEventListener('DOMContentLoaded', () => {
|
||||
const params = new URLSearchParams(window.location.search);
|
||||
const q = params.get('q');
|
||||
if (q) {
|
||||
document.getElementById('q').value = q;
|
||||
search();
|
||||
}
|
||||
});
|
||||
document.getElementById('q').addEventListener('keydown', e => { if (e.key === 'Enter') search(); });
|
||||
</script>
|
||||
</body>
|
||||
|
Loading…
x
Reference in New Issue
Block a user