improve merging by year, support query url

This commit is contained in:
SansGuidon 2025-05-25 01:35:47 +02:00
parent 36e52afc93
commit 224446313a
3 changed files with 46 additions and 14 deletions

View File

@ -1 +1 @@
0.0.4 0.0.5

View File

@ -1,5 +1,17 @@
function normalizeTitle(str) { function normalizeTitle(str) {
return str ? str.toLowerCase().replace(/[^a-z0-9]/g, '') : ''; return str ? str.toLowerCase().replace(/[^a-z0-9 ]/g, '') : '';
}
function hasSignificantWordOverlap(a, b) {
const skipWords = new Set(['the','le','la','les','de','du','des','and','et','a','an','un','une','dans','en','on']);
const aw = normalizeTitle(a).split(/\s+/).filter(w => w.length > 3 && !skipWords.has(w));
const bw = normalizeTitle(b).split(/\s+/).filter(w => w.length > 3 && !skipWords.has(w));
const overlap = aw.filter(w => bw.includes(w));
if (overlap.length > 0) {
console.log(`[MERGE] Overlap: "${a}" <-> "${b}" | Words: ${overlap.join(', ')}`);
return true;
}
return false;
} }
// Compute match score: exact > startsWith > includes > other // Compute match score: exact > startsWith > includes > other
@ -12,25 +24,36 @@ function getMatchScore(film, query) {
return 10; return 10;
} }
// Merge and rank by match
function mergeResults(arrays, query = '', limit = 5) { function mergeResults(arrays, query = '', limit = 5) {
const map = {}; const merged = [];
arrays.flat().forEach(entry => { arrays.flat().forEach(entry => {
const key = normalizeTitle(entry.title) + (entry.year ? '|' + entry.year : ''); const entryYear = entry.year ? entry.year.toString() : '';
if (!map[key]) { let foundIdx = -1;
map[key] = { for (let i = 0; i < merged.length; i++) {
const m = merged[i];
// Match same year AND at least one significant word in common
if (
(m.year ? m.year.toString() : '') === entryYear &&
hasSignificantWordOverlap(entry.title, m.title)
) {
foundIdx = i;
break;
}
}
if (foundIdx >= 0) {
console.log(`[MERGE] ${entry.title} (${entryYear}) merged with ${merged[foundIdx].title} (${entryYear})`);
merged[foundIdx].results.push({ source: entry.source, ...entry });
} else {
merged.push({
title: entry.title, title: entry.title,
year: entry.year, year: entry.year,
results: [], results: [{ source: entry.source, ...entry }],
__raw: entry // For tie-break __raw: entry // For tie-break
}; });
} }
map[key].results.push({
source: entry.source,
...entry
});
}); });
let out = Object.values(map);
let out = merged;
if (query) { if (query) {
out.forEach(f => f.__score = getMatchScore(f, query)); out.forEach(f => f.__score = getMatchScore(f, query));
out = out.sort((a, b) => b.__score - a.__score); out = out.sort((a, b) => b.__score - a.__score);

View File

@ -82,6 +82,7 @@
async function search() { async function search() {
const query = document.getElementById('q').value.trim(); const query = document.getElementById('q').value.trim();
if (!query) return; if (!query) return;
window.history.replaceState({}, '', '?q=' + encodeURIComponent(query));
const filmsDiv = document.getElementById('films'); const filmsDiv = document.getElementById('films');
filmsDiv.innerHTML = '<p class="loader">Searching...</p>'; filmsDiv.innerHTML = '<p class="loader">Searching...</p>';
@ -189,6 +190,14 @@
filmsDiv.innerHTML = `<p class="no-results">Search failed. Check the console.</p>`; filmsDiv.innerHTML = `<p class="no-results">Search failed. Check the console.</p>`;
} }
} }
window.addEventListener('DOMContentLoaded', () => {
const params = new URLSearchParams(window.location.search);
const q = params.get('q');
if (q) {
document.getElementById('q').value = q;
search();
}
});
document.getElementById('q').addEventListener('keydown', e => { if (e.key === 'Enter') search(); }); document.getElementById('q').addEventListener('keydown', e => { if (e.key === 'Enter') search(); });
</script> </script>
</body> </body>