feat(autotag) improve selection of relevant tags

This commit is contained in:
SansGuidon 2025-01-17 10:24:13 +00:00
parent 9b78c52a5a
commit 304d3ebd31

View File

@ -70,7 +70,7 @@ function auto_tag_plugin_init(ConfigManager $conf)
'gratuit,free' => 'free',
'gitops,gitlab,github actions,devops,SRE,ci/cd,platform-engineering,ci pipeline,application deployment,dagger,renovatebot,dependabot,continuous integration,site reliability eng' => 'devops,automation',
'git,gitlab,jujutsu,pijul,mercurial,svn,version control,contrôle de version' => 'version-control',
'gpt,chatgpt,llm,llms,artificial intelligence,intelligence artificielle,IA' => 'ai',
'gpt,chatgpt,llm,llms,artificial intelligence,intelligence artificielle,IA,l\'ia' => 'ai',
'hacking,piratage' => 'hacking',
'health,healthy,nutrition,food,alimentation,nourriture,santé' => 'health-and-wellness,food',
'humans,humains' => 'humans',
@ -112,7 +112,7 @@ function auto_tag_plugin_init(ConfigManager $conf)
'scripting,jq,curl,wget,script,bash' => 'scripting',
'search engine,moteur de recherche' => 'search-engines',
'security,permission,sécurité,secure,privacy,private,degoogl,gdpr,data protection,online tracking,user profiling,anonymo,anonymi,surveillance,malware,spyware,decentrali,secrets,privacy matters,vpn,passkey,protéger' => 'privacy-and-security',
'simplicity,minimal,less,declutter,stopped using,simple,simplification,simplifier,reduction in,fewer,no longer needed,minimalisme,simplicité,réduction' => 'minimalism',
'simplicity,minimal,declutter,stopped using,simple,simplification,simplifier,reduction in,no longer needed,minimalisme,simplicité,réduction' => 'minimalism',
'small web,indie web,indieweb,petit web' => 'small-web',
'smartphone,android,mobile,phone,téléphone,portables' => 'mobile',
'snippet,extrait de code' => 'code',
@ -147,11 +147,11 @@ function fetch_page_content($url)
return '';
}
// Remove unnecessary tags and strip content to plain text
$htmlContent = preg_replace('/<(script|style|head|noscript)[^>]*>.*?<\/\1>/is', '', $htmlContent);
return strip_tags($htmlContent);
}
function calculate_tags(array $keywordsToTags, array $searchContents): array
{
$contextWeights = [
@ -175,10 +175,7 @@ function calculate_tags(array $keywordsToTags, array $searchContents): array
$tagList = explode(',', $tags);
foreach ($tagList as $tag) {
$tag = trim($tag);
if (!isset($tagScores[$tag])) {
$tagScores[$tag] = 0;
}
$tagScores[$tag] += $contextWeights[$context];
$tagScores[$tag] = ($tagScores[$tag] ?? 0) + $contextWeights[$context];
}
}
}
@ -188,13 +185,21 @@ function calculate_tags(array $keywordsToTags, array $searchContents): array
return $tagScores;
}
function filter_and_limit_tags(array $tagScores, int $minScore = 2, int $maxTags = 3): array
function filter_and_limit_tags(array $tagScores, int $minScore = 2, int $maxTags = 4): array
{
// Filter tags with a score greater than or equal to the minimum required
$filteredTags = array_filter($tagScores, function ($score) use ($minScore) {
return $score >= $minScore;
});
// Determine the maximum score
$maxScore = max($filteredTags);
// Remove tags with a score less than half of the maximum score
$filteredTags = array_filter($filteredTags, function ($score) use ($maxScore) {
return $score > ($maxScore / 2);
});
// Sort tags by descending score
arsort($filteredTags);
@ -202,6 +207,17 @@ function filter_and_limit_tags(array $tagScores, int $minScore = 2, int $maxTags
return array_slice(array_keys($filteredTags), 0, $maxTags);
}
function append_tag_stats(array $data, array $tagScores): array
{
$stats = [];
foreach ($tagScores as $tag => $score) {
$stats[] = "$tag: score=$score";
}
$data['link']['description'] .= "\n\nTag Stats:\n" . implode("\n", $stats);
return $data;
}
function apply_auto_tags(array $data, ConfigManager $conf): array
{
if (empty($data['link']['url'])) {
@ -222,6 +238,9 @@ function apply_auto_tags(array $data, ConfigManager $conf): array
// Calculate scores for each tag
$tagScores = calculate_tags($keywordsToTags, $searchContents);
// Append tag stats to the description
$data = append_tag_stats($data, $tagScores);
// Filter and limit tags
$tagsToAdd = filter_and_limit_tags($tagScores);
@ -237,10 +256,7 @@ function apply_auto_tags(array $data, ConfigManager $conf): array
function hook_auto_tag_plugin_render_editlink(array $data, ConfigManager $conf): array
{
if (!$data['link_is_new']) {
// return $data; // Only auto-tag new links
}
// Always apply auto-tagging, even for existing links
return apply_auto_tags($data, $conf);
}
@ -248,3 +264,4 @@ function hook_auto_tag_plugin_save_link(array $data, ConfigManager $conf): array
{
return apply_auto_tags($data, $conf);
}