feat(autotag) improve selection of relevant tags
This commit is contained in:
parent
9b78c52a5a
commit
304d3ebd31
@ -70,7 +70,7 @@ function auto_tag_plugin_init(ConfigManager $conf)
|
|||||||
'gratuit,free' => 'free',
|
'gratuit,free' => 'free',
|
||||||
'gitops,gitlab,github actions,devops,SRE,ci/cd,platform-engineering,ci pipeline,application deployment,dagger,renovatebot,dependabot,continuous integration,site reliability eng' => 'devops,automation',
|
'gitops,gitlab,github actions,devops,SRE,ci/cd,platform-engineering,ci pipeline,application deployment,dagger,renovatebot,dependabot,continuous integration,site reliability eng' => 'devops,automation',
|
||||||
'git,gitlab,jujutsu,pijul,mercurial,svn,version control,contrôle de version' => 'version-control',
|
'git,gitlab,jujutsu,pijul,mercurial,svn,version control,contrôle de version' => 'version-control',
|
||||||
'gpt,chatgpt,llm,llms,artificial intelligence,intelligence artificielle,IA' => 'ai',
|
'gpt,chatgpt,llm,llms,artificial intelligence,intelligence artificielle,IA,l\'ia' => 'ai',
|
||||||
'hacking,piratage' => 'hacking',
|
'hacking,piratage' => 'hacking',
|
||||||
'health,healthy,nutrition,food,alimentation,nourriture,santé' => 'health-and-wellness,food',
|
'health,healthy,nutrition,food,alimentation,nourriture,santé' => 'health-and-wellness,food',
|
||||||
'humans,humains' => 'humans',
|
'humans,humains' => 'humans',
|
||||||
@ -112,7 +112,7 @@ function auto_tag_plugin_init(ConfigManager $conf)
|
|||||||
'scripting,jq,curl,wget,script,bash' => 'scripting',
|
'scripting,jq,curl,wget,script,bash' => 'scripting',
|
||||||
'search engine,moteur de recherche' => 'search-engines',
|
'search engine,moteur de recherche' => 'search-engines',
|
||||||
'security,permission,sécurité,secure,privacy,private,degoogl,gdpr,data protection,online tracking,user profiling,anonymo,anonymi,surveillance,malware,spyware,decentrali,secrets,privacy matters,vpn,passkey,protéger' => 'privacy-and-security',
|
'security,permission,sécurité,secure,privacy,private,degoogl,gdpr,data protection,online tracking,user profiling,anonymo,anonymi,surveillance,malware,spyware,decentrali,secrets,privacy matters,vpn,passkey,protéger' => 'privacy-and-security',
|
||||||
'simplicity,minimal,less,declutter,stopped using,simple,simplification,simplifier,reduction in,fewer,no longer needed,minimalisme,simplicité,réduction' => 'minimalism',
|
'simplicity,minimal,declutter,stopped using,simple,simplification,simplifier,reduction in,no longer needed,minimalisme,simplicité,réduction' => 'minimalism',
|
||||||
'small web,indie web,indieweb,petit web' => 'small-web',
|
'small web,indie web,indieweb,petit web' => 'small-web',
|
||||||
'smartphone,android,mobile,phone,téléphone,portables' => 'mobile',
|
'smartphone,android,mobile,phone,téléphone,portables' => 'mobile',
|
||||||
'snippet,extrait de code' => 'code',
|
'snippet,extrait de code' => 'code',
|
||||||
@ -147,11 +147,11 @@ function fetch_page_content($url)
|
|||||||
return '';
|
return '';
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Remove unnecessary tags and strip content to plain text
|
||||||
$htmlContent = preg_replace('/<(script|style|head|noscript)[^>]*>.*?<\/\1>/is', '', $htmlContent);
|
$htmlContent = preg_replace('/<(script|style|head|noscript)[^>]*>.*?<\/\1>/is', '', $htmlContent);
|
||||||
return strip_tags($htmlContent);
|
return strip_tags($htmlContent);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
function calculate_tags(array $keywordsToTags, array $searchContents): array
|
function calculate_tags(array $keywordsToTags, array $searchContents): array
|
||||||
{
|
{
|
||||||
$contextWeights = [
|
$contextWeights = [
|
||||||
@ -175,10 +175,7 @@ function calculate_tags(array $keywordsToTags, array $searchContents): array
|
|||||||
$tagList = explode(',', $tags);
|
$tagList = explode(',', $tags);
|
||||||
foreach ($tagList as $tag) {
|
foreach ($tagList as $tag) {
|
||||||
$tag = trim($tag);
|
$tag = trim($tag);
|
||||||
if (!isset($tagScores[$tag])) {
|
$tagScores[$tag] = ($tagScores[$tag] ?? 0) + $contextWeights[$context];
|
||||||
$tagScores[$tag] = 0;
|
|
||||||
}
|
|
||||||
$tagScores[$tag] += $contextWeights[$context];
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -188,13 +185,21 @@ function calculate_tags(array $keywordsToTags, array $searchContents): array
|
|||||||
return $tagScores;
|
return $tagScores;
|
||||||
}
|
}
|
||||||
|
|
||||||
function filter_and_limit_tags(array $tagScores, int $minScore = 2, int $maxTags = 3): array
|
function filter_and_limit_tags(array $tagScores, int $minScore = 2, int $maxTags = 4): array
|
||||||
{
|
{
|
||||||
// Filter tags with a score greater than or equal to the minimum required
|
// Filter tags with a score greater than or equal to the minimum required
|
||||||
$filteredTags = array_filter($tagScores, function ($score) use ($minScore) {
|
$filteredTags = array_filter($tagScores, function ($score) use ($minScore) {
|
||||||
return $score >= $minScore;
|
return $score >= $minScore;
|
||||||
});
|
});
|
||||||
|
|
||||||
|
// Determine the maximum score
|
||||||
|
$maxScore = max($filteredTags);
|
||||||
|
|
||||||
|
// Remove tags with a score less than half of the maximum score
|
||||||
|
$filteredTags = array_filter($filteredTags, function ($score) use ($maxScore) {
|
||||||
|
return $score > ($maxScore / 2);
|
||||||
|
});
|
||||||
|
|
||||||
// Sort tags by descending score
|
// Sort tags by descending score
|
||||||
arsort($filteredTags);
|
arsort($filteredTags);
|
||||||
|
|
||||||
@ -202,6 +207,17 @@ function filter_and_limit_tags(array $tagScores, int $minScore = 2, int $maxTags
|
|||||||
return array_slice(array_keys($filteredTags), 0, $maxTags);
|
return array_slice(array_keys($filteredTags), 0, $maxTags);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
function append_tag_stats(array $data, array $tagScores): array
|
||||||
|
{
|
||||||
|
$stats = [];
|
||||||
|
foreach ($tagScores as $tag => $score) {
|
||||||
|
$stats[] = "$tag: score=$score";
|
||||||
|
}
|
||||||
|
|
||||||
|
$data['link']['description'] .= "\n\nTag Stats:\n" . implode("\n", $stats);
|
||||||
|
return $data;
|
||||||
|
}
|
||||||
|
|
||||||
function apply_auto_tags(array $data, ConfigManager $conf): array
|
function apply_auto_tags(array $data, ConfigManager $conf): array
|
||||||
{
|
{
|
||||||
if (empty($data['link']['url'])) {
|
if (empty($data['link']['url'])) {
|
||||||
@ -222,6 +238,9 @@ function apply_auto_tags(array $data, ConfigManager $conf): array
|
|||||||
// Calculate scores for each tag
|
// Calculate scores for each tag
|
||||||
$tagScores = calculate_tags($keywordsToTags, $searchContents);
|
$tagScores = calculate_tags($keywordsToTags, $searchContents);
|
||||||
|
|
||||||
|
// Append tag stats to the description
|
||||||
|
$data = append_tag_stats($data, $tagScores);
|
||||||
|
|
||||||
// Filter and limit tags
|
// Filter and limit tags
|
||||||
$tagsToAdd = filter_and_limit_tags($tagScores);
|
$tagsToAdd = filter_and_limit_tags($tagScores);
|
||||||
|
|
||||||
@ -237,10 +256,7 @@ function apply_auto_tags(array $data, ConfigManager $conf): array
|
|||||||
|
|
||||||
function hook_auto_tag_plugin_render_editlink(array $data, ConfigManager $conf): array
|
function hook_auto_tag_plugin_render_editlink(array $data, ConfigManager $conf): array
|
||||||
{
|
{
|
||||||
if (!$data['link_is_new']) {
|
// Always apply auto-tagging, even for existing links
|
||||||
// return $data; // Only auto-tag new links
|
|
||||||
}
|
|
||||||
|
|
||||||
return apply_auto_tags($data, $conf);
|
return apply_auto_tags($data, $conf);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -248,3 +264,4 @@ function hook_auto_tag_plugin_save_link(array $data, ConfigManager $conf): array
|
|||||||
{
|
{
|
||||||
return apply_auto_tags($data, $conf);
|
return apply_auto_tags($data, $conf);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user