From c82457305db6f6ff95a18e3e4cad898171a2da4b Mon Sep 17 00:00:00 2001 From: SansGuidon Date: Fri, 2 May 2025 14:55:09 +0000 Subject: [PATCH] fix(shaarli/autotag) fallback to dumb keyword (no AI) tagging when AI API key is not set --- .../auto_tag_plugin_with_ai.php | 91 ++++++++++++++++++- 1 file changed, 88 insertions(+), 3 deletions(-) diff --git a/auto_tag_plugin_with_ai/auto_tag_plugin_with_ai.php b/auto_tag_plugin_with_ai/auto_tag_plugin_with_ai.php index 6a10876..5509444 100644 --- a/auto_tag_plugin_with_ai/auto_tag_plugin_with_ai.php +++ b/auto_tag_plugin_with_ai/auto_tag_plugin_with_ai.php @@ -91,7 +91,7 @@ function auto_tag_plugin_init(ConfigManager $conf) 'music,spotify,radios,webradios,soundtrack,bande originale,musique' => 'music', 'newsletter,news' => 'newsletter,news', 'nostalgia,nostalgie,things used to be better,internet archive' => 'nostalgia', - 'obsidian,note taking,note-taking,takings notes,note-geek,capturing knowledge,knowledge management,prise de notes,gestion de connaissances,knowledge transfer,transferring knowledge,your notes,my notes' => 'knowledge-management,note-taking', + 'obsidian,note taking,note-taking,taking notes,note-geek,capturing knowledge,knowledge management,prise de notes,gestion de connaissances,knowledge transfer,transferring knowledge,your notes,my notes' => 'knowledge-management,note-taking', 'ocr' => 'ocr', 'open-source,open source,code source libre' => 'open-source,free', 'optimize,optimization,speed up,an efficient,optimiser,optimisation' => 'optimization', @@ -245,12 +245,36 @@ function apply_auto_tags(array $data, ConfigManager $conf): array return $data; } + // Check if Mistral API key is set to default value + if ($conf->get('plugins.MISTRAL_API_KEY') === 'YOUR_MISTRAL_API_KEY') { + // Fallback to non-AI mode + $keywordsToTags = $conf->get('plugins.AUTO_TAG_KEYWORDS', []); + $searchContents = [ + 'title' => $title, + 'url' => $url, + 'description' => $desc, + 'existing' => implode(' ', explode(' ', $data['link']['tags'] ?? '')), + 'content' => $body + ]; + + // Calculate scores for each tag + $tagScores = calculate_tags($keywordsToTags, $searchContents); + + // Filter and limit tags + $tagsToAdd = filter_and_limit_tags($tagScores); + + // Merge, remove duplicates, and clean up + $data['link']['tags'] = trim(implode(' ', array_unique(array_merge(explode(' ', $data['link']['tags'] ?? ''), $tagsToAdd, ['auto-tagged'])))); + + return $data; + } + $text = "$title\n$url\n\n$body"; - $prompt = "TLDr de l'article, style gilfoyle/critique sans blabla et sans markdown/formating, max une ou deux phrases... "; + $prompt = "TLDr de l'article, style Gilfoyle sans blabla et sans markdown/formating, max une ou deux phrases... "; $prompt .= "EN FRANÇAIS et si possible avec une mini conclusion crue/honnête, pas de language prétentieux ici, soyons bruts... "; $prompt .= "et drôles quand c le cas, sinon francs, éducatifs/informatifs, optimistes mais pas dupes. "; - $prompt .= "Si possible en français, pas franglais, et si possible pas de \"cela\" mais des \"ça\", pas de zut mais des \"merde\" etc, sans être vulgaire ni raciste ).\n\n"; + $prompt .= "Si possible en français, pas franglais, et si possible pas de 'cela' mais des 'ça', pas de zut mais des 'merde' etc.\n\n"; $prompt .= $text . "\n\n"; $prompt .= "Ensuite, parmi ces tags (" . implode(', ', $conf->get('plugins.AUTO_TAG_WHITELIST')) . "), "; $prompt .= "propose jusqu'à max " . $conf->get('plugins.AUTO_TAG_MAX_TAGS') . " tags pertinents (si possible moins) "; @@ -283,6 +307,67 @@ function apply_auto_tags(array $data, ConfigManager $conf): array return $data; } +function calculate_tags(array $keywordsToTags, array $searchContents): array +{ + $contextWeights = [ + 'title' => 3, + 'url' => 3, + 'description' => 3, + 'existing' => 3, + 'content' => 1 + ]; + + $tagScores = []; + + foreach ($keywordsToTags as $keywords => $tags) { + $keywordList = explode(',', $keywords); + foreach ($keywordList as $keyword) { + $keyword = preg_quote(trim($keyword), '/'); + $regex = '/\b' . $keyword . '\b/i'; + + foreach ($searchContents as $context => $content) { + if (preg_match($regex, $content)) { + $tagList = explode(',', $tags); + foreach ($tagList as $tag) { + $tag = trim($tag); + $tagScores[$tag] = ($tagScores[$tag] ?? 0) + $contextWeights[$context]; + } + } + } + } + } + + return $tagScores; +} + +function filter_and_limit_tags(array $tagScores, int $minScore = 2, int $maxTags = 10): array +{ + // Filter tags with a score greater than or equal to the minimum required + $filteredTags = array_filter($tagScores, function ($score) use ($minScore) { + return $score >= $minScore; + }); + + // Check if $filteredTags is not empty to avoid max() error + if (!empty($filteredTags)) { + // Determine the maximum score + $maxScore = max($filteredTags); + + // Remove tags with a score less than half of the maximum score + $filteredTags = array_filter($filteredTags, function ($score) use ($maxScore) { + return $score > ($maxScore / 2); + }); + } else { + // If no tags pass the filter, return an empty array + $filteredTags = []; + } + + // Sort tags by descending score + arsort($filteredTags); + + // Limit the number of tags to the maximum allowed + return array_slice(array_keys($filteredTags), 0, $maxTags); +} + function hook_auto_tag_plugin_render_editlink(array $data, ConfigManager $conf): array { return apply_auto_tags($data, $conf);