From 0f966a7bade5d72b9fecf3462cee8eb469e8e580 Mon Sep 17 00:00:00 2001 From: SansGuidon Date: Fri, 22 Nov 2024 11:52:51 +0000 Subject: [PATCH] add(autotag): here is the backend --- auto_tag_plugin/auto_tag_plugin.php | 191 ++++++++++++++++++++++++++++ 1 file changed, 191 insertions(+) create mode 100644 auto_tag_plugin/auto_tag_plugin.php diff --git a/auto_tag_plugin/auto_tag_plugin.php b/auto_tag_plugin/auto_tag_plugin.php new file mode 100644 index 0000000..a7cb32c --- /dev/null +++ b/auto_tag_plugin/auto_tag_plugin.php @@ -0,0 +1,191 @@ +setEmpty('plugins.AUTO_TAG_KEYWORDS', [ + 'accessibility,web development,web design,html,css' => 'web-development', + 'adhd,tdah' => 'adhd', + 'alternative,compatible' => 'alternatives', + 'anxi' => 'health-and-wellness', + 'art' => 'culture', + 'autism,autist' => 'autism', + 'automation,automate,automatic,scripting,terminal' => 'scripting,automation', + 'beauty,imperfection' => 'beauty,imperfection', + 'belgian,belgium' => 'belgium', + 'best practices,checklist,guide,how-to,how to,deep dive,dive into,tips,optimize,optimization,comment,étapes,tutorial,tutoriel,how do I' => 'guides-and-tips', + 'books,livre,livre,quatrième de couverture,roman,novel' => 'reading-and-literature,may-read,inspiration,culture', + 'bruxelles,brussels' => 'brussels', + 'calm tech,calmness,calm' => 'calm-tech', + 'cheatsheet,cheat sheet,cheat-sheet' => 'cheatsheet,guides-and-tips', + 'cloud,aws,amazon' => 'cloud', + 'comics' => 'comics,reading-and-literature,may-read,culture', + 'communicate,communication,messaging,messenger,gmail' => 'communication', + 'complex' => 'complexity', + 'data collection' => 'data-collection', + 'data transfer' => 'data-portability', + 'debug,troubleshoot,diagnose,resolution,solution,problem,solv,trouble' => 'problem-solving,guides-and-tips', + 'design' => 'design', + 'docker,docker-compose,docker compose,container,containers,k8s,kubernetes,minikube,k3s,helm,openshift' => 'container-technology', + 'documentation,docs' => 'documentation', + 'elixir,python,pip,rust,golang,programming,developer,software development' => 'software-development', + 'emulator,emulation' => 'emulation', + 'entrepreneurship,entrepreneurs' => 'business', + 'espresso,coffee' => 'coffee', + 'ethic,ethique' => 'ethics', + 'explor,going deep' => 'discovery', + 'fediverse' => 'privacy-and-security,freedom,social-media', + 'libre' => 'freedom', + 'libre,software,logiciel' => 'free-software', + 'from home,remote work,work remote' => 'remote-work', + 'gafam' => 'big-tech', + 'game,jeu vidéo,game dev' => 'games', + 'gratuit,free' => 'free', + 'gitops,gitlab,devops,SRE,ci/cd,platform-engineering,ci pipeline,application deployment,dagger,continuous integration,site reliability eng' => 'devops', + 'git,gitlab' => 'version-control', + 'gpt,chatgpt,llm,artificial intelligence,intelligence artificielle,l\'ia' => 'ai', + 'health,healthy,nutrition,food,alimentation,nourriture' => 'health-and-wellness,food', + 'humor,humour' => 'humor', + 'inspiration,creativity,creative' => 'inspiration', + 'lambic,gueuze,beer,bière' => 'beer-and-brewing', + 'linux,ubuntu,debian,linux windows macos' => 'os', + 'list,index of,awesome' => 'list,discovery,tools-and-resources', + 'low-tech,low tech' => 'low-tech', + 'monitoring,metrics' => 'monitoring,metrics', + 'music,spotify' => 'music', + 'newsletter' => 'newsletter,news,may-subscribe', + 'nostalg' => 'nostalgia', + 'obsidian,note taking,note-taking,knowledge manag' => 'knowledge-management', + 'open-source,open source' => 'open-source,free', + 'organize,organise,planning,prioritize,priorities,priority,tasks,project,focus,productivity,productive' => 'organizing,productivity-and-management', + 'philosoph' => 'philosophy', + 'photography' => 'photography,photos', + 'podcast' => 'podcast', + 'python' => 'python,software-development,code', + 'problem,solv,trouble' => 'problem-solving', + 'recycling,sustainab,green web' => 'ecology', + 'reviews,critique' => 'reviews', + 'rss,rss feed,miniflux' => 'content-aggregation,content-curation', + 'ruby' => 'ruby,software-development,code', + 'scripting,jq' => 'scripting', + 'search engine' => 'search-engines', + 'security,permission,sécurité,secure,privacy,private,degoogl,gdpr,data protection,online tracking,user profiling,anonymo,anonymi,surveillance,malware,spyware,decentrali,secrets,privacy matters,vpn' => 'privacy-and-security', + 'simplicity,minimal,less,declutter' => 'minimalism', + 'small web,indie web,indieweb' => 'small-web', + 'smartphone,android,mobile,phone' => 'mobile', + 'teamwork,collaborat,équipe' => 'collaboration', + 'technology' => 'technology', + 'test' => 'testing', + 'to do,to-do' => 'todo', + 'tool,resources,a script,outil,a collection,a catalog,awesome list' => 'tools-and-resources', + 'training,course,conference talk,learning,homeschool,expert,specializ,tacit knowledge,tribal knowledge' => 'education', + '.txt' => 'text-files', + 'vpn' => 'privacy-and-security', + 'wordpress,personal website,blog roll,blogroll,blogosphere,webring,digital garden' => 'blogging', + 'youtube.com,invidious,peertube,watch?v' => 'to-watch,video' + ]); +} + +function fetch_page_content($url) +{ + $htmlContent = @file_get_contents($url); + if ($htmlContent === false) { + return ''; + } + + $htmlContent = preg_replace('/<(script|style|head|noscript)[^>]*>.*?<\/\1>/is', '', $htmlContent); + return strip_tags($htmlContent); +} + + +function calculate_tags(array $keywordsToTags, array $searchContents): array +{ + // Pondérations par contexte + $contextWeights = [ + 'title' => 3, + 'url' => 3, + 'description' => 3, + 'existing' => 3, + 'content' => 1 + ]; + + $tagScores = []; + + foreach ($keywordsToTags as $keywords => $tags) { + $keywordList = explode(',', $keywords); + foreach ($keywordList as $keyword) { + $keyword = trim($keyword); + foreach ($searchContents as $context => $content) { + if (stripos($content, $keyword) !== false) { + $tagList = explode(',', $tags); + foreach ($tagList as $tag) { + $tag = trim($tag); + if (!isset($tagScores[$tag])) { + $tagScores[$tag] = 0; + } + // Ajouter le poids selon le contexte + $tagScores[$tag] += $contextWeights[$context]; + } + } + } + } + } + + return $tagScores; +} + +function filter_and_limit_tags(array $tagScores, int $minScore = 2, int $maxTags = 7): array +{ + // Filtrer les tags avec un score supérieur ou égal au minimum requis + $filteredTags = array_filter($tagScores, function ($score) use ($minScore) { + return $score >= $minScore; + }); + + // Trier les tags par score décroissant + arsort($filteredTags); + + // Limiter le nombre de tags au maximum autorisé + return array_slice(array_keys($filteredTags), 0, $maxTags); +} + +function apply_auto_tags(array $data, ConfigManager $conf): array +{ + $keywordsToTags = $conf->get('plugins.AUTO_TAG_KEYWORDS', []); + $pageContent = fetch_page_content($data['link']['url']); + + $searchContents = [ + 'title' => $data['link']['title'], + 'url' => $data['link']['url'], + 'description' => $data['link']['description'] ?? '', + 'existing' => implode(' ', explode(' ', $data['link']['tags'] ?? '')), + 'content' => $pageContent + ]; + + // Calcul des scores pour chaque tag + $tagScores = calculate_tags($keywordsToTags, $searchContents); + + // Filtrer et limiter les tags + $tagsToAdd = filter_and_limit_tags($tagScores); + + $tagsToAdd[] = 'auto-tagged'; // Ajouter un tag fixe + $existingTags = explode(' ', $data['link']['tags']); + $data['link']['tags'] = implode(' ', array_unique(array_merge($existingTags, $tagsToAdd))); + + return $data; +} + +function hook_auto_tag_plugin_render_editlink(array $data, ConfigManager $conf): array +{ + if (!$data['link_is_new']) { + //return $data; // Ne pas réappliquer pour les liens non nouveaux + } + + return apply_auto_tags($data, $conf); +} + +function hook_auto_tag_plugin_save_link(array $data, ConfigManager $conf): array +{ + return apply_auto_tags($data, $conf); +} \ No newline at end of file