2024-11-22 11:52:51 +00:00
< ? php
use Shaarli\Config\ConfigManager ;
function auto_tag_plugin_init ( ConfigManager $conf )
{
2024-11-25 15:30:45 +00:00
// Configure keywords and tags based on provided rules
2024-11-22 11:52:51 +00:00
$conf -> setEmpty ( 'plugins.AUTO_TAG_KEYWORDS' , [
2025-01-26 09:09:43 +00:00
'accessibility,accessibilité,web development,web design,html,css,websites' => 'web' ,
2025-01-16 11:24:08 +00:00
'accounting,comptabilité' => 'accounting' ,
2025-01-26 09:09:43 +00:00
'addict,addiction,addicted,drugs,drogues,cigarettes,bonbons,dopamine,sucre,nicotine,hooked' => 'addiction' ,
2024-11-22 11:52:51 +00:00
'adhd,tdah' => 'adhd' ,
2025-01-16 11:24:08 +00:00
'alternative,alternatives,compatible,compatibles,migrated,migration' => 'alternatives' ,
'ads,advertisements,publicités,spam' => 'ads' ,
2025-01-18 15:10:05 +00:00
'anxiété,anxieux,anxieuse,anxiété,burnout,méditation,cardio,santé,health,healthy' => 'health-and-wellness' ,
2025-01-16 11:24:08 +00:00
'architecture,architectures' => 'architecture' ,
'archive,archiving,archives,archivage' => 'archiving,archives' ,
2025-01-26 09:09:43 +00:00
'art,arts,folklore' => 'culture' ,
2025-01-02 09:45:38 +00:00
'ask hn,news.ycombinator.com/item,reddit.com/r/' => 'debate' ,
2025-01-16 11:24:08 +00:00
'autism,autisme,autist,autiste' => 'autism' ,
'belgian,belgium,belge,belgique' => 'belgium' ,
2025-01-18 06:35:58 +00:00
'board game,board games,boardgame,jeu de société,jeux de société' => 'board-games,geek' ,
2025-01-16 11:24:08 +00:00
'bookmarks,favoris,signets' => 'bookmarks-management' ,
'browsers,navigateurs,web browsers' => 'web-browsers' ,
2025-01-18 06:35:58 +00:00
'books,livre,livres,quatrième de couverture,roman,novel,reading-list' => 'reading-and-literature,inspiration,culture' ,
2025-01-02 09:45:38 +00:00
'robots.txt,bots,spam,crawling,ddos' => 'spam' ,
2024-11-22 11:52:51 +00:00
'bruxelles,brussels' => 'brussels' ,
2025-01-16 11:24:08 +00:00
'calm tech,calmness,calm,technologie calme' => 'calm-tech' ,
2025-01-18 14:30:21 +00:00
'cars,car bloat,vehicles,véhicule,véhicules,vehicle,automobile,automotive' => 'transport' ,
2025-02-05 11:41:39 +00:00
'cheatsheet,cheat sheet,cheat-sheet,antisèche' => 'guides-and-tips' ,
'cleaning,nettoyage,deep clean,cleanse,deep cleans' => 'cleaning' ,
2024-12-30 16:39:40 +00:00
'cloud,aws,amazon,cloudron' => 'cloud' ,
'cloudron' => 'cloudron,hosting' ,
2025-01-18 06:35:58 +00:00
'(comic),-comic-,comics,bandes dessinées' => 'comics,reading-and-literature,culture,humor' ,
2025-01-16 11:24:08 +00:00
'comparison,comparer, vs ,versus,comparatif,comparaison' => 'comparison' ,
2025-01-20 10:16:24 +00:00
'communicate,communication,messaging,messenger,gmail,communiquer,écriture inclusive,la langue,académie française,language' => 'communication' ,
2025-01-21 13:04:29 +00:00
'complex,impossible to solve,complicated,very difficult,complexe,compliqué,complexity' => 'complexity,its-complicated' ,
2025-01-20 10:26:33 +00:00
'computers,personal computer,the pc' => 'pc' ,
2025-01-16 11:24:08 +00:00
'configure,configuration,paramétrer,paramétrage' => 'configuration' ,
'database,databases,RDS,base de données' => 'databases' ,
'data collection,collecte de données' => 'data-collection' ,
'data transfer,transfert de données' => 'data-portability' ,
2025-01-26 09:09:43 +00:00
'debug,troubleshoot,diagnose,résoudre,diagnostiquer,troubleshooting,find a solution,fix bugs' => 'problem-solving,guides-and-tips,debugging' ,
2025-01-16 11:24:08 +00:00
'design,designs' => 'design' ,
'development workflow,devex,flux de développement' => 'devex' ,
2025-02-05 12:22:48 +00:00
'digital garden' => 'digital-garden' ,
2025-01-16 11:24:08 +00:00
'disk,disque,disques' => 'storage' ,
'distraction,procrastination,procrastine,procrastiner,glander' => 'procrastination' ,
'diy,self-host,héberger soi-même,my personal,fait maison' => 'diy' ,
'dns,network,tcp,wireshark,réseau' => 'network' ,
2025-01-02 09:45:38 +00:00
'docker,docker-compose,docker compose,container,containers,k8s,eks,kubernetes,minikube,k3s,helm,openshift' => 'container-technology,devops' ,
2025-01-16 11:24:08 +00:00
'documentation,docs,document,documentation technique' => 'documentation' ,
2025-02-05 12:22:48 +00:00
'drawing' => 'drawing' ,
'drinks,soda,beer,coffee' => 'drinks' ,
2025-01-16 11:24:08 +00:00
'ses droits,legally,legalement,légal' => 'legal' ,
2025-01-18 07:41:16 +00:00
'elixir,python,pip,php,rust,golang,programming,developer,software development,developers,développeurs' => 'software-development' ,
2025-01-16 11:24:08 +00:00
'emulator,emulation,émulateur,émulation' => 'emulation' ,
'entrepreneurship,entrepreneurs,entrepreneuriat' => 'business' ,
2025-02-05 12:22:48 +00:00
'espresso,coffee,café' => 'coffee,drinks' ,
2025-01-16 11:24:08 +00:00
'ethic,ethique,ethics' => 'ethics' ,
'explor,going deep,exploration' => 'discovery' ,
2025-01-18 13:59:15 +00:00
'en sécurité,enfin libre,libéré,unsafe,liberté' => 'privacy-and-security,freedom' ,
2025-01-18 07:58:01 +00:00
'libre,free,liberté,degoogling,degoogle,degoogler,dégooglisation' => 'freedom' ,
2024-12-16 13:01:44 +00:00
'logiciel libre,free software,logiciel gratuit,free to use' => 'free-software' ,
2025-01-16 11:24:08 +00:00
'from home,remote work,work remote,travail à distance,télétravail' => 'remote-work' ,
2025-01-24 23:46:33 +00:00
'frustrated,frustration,am pissed,I hate' => 'rant' ,
2025-01-26 09:09:43 +00:00
'big-tech,gafam,degoogling,google,degoogle,grandes entreprises technologiques,meta quest,meta ai' => 'big-tech' ,
2025-01-24 23:46:33 +00:00
'game,jeu vidéo,game dev,jeux,jeux vidéo,games,gameplay' => 'games,geek,culture' ,
2025-01-16 11:24:08 +00:00
'gamedev,building game,développement de jeux' => 'gamedev,games,geek,culture' ,
2024-11-22 11:52:51 +00:00
'gratuit,free' => 'free' ,
2025-01-18 15:10:05 +00:00
'gitops,gitlab,github actions,devops,SRE,ci/cd,platform-engineering,ci pipeline,application deployment,dagger,renovatebot,dependabot,continuous integration,site reliability eng' => 'devops' ,
2025-01-16 11:24:08 +00:00
'git,gitlab,jujutsu,pijul,mercurial,svn,version control,contrôle de version' => 'version-control' ,
2025-01-26 09:09:43 +00:00
'gpt,chatgpt,llm,llms,artificial intelligence,intelligence artificielle,IA,l\'ia,ai,ai model,an ai,metal ai,auto-coder,autonomous AI' => 'ai' ,
2025-01-16 11:24:08 +00:00
'hacking,piratage' => 'hacking' ,
2025-01-20 18:08:18 +00:00
'nutrition,food,alimentation,nourriture,recette,recette de cuisine,recettes de cuisine,ingrédients,cette recette,beurre,cuisine' => 'food' ,
2025-01-18 14:30:21 +00:00
'history,histoire,documentaire,documentary' => 'history' ,
2024-12-04 14:37:24 +00:00
'humans,humains' => 'humans' ,
2024-11-22 11:52:51 +00:00
'humor,humour' => 'humor' ,
2025-01-24 23:46:33 +00:00
'idiocracy' => 'idiocracy' ,
2025-01-16 11:24:08 +00:00
'inspiration,creativity,creative,inspiration,créativité' => 'inspiration' ,
'leadership,staff engineering,gestion' => 'leadership' ,
2025-02-05 12:22:48 +00:00
'lambic,gueuze,beer,bière,bières' => 'beer-and-brewing,drinks' ,
2025-01-24 23:46:33 +00:00
'linux,ubuntu,debian,linux windows macos,sur mac,sur windows' => 'os' ,
2025-02-05 09:02:40 +00:00
'list,index of,awesome,installation,GitHub - ,liste' => 'discovery' ,
2025-01-16 11:24:08 +00:00
'low-tech,low tech,technologie simple' => 'low-tech' ,
'merdification,enshittif,AI-generated,crapification,decline in quality,déclin de qualité' => 'enshittification' ,
2025-01-17 14:01:18 +00:00
'misinformation,fact-checking,fact checking' => 'misinformation' ,
2025-01-16 11:24:08 +00:00
'monitoring,metrics,to monitor,surveillance,métriques' => 'monitoring,metrics' ,
'movie,cinéma,cinema,film,films' => 'movies,culture,geek' ,
2025-01-02 09:45:38 +00:00
'music,spotify,radios,webradios,soundtrack,bande originale,musique' => 'music' ,
2025-01-18 06:35:58 +00:00
'newsletter,news' => 'newsletter,news' ,
2025-01-20 10:11:08 +00:00
'nostalgia,nostalgie,things used to be better,internet archive' => 'nostalgia' ,
2025-01-17 19:01:38 +00:00
'obsidian,note taking,note-taking,takings notes,note-geek,capturing knowledge,knowledge management,prise de notes,gestion de connaissances,knowledge transfer,transferring knowledge,your notes,my notes' => 'knowledge-management,note-taking' ,
2025-01-18 07:41:16 +00:00
'ocr' => 'ocr' ,
2025-01-16 11:24:08 +00:00
'open-source,open source,code source libre' => 'open-source,free' ,
'optimize,optimization,speed up,an efficient,optimiser,optimisation' => 'optimization' ,
2025-01-18 07:41:16 +00:00
'philosophe,philosophia,philosophy,lifestyle,philosophie' => 'philosophy' ,
2025-01-18 06:42:46 +00:00
'photography,photos,photographie' => 'photos' ,
2025-01-16 11:24:08 +00:00
'podcast,podcasts' => 'podcast' ,
2025-01-18 07:58:01 +00:00
'ego,narcissism,narcissist,narcissisme,psycholog,psychologie' => 'psychology' ,
2025-01-21 08:57:44 +00:00
'voting,politic,politique,vote,multiculturalism,culturalism,cultural integration,political' => 'politics' ,
2025-01-16 11:24:08 +00:00
'python,logiciels en python' => 'python,software-development,code' ,
2025-01-17 09:18:18 +00:00
'privatebin' => 'secrets' ,
2025-01-24 23:46:33 +00:00
'productivity,time management' => 'productivity-and-management' ,
2025-01-16 11:24:08 +00:00
'programming languages,langages de programmation' => 'code,computer-languages' ,
'publishing,publier,publication' => 'publishing' ,
'quality,qualité' => 'quality' ,
2025-01-26 09:09:43 +00:00
'recommandations,recommendation,conseils' => 'recommendations' ,
2025-01-18 15:28:58 +00:00
'recycling,sustainable,green web,climate,recyclage,web écologique,climat,ai emissions,water use,emissions produced,amount of co2,global co2,co2 emissions,car bloat,environnement,environment,environmental,environmentally,plasic waste' => 'ecology' ,
2025-01-26 09:09:43 +00:00
'relationship,relationships,de rencontre,meaningful connections' => 'relationships' ,
2025-01-18 06:42:46 +00:00
'religion,chretiens,bible,coran,islam,musulmans,croyants,athée,la foi,église' => 'philosophy' ,
2025-01-16 11:24:08 +00:00
'reviews,critique,avis' => 'reviews' ,
'rss,rss feed,miniflux,web reader,lecteur web' => 'content-aggregation,content-curation' ,
'ruby,rails app,applications ruby' => 'ruby,software-development,code' ,
2025-01-26 09:09:43 +00:00
'science,sciences,scientifique,scientist' => 'science' ,
2025-01-26 10:38:22 +00:00
'scripting,jq,curl,wget,script,bash,terminal,bash script,#!/bin/bash,script python,python script,lua,script shell,script bash,shell script,shell' => 'scripting' ,
2025-01-16 11:24:08 +00:00
'search engine,moteur de recherche' => 'search-engines' ,
2025-01-26 09:09:43 +00:00
'security,permission,sécurité,anti vol,anti-vol,secure,data privacy,privacy,private,degoogl,gdpr,data protection,online tracking,user profiling,anonymo,anonymi,surveillance,malware,spyware,decentrali,secrets,privacy matters,vpn,passkey,protéger,password manager,vie privée' => 'privacy-and-security' ,
2025-01-17 10:24:13 +00:00
'simplicity,minimal,declutter,stopped using,simple,simplification,simplifier,reduction in,no longer needed,minimalisme,simplicité,réduction' => 'minimalism' ,
2025-01-16 11:24:08 +00:00
'small web,indie web,indieweb,petit web' => 'small-web' ,
2025-01-18 06:42:46 +00:00
'smartphone,android,mobile,phone,téléphone,sms' => 'mobile' ,
2025-01-16 11:24:08 +00:00
'snippet,extrait de code' => 'code' ,
2025-01-26 08:29:09 +00:00
'static site,static-site,site statique' => 'static-site' ,
2025-01-26 09:09:43 +00:00
'social media,réseau social,fediverse,fédiverse,réseaux sociaux,social networks,social network,meta quest' => 'social-media' ,
2025-01-21 08:57:44 +00:00
'society,societies' => 'society' ,
2025-01-16 11:24:08 +00:00
'teamwork,collaborat,équipe,cooperat,coordinat,travail équipe' => 'collaboration' ,
'technology,technologie' => 'technology' ,
'template,modèle' => 'template' ,
2025-01-26 09:09:43 +00:00
'terminal,terminaltrove' => 'terminal,tools-and-resources' ,
2025-01-16 11:24:08 +00:00
'test,tester' => 'testing' ,
2025-01-18 15:10:05 +00:00
'markdown,text files,fichiers texte,formats,text-based,plaintext,markup language,markdown,plain text,basé sur du texte,langage balisé' => 'plaintext,formats,text-files' ,
2025-01-16 11:24:08 +00:00
'time to update,maintainers,tech-debt,legacy code,long term software,temps pour mettre à jour' => 'maintenance,tech-debt' ,
'to do,to-do,à faire' => 'todo' ,
2025-01-26 09:09:43 +00:00
'tool,tools-and-resources,resources,a script,outil,a collection,a catalog,awesome list,links,outils,ressources,password manager,logiciels' => 'tools-and-resources' ,
2025-01-16 11:24:08 +00:00
'training,course,conference talk,learning,homeschool,expert,specializ,tacit knowledge,tribal knowledge,formation,cours,conférence,apprentissage' => 'education' ,
'.txt,text-based,fichiers txt' => 'text-files' ,
'ui,interfaces utilisateur' => 'ui' ,
'ux,the experience of,usable,uxer,user experience,expérience utilisateur' => 'ux' ,
2025-01-26 09:09:43 +00:00
'vps,serveur privé virtuel' => 'cloud,privacy-and-security,hosting' ,
2025-01-16 11:24:08 +00:00
'web archive,web archiving,save any website,bookmarks,wayback machine,archive.org,archivebox,archive web' => 'web-archiving' ,
2025-01-16 13:36:29 +00:00
'webring' => 'discovery,small-web,webring' ,
2025-01-26 09:09:43 +00:00
'wordpress,personal website,personal websites,blog roll,blogroll,blogosphere,webring,digital garden,to blog,blogs' => 'blogging,writing,discovery,small-web' ,
2025-01-17 19:05:32 +00:00
'work,travail,contract work,previous job,my work,coworkers,coworker,the job,workgroup' => 'work' ,
2025-01-18 06:35:58 +00:00
'youtube.com,invidious,peertube,watch?v' => 'video' ,
2025-01-16 11:24:08 +00:00
'zoemp,zoemp.be' => 'zoemp'
2024-11-22 11:52:51 +00:00
]);
}
function fetch_page_content ( $url )
{
$htmlContent = @ file_get_contents ( $url );
if ( $htmlContent === false ) {
return '' ;
}
2025-01-17 10:24:13 +00:00
// Remove unnecessary tags and strip content to plain text
2024-11-22 11:52:51 +00:00
$htmlContent = preg_replace ( '/<(script|style|head|noscript)[^>]*>.*?<\/\1>/is' , '' , $htmlContent );
return strip_tags ( $htmlContent );
}
function calculate_tags ( array $keywordsToTags , array $searchContents ) : array
{
$contextWeights = [
'title' => 3 ,
'url' => 3 ,
'description' => 3 ,
'existing' => 3 ,
'content' => 1
];
$tagScores = [];
foreach ( $keywordsToTags as $keywords => $tags ) {
$keywordList = explode ( ',' , $keywords );
foreach ( $keywordList as $keyword ) {
2025-01-14 14:57:16 +00:00
$keyword = preg_quote ( trim ( $keyword ), '/' );
$regex = '/\b' . $keyword . '\b/i' ;
2024-11-22 11:52:51 +00:00
foreach ( $searchContents as $context => $content ) {
2025-01-14 14:57:16 +00:00
if ( preg_match ( $regex , $content )) {
2024-11-22 11:52:51 +00:00
$tagList = explode ( ',' , $tags );
foreach ( $tagList as $tag ) {
$tag = trim ( $tag );
2025-01-17 10:24:13 +00:00
$tagScores [ $tag ] = ( $tagScores [ $tag ] ? ? 0 ) + $contextWeights [ $context ];
2024-11-22 11:52:51 +00:00
}
}
}
}
}
return $tagScores ;
}
2025-01-17 10:24:13 +00:00
function filter_and_limit_tags ( array $tagScores , int $minScore = 2 , int $maxTags = 4 ) : array
2024-11-22 11:52:51 +00:00
{
2024-11-25 15:30:45 +00:00
// Filter tags with a score greater than or equal to the minimum required
2024-11-22 11:52:51 +00:00
$filteredTags = array_filter ( $tagScores , function ( $score ) use ( $minScore ) {
return $score >= $minScore ;
});
2025-01-21 08:57:44 +00:00
// Check if $filteredTags is not empty to avoid max() error
if ( ! empty ( $filteredTags )) {
// Determine the maximum score
$maxScore = max ( $filteredTags );
2025-01-17 10:24:13 +00:00
2025-01-21 08:57:44 +00:00
// Remove tags with a score less than half of the maximum score
$filteredTags = array_filter ( $filteredTags , function ( $score ) use ( $maxScore ) {
return $score > ( $maxScore / 2 );
});
} else {
// If no tags pass the filter, return an empty array
$filteredTags = [];
}
2025-01-17 10:24:13 +00:00
2024-11-25 15:30:45 +00:00
// Sort tags by descending score
2024-11-22 11:52:51 +00:00
arsort ( $filteredTags );
2024-11-25 15:30:45 +00:00
// Limit the number of tags to the maximum allowed
2024-11-22 11:52:51 +00:00
return array_slice ( array_keys ( $filteredTags ), 0 , $maxTags );
}
2025-01-17 10:24:13 +00:00
function append_tag_stats ( array $data , array $tagScores ) : array
{
$stats = [];
foreach ( $tagScores as $tag => $score ) {
$stats [] = " $tag : score= $score " ;
}
$data [ 'link' ][ 'description' ] .= " \n \n Tag Stats: \n " . implode ( " \n " , $stats );
return $data ;
}
2024-11-22 11:52:51 +00:00
function apply_auto_tags ( array $data , ConfigManager $conf ) : array
{
2024-12-16 13:10:26 +00:00
if ( empty ( $data [ 'link' ][ 'url' ])) {
return $data ;
}
2025-01-17 10:10:56 +00:00
2025-01-24 23:36:08 +00:00
if ( stripos ( $data [ 'link' ][ 'url' ], 'youtube.com' ) !== false || stripos ( $data [ 'link' ][ 'url' ], 'youtu.be' ) !== false ) {
$title = $data [ 'link' ][ 'title' ] ? ? '' ;
if ( stripos ( $title , '[Video]' ) !== 0 ) {
$data [ 'link' ][ 'title' ] = '[Video] ' . $title ;
}
}
2024-11-22 11:52:51 +00:00
$keywordsToTags = $conf -> get ( 'plugins.AUTO_TAG_KEYWORDS' , []);
$pageContent = fetch_page_content ( $data [ 'link' ][ 'url' ]);
$searchContents = [
2025-01-17 10:10:56 +00:00
'title' => $data [ 'link' ][ 'title' ] ? ? '' ,
2024-11-22 11:52:51 +00:00
'url' => $data [ 'link' ][ 'url' ],
'description' => $data [ 'link' ][ 'description' ] ? ? '' ,
'existing' => implode ( ' ' , explode ( ' ' , $data [ 'link' ][ 'tags' ] ? ? '' )),
'content' => $pageContent
];
2024-11-25 15:30:45 +00:00
// Calculate scores for each tag
2024-11-22 11:52:51 +00:00
$tagScores = calculate_tags ( $keywordsToTags , $searchContents );
2025-01-17 10:24:13 +00:00
// Append tag stats to the description
2025-01-24 23:46:33 +00:00
//$data = append_tag_stats($data, $tagScores); // UNCOMMENT WHEN DEBUGGING AUTOMATED TAGS
2025-01-17 10:24:13 +00:00
2024-11-25 15:30:45 +00:00
// Filter and limit tags
2024-11-22 11:52:51 +00:00
$tagsToAdd = filter_and_limit_tags ( $tagScores );
2025-01-17 10:10:56 +00:00
$tagsToAdd [] = 'auto-tagged' ; // Add a fixed tag to indicate auto-tagging
$existingTags = array_map ( 'trim' , explode ( ' ' , $data [ 'link' ][ 'tags' ] ? ? '' ));
2024-12-04 14:37:24 +00:00
$tagsToAdd = array_map ( 'trim' , $tagsToAdd );
2025-01-17 10:10:56 +00:00
// Merge, remove duplicates, and clean up
$data [ 'link' ][ 'tags' ] = trim ( implode ( ' ' , array_unique ( array_merge ( $existingTags , $tagsToAdd ))));
2024-11-22 11:52:51 +00:00
return $data ;
}
function hook_auto_tag_plugin_render_editlink ( array $data , ConfigManager $conf ) : array
{
2025-01-17 10:24:13 +00:00
// Always apply auto-tagging, even for existing links
2024-11-22 11:52:51 +00:00
return apply_auto_tags ( $data , $conf );
}
function hook_auto_tag_plugin_save_link ( array $data , ConfigManager $conf ) : array
{
return apply_auto_tags ( $data , $conf );
2025-01-17 10:24:13 +00:00
}