diff --git a/auto_tag_plugin_with_ai/auto_tag_plugin_with_ai.php b/auto_tag_plugin_with_ai/auto_tag_plugin_with_ai.php new file mode 100644 index 0000000..6a10876 --- /dev/null +++ b/auto_tag_plugin_with_ai/auto_tag_plugin_with_ai.php @@ -0,0 +1,294 @@ +setEmpty('plugins.AUTO_TAG_KEYWORDS', [ + 'accessibility,accessibilité,web development,web design,html,css,websites' => 'web', + 'accounting,comptabilité' => 'accounting', + 'addict,addiction,addicted,drugs,drogues,cigarettes,bonbons,dopamine,sucre,nicotine,hooked' => 'addiction', + 'adhd,tdah' => 'adhd', + 'alternative,alternatives,compatible,compatibles,migrated,migration' => 'alternatives', + 'ads,advertisements,publicités,spam' => 'ads', + 'anxiété,anxieux,anxieuse,anxiété,burnout,méditation,cardio,santé,health,healthy' => 'health-and-wellness', + 'architecture,architectures' => 'architecture', + 'archive,archiving,archives,archivage' => 'archiving,archives', + 'art,arts,folklore' => 'culture', + 'ask hn,news.ycombinator.com/item,reddit.com/r/' => 'debate', + 'autism,autisme,autist,autiste' => 'autism', + 'belgian,belgium,belge,belgique' => 'belgium', + 'board game,board games,boardgame,jeu de société,jeux de société' => 'board-games,geek', + 'bookmarks,favoris,signets' => 'bookmarks-management', + 'browsers,navigateurs,web browsers' => 'web-browsers', + 'books,livre,livres,quatrième de couverture,roman,novel,reading-list' => 'reading-and-literature,inspiration,culture', + 'robots.txt,bots,spam,crawling,ddos' => 'spam', + 'bruxelles,brussels' => 'brussels', + 'calm tech,calmness,calm,technologie calme' => 'calm-tech', + 'cars,car bloat,vehicles,véhicule,véhicules,vehicle,automobile,automotive' => 'transport', + 'cheatsheet,cheat sheet,cheat-sheet,antisèche' => 'guides-and-tips', + 'cleaning,nettoyage,deep clean,cleanse,deep cleans' => 'cleaning', + 'cloud,aws,amazon,cloudron' => 'cloud', + 'cloudron' => 'cloudron,hosting', + '(comic),-comic-,comics,bandes dessinées' => 'comics,reading-and-literature,culture,humor', + 'comparison,comparer, vs ,versus,comparatif,comparaison' => 'comparison', + 'communicate,communication,messaging,messenger,gmail,communiquer,écriture inclusive,la langue,académie française,language' => 'communication', + 'complex,impossible to solve,complicated,very difficult,complexe,compliqué,complexity' => 'complexity,its-complicated', + 'computers,personal computer,the pc' => 'pc', + 'configure,configuration,paramétrer,paramétrage' => 'configuration', + 'database,databases,RDS,base de données' => 'databases', + 'data collection,collecte de données' => 'data-collection', + 'data transfer,transfert de données' => 'data-portability', + 'debug,troubleshoot,diagnose,résoudre,diagnostiquer,troubleshooting,find a solution,fix bugs' => 'problem-solving,guides-and-tips,debugging', + 'design,designs' => 'design', + 'development workflow,devex,flux de développement' => 'devex', + 'digital garden' => 'digital-garden', + 'disk,disque,disques' => 'storage', + 'distraction,procrastination,procrastine,procrastiner,glander' => 'procrastination', + 'diy,self-host,héberger soi-même,my personal,fait maison' => 'diy', + 'dns,network,tcp,wireshark,réseau' => 'network', + 'docker,docker-compose,docker compose,container,containers,k8s,eks,kubernetes,minikube,k3s,helm,openshift' => 'container-technology,devops', + 'documentation,docs,document,documentation technique' => 'documentation', + 'drawing' => 'drawing', + 'drinks,soda,beer,coffee' => 'drinks', + 'ses droits,legally,legalement,légal' => 'legal', + 'elixir,python,pip,php,rust,golang,programming,developer,software engineer,software development,developers,développeurs' => 'software-development', + 'emulator,emulation,émulateur,émulation' => 'emulation', + 'entrepreneurship,entrepreneurs,entrepreneuriat' => 'business', + 'espresso,coffee,café' => 'coffee,drinks', + 'ethic,ethique,ethics' => 'ethics', + 'explor,going deep,exploration' => 'discovery', + 'en sécurité,enfin libre,libéré,unsafe,liberté' => 'privacy-and-security,freedom', + 'libre,free,liberté,degoogling,degoogle,degoogler,dégooglisation' => 'freedom', + 'logiciel libre,free software,logiciel gratuit,free to use' => 'free-software', + 'from home,remote work,work remote,travail à distance,télétravail' => 'remote-work', + 'frustrated,frustration,am pissed,I hate' => 'rant', + 'big-tech,gafam,degoogling,google,degoogle,géants américains,grandes entreprises technologiques,meta quest,meta ai' => 'big-tech', + 'game,jeu vidéo,game dev,jeux,jeux vidéo,games,gameplay' => 'games,geek,culture', + 'gamedev,building game,game programming,game engine,moteur de jeu,game development,développement de jeux' => 'gamedev,games,geek,culture', + 'gratuit,free' => 'free', + 'gitops,gitlab,github actions,devops,SRE,ci/cd,platform-engineering,ci pipeline,application deployment,dagger,renovatebot,dependabot,continuous integration,site reliability eng' => 'devops', + 'git,gitlab,jujutsu,pijul,mercurial,svn,version control,contrôle de version' => 'version-control', + 'gpt,chatgpt,llm,llms,artificial intelligence,code generation,genai,vibe code,vibe coding,intelligence artificielle,IA,l\'ia,ai,ai model,an ai,metal ai,auto-coder,autonomous AI' => 'ai', + 'hacking,piratage' => 'hacking', + 'nutrition,food,alimentation,nourriture,recette,recette de cuisine,recettes de cuisine,ingrédients,cette recette,beurre,cuisine' => 'food', + 'history,histoire,documentaire,documentary' => 'history', + 'humans,humains' => 'humans', + 'humor,humour' => 'humor', + 'idiocracy' => 'idiocracy', + 'réduire sa dépendance' => 'independence,freedom', + 'inspiration,creativity,creative,inspiration,créativité' => 'inspiration', + 'leadership,staff engineering,gestion' => 'leadership', + 'lambic,gueuze,beer,bière,bières' => 'beer-and-brewing,drinks', + 'linux,ubuntu,debian,linux windows macos,sur mac,sur windows' => 'os', + 'list,index of,awesome,installation,GitHub - ,liste' => 'discovery', + 'low-tech,low tech,technologie simple' => 'low-tech', + 'merdification,enshittif,AI-generated,crapification,decline in quality,déclin de qualité' => 'enshittification', + 'misinformation,fact-checking,fact checking' => 'misinformation', + 'monitoring,metrics,to monitor,surveillance,métriques' => 'monitoring,metrics', + 'movie,cinéma,cinema,film,films' => 'movies,culture,geek', + 'music,spotify,radios,webradios,soundtrack,bande originale,musique' => 'music', + 'newsletter,news' => 'newsletter,news', + 'nostalgia,nostalgie,things used to be better,internet archive' => 'nostalgia', + 'obsidian,note taking,note-taking,takings notes,note-geek,capturing knowledge,knowledge management,prise de notes,gestion de connaissances,knowledge transfer,transferring knowledge,your notes,my notes' => 'knowledge-management,note-taking', + 'ocr' => 'ocr', + 'open-source,open source,code source libre' => 'open-source,free', + 'optimize,optimization,speed up,an efficient,optimiser,optimisation' => 'optimization', + 'philosophe,philosophia,philosophy,lifestyle,philosophie' => 'philosophy', + 'photography,photos,photographie' => 'photos', + 'podcast,podcasts' => 'podcast', + 'ego,narcissism,narcissist,narcissisme,psycholog,psychologie' => 'psychology', + 'voting,politic,politique,vote,multiculturalism,culturalism,cultural integration,political' => 'politics', + 'python,logiciels en python' => 'python,software-development,code', + 'principles' => 'principles', + 'privatebin' => 'secrets', + 'productivity,time management,timetracker' => 'productivity-and-management', + 'programming languages,langages de programmation' => 'code,computer-languages', + 'publishing,publier,publication' => 'publishing', + 'quality,qualité' => 'quality', + 'recommandations,recommendation,conseils' => 'recommendations', + 'recycling,sustainable,green web,climate,recyclage,web écologique,climat,ai emissions,water use,emissions produced,amount of co2,global co2,co2 emissions,car bloat,environnement,environment,environmental,environmentally,plastic waste' => 'ecology', + 'relationship,relationships,de rencontre,meaningful connections' => 'relationships', + 'religion,chretiens,bible,coran,islam,musulmans,croyants,athée,la foi,église' => 'philosophy', + 'reviews,critique,avis' => 'reviews', + 'rss,rss feed,miniflux,web reader,lecteur web' => 'content-aggregation,content-curation', + 'ruby,rails app,applications ruby' => 'ruby,software-development,code', + 'science,sciences,scientifique,scientist' => 'science', + 'scripting,jq,curl,wget,script,bash,terminal,bash script,#!/bin/bash,script python,python script,lua,script shell,script bash,shell script,shell' => 'scripting', + 'search engine,moteur de recherche' => 'search-engines', + 'security,permission,sécurité,securing,anti vol,anti-vol,secure,data privacy,privacy,private,degoogl,gdpr,data protection,online tracking,user profiling,anonymo,anonymi,surveillance,malware,spyware,decentrali,secrets,privacy matters,vpn,passkey,protéger,password manager,vie privée' => 'privacy-and-security', + 'simplicity,minimal,declutter,stopped using,simple,simplification,simplifier,reduction in,no longer needed,minimalisme,simplicité,réduction' => 'minimalism', + 'small web,indie web,indieweb,petit web,small-web' => 'small-web', + 'smartphone,android,mobile,phone,téléphone,sms' => 'mobile', + 'snippet,extrait de code' => 'code', + 'static site,static-site,site statique' => 'static-site', + 'social media,réseau social,fediverse,fédiverse,réseaux sociaux,social networks,social network,meta quest' => 'social-media', + 'society,societies' => 'society', + 'teamwork,collaborat,équipe,cooperat,coordinat,travail équipe' => 'collaboration', + 'technology,technologie' => 'technology', + 'template,modèle' => 'template', + 'terminal,terminaltrove' => 'terminal,tools-and-resources', + 'test,tester' => 'testing', + 'markdown,text files,fichiers texte,formats,text-based,plaintext,markup language,markdown,plain text,basé sur du texte,langage balisé' => 'plaintext,formats,text-files', + 'time to update,maintainers,tech-debt,legacy code,long term software,temps pour mettre à jour' => 'maintenance,tech-debt', + 'to do,to-do,à faire' => 'todo', + 'tool,tools-and-resources,resources,a script,outil,a collection,a catalog,awesome list,links,outils,ressources,password manager,logiciels' => 'tools-and-resources', + 'training,course,conference talk,learning,homeschool,expert,specializ,tacit knowledge,tribal knowledge,formation,cours,conférence,apprentissage' => 'education', + '.txt,text-based,fichiers txt' => 'text-files', + 'ui,interfaces utilisateur' => 'ui', + 'ux,the experience of,usable,uxer,user experience,expérience utilisateur' => 'ux', + 'vps,serveur privé virtuel' => 'cloud,privacy-and-security,hosting', + 'web archive,web archiving,save any website,bookmarks,wayback machine,archive.org,archivebox,archive web' => 'web-archiving', + 'webring' => 'discovery,small-web,webring', + 'wordpress,personal website,personal websites,blog roll,blogroll,blogosphere,webring,digital garden,to blog,blogs' => 'blogging,writing,discovery,small-web', + 'work,travail,contract work,previous job,my work,coworkers,coworker,the job,workgroup' => 'work', + 'of writing' => 'writing', + 'youtube.com,invidious,peertube,watch?v' => 'video', + 'zoemp,zoemp.be' => 'zoemp' + ]); + + // Configure Mistral API settings + $conf->setEmpty('plugins.MISTRAL_API_KEY', 'YOUR_MISTRAL_API_KEY'); + $conf->setEmpty('plugins.MISTRAL_API_URL', 'https://api.mistral.ai/v1/chat/completions'); + $conf->setEmpty('plugins.MISTRAL_MODEL', 'mistral-large-latest'); + $conf->setEmpty('plugins.AUTO_TAG_WHITELIST', [ + 'guides-and-tips','productivity-and-management','software-development','auto-tagged', + 'privacy-and-security','web','tools-and-resources','testing','devops','documentation', + 'problem-solving','mobile','cloud','alternatives','organizing','social-media', + 'education','culture','communication','open-source','quality','inspiration', + 'minimalism','complexity','os','design','wishlist','optimization','ai', + 'content-aggregation','blogging','freedom','games','economics','collaboration', + 'health-and-wellness','automation','version-control','music','work','scripting', + 'discovery','comparison','philosophy','web-browsers','ecology','video','todo', + 'finance','x2','podcast','humor','reading-and-literature','small-web','creativity', + 'text-files','search-engines','backup','hardware','monitoring','comics','business', + 'its-complicated','maintenance','big-tech','metrics','terminal','prioritization', + 'ux','software-architecture','geek','plaintext','food','ownership', + 'work-life-balance','free','internet','society','knowledge-management', + 'container-technology','beer-and-brewing','code','enshittification','nostalgia', + 'content-curation','free-software','belgium','coffee','static-site','ideas', + 'brussels','humans','independence','devex','history','sansguidon','politics', + 'psychology','news','ethics','elegance','calmness','data-portability', + 'idiocracy','shopping','gamedev','debugging','x3','formats','remote-work', + 'network','secrets','photos','pictures','misinformation','writing', + 'bookmarks-management','crisis','diy','hacking','science','webring', + 'configuration','autism','web-archiving','databases','low-tech','products', + 'ui','via-yohan-courbe','projects','family','publishing','adhd','archiving', + 'leadership','legal','war-and-peace','data-collection','drinks','human-rights', + 'archives','addiction','newsletter','debate','slow-programming','note-taking', + 'search','movies','reviews','computer-languages','digital-garden','ads', + 'emulation','spam','storage','events','zombies','architecture','bugs','fixme', + 'technology','pc','python','procrastination','rant','tv-shows','people', + 'tech-debt','principles','tracking','sharing','synchronization','maps', + 'patterns','relationships','uses','media','slow-thinking','email','plugins', + 'travel','calm-tech','download','parenting','power','slow-living','zoemp', + 'board-games','files-management','personal-development','recommendations', + 'space','toys','via-vl','quotes','template','transport','account','accounting', + 'animation','shaarli','to-track','x4','boring-tech','horror','system-thinking', + 'user-profiling','bookmarklets','ocr','slow-web','tapas','to-subscribe', + 'via-antoine-dotreppe','drawing','ruby','via-frederique','zerosums', + 'via-david-tillemans','cleaning','for-aaron','to-follow','to-linkback', + 'user-tracking','via-xavier','ThrashMetal','lua','noel','php','rust', + 'via-christophe-gerard','via-damien-bravin' + ]); + $conf->setEmpty('plugins.AUTO_TAG_MAX_TAGS', 10); +} + +function fetch_page_content(string $url): string +{ + $html = @file_get_contents($url); + if (!$html) return ''; + $clean = preg_replace('/<(script|style|head|noscript)[^>]*>.*?<\/\1>/is', '', $html); + return strip_tags($clean); +} + +function ask_mistral(ConfigManager $conf, string $prompt, int $maxTokens): string +{ + $ch = curl_init($conf->get('plugins.MISTRAL_API_URL')); + $payload = json_encode([ + 'model' => $conf->get('plugins.MISTRAL_MODEL'), + 'messages' => [['role'=>'user','content'=>$prompt]], + 'max_tokens' => $maxTokens, + 'temperature'=> 0.7, + ]); + curl_setopt_array($ch, [ + CURLOPT_RETURNTRANSFER => true, + CURLOPT_POST => true, + CURLOPT_POSTFIELDS => $payload, + CURLOPT_HTTPHEADER => [ + 'Content-Type: application/json', + 'Authorization: Bearer ' . $conf->get('plugins.MISTRAL_API_KEY'), + ], + CURLOPT_TIMEOUT => 10, + ]); + $resp = curl_exec($ch); + curl_close($ch); + $json = json_decode($resp ?: '', true); + return $json['choices'][0]['message']['content'] ?? ''; +} + +function apply_auto_tags(array $data, ConfigManager $conf): array +{ + if (stripos($data['link']['tags'] ?? '', 'auto-tagged') !== false) { + return $data; + } + + $title = $data['link']['title'] ?? ''; + $url = $data['link']['url'] ?? ''; + $desc = trim($data['link']['description'] ?? ''); + $body = $desc !== '' ? $desc : fetch_page_content($url); + + // Check if content is available + if (empty($body)) { + $data['link']['tags'] = trim(implode(' ', array_unique(array_merge(explode(' ', $data['link']['tags'] ?? ''), ['404'])))); + return $data; + } + + $text = "$title\n$url\n\n$body"; + + $prompt = "TLDr de l'article, style gilfoyle/critique sans blabla et sans markdown/formating, max une ou deux phrases... "; + $prompt .= "EN FRANÇAIS et si possible avec une mini conclusion crue/honnête, pas de language prétentieux ici, soyons bruts... "; + $prompt .= "et drôles quand c le cas, sinon francs, éducatifs/informatifs, optimistes mais pas dupes. "; + $prompt .= "Si possible en français, pas franglais, et si possible pas de \"cela\" mais des \"ça\", pas de zut mais des \"merde\" etc, sans être vulgaire ni raciste ).\n\n"; + $prompt .= $text . "\n\n"; + $prompt .= "Ensuite, parmi ces tags (" . implode(', ', $conf->get('plugins.AUTO_TAG_WHITELIST')) . "), "; + $prompt .= "propose jusqu'à max " . $conf->get('plugins.AUTO_TAG_MAX_TAGS') . " tags pertinents (si possible moins) "; + $prompt .= "pour cet article, sans chiffres, séparés par des espaces. ->\n"; + $prompt .= $text; + + $resp = ask_mistral($conf, $prompt, 800); + // remove any "Tags pertinents :" prefix + $resp = preg_replace('/^Tags pertinents\s*:\s*/i', '', trim($resp)); + $parts = preg_split('/\R+/', $resp, 2); + $tldr = trim($parts[0] ?? ''); + $tags = isset($parts[1]) + ? preg_split('/\s+/', preg_replace('/^Tags pertinents\s*:\s*/i','', trim($parts[1]))) + : []; + + // filter tags to whitelist + $tags = array_values(array_intersect($conf->get('plugins.AUTO_TAG_WHITELIST'), $tags)); + + // build description + if ($desc !== '') { + $desc = implode("\n", array_map(fn($l)=>'> '.$l, preg_split('/\R/', $desc))); + } + $data['link']['description'] = trim($desc . "\n\n" . $tldr); + + // merge tags + auto-tagged + $existing = preg_split('/\s+/', $data['link']['tags'] ?? ''); + $all = array_slice(array_unique(array_merge($existing, $tags, ['auto-tagged'])), 0, $conf->get('plugins.AUTO_TAG_MAX_TAGS')+1); + $data['link']['tags'] = trim(implode(' ', $all)); + + return $data; +} + +function hook_auto_tag_plugin_render_editlink(array $data, ConfigManager $conf): array +{ + return apply_auto_tags($data, $conf); +} + +function hook_auto_tag_plugin_save_link(array $data, ConfigManager $conf): array +{ + return apply_auto_tags($data, $conf); +}