fix(autoTag) improve word matching accuracy
This commit is contained in:
parent
45e6529b51
commit
e127598802
@ -152,7 +152,6 @@ function fetch_page_content($url)
|
|||||||
|
|
||||||
function calculate_tags(array $keywordsToTags, array $searchContents): array
|
function calculate_tags(array $keywordsToTags, array $searchContents): array
|
||||||
{
|
{
|
||||||
// Context weights
|
|
||||||
$contextWeights = [
|
$contextWeights = [
|
||||||
'title' => 3,
|
'title' => 3,
|
||||||
'url' => 3,
|
'url' => 3,
|
||||||
@ -166,16 +165,17 @@ function calculate_tags(array $keywordsToTags, array $searchContents): array
|
|||||||
foreach ($keywordsToTags as $keywords => $tags) {
|
foreach ($keywordsToTags as $keywords => $tags) {
|
||||||
$keywordList = explode(',', $keywords);
|
$keywordList = explode(',', $keywords);
|
||||||
foreach ($keywordList as $keyword) {
|
foreach ($keywordList as $keyword) {
|
||||||
$keyword = trim($keyword);
|
$keyword = preg_quote(trim($keyword), '/');
|
||||||
|
$regex = '/\b' . $keyword . '\b/i';
|
||||||
|
|
||||||
foreach ($searchContents as $context => $content) {
|
foreach ($searchContents as $context => $content) {
|
||||||
if (stripos($content, $keyword) !== false) {
|
if (preg_match($regex, $content)) {
|
||||||
$tagList = explode(',', $tags);
|
$tagList = explode(',', $tags);
|
||||||
foreach ($tagList as $tag) {
|
foreach ($tagList as $tag) {
|
||||||
$tag = trim($tag);
|
$tag = trim($tag);
|
||||||
if (!isset($tagScores[$tag])) {
|
if (!isset($tagScores[$tag])) {
|
||||||
$tagScores[$tag] = 0;
|
$tagScores[$tag] = 0;
|
||||||
}
|
}
|
||||||
// Add weight based on context
|
|
||||||
$tagScores[$tag] += $contextWeights[$context];
|
$tagScores[$tag] += $contextWeights[$context];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
Loading…
x
Reference in New Issue
Block a user