// v0.01 Initial public beta require_plugin('smd_lib'); global $smd_fuzzLang; $smd_fuzz_str = array( 'too_short' => 'The text you are searching for is probably too short. Try a longer word. ', 'no_match' => 'Sorry, no results matched "{search_term}" exactly. ', 'suggest' => 'Here are the closest matching {thingies} that may help you find what you are looking for: ', 'suggest_join' => 'and', 'articles' => 'articles', 'words' => 'words', ); $smd_fuzzLang = new smd_MLP('smd_fuzzy_find', 'smd_fuzz', $smd_fuzz_str); function smd_fuzzy_find($atts, $thing='') { global $pretext; global $smd_fuzzLang; extract(lAtts(array( 'form' => 'search_results', 'section' => '', 'subcats' => '0', 'search_term' => '?q', 'match_with' => 'article:keywords;body;excerpt', 'tolerance' => '2', 'min_word_length' => '4', 'limit' => 'words:5, articles:10', 'case_sensitive' => '0', 'show' => 'words, articles', 'no_match_label' => '#', 'suggest_label' => '#', 'too_short_label' => '#', ), $atts)); $searchAllow = array("?c", "!c", "?s", "!s", "?t", "!t", "?id", "!id", "?q", "!q", "?field", "!field"); $showAllow = array("articles", "words"); $colNames = array('Keywords' => "article:keywords", 'Body' => "article:body", 'Excerpt' => "article:excerpt", 'Category1' => "article:category1", 'Category2' => "article:category2", 'Section' => "article:section", 'ID' => "article:id", 'AuthorID' => "article:authorid", 'Title' => "article:title", 'category' => "image:category", 'alt' => "image:alt", 'caption' => "image:caption", 'name' => "image:name"); $places = array('textpattern' => "article", 'txp_image' => "image"); $secsin = array(); $catsin = array(); $fullSecList = array(); $notSecList = array(); $dbField = array(); // Expand the args in case they're ? or ! shortcuts, and do some validity checking $lookin = smd_splitRange($match_with, ":"); $dbTable = array_search($lookin[0], $places); $dbTable = ($dbTable == "") ? "article" : $dbTable; $isArticle = ($lookin[0] == "article") ? true : false; $dbFields = smd_splitRange($lookin[1],";"); foreach ($dbFields as $lookField) { $dbField[] = array_search($lookin[0].":".$lookField, $colNames); } $search_term = smd_getAtts($search_term,$searchAllow); $search_term = $search_term[0][0]; $meta_search = metaphone($search_term); $sound_search = soundex($search_term); $tolerance = intval($tolerance); $show = smd_getAtts($show, array()); $showList = $show[0]; for ($idx = 0; $idx < count($showList); $idx++) { if (in_array($showList[$idx], $showAllow)) { $showList[$idx] = $smd_fuzzLang->gTxt($showList[$idx]); } } $limitBy = array(); $limit = smd_getAtts($limit, array()); foreach ($limit[0] as $limOption) { if (is_numeric($limOption)) { $limitBy["articles"] = $limOption; $limitBy["words"] = $limOption; } else { $limsplit = smd_splitRange($limOption, ":"); if ((count($limsplit) == 2) && (in_array($limsplit[0], $showAllow)) && (is_numeric($limsplit[1]))) { $limitBy[$limsplit[0]] = $limsplit[1]; } } } $thingies = implode(" ".$smd_fuzzLang->gTxt('suggest_join')." ", $showList); $no_match_label = ($no_match_label == "#") ? $smd_fuzzLang->gTxt('no_match', array("{search_term}" => $search_term)) : $no_match_label; $suggest_label = ($suggest_label == "#") ? $smd_fuzzLang->gTxt('suggest', array("{thingies}" => $thingies)) : $suggest_label; $too_short_label = ($too_short_label == "#") ? $smd_fuzzLang->gTxt('too_short') : $too_short_label; $sections = smd_getAtts($section,array("?s", "!s", "?field", "!field")); $fullSecList = $sections[0]; $notSecList = $sections[1]; $where = ""; // included sections if ($isArticle) { $tmpa = array(); for ($idx = 0; $idx < count($fullSecList); $idx++) { $tmpa[] = "'" .$fullSecList[$idx]. "'"; } if (count($tmpa) > 0) { $secsin[] = "section IN (" .implode(",", $tmpa). ")"; } // excluded sections $tmpa = array(); for ($idx = 0; $idx < count($notSecList); $idx++) { $tmpa[] = "'" .$notSecList[$idx]. "'"; } if (count($tmpa) > 0) { $secsin[] = "section NOT IN (" .implode(",", $tmpa). ")"; } if (count($secsin) > 0) { $where .= "(" . implode(" OR ", $secsin) . ")"; } else { $where .= ""; } } // Catch-all for the where statement if ($where == "") { $where = "1=1"; } $out = ""; // Perform the searches $finder = new smd_FuzzyFind($search_term, $tolerance); if ($finder->too_short_err) { $out .= $no_match_label; $out .= $too_short_label; } else { $cols = "*" . (($isArticle) ? ", unix_timestamp(Posted) AS uPosted" : ""); $rs = safe_rows_start($cols, $dbTable, $where); $opform = ""; if ($isArticle) { $opform = fetch_form($form); } $pageurl = smd_removeQSVar($pretext['request_uri'],'q'); $allFields = ""; $artList = array(); $termList = array(); while($row = nextRow($rs)) { $allFields = ""; // Join all the required places to look into a long text block foreach ($dbField as $theField) { $allFields .= $row[$theField].","; } // Split them up by comma/space (probably should do other punctuation too)... $werds = smd_getAtts($allFields, array()); // ...and reconstitute them as a huge space-delimited string $werds = implode(" ",$werds[0]); // Find close word matches $matches = $finder->search($werds); if (count($matches) > 0) { $shortestDist = 100; // A stupidly high number to start with $shortestMetaDist = -1; $closestWord = ""; $closestMetaWord = ""; $max_term_len = 0; // Build a uniqe array of closest matching words while(list($idx,$dist) = each($matches)) { $term = smd_getWord($werds,$idx); $max_term_len = (strlen($term) > $max_term_len) ? strlen($term) : $max_term_len; if (strlen($term) < $min_word_length) { continue; } $term = ($case_sensitive) ? $term : strtolower($term); if ($dist < $shortestDist) { $shortestDist = $dist; $closestWord = $term; } $meta_term = metaphone($term); $levDist = levenshtein($meta_search, $meta_term); if ($levDist <= $shortestMetaDist || $shortestMetaDist < 0) { $shortestMetaDist = $levDist; $closestMetaWord = $term; } } // Pick the one that sounds closest to the original if (trim($closestWord) != "") { $sound1 = levenshtein(soundex($closestWord), $sound_search); $sound2 = levenshtein(soundex($closestMetaWord), $sound_search); if ($sound1 < $sound2) { $idx = md5($closestWord); $wordList[$idx] = $closestWord; $wordDist[$idx] = $shortestDist; } else { $idx = md5($closestMetaWord); $wordList[$idx] = $closestMetaWord; $wordDist[$idx] = $shortestMetaDist; } } // Build an array of unique matching articles if ($max_term_len >= $min_word_length) { if ($isArticle) { populateArticleData($row); } $artList[] = parse($opform); } } } // Sort the word list in order of relevance if (count($wordList) > 0) { array_multisort($wordDist,$wordList); } // Output stuff to the page $out .= $no_match_label; if (count($wordList) > 0) { $out .= (count($showList) > 0) ? $suggest_label : ""; if (in_array("words", $show[0])) { $ctr = 0; foreach ($wordList as $item) { if (array_key_exists("words", $limitBy) && $ctr > $limitBy["words"]) { break; } $out .= ''.$item.''.n; $ctr++; } } if (in_array("articles", $show[0])) { $ctr = 0; foreach ($artList as $art) { if (array_key_exists("articles", $limitBy) && $ctr >= $limitBy["articles"]) { break; } $out .= $art; $ctr++; } } } } return $out; }