function smd_fuzzy_find($atts, $thing='') { require_plugin('smd_lib'); // MLP support global $pretext, $smd_fuzzLang; $smd_fuzz_str = array( 'too_short' => 'The text you are searching for is probably too short. Try a longer word. ', 'no_match' => 'Sorry, no results matched "{search_term}" exactly. ', 'suggest' => 'Here are the closest matching {thingies} that may help you find what you are looking for: ', 'suggest_join' => 'and', 'articles' => 'articles', 'words' => 'words', ); $smd_fuzzLang = new smd_MLP('smd_fuzzy_find', 'smd_fuzz', $smd_fuzz_str); extract(lAtts(array( 'form' => 'search_results', 'section' => '', 'category' => '', 'subcats' => '0', 'search_term' => '?q', 'match_with' => 'article:keywords;body;excerpt', 'tolerance' => '2', 'min_word_length' => '4', 'limit' => 'words:5, articles:10', 'case_sensitive' => '0', 'refine' => 'metaphone, soundex', 'show' => 'words, articles', 'status' => 'live, sticky', 'debug' => '0', 'no_match_label' => '#', 'suggest_label' => '#', 'too_short_label' => '#', 'labeltag' => 'p', ), $atts)); $searchAllow = array("?c", "!c", "?s", "!s", "?t", "!t", "?id", "!id", "?q", "!q", "?field", "!field"); $refineAllow = array("metaphone", "soundex"); $showAllow = array("articles", "words"); $colNames = array('Keywords' => "article:keywords", 'Body' => "article:body", 'Excerpt' => "article:excerpt", 'Category1' => "article:category1", 'Category2' => "article:category2", 'Section' => "article:section", 'ID' => "article:id", 'AuthorID' => "article:authorid", 'Title' => "article:title", 'category' => "image:category", 'alt' => "image:alt", 'caption' => "image:caption", 'name' => "image:name", 'message' => "comments:message", 'email' => "comments:email", 'name' => "comments:name", 'web' => "comments:web", ); $places = array('textpattern' => "article", 'txp_image' => "image", 'txp_discuss' => "comments"); $secsin = array(); $catsin = array(); $statsin = array(); $fullSecList = array(); $notSecList = array(); $fullCatList = array(); $notCatList = array(); $fullStatList = array(); $notStatList = array(); $refineList = array(); $dbTables = array(); $dbFields = array(); $whereParts = array(); // Expand the args in case they're ? or ! shortcuts, and do some validity checking. $search_term = smd_getOpts($search_term, $searchAllow, "", false); $search_term = $search_term[0][0]; if ($debug > 1) { dmp($search_term); } $refine = do_list($refine); for ($idx = 0; $idx < count($refine); $idx++) { if (in_array($refine[$idx], $refineAllow)) { $refineList[$idx] = $refine[$idx]; } } $meta_search = (in_array("metaphone", $refineList)) ? metaphone($search_term) : ""; $sound_search = (in_array("soundex", $refineList)) ? soundex($search_term) : ""; $tolerance = intval($tolerance); // match_with needs to be built into a series of arrays of database tables and columns $lookin = smd_split($match_with, false, ":,\s"); // Loop over pairs of elements for ($idx = 0; $idx < count($lookin); $idx+=2) { $dbTables[] = array_search($lookin[$idx], $places); $dbFieldList = smd_split($lookin[$idx+1], false, ";"); $dbField = array(); foreach ($dbFieldList as $lookField) { $key = array_search($lookin[$idx].":".$lookField, $colNames); if ($key) { $dbField[] = $key; } else if (strpos($lookField, "custom_") === 0) { $dbField[] = $lookField; } } $dbFields[] = $dbField; } if (count($dbTables) == 0 || count($dbFields) == 0) { $dbTables[] = "textpattern"; $dbFields[] = "*"; } $showList = do_list($show); for ($idx = count($showList); $idx >= 0; $idx--) { if (!in_array($showList[$idx], $showAllow)) { unset($showList[$idx]); } } $limitBy = array(); $limit = do_list($limit); foreach ($limit as $limOption) { if (is_numeric($limOption)) { $limitBy["articles"] = $limOption; $limitBy["words"] = $limOption; } else { $limsplit = smd_split($limOption, false, ":"); if ((count($limsplit) == 2) && (in_array($limsplit[0], $showAllow)) && (is_numeric($limsplit[1]))) { $limitBy[$limsplit[0]] = $limsplit[1]; } } } $thingiesL10n = array(); foreach ($showList as $item) { $thingiesL10n[] = $smd_fuzzLang->gTxt($item); } $thingies = implode(" ".$smd_fuzzLang->gTxt('suggest_join')." ", $thingiesL10n); $no_match_label = ($no_match_label == "#") ? $smd_fuzzLang->gTxt('no_match', array("{search_term}" => $search_term)) : $no_match_label; $suggest_label = ($suggest_label == "#") ? $smd_fuzzLang->gTxt('suggest', array("{thingies}" => $thingies)) : $suggest_label; $too_short_label = ($too_short_label == "#") ? $smd_fuzzLang->gTxt('too_short') : $too_short_label; // Get the document statuses to search $statii = smd_getOpts($status, array("!field"), "", false); $fullStatList = $statii[0]; $notStatList = $statii[1]; $tmpa = array(); for ($idx = 0; $idx < count($fullStatList); $idx++) { $tmpa[] = "'" .getStatusNum($fullStatList[$idx]). "'"; } if (count($tmpa) > 0) { $statsin[] = "status IN (" .implode(",", $tmpa). ")"; } $tmpa = array(); for ($idx = 0; $idx < count($notStatList); $idx++) { $tmpa[] = "'" .getStatusNum($notStatList[$idx]). "'"; } if (count($tmpa) > 0) { $statsin[] = "status NOT IN (" .implode(",", $tmpa). ")"; } if (count($statsin) > 0) { $whereParts[] = "(" . implode(" OR ", $statsin) . ")"; } $sections = smd_getOpts($section, array("?s", "!s", "!field"), "", false); $fullSecList = $sections[0]; $notSecList = $sections[1]; $cats = smd_getOpts($category, array("?c", "!c", "!field"), "", false); $fullCatList = $cats[0]; $notCatList = $cats[1]; // included sections & categories if (in_array("textpattern", $dbTables)) { // sections $tmpa = array(); for ($idx = 0; $idx < count($fullSecList); $idx++) { $tmpa[] = "'" .$fullSecList[$idx]. "'"; } if (count($tmpa) > 0) { $secsin[] = "section IN (" .implode(",", $tmpa). ")"; } // excluded sections $tmpa = array(); for ($idx = 0; $idx < count($notSecList); $idx++) { $tmpa[] = "'" .$notSecList[$idx]. "'"; } if (count($tmpa) > 0) { $secsin[] = "section NOT IN (" .implode(",", $tmpa). ")"; } if (count($secsin) > 0) { $whereParts[] = "(" . implode(" OR ", $secsin) . ")"; } // categories $tmpa = array(); for ($idx = 0; $idx < count($fullCatList); $idx++) { if ($subcats) { $categs = getTree($fullCatList[$idx], 'article'); for ($jdx = 0; $jdx < count($categs); $jdx++) { if ($categs[$jdx]['name'] != "root") { $tmpa[] = "'" .$categs[$jdx]['name']. "'"; } } } else { $tmpa[] = "'" .$fullCatList[$idx]. "'"; } } if (count($tmpa) > 0) { $theCats = implode(",", $tmpa); $catsin[] = "( Category1 IN (" .$theCats. ") OR Category2 IN (" .$theCats. ") )"; } // excluded categories $tmpa = array(); for ($idx = 0; $idx < count($notCatList); $idx++) { if ($subcats) { $categs = getTree($notCatList[$idx], 'article'); for ($jdx = 0; $jdx < count($categs); $jdx++) { if ($categs[$jdx]['name'] != "root") { $tmpa[] = "'" .$categs[$jdx]['name']. "'"; } } } else { $tmpa[] = "'" .$notCatList[$idx]. "'"; } } if (count($tmpa) > 0) { $theCats = implode(",", $tmpa); $catsin[] = "( Category1 NOT IN (" .$theCats. ") AND Category2 NOT IN (" .$theCats. ") )"; } if (count($catsin) > 0) { $whereParts[] = "(" . implode(" AND ", $catsin) . ")"; } // comments if (in_array("txp_discuss",$dbTables)) { $whereParts[] = "textpattern.ID = txp_discuss.parentid"; } } // Catch-all for the where statement if (count($whereParts) == 0) { $where = "1=1"; } else { $where = implode(" AND ", $whereParts); } if ($debug > 0) { dmp($where); } $out = ""; // Perform the searches $finder = new smd_FuzzyFind($search_term, $tolerance); if ($finder->too_short_err) { $out .= ($labeltag == "") ? "" : "<" .$labeltag.">"; $out .= $no_match_label; $out .= $too_short_label; $out .= ($labeltag == "") ? "" : ""; } else { $cols = "*" . ((in_array("textpattern", $dbTables)) ? ", unix_timestamp(textpattern.Posted) AS uPosted" : ""); $rs = safe_rows_start($cols, implode($dbTables, ", "), $where); if (in_array("textpattern",$dbTables)) { $opform = fetch_form($form); } $pageurl = smd_removeQSVar($pretext['request_uri'],'q'); $allFields = ""; $artList = array(); $wordList = array(); $termList = array(); while($row = nextRow($rs)) { $allFields = ""; // Join all the required places to look into a long text block foreach ($dbField as $theField) { $allFields .= $row[$theField]." "; } // Remove between-word punctuation and replace with space $justChars = "/[^a-zA-Z0-9\-\' ]+/"; $allFields = preg_replace($justChars, ' ', $allFields); // Split the remainder by (single or multiple) spaces $werds = preg_split('/\s+/', $allFields, -1, PREG_SPLIT_NO_EMPTY); // ...and reconstitute the unique words as a huge space-delimited string $werds = implode(" ",array_unique($werds)); // Take into account case sensitivity $werds = ($case_sensitive) ? $werds : strtolower($werds); if ($debug > 1) { dmp($werds); } // Find close word matches $matches = $finder->search($werds); if ($debug > 1) { dmp($matches); } if (count($matches) > 0) { $shortestDist = 100; // A stupidly high number to start with $shortestMetaDist = -1; $closestWord = ""; $closestMetaWord = ""; $max_term_len = 0; // Build a unique array of closest matching words while(list($idx,$dist) = each($matches)) { $term = smd_getWord($werds,$search_term,$idx); // Only words meeting the minimum requirement need apply $max_term_len = (strlen($term) > $max_term_len) ? strlen($term) : $max_term_len; if (strlen($term) < $min_word_length) { continue; } $term = ($case_sensitive) ? $term : strtolower($term); if ($debug > 2) { echo $term . " "; } if ($dist < $shortestDist) { $shortestDist = $dist; $closestWord = $term; } if ($meta_search != "") { $meta_term = metaphone($term); if ($debug > 2) { echo $meta_term . " : " . $meta_search ." ".n; } $levDist = levenshtein($meta_search, $meta_term); if ($levDist <= $shortestMetaDist || $shortestMetaDist < 0) { $shortestMetaDist = $levDist; $closestMetaWord = $term; } } } // Pick the one that sounds closest to the original if (trim($closestWord) != "") { $idx = md5($closestWord); $bestFit = $closestWord; $bestDist = $shortestDist; if ($sound_search != "") { $sound1 = levenshtein(soundex($closestWord), $sound_search); $sound2 = levenshtein(soundex($closestMetaWord), $sound_search); if ($sound1 >= $sound2) { $idx = md5($closestMetaWord); $bestFit = $closestMetaWord; $bestDist = $shortestMetaDist; } } $wordList[$idx] = $bestFit; $wordDist[$idx] = $bestDist; } if ($debug > 2) { dmp("BESTFIT:" .$bestFit); } // Build an array of unique matching articles if ($max_term_len >= $min_word_length) { if (in_array("textpattern", $dbTables)) { populateArticleData($row); } // Temporarily assign the closest match to the query string so that // the search_result_excerpt can hilight the found words $pretext['q'] = $term; $artList[] = parse($opform); $pretext['q'] = $search_term; } } } // Sort the word list in order of relevance if (count($wordList) > 0) { array_multisort($wordDist,$wordList); } // Output stuff to the page $out .= ($labeltag == "") ? "" : "<" .$labeltag.">"; $out .= $no_match_label; if (count($wordList) > 0) { $out .= (count($showList) > 0) ? $suggest_label : ""; if (in_array("words", $showList)) { $ctr = 0; foreach ($wordList as $item) { if (array_key_exists("words", $limitBy) && $ctr >= $limitBy["words"]) { break; } $out .= ''.$item.''.n; $ctr++; } } $out .= ($labeltag == "") ? "" : ""; if (in_array("articles", $showList)) { $ctr = 0; foreach ($artList as $art) { if (array_key_exists("articles", $limitBy) && $ctr >= $limitBy["articles"]) { break; } $out .= $art; $ctr++; } } } } return $out; }