Extensible search highlighting in PHP
Based on Dean's original Google Hilite, but refactored a bit to make it easy to add support for more search engines (currently supports some 20-odd major searches).
<?php function search_highlight($text) { $referer = $_SERVER['HTTP_REFERER']; //Did they get here from a search? if((preg_match('/www\.google.*/i',$referer) && !preg_match('/^http:\/\/www\.google\.com\//i', $referer)) || preg_match('/search\.atomz.*/i',$referer) || preg_match('/search\.msn.*/i',$referer) || preg_match('/search\.yahoo.*/i',$referer) || preg_match('/msxml\.excite\.com/i', $referer) || preg_match('/search\.lycos\.com/i', $referer) || preg_match('/www\.alltheweb\.com/i', $referer) || preg_match('/search\.aol\.com/i', $referer) || preg_match('/search\.iwon\.com/i', $referer) || preg_match('/ask\.com/i', $referer) || preg_match('/search\.cometsystems\.com/i', $referer) || preg_match('/www\.hotbot\.com/i', $referer) || preg_match('/www\.overture\.com/i', $referer) || preg_match('/www\.metacrawler\.com/i', $referer) || preg_match('/search\.netscape\.com/i', $referer) || preg_match('/www\.looksmart\.com/i', $referer) || preg_match('/go\.google\.com/i', $referer) || preg_match('/dpxml\.webcrawler\.com/i', $referer) || preg_match('/search\.earthlink\.net/i', $referer) || preg_match('/search\.viewpoint\.com/i', $referer) || preg_match('/www\.mamma\.com/i', $referer) || preg_match('/home\.bellsouth\.net\/s\/s\.dll/i', $referer) || preg_match('/www\.ask\.co\.uk/i', $referer)) { //Figure out which search and get the part of its URL which contains the search terms. if(preg_match('/(www\.google.*)|(search\.msn.*)|(www\.alltheweb\.com)|(ask\.com)|(go\.google\.com)|(search\.earthlink\.net)/i',$referer)) $delimiter = "q"; elseif(preg_match('/www\.ask\.co\.uk/i', $referer)) $delimiter = "ask"; elseif(preg_match('/search\.atomz.*/i',$referer)) $delimiter = "sp-q"; elseif(preg_match('/search\.yahoo.*/i',$referer)) $delimiter = "p"; elseif(preg_match('/(msxml\.excite\.com)|(www\.metacrawler\.com)|(dpxml\.webcrawler\.com)/i', $referer)) $delimiter = "qkw"; elseif(preg_match('/(search\.lycos\.com)|(search\.aol\.com)|(www\.hotbot\.com)|(search\.netscape\.com)|(search\.mamma\.com)/i', $referer)) $delimiter = "query"; elseif(preg_match('/search\.iwon\.com/i', $referer)) $delimiter = "searchfor"; elseif(preg_match('/search\.cometsystems\.com/i', $referer)) $delimiter = "qry"; elseif(preg_match('/www\.overture\.com/i', $referer)) $delimiter = "Keywords"; elseif(preg_match('/www\.looksmart\.com/i', $referer)) $delimiter = "key"; elseif(preg_match('/search\.viewpoint\.com/i', $referer)) $delimiter = "k"; elseif(preg_match('/home\.bellsouth\.net\/s\/s\.dll/i', $referer)) $delimiter = "string"; $pattern = "/^.*" . $delimiter . "=([^&]+)&?.*\$/i"; $query = preg_replace($pattern, '$1', $referer); //Remove quotation marks. $query = preg_replace('/\'|"/','',$query); //List of words to exclude from matching. $excludes = array('a', 'an', 'the', 'is', 'in', 'are', 'was', 'and', 'by', 'for', 'from', 'of', 'on', 'with', 'this', 'that', 'shtuff', 'or', ' ', ''); $query_array = preg_split ("/[\s,\+\.]+/",$query); //Iterate over search terms and do the highlighting. foreach($query_array as $term) { //Don't match the excluded terms. $term = strtolower($term); if(in_array($term, $excludes)) { continue; } if(preg_match('/(?<=>)([^<]+)?(\b'.$term.'\b)/i', $text)) { $matched = "Spoon!"; } else { $mismatched = "Whoops"; } if (!preg_match('/<.+>/',$text)) { $text = preg_replace('/(\b'.$term.'\b)/i','<span class="searchterm">$1span>',$text); } else { $text = preg_replace('/(?<=>)([^<]+)?(\b'.$term.'\b)/i','$1<span class="searchterm">$2span>',$text); } } $query_terms = implode(" ", $query_array); $query_terms = htmlspecialchars(urldecode($query_terms)); //If all terms matched, just tell them you did the highlighting. if($matched) { //Change this message if you like. $message = "It seems you arrived at this page from a search engine. To help you find
" . "what you were looking for, your search terms (\"$query_terms\") should " . "be highlighted with yellow backgrounds, like \"searchterm\">this."; $text = $message . $text; } elseif($mismatched) { //If only some or no terms matched, offer to repeat the search locally. $query = implode("+", $query_array); //Also change this message if you like. $message = "It seems you arrived at this page from a search engine, but that some
" . "or all of the terms you searched for (\"$query_terms\") aren’t in this page. Would you like to " . "\"http://search.atomz.com/search/?sp-q=" //Insert a proper URL for your site's search function here, up to BUT NOT INCLUDING the part where the search terms go. . $query //Begin the next line with any parts of the search URL which have to go AFTER the search terms. . "&sp-a=sp10028bf7&sp-p=all&sp-f=iso-8859-1" . "\">try your search again using this site’s built-in search? It might be more accurate."; if($matched) { $message .= "Any of your search terms which do appear in this page
" . "should be highlighted with yellow backgrounds, like \"searchterm\">this."; } $text = $message . $text; } } return $text; } ?>