Never been to TextSnippets before?

Snippets is a public source code repository. Easily build up your personal collection of code snippets, categorize them with tags / keywords, and share them with the world (or not, you can keep them private!)

About this user

James Bennett

« Newer Snippets
Older Snippets »
1 total  XML / RSS feed 

Extensible search highlighting in PHP

Based on Dean's original Google Hilite, but refactored a bit to make it easy to add support for more search engines (currently supports some 20-odd major searches).

<?php

function search_highlight($text)  {
  $referer = $_SERVER['HTTP_REFERER'];

  //Did they get here from a search?
  if((preg_match('/www\.google.*/i',$referer) && !preg_match('/^http:\/\/www\.google\.com\//i', $referer))
     || preg_match('/search\.atomz.*/i',$referer)
     || preg_match('/search\.msn.*/i',$referer)
     || preg_match('/search\.yahoo.*/i',$referer)
     || preg_match('/msxml\.excite\.com/i', $referer)
     || preg_match('/search\.lycos\.com/i', $referer)
     || preg_match('/www\.alltheweb\.com/i', $referer)
     || preg_match('/search\.aol\.com/i', $referer)
     || preg_match('/search\.iwon\.com/i', $referer)
     || preg_match('/ask\.com/i', $referer)
     || preg_match('/search\.cometsystems\.com/i', $referer)
     || preg_match('/www\.hotbot\.com/i', $referer)
     || preg_match('/www\.overture\.com/i', $referer)
     || preg_match('/www\.metacrawler\.com/i', $referer)
     || preg_match('/search\.netscape\.com/i', $referer)
     || preg_match('/www\.looksmart\.com/i', $referer)
     || preg_match('/go\.google\.com/i', $referer)
     || preg_match('/dpxml\.webcrawler\.com/i', $referer)
     || preg_match('/search\.earthlink\.net/i', $referer)
     || preg_match('/search\.viewpoint\.com/i', $referer)
     || preg_match('/www\.mamma\.com/i', $referer)
     || preg_match('/home\.bellsouth\.net\/s\/s\.dll/i', $referer)
     || preg_match('/www\.ask\.co\.uk/i', $referer)) {

    //Figure out which search and get the part of its URL which contains the search terms.
    if(preg_match('/(www\.google.*)|(search\.msn.*)|(www\.alltheweb\.com)|(ask\.com)|(go\.google\.com)|(search\.earthlink\.net)/i',$referer))
      $delimiter = "q";
    elseif(preg_match('/www\.ask\.co\.uk/i', $referer))
      $delimiter = "ask";
    elseif(preg_match('/search\.atomz.*/i',$referer))
      $delimiter = "sp-q";
    elseif(preg_match('/search\.yahoo.*/i',$referer))
      $delimiter = "p";
    elseif(preg_match('/(msxml\.excite\.com)|(www\.metacrawler\.com)|(dpxml\.webcrawler\.com)/i', $referer))
      $delimiter = "qkw";
    elseif(preg_match('/(search\.lycos\.com)|(search\.aol\.com)|(www\.hotbot\.com)|(search\.netscape\.com)|(search\.mamma\.com)/i', $referer))
      $delimiter = "query";
    elseif(preg_match('/search\.iwon\.com/i', $referer))
      $delimiter = "searchfor";
    elseif(preg_match('/search\.cometsystems\.com/i', $referer))
      $delimiter = "qry";
    elseif(preg_match('/www\.overture\.com/i', $referer))
      $delimiter = "Keywords";
    elseif(preg_match('/www\.looksmart\.com/i', $referer))
      $delimiter = "key";
    elseif(preg_match('/search\.viewpoint\.com/i', $referer))
      $delimiter = "k";
    elseif(preg_match('/home\.bellsouth\.net\/s\/s\.dll/i', $referer))
      $delimiter = "string";

    $pattern = "/^.*" . $delimiter . "=([^&]+)&?.*\$/i";
    $query = preg_replace($pattern, '$1', $referer);

    //Remove quotation marks.
    $query = preg_replace('/\'|"/','',$query);

    //List of words to exclude from matching.
    $excludes = array('a', 'an', 'the', 'is', 'in', 'are', 'was', 'and', 'by', 'for', 'from', 'of', 'on', 'with', 'this', 'that', 'shtuff', 'or', ' ', '');
    $query_array = preg_split ("/[\s,\+\.]+/",$query);
    //Iterate over search terms and do the highlighting.
    foreach($query_array as $term) {
      //Don't match the excluded terms.
      $term = strtolower($term);
      if(in_array($term, $excludes)) {
        continue;
      }
      if(preg_match('/(?<=>)([^<]+)?(\b'.$term.'\b)/i', $text)) {
        $matched = "Spoon!";
      } else {
        $mismatched = "Whoops";
      }
      if (!preg_match('/<.+>/',$text)) {
        $text = preg_replace('/(\b'.$term.'\b)/i','<span class="searchterm">$1span>',$text);  
      } else {
        $text = preg_replace('/(?<=>)([^<]+)?(\b'.$term.'\b)/i','$1<span class="searchterm">$2span>',$text);
      }
    }
    $query_terms = implode(" ", $query_array);
    $query_terms = htmlspecialchars(urldecode($query_terms));
    //If all terms matched, just tell them you did the highlighting.
    if($matched) {
      //Change this message if you like.
      $message = "

It seems you arrived at this page from a search engine. To help you find " . "what you were looking for, your search terms (\"$query_terms\") should " . "be highlighted with yellow backgrounds, like \"searchterm\">this.

"; $text = $message . $text; } elseif($mismatched) { //If only some or no terms matched, offer to repeat the search locally. $query = implode("+", $query_array); //Also change this message if you like. $message = "

It seems you arrived at this page from a search engine, but that some " . "or all of the terms you searched for (\"$query_terms\") aren’t in this page. Would you like to " . "\"http://search.atomz.com/search/?sp-q=" //Insert a proper URL for your site's search function here, up to BUT NOT INCLUDING the part where the search terms go. . $query //Begin the next line with any parts of the search URL which have to go AFTER the search terms. . "&sp-a=sp10028bf7&sp-p=all&sp-f=iso-8859-1" . "\">try your search again using this site’s built-in search? It might be more accurate.

"; if($matched) { $message .= "

Any of your search terms which do appear in this page " . "should be highlighted with yellow backgrounds, like \"searchterm\">this.

"; } $text = $message . $text; } } return $text; } ?>
« Newer Snippets
Older Snippets »
1 total  XML / RSS feed