Google texter - v0.2

This experimental script creates a random text by using the google search as source.

/*
** Google texter - v0.2
** This experimental script creates a random text by
** using google search as source. 
**
** Warning: Do not execute this script on a webserver!
** This script was made to execute from a command line.
**
** (c) Jonas John, <www.jonasjohn.de>
** License: BSD (http://en.wikipedia.org/wiki/BSD_licenses)
*/
 
// 
// Configuration: 
// 
 
// start words to search for:
$words = 'php is';
 
// stop the script after X words
$text_length = 100;
 
// search for X words on google
$search_length = 3;
 
// search in the first X google results for new words
$google_pages = 50;
 
// use this google server
$google_server = 'www.google.com';
 
 
// start the search progress
google_texter($words, $text_length, $search_length);
 
 
 
function get_url($url){
 
    // create a new curl resource
    $ch = curl_init();
 
    // set URL to download
    curl_setopt($ch, CURLOPT_URL, $url);
 
    // user agent:
    $browser = "Mozilla/5.0 (Windows; U; Windows NT 5.1; de; rv:1.8.0.4)";
    curl_setopt($ch, CURLOPT_USERAGENT, $browser);
 
    // remove header? 0 = yes, 1 = no
    curl_setopt($ch, CURLOPT_HEADER, 0);
 
    // should curl return or print the data? true = return, false = print
    curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
 
    // timeout in seconds
    curl_setopt($ch, CURLOPT_TIMEOUT, 10);
 
    // download the given URL, and return output
    $output = curl_exec($ch);
 
    // close the curl resource, and free system resources
    curl_close($ch);
 
    // print output
    return $output;
}
 
function get_word($words, $search_length){
    global $google_pages;
    global $google_server;
 
    // split words into an array
    $words = explode(" ", $words);    
 
    $_words = array_slice($words, ($search_length * -1));
 
    $words = implode(' ', $_words);
    $words = strtolower($words);
 
 
    $url = 'http://'.$google_server.'/';
    $url .= 'search?num='.$google_pages.'&';
    $url .= 'q=%22'.urlencode($words).'%22&';
    $url .= 'btnG=Search';
 
    $content = get_url($url);
 
    $content = strip_tags($content);
    $content = strtolower($content);
    $content = str_replace("\r\n", " ", $content);
    $content = str_replace("\r", " ", $content);
    $content = str_replace("\n", " ", $content);
    $content = str_replace("\t", " ", $content);
    $content = str_replace("<", " ", $content);
    $content = str_replace(">", " ", $content);
    $content = str_replace('"', " ", $content);
    $content = str_replace("'", " ", $content);
    $content = str_replace("-", " ", $content);
    $content = str_replace(".", " ", $content);
 
    preg_match_all('/'.$words.' ([0-9a-zA-ZäöüÄÖÜ\?!]+)/', $content, $m);
 
    $next_word = isset($m[1]) ? $m[1] : array();
 
    $next_word = array_count_values($next_word);
 
    arsort($next_word);
 
    $next_word = array_keys($next_word);
 
    $r = rand(0,1);
 
    if (isset($next_word[$r])){
        return $next_word[$r];
    }
 
    if (isset($next_word[0])){
        return $next_word[0];
    }    
    return '';
}
 
function google_texter($start_words, $text_length, $search_length){
 
    $word = $start_words;
    print $word;
 
    for ($x=0; $x < $text_length; $x++){
        $w = get_word($word, $search_length);
        $word .= ' ' . $w;
        print ' ' . $w;
    }
 
}
Snippet Details
  • Author
    Jonas John
  • License
    BSD
  • Language
    PHP
  • Created
    08/27/2006
  • Updated
    08/27/2006
  • Tags
    , , ,



<b>Example output:</b><br/><br/><b>php is</b> ... not the answer by blue october on the south beach diet is not a business associate agreement is required to use open source<br/><br/><b>java is</b> ... not my favorite language for many system management tasks for the communicative signals this worksheet to the students<br/><br/><b>google says</b> ... it has no interest in the public sector is a huge mistake by the lake is approximately 10 minutes from the ordinary general shareholders

Sorry folks, comments have been deactivated for now due to the large amount of spam.

Please try to post your questions or problems on a related programming board, a suitable mailing list, a programming chat-room,
or use a QA website like stackoverflow because I'm usually too busy to answer any mails related
to my code snippets. Therefore please just mail me if you found a serious bug... Thank you!


Older comments:

None.