Google texter - v0.2
This experimental script creates a random text by using the google search as source.
/* ** Google texter - v0.2 ** This experimental script creates a random text by ** using google search as source. ** ** Warning: Do not execute this script on a webserver! ** This script was made to execute from a command line. ** ** (c) Jonas John, <www.jonasjohn.de> ** License: BSD (http://en.wikipedia.org/wiki/BSD_licenses) */ // // Configuration: // // start words to search for: $words = 'php is'; // stop the script after X words $text_length = 100; // search for X words on google $search_length = 3; // search in the first X google results for new words $google_pages = 50; // use this google server $google_server = 'www.google.com'; // start the search progress google_texter($words, $text_length, $search_length); function get_url($url){ // create a new curl resource $ch = curl_init(); // set URL to download curl_setopt($ch, CURLOPT_URL, $url); // user agent: $browser = "Mozilla/5.0 (Windows; U; Windows NT 5.1; de; rv:1.8.0.4)"; curl_setopt($ch, CURLOPT_USERAGENT, $browser); // remove header? 0 = yes, 1 = no curl_setopt($ch, CURLOPT_HEADER, 0); // should curl return or print the data? true = return, false = print curl_setopt($ch, CURLOPT_RETURNTRANSFER, true); // timeout in seconds curl_setopt($ch, CURLOPT_TIMEOUT, 10); // download the given URL, and return output $output = curl_exec($ch); // close the curl resource, and free system resources curl_close($ch); // print output return $output; } function get_word($words, $search_length){ global $google_pages; global $google_server; // split words into an array $words = explode(" ", $words); $_words = array_slice($words, ($search_length * -1)); $words = implode(' ', $_words); $words = strtolower($words); $url = 'http://'.$google_server.'/'; $url .= 'search?num='.$google_pages.'&'; $url .= 'q=%22'.urlencode($words).'%22&'; $url .= 'btnG=Search'; $content = get_url($url); $content = strip_tags($content); $content = strtolower($content); $content = str_replace("\r\n", " ", $content); $content = str_replace("\r", " ", $content); $content = str_replace("\n", " ", $content); $content = str_replace("\t", " ", $content); $content = str_replace("<", " ", $content); $content = str_replace(">", " ", $content); $content = str_replace('"', " ", $content); $content = str_replace("'", " ", $content); $content = str_replace("-", " ", $content); $content = str_replace(".", " ", $content); preg_match_all('/'.$words.' ([0-9a-zA-ZäöüÄÖÜ\?!]+)/', $content, $m); $next_word = isset($m[1]) ? $m[1] : array(); $next_word = array_count_values($next_word); arsort($next_word); $next_word = array_keys($next_word); $r = rand(0,1); if (isset($next_word[$r])){ return $next_word[$r]; } if (isset($next_word[0])){ return $next_word[0]; } return ''; } function google_texter($start_words, $text_length, $search_length){ $word = $start_words; print $word; for ($x=0; $x < $text_length; $x++){ $w = get_word($word, $search_length); $word .= ' ' . $w; print ' ' . $w; } }
<b>Example output:</b><br/><br/><b>php is</b> ... not the answer by blue october on the south beach diet is not a business associate agreement is required to use open source<br/><br/><b>java is</b> ... not my favorite language for many system management tasks for the communicative signals this worksheet to the students<br/><br/><b>google says</b> ... it has no interest in the public sector is a huge mistake by the lake is approximately 10 minutes from the ordinary general shareholders
Sorry folks, comments have been deactivated for now due to the large amount of spam.
Please try to post your questions or problems on a related programming board, a suitable mailing list, a programming chat-room,
or use a QA website like stackoverflow because I'm usually too busy to answer any mails related
to my code snippets. Therefore please just mail me if you found a serious bug... Thank you!
Older comments:
None.