Hey guys. I'm having some major trouble getting an array of the top 10 results for a particular keyword. I found some tutorials over at www.tellinya.com/read/2007/09/13/get-msn-live-search-serps-search-result-pages-with-php/ www.tellinya.com/read/2007/09/07/get-google-serps-search-result-pages-with-php/ But I can't seem to get his code working, it keeps popping up errors. Basically i'd just like to have some sort of an array in which there would be title, description and the url of the top 10 results. This is the non working code... <? class eHttpClient{ //-- var $httpUserAgent = "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; en)"; var $httpReferer = ""; var $nTimeout = 30; var $httpRecvHeaders = ""; var $curl; //-- function __construct() { $this->curl = curl_init(); } function __destruct() { curl_close($this->curl); } //-- function _init($url, $post, $nobody){ curl_setopt ($this->curl, CURLOPT_URL, $url); curl_setopt ($this->curl, ($post ? CURLOPT_POST : CURLOPT_HTTPGET), 1); curl_setopt ($this->curl, CURLOPT_NOBODY, (int)$nobody); //-- curl_setopt ($this->curl, CURLOPT_TIMEOUT, $this->nTimeout); if(strlen($this->httpReferer)) curl_setopt ($this->curl, CURLOPT_REFERER, $this->httpReferer); curl_setopt ($this->curl, CURLOPT_USERAGENT, $this->httpUserAgent); //-- curl_setopt ($this->curl, CURLOPT_SSL_VERIFYPEER, 0); curl_setopt ($this->curl, CURLOPT_RETURNTRANSFER, 1); curl_setopt ($this->curl, CURLOPT_HEADER, 1); curl_setopt ($this->curl, CURLOPT_FOLLOWLOCATION, 1); curl_setopt ($this->curl, CURLOPT_MUTE, 1); curl_setopt ($this->curl, CURLOPT_AUTOREFERER, 1); curl_setopt ($this->curl, CURLOPT_FORBID_REUSE, 1); curl_setopt ($this->curl, CURLOPT_FRESH_CONNECT, 1); curl_setopt ($this->curl, CURLOPT_PORT, 80); //-- if($post) curl_setopt ( $this->curl, CURLOPT_HTTPHEADER, array("Content-type: application/x-www-form-urlencoded") ); //-- $this->httpRecvHeaders = array(); } /* $url = url to retrieve * mandatory $qs = query string can also be included here or in the $url * optional $hdr = array with extra headers * optional */ function get($url,$qs,$hdr){ $qs=http_build_query($qs); if(strlen($qs)) $url.="?".$qs; $this->_init($url,0,$hdr); return $this->_fetchHtml(); } /* $url = url to retrieve * mandatory $qs = array with variables to post * mandatory $hdr = array with extra headers * optional */ function post($url,$qs,$hdr){ $this->_init($url,1,$hdr); $qs=http_build_query($qs); if(!strlen($qs)) return $this->get($url); curl_setopt ($this->curl, CURLOPT_POSTFIELDS, $qs); return $this->_fetchHtml(); } //-- function _parseHeaders($headers){ $hdret = array("Raw" => array()); $httpinf = $headers[0]; $hdret['HTTP'] = $httpinf; array_splice($headers,0,1); foreach($headers as $hdr){ if(!preg_match("/([^:]+):(.*)/",$hdr,$pcs)){ array_push($hdret['Raw'],$hdr); continue; } $key = trim($pcs[1]); $val = trim($pcs[2]); if(isset($hdret[$key])){ if(!is_array($hdret[$key])) $hdret[$key]=array($hdret[$key]); array_push($hdret[$key],$val); }else{ $hdret[$key]=$val; } } if(!count($hdret['Raw'])) unset($hdret['Raw']); return $hdret; } //-- function _fetchHtml(){ $html = curl_exec ($this->curl); $inf = $this->getInfo(); $redirs = (int)$inf['redirect_count']; $html = str_replace("\r","",$html); $lines = explode("\n",$html); unset($html); $iline = 0; //-- $redirs+=((int)$inf['header_size'] ? 1 : 0); //-- while($redirs>0){ $hdr = array(); while(strlen($lines[$iline])){ array_push($hdr,$lines[$iline]); $iline++; } array_push($this->httpRecvHeaders,$this->_parseHeaders($hdr)); $redirs--; $iline++; } array_splice($lines,0,$iline); return implode("\n",$lines); } //-- get cUrl Info function getInfo(){ return curl_getinfo($this->curl); } //-- Get array with headers. If request was redirected //-- more then one header will be retrieved function getHeaders(){ return $this->httpRecvHeaders; } //-- Get the last header that applies to received page function getHeader(){ $hdr=array_pop($this->httpRecvHeaders); array_push($this->httpRecvHeaders,$hdr); return $hdr; } //-- Simulate a referer for the request function setReferer($ref){ $this->httpReferer = $ref; } //-- Pretend a User Agent function setUserAgent($ua){ if($ua=="gg") $this->httpUserAgent = "Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)"; elseif($ua=="ms") $this->httpUserAgent = "msnbot/1.0 (+http://search.msn.com/msnbot.htm)"; elseif($ua=="yh") $this->httpUserAgent = "Mozilla/5.0 (compatible; Yahoo! Slurp; http://help.yahoo.com/help/us/ysearch/slurp)"; else $this->httpUserAgent = $ua; } //-- }; function msnResults($query,$page=1,$perpage=10,$dc="search.live.com"){ if($page) $page--; $url=sprintf("http://%s/results.aspx?q=%s&count=%d&first=%d&format=rss", $dc,urlencode($query),$perpage,($page*$perpage)+1); $hc=new eHttpClient(); $xml=$hc->get($url); //Rss Has items. If not terminate! if(!preg_match_all( "/<item>(.+?)</item>/", $xml, $matches)) return false; $matches=$matches[1]; $results=array(); for($i=0;$i<count($matches);$i++){ $match=trim($matches[$i]); $match=str_replace("&","&",$match); //If item can't be translated, continue! if(!preg_match( "/<title>(.+?)</title>s*". "<link>(.+?)</link>s*". "<description>(.*)</description>s*". "<pubDate>(.+?)</pubDate>/i", $match, $parts )) continue; //Decode the HTML encodes and strip tags. $title=html_entity_decode(strip_tags(trim($parts[1]," \""))); $desc=html_entity_decode(strip_tags(trim($parts[3]," \""))); //Rank? $pos=($page*$perpage)+$i+1; $link=trim($parts[2]," \""); $tm=strtotime($parts[4]); //-- Link invalid ... continue; if(!preg_match("/^([^:]+)://([^/]+)[/]?(.*)$/", $link,$Doms)) continue; $Http=$Doms[1]; $Rel="/".$Doms[3]; $Dom=$Doms[2]; //Prepare result $serpEntry=array( "Rank" => $pos, "Url" => $link, "Title" => trim($title), "Host" => $Dom, "Protocol" => $Http, "Path" => $Rel, "Summary" => trim($desc), "Cached" => $tm, //UnixTime Stamp //Human Readable "CachedOn" => strftime("%d %B %Y",$tm), ); array_push($results,$serpEntry); } return $results; } // -- function msnLinks($query,$page=1,$perpage=10,$dc="search.live.com"){ $res=msnResults($query,$page,$perpage,$dc); $links=array(); for($i=0;$i<count($res);$i++){ $link=$res[$i]['Url']; array_push($links,$link); } return $links; } ?> <? print_r(msnResults("site:tellinya.com/",1,10)); ?> Code (markup):
Try setting : error_reporting(E_ALL^(E_NOTICE|E_WARNING)); before code and ensure curl is enabled in PHP.ini. I wrote those scripts and use them daily so they do work and you have either a config problem on PHP install on PHP script. Make sure it pops errors and not warnings becase the difference between the two is huge!