Hi, I've tried numerous PHP scripts to get the google PR of a website. e.g. http://forums.digitalpoint.com/showthread.php?t=67792 http://www.googlecommunity.com/scripts/google-pagerank.php However, they all give me a 403 Forbidden error. e.g. Warning: file(http://www.google.com/search?client=navclient-auto&ch=6-1557012338682815999&features=Rank&q=info:): failed to open stream: HTTP request failed! HTTP/1.0 403 Forbidden in /home/blapper/public_html/forum/pagerank.php(71) : eval()'d code on line 98 PageRank: provided by Google Community.com Does anyone know of any alternative scripts I can use? I'm not sure if I'd want to use the Google API as that's limited to 1,000 requests a day and I don't want my script to be limited. Thanks, fcmisc
You can't use the Google API to get PageRank anyway, so you can put that option out of your mind now.
Some servers don't allow opening files / streams from other servers within their PHP environment. If your server is one of these, then nothing will help...
Thanks for all the feedback guys. it appears that my script wasn't working as the IP of my server was banned. The script works if I put it on another server. Why would one IP be banned and not the other? I am using two different shared hosting solutions. How can I stop my 2nd IP being banned as well?
Php varies from version to version. You might want to check if you have the same version on the 2 servers.
" How can I stop my 2nd IP being banned as well? " Write a liltte random script, that always take another DC. (A list of 10 DCs should solve the problem)
Alternately, using proxies, you should be able to send different IPs - but I never got this to work in PHP so far.
How do you know the IP was banned? Did google tell you? Maybe the server had some kind of firewall blocking access to google.
I know the IP's were banned as I write a script to output the returned data into a brower. Your client does not have permission to get URL /search?client=navclient-auto&ch=61413624739196411948&features=Rank&q=info:http://www.yahoo.com from this server. How do I get the script to look at different data centers? The code is here: <?php define('GOOGLE_MAGIC', 0xE6359A60); class pageRank{ var $pr; function zeroFill($a, $b){ $z = hexdec(80000000); if ($z & $a){ $a = ($a>>1); $a &= (~$z); $a |= 0x40000000; $a = ($a>>($b-1)); }else{ $a = ($a>>$b); } return $a; } function mix($a,$b,$c) { $a -= $b; $a -= $c; $a ^= ($this->zeroFill($c,13)); $b -= $c; $b -= $a; $b ^= ($a<<8); $c -= $a; $c -= $b; $c ^= ($this->zeroFill($b,13)); $a -= $b; $a -= $c; $a ^= ($this->zeroFill($c,12)); $b -= $c; $b -= $a; $b ^= ($a<<16); $c -= $a; $c -= $b; $c ^= ($this->zeroFill($b,5)); $a -= $b; $a -= $c; $a ^= ($this->zeroFill($c,3)); $b -= $c; $b -= $a; $b ^= ($a<<10); $c -= $a; $c -= $b; $c ^= ($this->zeroFill($b,15)); return array($a,$b,$c); } function GoogleCH($url, $length=null, $init=GOOGLE_MAGIC) { if(is_null($length)) { $length = sizeof($url); } $a = $b = 0x9E3779B9; $c = $init; $k = 0; $len = $length; while($len >= 12) { $a += ($url[$k+0] +($url[$k+1]<<8) +($url[$k+2]<<16) +($url[$k+3]<<24)); $b += ($url[$k+4] +($url[$k+5]<<8) +($url[$k+6]<<16) +($url[$k+7]<<24)); $c += ($url[$k+8] +($url[$k+9]<<8) +($url[$k+10]<<16)+($url[$k+11]<<24)); $mix = $this->mix($a,$b,$c); $a = $mix[0]; $b = $mix[1]; $c = $mix[2]; $k += 12; $len -= 12; } $c += $length; switch($len){ case 11: $c+=($url[$k+10]<<24); case 10: $c+=($url[$k+9]<<16); case 9 : $c+=($url[$k+8]<<8); /* the first byte of c is reserved for the length */ case 8 : $b+=($url[$k+7]<<24); case 7 : $b+=($url[$k+6]<<16); case 6 : $b+=($url[$k+5]<<8); case 5 : $b+=($url[$k+4]); case 4 : $a+=($url[$k+3]<<24); case 3 : $a+=($url[$k+2]<<16); case 2 : $a+=($url[$k+1]<<8); case 1 : $a+=($url[$k+0]); } $mix = $this->mix($a,$b,$c); /* report the result */ return $mix[2]; } //converts a string into an array of integers containing the numeric value of the char function strord($string) { for($i=0;$i<strlen($string);$i++) { $result[$i] = ord($string{$i}); } return $result; } function printrank($url){ $ch = "6".$this->GoogleCH($this->strord("info:" . $url)); $fp = fsockopen("www.google.com", 80, $errno, $errstr, 30); if (!$fp) { echo "$errstr ($errno)<br />\n"; } else { $out = "GET /search?client=navclient-auto&ch=" . $ch . "&features=Rank&q=info:" . $url . " HTTP/1.1\r\n" ; $out .= "Host: www.google.com\r\n" ; $out .= "Connection: Close\r\n\r\n" ; fwrite($fp, $out); while (!feof($fp)) { $data = fgets($fp, 128); //echo '---' . $data; $pos = strpos($data, "Rank_"); if($pos === false){ }else{ $pagerank = substr($data, $pos + 9); $this->pr_image($pagerank); } } fclose($fp); echo "<br />$out<br />PAGE RANK IS $pagerank"; } } //display pagerank image. Create your own or download images I made for this script. If you make your own make sure to call them pr0.gif, pr1.gif, pr2.gif etc. function pr_image($pagerank){ if($pagerank == 0){ $this->pr = "<img src=\"images/pr0.gif\" alt=\"PageRank " .$pagerank. " out of 10\">" ; }elseif($pagerank == 1){ $this->pr = "<img src=\"images/pr1.gif\" alt=\"PageRank " .$pagerank. " out of 10\">" ; }elseif($pagerank == 2){ $this->pr = "<img src=\"images/pr2.gif\" alt=\"PageRank " .$pagerank. " out of 10\">" ; }elseif($pagerank == 3){ $this->pr = "<img src=\"images/pr3.gif\" alt=\"PageRank " .$pagerank. " out of 10\">" ; }elseif($pagerank == 4){ $this->pr = "<img src=\"images/pr4.gif\" alt=\"PageRank " .$pagerank. " out of 10\">" ; }elseif($pagerank == 5){ $this->pr = "<img src=\"images/pr5.gif\" alt=\"PageRank " .$pagerank. " out of 10\">" ; }elseif($pagerank == 6){ $this->pr = "<img src=\"images/pr6.gif\" alt=\"PageRank " .$pagerank. " out of 10\">" ; }elseif($pagerank == 7){ $this->pr = "<img src=\"images/pr7.gif\" alt=\"PageRank " .$pagerank. " out of 10\">" ; }elseif($pagerank == 8){ $this->pr = "<img src=\"images/pr8.gif\" alt=\"PageRank " .$pagerank. " out of 10\">" ; }elseif($pagerank == 9){ $this->pr = "<img src=\"images/pr9.gif\" alt=\"PageRank " .$pagerank. " out of 10\">" ; }else{ $this->pr = "<img src=\"images/pr10.gif\" alt=\"PageRank " .$pagerank. " out of 10\">" ; } } function get_pr(){ return $this->pr; } } $gpr = new pageRank(); $gpr->printrank("http://www.yahoo.com"); //display image echo "PR:'$gpr->get_pr()\n"; echo "helo"; ?>
This looks to me like the message I get if the server firewall is not letting my request through. My host would then specifically allow the url and it works OK. If it is your server that is banned I expect the ban will cover all google datacenters. Is it a dedicated server? It may be that another server user has got the entire IP banned in the past.
I have a number of hosts on shared servers. Not making enough cash to justify a dedicated server yet. The one I want to use the PR check on is banned. But another one is not banned. Also, my home IP address is banned for some reason. My plan is to use query the pagerank check on the unbanned server from the banned server. It won't be fast or pretty, but it'll work.
Assuming you have curl setup within PHP, here's something that I just copied from the PHP help file. I haven't actually tested it, so I can't guarantee that it will work, but you definitely should be able to get the idea... function curl_string ($url,$user_agent,$proxy){ $ch = curl_init(); curl_setopt ($ch, CURLOPT_PROXY, $proxy); curl_setopt ($ch, CURLOPT_URL, $url); curl_setopt ($ch, CURLOPT_USERAGENT, $user_agent); curl_setopt ($ch, CURLOPT_COOKIEJAR, "c:\cookie.txt"); curl_setopt ($ch, CURLOPT_HEADER, 1); curl_setopt ($ch, CURLOPT_RETURNTRANSFER, 1); curl_setopt ($ch, CURLOPT_FOLLOWLOCATION, 1); curl_setopt ($ch, CURLOPT_TIMEOUT, 120); $result = curl_exec ($ch); curl_close($ch); return $result; } $url_page = "http://www.google.com/"; $user_agent = "Mozilla/4.0"; $proxy = "http://208.25.243.167:8080"; $string = curl_string($url_page,$user_agent,$proxy); echo $string; Code (markup): Hope that makes sense...
I would assume so. I don't know. Like I said, I didn't test this code but it would be fairly simple to test it yourself... I've never used the proxy part of curl through PHP, but have used pretty much the rest of it successfully.
I've used cURL too but never got the proxy part working. I tried this code but keep getting either of the 2 errors all the time. $handle = curl_init(); curl_setopt ($handle, CURLOPT_HEADER, TRUE); curl_setopt ($handle, CURLOPT_USERAGENT, "Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.8.0.1) Gecko/20060111 Firefox/1.5.0.1"); curl_setopt ($handle, CURLOPT_RETURNTRANSFER, TRUE); curl_setopt ($handle, CURLOPT_REFERER, ""); curl_setopt ($handle, CURLOPT_FOLLOWLOCATION, TRUE); curl_setopt ($handle, CURLOPT_MAXREDIRS, 5); curl_setopt ($handle, CURLOPT_CONNECTTIMEOUT, 5); curl_setopt ($handle, CURLOPT_FAILONERROR, TRUE); curl_setopt ($handle, CURLOPT_COOKIESESSION, TRUE); curl_setopt ($handle, CURLOPT_COOKIEFILE, "cookie.txt"); curl_setopt ($handle, CURLOPT_COOKIEJAR, "cookie.txt"); curl_setopt ($handle, CURLOPT_HTTPPROXYTUNNEL, TRUE); curl_setopt ($handle, CURLOPT_PROXY, "http://202.150.105.35:1080"); $url = "http://yahoo.com"; curl_setopt ($handle, CURLOPT_URL, $url); $file_contents = curl_exec($handle); $ErrNo = curl_errno($handle); if ($ErrNo != 0) echo "Error : $ErrNo : ".curl_error($handle); echo $file_contents; curl_close($handle); PHP: I always get Error : 28 : connect() timed out! or Error : 7 : couldn't connect to host.
I don't suppose you're using an authenticating / NTLM proxy by any chance? That is, one that requires a username and password? This is a little more tricky to answer as for some setups this is transparent Anyway, from what I can remember, cURL didn't support NTLM at one stage. Now, I don't know if it does now but if it doesn't, you might need to tunnel through using the NTLM Authorization Proxy Server from http://ntlmaps.sourceforge.net/ . It effectively operates as a proxy for your proxy...
I don't think Google is blocking it. Different versions of PHP have problems with math and the checksum is not getting calculated correctly.
I don't know anything about different datacentres. And have no control over my PHP versions. I've therefore put the below PHP. If you query it like: http://www.mysite.com/pagerank.php?url=http://www.google.com/ it will get the PR for google. To reduce the risk of ban, I've limited it to 1 request every 10 seconds. That should stop it flooding the server if things get busy. So the application that I'll eventually write will use cached PR's. Anyway, I hope that this script is useful for others in my situation. <?php define('GOOGLE_MAGIC', 0xE6359A60); class pageRank{ var $pr; function zeroFill($a, $b){ $z = hexdec(80000000); if ($z & $a){ $a = ($a>>1); $a &= (~$z); $a |= 0x40000000; $a = ($a>>($b-1)); }else{ $a = ($a>>$b); } return $a; } function mix($a,$b,$c) { $a -= $b; $a -= $c; $a ^= ($this->zeroFill($c,13)); $b -= $c; $b -= $a; $b ^= ($a<<8); $c -= $a; $c -= $b; $c ^= ($this->zeroFill($b,13)); $a -= $b; $a -= $c; $a ^= ($this->zeroFill($c,12)); $b -= $c; $b -= $a; $b ^= ($a<<16); $c -= $a; $c -= $b; $c ^= ($this->zeroFill($b,5)); $a -= $b; $a -= $c; $a ^= ($this->zeroFill($c,3)); $b -= $c; $b -= $a; $b ^= ($a<<10); $c -= $a; $c -= $b; $c ^= ($this->zeroFill($b,15)); return array($a,$b,$c); } function GoogleCH($url, $length=null, $init=GOOGLE_MAGIC) { if(is_null($length)) { $length = sizeof($url); } $a = $b = 0x9E3779B9; $c = $init; $k = 0; $len = $length; while($len >= 12) { $a += ($url[$k+0] +($url[$k+1]<<8) +($url[$k+2]<<16) +($url[$k+3]<<24)); $b += ($url[$k+4] +($url[$k+5]<<8) +($url[$k+6]<<16) +($url[$k+7]<<24)); $c += ($url[$k+8] +($url[$k+9]<<8) +($url[$k+10]<<16)+($url[$k+11]<<24)); $mix = $this->mix($a,$b,$c); $a = $mix[0]; $b = $mix[1]; $c = $mix[2]; $k += 12; $len -= 12; } $c += $length; switch($len){ case 11: $c+=($url[$k+10]<<24); case 10: $c+=($url[$k+9]<<16); case 9 : $c+=($url[$k+8]<<8); /* the first byte of c is reserved for the length */ case 8 : $b+=($url[$k+7]<<24); case 7 : $b+=($url[$k+6]<<16); case 6 : $b+=($url[$k+5]<<8); case 5 : $b+=($url[$k+4]); case 4 : $a+=($url[$k+3]<<24); case 3 : $a+=($url[$k+2]<<16); case 2 : $a+=($url[$k+1]<<8); case 1 : $a+=($url[$k+0]); } $mix = $this->mix($a,$b,$c); /* report the result */ return $mix[2]; } //converts a string into an array of integers containing the numeric value of the char function strord($string) { for($i=0;$i<strlen($string);$i++) { $result[$i] = ord($string{$i}); } return $result; } function printrank($url){ $ch = "6".$this->GoogleCH($this->strord("info:" . $url)); $fp = fsockopen("www.google.com", 80, $errno, $errstr, 30); if (!$fp) { echo "ERROR: $errstr ($errno)<br />\n"; } else { $out = "GET /search?client=navclient-auto&ch=" . $ch . "&features=Rank&q=info:" . $url . " HTTP/1.1\r\n" ; $out .= "Host: www.google.com\r\n" ; $out .= "Connection: Close\r\n\r\n" ; fwrite($fp, $out); while (!feof($fp)) { $data = fgets($fp, 128); //echo '---' . $data; $pos = strpos($data, "Rank_"); if($pos === false){ }else{ $pagerank = substr($data, $pos + 9); $this->pr_image($pagerank); } } fclose($fp); echo $pagerank; } } //display pagerank image. Create your own or download images I made for this script. If you make your own make sure to call them pr0.gif, pr1.gif, pr2.gif etc. function pr_image($pagerank){ if($pagerank == 0){ $this->pr = "<img src=\"images/pr0.gif\" alt=\"PageRank " .$pagerank. " out of 10\">" ; }elseif($pagerank == 1){ $this->pr = "<img src=\"images/pr1.gif\" alt=\"PageRank " .$pagerank. " out of 10\">" ; }elseif($pagerank == 2){ $this->pr = "<img src=\"images/pr2.gif\" alt=\"PageRank " .$pagerank. " out of 10\">" ; }elseif($pagerank == 3){ $this->pr = "<img src=\"images/pr3.gif\" alt=\"PageRank " .$pagerank. " out of 10\">" ; }elseif($pagerank == 4){ $this->pr = "<img src=\"images/pr4.gif\" alt=\"PageRank " .$pagerank. " out of 10\">" ; }elseif($pagerank == 5){ $this->pr = "<img src=\"images/pr5.gif\" alt=\"PageRank " .$pagerank. " out of 10\">" ; }elseif($pagerank == 6){ $this->pr = "<img src=\"images/pr6.gif\" alt=\"PageRank " .$pagerank. " out of 10\">" ; }elseif($pagerank == 7){ $this->pr = "<img src=\"images/pr7.gif\" alt=\"PageRank " .$pagerank. " out of 10\">" ; }elseif($pagerank == 8){ $this->pr = "<img src=\"images/pr8.gif\" alt=\"PageRank " .$pagerank. " out of 10\">" ; }elseif($pagerank == 9){ $this->pr = "<img src=\"images/pr9.gif\" alt=\"PageRank " .$pagerank. " out of 10\">" ; }else{ $this->pr = "<img src=\"images/pr10.gif\" alt=\"PageRank " .$pagerank. " out of 10\">" ; } } function get_pr(){ return $this->pr; } } $filename = 'c:\hosting\webhost4life\member\samuelhon\wwwroot\personal-finance.tv\pagerank_last_usage.txt'; if(file_exists($filename)){ $lines = file($filename); $timestamp = $lines[0]; // echo "timestamp is " . $timestamp . "<br />\n"; if(abs(time()-$timestamp)<10){ echo -2; }else{ $FH = fopen($filename,'w'); if(fwrite($FH,time())){ $do_check = true; }else{ //echo "write failed<br />"; }// end if fclose($FH); }// end if }else{ //echo "file not found, writing file<br />"; $FH = fopen($filename,'w'); if(fwrite($FH,time())){ $do_check = true; }// end if fclose($FH); } if($do_check){ $gpr = new pageRank(); $gpr->printrank($_GET['url']); //echo $gpr->get_pr(); }// end if ?>