Sweet - thanks for the tips! I only updated the function outside of the for loop. If anyone wants to tear apart the code and integrate the other changes from: http://foliovision.com/2007/11/07/phpp-speed-security-code-optimization/ It would be greatly appreciated - especially removing all the $GLOBALS tags. <?php function remove_tld() { $periods = explode(".", $GLOBALS['current_domain']); $strdomain = $GLOBALS['current_domain']; y=count($periods); for($x=$y-1;$x>0;$x--) { $result = mysql_query("SELECT * FROM `tlds` WHERE `tld` = '.".$periods[$x]."'"); if(mysql_num_rows($result) != 0) $strdomain = substr($strdomain, 0, strlen($strdomain)-(strlen($periods[$x])+1)); } $GLOBALS['no_tld'] = str_replace("-", "", $strdomain); if(empty($GLOBALS['keywords'])) { find_keywords($GLOBALS['no_tld'], $GLOBALS['count']); } for($x=0;$x<$GLOBALS['count'];$x++) { while(strlen($GLOBALS['no_tld']) != strlen($GLOBALS['keywordsnospace'][$x])) { find_keywords(substr($GLOBALS['no_tld'], strlen($GLOBALS['keywordsnospace'][$x])), $x); } if(!empty($GLOBALS['key_full'])) break; } if(!empty($GLOBALS['key_full'])) { $GLOBALS['final_keywords'] = $GLOBALS['key_full'][0]; } else { $GLOBALS['final_keywords'] = $GLOBALS['keywords'][0]; } $GLOBALS['final_keywords'] = str_replace(" s ", "s ", $GLOBALS['final_keywords']); if(substr($GLOBALS['final_keywords'], strlen($GLOBALS['final_keywords'])-2) == " s") $GLOBALS['final_keywords'] = substr($GLOBALS['final_keywords'], 0, strlen($GLOBALS['final_keywords'])-2)."s"; return $GLOBALS['final_keywords']; } function find_keywords($text, $number) { $text = strtolower($text); $where = ''; for($x=0;$x<strlen($text);$x++) { $where .= " `word` = '".substr($text, 0, strlen($text)-$x)."'"; if($x != strlen($text)-1) $where .= " OR"; } $result = mysql_query("SELECT `word` FROM dictionary WHERE".$where." ORDER BY CHAR_LENGTH(`word`) DESC"); if(mysql_num_rows($result) != 0) { $z=0; while($row = mysql_fetch_row($result)) { $row[0] = strtolower($row[0]); if(empty($GLOBALS['keywords'][$number])) { $GLOBALS['keywords'][$GLOBALS['count']] = $row[0]; $GLOBALS['keywordsnospace'][$GLOBALS['count']] = $row[0]; $GLOBALS['count']++; $number++; } elseif($z == 0 && !empty($GLOBALS['keywords'][$number])) { $lastkeys = $GLOBALS['keywords'][$number]; $lastkeysnospace = $GLOBALS['keywordsnospace'][$number]; $GLOBALS['keywords'][$number] .= " ".$row[0]; $GLOBALS['keywordsnospace'][$number] .= $row[0]; if(strlen($GLOBALS['no_tld']) == strlen($GLOBALS['keywordsnospace'][$number])) $GLOBALS['key_full'][] = $GLOBALS['keywords'][$number]; } elseif($z != 0 && !empty($GLOBALS['keywords'][$number])) { $GLOBALS['keywords'][$GLOBALS['count']] = $lastkeys." ".$row[0]; $GLOBALS['keywordsnospace'][$GLOBALS['count']] = $lastkeysnospace.$row[0]; if(strlen($GLOBALS['no_tld']) == strlen($GLOBALS['keywordsnospace'][$GLOBALS['count']])) $GLOBALS['key_full'][] = $GLOBALS['keywords'][$GLOBALS['count']]; $GLOBALS['count']++; } $z++; } } else { $GLOBALS['keywords'][$number] .= " ".$text; $GLOBALS['keywordsnospace'][$number] .= $text; } } $hostname=""; $username=""; $password=""; $dbname=""; mysql_connect($hostname,$username, $password) OR DIE ("Unable to connect to database! Please try again later."); mysql_select_db($dbname); $GLOBALS['current_domain'] = "Iranacrossthemooon.com"; print $GLOBALS['current_domain']." => ".remove_tld($GLOBALS['current_domain'])."\n"; ?> PHP:
That's way to much code... Try this one... http://www.phpbyexample.com/example.php This is part of my http_parallel.php class, it's PHP using multi threading sockets. It can crawl 500,000 pages an hour, using 50 socket streams. It can extract keywords sort by relevance, build tag clouds, basically it can index the complete page (links, images, css, javascript, spam rating, keywords, checks for unsafe scripting), can open Flash files and grab the links out them to. The example will crawl the first (5) links found on the page and extract the key words from those links! If you want the script PM me!
Hi dataman! That script is not what I was originally looking for. Once again I was looking for a script that finds what keywords a domain name contains. For Example: DataMan.com => Data Man Iranacrossthemoon.com => I ran across the moon
This is a snippet of my script and it runs pretty fast { // print "$i\t: ".$gurls[$i]."\n"; // print "<span class=\"style1\">Memory Usage is - </span>" .(memory_get_usage(6041952)."<span class=\"style1\"> - in bytes</span>\n"); logstr("log-p.txt",memory_get_usage()."\n"); logstr("log-p.txt","$i\t: ".$gurls[$i]."\n"); unset($links); $links=array(); $links=parseforlinks("http://".$gurls[$i],$regexp); if(!count($links)) break; $linkcnt=0; $dubcnt=0; for($j=0;$j<count($links);$j++) { $caption=''; if($type==1) { $words=$links[$j]; preg_match("/rapidshare\.com\/files\/\d+\/(.+)/",$words,$match); unset($words); $words=$match[1]; unset($match); $words=preg_split("/[_\.\-]/",$words); $lastword=array_pop($words); if($lastword=="html") array_pop($words); $words=implode(" ",$words); $words=preg_replace("/\s{2,}/"," ",$words); $caption=mysql_real_escape_string($words); unset($words); } $url=mysql_real_escape_string($links[$j]); $source=mysql_real_escape_string("http://".$gurls[$i]); $query="INSERT INTO `v2links` VALUES(NULL,'$url','$caption','0000-00-00 00:00:00',0,'','$source','$type')"; PHP: it splits my ling up to get the name e.g http://www.rapidshare.com/files/41201717/[B]Eminem_-_Eminem_Presents_the_Re-Up__2006_.part2.rar[/B] Code (markup): and then gives it the name "Eminem Eminem Presents the Re Up 2006 part2" Code (markup): hope it helps helps my searches and then adds eachword into my database as new keywords