I have been creating a sitemap generator with a friend of mine, and it has been going great until now. I had just uploaded the script to my site, and was going to release it, but I found a huge problem. No matter what URL I input, it didn't want to read it. I have attached the code, and you can find a semi-working version at http://programmingseo.com/site.php <?php # The site url to crawl. if(IsSet($_REQUEST['starturl'])){ $siteurl = ($_REQUEST['starturl']); } # The frequency of updating. if(IsSet($_REQUEST['freq'])){ $frequency = ($_REQUEST['freq']); } # Priority of page in relation to other parts of site. if(IsSet($_REQUEST['start_priority'])){ $priority = ($_REQUEST['start_priority']); } # Include last modification date. if(IsSet($_REQUEST['mod_date'])){ $lastmodification = ($_REQUEST['mod_date']); //working on this } # File extensions to include. $extensions = array("htm", "html", "php", "asp", "pdf"); # Try to index dynamic web pages that have a parameter in there url (?). Valid options # are true or false. Use this at your own risk, could capture session info # which possibly could cause problems during the Google index process. if(IsSet($_REQUEST['index_dynpages'])){ $index_dynamic_pages_params = ($_REQUEST['index_dynpages']); } # Gets a URLs path minus the actual filename + query. function getPath($url) { if($GLOBALS['index_dynamic_pages_params'] == true){ $url = explode("?", $url); $url = $url[0]; } $temp = explode("/", $url); $fnsize=strlen($temp[(count($temp) - 1)]); return substr($url, 0, strlen($url) - $fnsize); } # Cleans up a path so that extra / slashes are gone, translated, etc function cleanPath($url) { $new = array(); $url = explode("/", trim($url)); foreach($url as $p){ $p = trim($p); if($p != "" && $p != "."){ if($p == ".."){ if(is_array($new))$new = array_pop($new); }else{ $new = array_merge((array) $new, array($p)); } } } $url = $new[0]."/"; for($i=1; $i < count($new); $i++){ $url .= "/".$new[$i]; } return $url; } # Checks if URL has specified extension, if so returns true function checkExt($url, $ext){ # Strip out parameter info from a script (?) if($GLOBALS['index_dynamic_pages_params'] == true){ $url = explode("?", $url); $url = $url[0]; } $text=substr($url, strlen($url) - (strlen($ext) + 1), strlen($url)); if($text == ".".$ext){ return true; }else{ return false; } } # Retrieve Site URLs function getUrls($url, $string) { $type = "href"; # Regex to chop out urls preg_match_all("|$type\=\"?'?`?([[:alnum:]:?=&@/._-]+)\"?'?`?|i", $string, $matches); $ret[$type] = $matches[1]; # Make all URLS literal (full path) for($i=0; $i<count($ret['href']); $i++){ if(!preg_match( '/^(http|https):\/\//i' , $ret['href'][$i])){ $ret['href'][$i] = getPath($url)."/".$ret['href'][$i]; } $ret['href'][$i] = cleanPath($ret['href'][$i]); } return $ret; } function addUrls($urls) { if(is_array($urls)){ for($i=0; $i < count($urls['href']); $i++){ $skip = 0; # Cycle through to make sure url is unique for($x=0; $x < count($GLOBALS['urls']); $x++){ if($GLOBALS['urls'][$x] == $urls['href'][$i]){ $skip = 1; break; } # Check extension $extgood = 0; foreach($GLOBALS['extensions'] as $ext){ if(checkExt($urls['href'][$i], $ext))$extgood = 1; } # Make sure its in the current website if(!stristr($urls['href'][$i], $siteurl))$skip = 1; if($skip == 0 && $extgood == 1)$GLOBALS['urls'][] = $urls['href'][$i]; } } } } function getNextUrl($oldurl) { if($oldurl == "")return $GLOBALS['urls'][0]; for($i=0; $i < count($GLOBALS['urls']); $i++){ if($GLOBALS['urls'][$i] == $oldurl){ if(isset($GLOBALS['urls'][($i+1)])){ return $GLOBALS['urls'][($i+1)]; }else{ return false; } } } return false; } $urls = array($siteurl); $turl = ""; # Cycle through tree and build a url list while($turl = getNextUrl($turl)){ # Extend script time limit set_time_limit(120); $html=''; # Read html file into memory if($html == file($turl)){ // You're setting $html = $turl, use double equal signs $html = stripslashes(implode($html)); print($turl."\n"); # Get site urls from html and add new unique url to list if needed addUrls(getUrls($turl, $html)); }else{ print("Failed reading URL: $turl\n"); } } function generateSitemap(){ $xml_string = '<?xml version="1.0" encoding="UTF-8"?><urlset xmlns="http://www.google.com/schemas/sitemap/0.84" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://www.google.com/schemas/sitemap/0.84 http://www.google.com/schemas/sitemap/0.84/sitemap.xsd">'; foreach($urls as $url){ $xml_string .= "<url><loc>$urls</loc><changefreq>$frequency.</changefreq><priority>$priority.</priority></url>\n"; } $xml_string .= "</urlset>"; print("\nDumping result to screen...\n<br /><br /><br />\n\n\n"); print('<textarea rows="25" cols="70" style="width:100%">'.$xml_string.'</textarea>'); } ?> <!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.1 Transitional//EN"> <html> <head> <title>Sitemap Generator - ProgrammingSEO.com</title> <style type="text/css"> a {color:blue;text-decoration:none;} a:hover {color:red;} </style> </head> <body> <h1>Google Sitemap Generator</h1> <?php if(IsSet($_POST['action'])){ if($_POST['action'] == 'Create Sitemap')generateSitemap(); } ?> <h2>Settings</h2> <form action="site.php" method="post"> Site URL: <input type="text" name="starturl" size="30" value ="http://yoursite.com/"/><br/> <br/>Priority : <input type="text" name="start_priority" size="3" value="1.0" /> Priority of page in relation to other parts of your site. A number from 0.1 to 1.0 is acceptable. <br/> <br/>Change Frequency <br/> <input type="radio" name="" value="always" /> Always<br/> <input type="radio" name="" value="hourly" /> Hourly<br/> <input type="radio" name="" value="daily" /> Daily<br/> <input type="radio" name="" value="weekly" /> Weekly<br/> <input type="radio" name="" value="monthly" /> Monthly<br/> <input type="radio" name="" value="yearly" /> Yearly<br/> <input type="radio" name="" value="never" /> Never<br/><br/> <br/>Post the Last Modification Date?<br/> <input type="radio" checked="checked" name="mod_date" value ="true" /> Yes<br/> <input type="radio" name="mod_date" value ="false" /> No <br/> <br/> <br/>Index Dynamic Pages?<small> (May index sessions!)</small><br/> <input type="radio" checked="checked" name="index_dynpages" value ="true" /> Yes<br/> <input type="radio" name="index_dynpages" value ="false" /> No <br/> <br/> <input type="submit" name="action" value="Go!" /> </form> </body> </html> PHP: I greatly appreciate any help, I've spent a few days trying to debug this. I know it's probably something obvious, but I'm pretty new. Thanks again.