I have a script that create static pages from RSS content. RSS titles that contain characters such as !@$#%^&*()_+{}|,./ ' cause url problems when the state pages are created. I've tried using the preg_replace function, but I cannot get it right. Could somebody please tell me how to fix this. I have posted the script below. Thanks <?php function unichar($string) { $two= strtolower(str_replace(' ', '', $string)); $res = count(count_chars($two, 1)); return $res; } //include ("config.ini.php"); function rss2html($url,$m=0,$w=0,$target="_blank",$cssprefix="rsslib",$items,$sourceid,$sectionid,$sections, $status) { global $channel; //$items; global $sitetitle,$version,$powered,$sitedomain, $folder,$rsspath,$datefolder,$fileextention; getrss($url,$items); if($m==0){ $m=count($items); } for($i=0;$i<$m;$i++) { //--------mysql add-on ----------------// $sqlCheck="SELECT link FROM `content` where link = '".$items[$i][1]."' OR Title = '".addslashes($items[$i][0])."'"; $checkdup=mysql_query($sqlCheck); $num_rows = mysql_num_rows($checkdup); if(unichar($items[$i][0])>2){ if($num_rows > 0){ } else{ $titles = str_replace('"','',$items[$i][0]); $titles = str_replace("'","",$titles); $titles = preg_replace("/[^a-z0-9\- _\"'\.]/i", "", $titles); //$titles = str_replace("#","No.",$titles); $insert = mysql_query ("INSERT INTO `content` ( `ID` , `Title` , `description` , `source` ,`link` , `publish`, `SectionId`, `SourceID`, `timestamp` ) VALUES (NULL, '$titles', '".addslashes($items[$i][2])."', '".addslashes($sections)."', '".addslashes($items[$i][1])."', '$status', '$sectionid','$sourceid','".time()."')"); } if($insert = true){ $inserted .= "Inserted ".$items[$i][0]."...<br>"; } else{ DIE('Error: '.mysql_error()); } } //--------mysql add-on ----------------// } } //end function rss2html // *********************************** // *********************************** function getrss($url,&$items) { global $channel; global $use_cache,$cache_folder,$cache_valid; if($use_cache) { $cache_filename=$cache_folder."/".md5($url).".rss"; if(file_exists($cache_filename)) { $t=filemtime($cache_filename); $cache_create=((!$t)||($t<strtotime("now")-60*$cache_valid)); } else $cache_create=true; if($cache_create) { //cache not valid - create it again $simple = file($url); $f=fopen($cache_filename,"w"); for($i=0;$i<count($simple);$i++) fwrite($f,$simple[$i]); fclose($f); $simple=implode('',$simple); } else $simple = implode('',file($cache_filename)); } else $simple = implode('',file($url)); $p = xml_parser_create(); xml_parse_into_struct($p,$simple,$vals,$index); xml_parser_free($p); $type=0; $tmp[]=array("","",""); $id=0; for($i=0;$i<count($vals);$i++) { if(($vals[$i]['tag']=="CHANNEL")&&($vals[$i]['type']=="open")) $id=$vals[$i]['level']+1; if(($type==0)&&($id==$vals[$i]['level'])) switch($vals[$i]['tag']) { case "TITLE": $channel[0]=$vals[$i]['value']; break; case "LINK": $channel[1]=$vals[$i]['value']; break; case "DESCRIPTION": $channel[2]=$vals[$i]['value']; break; case "COPYRIGHT": case "DC:RIGHTS": $channel[3]=$vals[$i]['value']; break; case "MANAGINGEDITOR": case "DC:PUBLISHER": $channel[4]=$vals[$i]['value']; break; case "PUBDATE": case "DC:DATE": $channel[5]=$vals[$i]['value']; break; } else switch($vals[$i]['tag']) { case "TITLE": $tmp[0]=$vals[$i]['value']; break; case "LINK": $tmp[1]=$vals[$i]['value']; break; case "DESCRIPTION": $tmp[2]=$vals[$i]['value']; break; } if($vals[$i]['tag']=="ITEM") { if(($vals[$i]['type']=="open")&&($type==0)) $type=1; if($vals[$i]['type']=="close") { $items[]=$tmp; $tmp[0]=""; $tmp[1]=""; $tmp[2]=""; } } } //print_r($channel); //print_r($items); } // end function getrss ?> PHP:
I assume _ are ok in filenames? If not change the first line. $val = str_replace(' ', '_', $val); //replace spaces with _ $val = str_replace('[', '', $val); //remove [ $val = str_replace(']', '', $val); //remove ] $val = preg_replace('_[!@$#%^&*()+{}:;\\><~`?|,./\'"]_', '', $val); Code (markup): -the mole