What do you mean ignore words that are less than 4 characters long? You want to remove them from the string? You could make a function that splits the string based on spaces, loops through the array and removes any that are less than 4. . . A quick function I came up with: <? function removeSmallWords($baseString){ $toReturn = ""; $wordArray = spliti(" ", $baseString); foreach ($wordArray as $word) { if(strlen($word) >= 5){ $toReturn .= $word . " "; } } return $toReturn; } $subjectString = "Hello There, this is a function test string"; echo removeSmallWords($subjectString); // Outputs "Hello There, function string " ?> PHP: Hope that helps =] ~Todd
/** get all thehtml tags regex */ function get_tags( $tag, $xml ) { $tag = preg_quote($tag); $matches[]="1"; $matches[]="2"; $regex = "/<\/?\w+((\s+(\w|\w[\w-]*\w)(\s*=\s*(?:\â€.*?\â€|’.*?’|[^'\">\s]+))?)+\s*|\s*)\/?>/i"; $regex = "/<\/?\w+((\s+\w+(\s*=\s*(?:\".*?\"|'.*?'|[^'\">\s]+))?)+\s*|\s*)\/?>/i"; preg_match_all($regex, $xml, $matches ); return $matches; } function splitHtmlTextOnlyProc($s, &$curr, &$nxt, $tag){ $t=1; global $ll; $orig = $s; $str = $s; if(($curr - $nxt) == 0)return; $words = preg_split("/[\s,]+/", $str); $doneWords["aa"]=1; foreach($words as $k ){ $kLower = strtolower($k); if(trim($k)=='')continue; if(isset($doneWords[$kLower ]))continue; $doneWords[$kLower ]=1; $k1 = getLink($k); $k1len = strlen($k1); $klen = strlen($k); if($k1len != $klen){ $count = 0; $kq = preg_quote($k); $kt = "zT1TATAzy"; $kt1 = preg_quote($kt); //$str = preg_replace ( "/\b" . $kq . "\b/i", $k1 , $str, -1, $count); $s1 = preg_replace ( "/\b" . $kq . "\b/i", $kt , $str, -1, $count); log2("splitadd1 tag $tag nxt $nxt count $count len k " . $klen . " len k1 " . $k1len . " k $k kq $kq k1\n{$k1}\n---\norig \n{$orig}\n---\n s1\n{$s1}\n---\n\n str \n{$str}\n\n---\n "); //$count = 0; //$str = preg_replace ( "/\b" . $kt . "\b/i", $k1 , $s1, -1, $count); $str = str_replace($kt, $k1, $s1); //$str = str_replace($k, $k1, $str, $count); //$str = preg_replace ( "/" . $k . "/", $k1 , $str, -1, $count); $lNxt= $nxt ; $nxt += ($k1len - $klen ) * $count; log2("splitadd tag $tag lNxt $lNxt nxt $nxt count $count len k " . $klen . " len k1 " . $k1len . " k $k kq $kq k1\n{$k1}\n---\ns1 \n{$s1} \n---\n str\n{$str}\n---\n\n "); //log2("splitadd lNxt $lNxt nxt $nxt count $count k \n$k\n k1 \n$k1\n\n"); } } return $str; } /** user */ function hvisit( $html, $tags ) { $ll="\n"; global $spl; $nogos = array("<html", "<head", "<script", "<a", "<meta", "<style", "</head", "</style", "<title", "</title>", "<base", "<iframe"); //log2 ("Tags \n "); //print_r($tags[0]); $i = 0; foreach($tags[0] as $tag){ $tags[0][$i] = strtolower($tags[0][$i]); // "\n<!-- $i $tag \n" . $tags[0][$i] . "-->" ; $i++; } $i = -1; $loc1 = 0; $loc2 = 0; $oldLoc1= $loc1; $ln1 = count($tags[0]); $ln1_1 = $ln1 - 1; $ahref = false; foreach($tags[0] as $tag){ $i++; $ln_tag = + strlen($tag); log2("iterate tag $tag loc1 $loc1 ln_tag $ln_tag "); $loc1 = stripos($html, $tag, $loc1) + $ln_tag; log2("newLoc1 A loc1 $loc1 "); $aa1 = stripos($tag, "</a>"); if($aa1 === false || $aa1 == NULL)$aa1 = stripos($tag, "</a "); $tag0 = str_replace(' ', '', $tag); //log2(" tag0 $tag0"); if(($aa1 !== false && $aa1 != null) || $tag0 == '</a>'){ //closing a tag $ahref = false; log2("closing a tag 3439"); } else{ if(!$ahref)$aa1 = stripos($tag, '<a'); } if($ahref || ($aa1 !== false && $aa1 !== NULL)){ $ahref = true; log2("c1a href $tag nogo2 $nogo r $r loc1 $loc1 $ll"); continue; } foreach($nogos as $nogo){ $r = stripos($tag, $nogo); if($r !== false && $r !== NULL){ log2("c1 $tag nogo $nogo r $r $ll"); //$loc1 += $ln_tag; log2("newA loc1 $loc1 +ln "); $loc2 = $loc1; $oldLoc1= $loc1; break; } } if($r !== false && $r !== NULL){ log2("c1a $tag nogo2 $nogo r $r loc1 $loc1 $ll"); continue; } log2 (" proc3 c2 $tag \nloc1 $loc1 next |" . $tags[0][$i + 1] . "| i $i ln1 $ln1 \n"); if($loc1===false){ $loc1 = stripos($html, $tag, $oldLoc1) + $ln_tag; log2 ("<!-- unex err fileNoLog4 $fileNoLog4 -->\n"); log2("newLoc1 B er loc1 $loc1 "); } if($loc1===false){ log2 ("<!-- unex err 2 fileNoLog4 $fileNoLog4 -->\n"); $loc1 =$oldLoc1; log2("newLoc1 C er loc1 $loc1 fileNoLog4 $fileNoLog4 "); } global $fileNoLog4; if($i < $ln1_1 && $tags[0][$i + 1] != ''){ log2 ("<!-- search end " . $tags[0][$i + 1] . "\n"); $loc2 = stripos($html, $tags[0][$i + 1], $loc1-1); if($loc2=== false){ $loc2 = stripos($html, $tags[0][$i + 1], $oldLoc1); log2 ("<!-- unex err 3 fileNoLog4 $fileNoLog4 -->\n"); } if($loc2=== false){ $loc2 = strlen($html); log2 ("<!-- unex err 4 fileNoLog4 $fileNoLog4 -->\n"); } }else{ $loc2 = strlen($html); } log2 ("\n loc2 match $loc2 before loc1 $loc1 $ll"); $ln2 = $loc2 - $loc1; $start= 0; $s3 = substr($html, $loc1, $ln2); $lns3 = strlen($s3); $ln3a = $lns3 ; log4("before loc1 $loc1 loc2 $loc2 ln2 $ln2 \n\n{$html}"); log4("before sending loc1 $loc1 ln2 $ln2 \n{$tag}\n\n{$s3}"); $s = splitHtmlTextOnlyProc($s3, $start , $lns3, $tag); if($lns3 != $ln3a ){ $html = substr($html, 0, $loc1) . $s . substr($html, $loc2); //$loc1 = $loc1 + ln2 - $ln3a; //$loc1 += strlen($s) - $lns3; $loc1 = $loc1 + $lns3; log2 ("split done with replce after loc1 $loc1 lns3 $lns3 $ll"); log4("after loc $loc1 loc2 $loc2 ln2 $ln2 lns3 $lns3 html \n\n{$html}"); log4("after loc $loc1 loc2 $loc2 ln2 $ln2 lns3 $lns3 s \n\n{$s}"); }else{ log2 ("split done no replces $ll"); } $oldLoc1= $loc1; } return $html; } PHP: