Ignore small words

Discussion in 'PHP' started by AT-XE, Jul 24, 2008.

  1. #1
    Hello guys, is there any way of ignoring words with less than 4 characters in a string.?

    Thanks :)
     
    AT-XE, Jul 24, 2008 IP
  2. ToddMicheau

    ToddMicheau Active Member

    Messages:
    183
    Likes Received:
    11
    Best Answers:
    0
    Trophy Points:
    58
    #2
    What do you mean ignore words that are less than 4 characters long? You want to remove them from the string? You could make a function that splits the string based on spaces, loops through the array and removes any that are less than 4. . .

    A quick function I came up with:

    
    <?
    function removeSmallWords($baseString){
    	$toReturn = "";
    	$wordArray = spliti(" ", $baseString);
    	foreach ($wordArray as $word) {
    		if(strlen($word) >= 5){
    			$toReturn .= $word . " ";
    		}
    	}
    	return $toReturn;
    }
    
    $subjectString = "Hello There, this is a function test string";
    echo removeSmallWords($subjectString);
    
    // Outputs "Hello There, function string "
    
    ?>
    
    PHP:
    Hope that helps =]
    ~Todd
     
    ToddMicheau, Jul 24, 2008 IP
  3. tgkprog

    tgkprog Peon

    Messages:
    28
    Likes Received:
    1
    Best Answers:
    0
    Trophy Points:
    0
    #3
    
    /**
    get all thehtml tags regex
    */
    function get_tags( $tag, $xml ) {
       $tag = preg_quote($tag);
       $matches[]="1";
       $matches[]="2";
       $regex = "/<\/?\w+((\s+(\w|\w[\w-]*\w)(\s*=\s*(?:\”.*?\”|’.*?’|[^'\">\s]+))?)+\s*|\s*)\/?>/i";
       $regex = "/<\/?\w+((\s+\w+(\s*=\s*(?:\".*?\"|'.*?'|[^'\">\s]+))?)+\s*|\s*)\/?>/i";
     	preg_match_all($regex,
                        $xml,
                        $matches
                        );
    
       return $matches;
     }
    
    
    function splitHtmlTextOnlyProc($s, &$curr, &$nxt, $tag){
    	$t=1;
    
    	global $ll;
    	$orig = $s;
    	$str = $s;
    
    	if(($curr - $nxt) == 0)return;
    
    	$words = preg_split("/[\s,]+/", $str);
    	$doneWords["aa"]=1;
    
    	foreach($words as $k ){
    		$kLower = strtolower($k);
    		if(trim($k)=='')continue;
    		if(isset($doneWords[$kLower ]))continue;
    		$doneWords[$kLower ]=1;
    		$k1 = getLink($k);
    		$k1len = strlen($k1);
    		$klen = strlen($k);
    		if($k1len != $klen){
    
    			$count = 0;
    			$kq = preg_quote($k);
    			$kt = "zT1TATAzy";
    			$kt1 = preg_quote($kt);
    			//$str =  preg_replace ( "/\b" . $kq . "\b/i", $k1 , $str, -1, $count);
    			$s1 =  preg_replace ( "/\b" . $kq . "\b/i", $kt , $str, -1, $count);
    
    			log2("splitadd1 tag $tag nxt $nxt count  $count len k " . $klen  . " len k1 "  . $k1len . " k $k kq $kq k1\n{$k1}\n---\norig \n{$orig}\n---\n s1\n{$s1}\n---\n\n str \n{$str}\n\n---\n ");
    
    			//$count = 0;
    			//$str =  preg_replace ( "/\b" . $kt . "\b/i", $k1 , $s1, -1, $count);
    			$str = str_replace($kt, $k1, $s1);
    
    			//$str = str_replace($k, $k1, $str, $count);
    			//$str =  preg_replace ( "/" . $k . "/", $k1 , $str, -1, $count);
    
    
    			$lNxt= $nxt ;
    			$nxt += ($k1len  - $klen ) * $count;
    			log2("splitadd tag $tag lNxt $lNxt nxt $nxt  count  $count len k " . $klen  . " len k1 "  . $k1len . " k $k kq $kq k1\n{$k1}\n---\ns1 \n{$s1} \n---\n str\n{$str}\n---\n\n ");
    			//log2("splitadd lNxt $lNxt nxt $nxt count $count k \n$k\n k1 \n$k1\n\n");
    		}
    
    	}
    
    	return $str;
    
    }
    
    /** user
    */
    
    function hvisit( $html, $tags ) {
    
    	 $ll="\n";
    	 global $spl;
    	 $nogos = array("<html", "<head", "<script", "<a", "<meta", "<style", "</head", "</style", "<title", "</title>", "<base", "<iframe");
    
    	 //log2 ("Tags \n ");
    	 //print_r($tags[0]);
    
    	 $i = 0;
    	 foreach($tags[0] as $tag){
    		 $tags[0][$i] = strtolower($tags[0][$i]);
    		 // "\n<!-- $i $tag \n" . $tags[0][$i] . "-->" ;
    		 $i++;
    	 }
    
    
    
    	 $i = -1;
    	 $loc1 = 0;
    	 $loc2 = 0;
    	 $oldLoc1= $loc1;
    	 $ln1 = count($tags[0]);
    	 $ln1_1 = $ln1 - 1;
    	 $ahref = false;
    	 foreach($tags[0] as $tag){
    		$i++;
    
    		$ln_tag = + strlen($tag);
    		log2("iterate tag $tag loc1 $loc1 ln_tag  $ln_tag ");
    		$loc1 = stripos($html, $tag, $loc1) + $ln_tag;
    		log2("newLoc1 A loc1 $loc1 ");
    
    		$aa1 = stripos($tag, "</a>");
    		if($aa1 ===  false || $aa1 == NULL)$aa1 = stripos($tag, "</a ");
    		$tag0 = str_replace(' ', '', $tag);
    		//log2(" tag0 $tag0");
    		if(($aa1 !== false && $aa1 != null) || $tag0 == '</a>'){
    			//closing a tag
    			$ahref = false;
    			log2("closing a tag 3439");
    		}
    		else{
    			if(!$ahref)$aa1 = stripos($tag, '<a');
    		}
    		if($ahref || ($aa1 !==  false && $aa1 !== NULL)){
    			$ahref = true;
    
    			log2("c1a href $tag nogo2 $nogo r $r loc1  $loc1 $ll");
    			continue;
    		}
    
    		foreach($nogos as $nogo){
    			$r = stripos($tag, $nogo);
    
    			if($r !== false && $r !== NULL){
    				log2("c1 $tag nogo $nogo r $r $ll");
    
    				//$loc1 += $ln_tag;
    				log2("newA loc1 $loc1 +ln ");
    				$loc2 = $loc1;
    				$oldLoc1= $loc1;
    				break;
    
    			}
    		}
    		if($r !== false && $r !== NULL){
    
    			log2("c1a $tag nogo2 $nogo r $r loc1  $loc1 $ll");
    			continue;
    		}
    
    
    		log2 (" proc3 c2 $tag \nloc1 $loc1 next |" . $tags[0][$i + 1] . "| i $i ln1 $ln1 \n");
    		if($loc1===false){
    			$loc1 = stripos($html, $tag, $oldLoc1) + $ln_tag;
    			log2 ("<!-- unex err fileNoLog4 $fileNoLog4  -->\n");
    			log2("newLoc1 B er loc1 $loc1 ");
    		}
    		if($loc1===false){
    			log2 ("<!-- unex err 2 fileNoLog4 $fileNoLog4  -->\n");
    			$loc1 =$oldLoc1;
    			log2("newLoc1 C er loc1 $loc1 fileNoLog4 $fileNoLog4  ");
    		}
    
    		global $fileNoLog4;
    
    		if($i < $ln1_1 && $tags[0][$i + 1] != ''){
    			log2 ("<!-- search end " . $tags[0][$i + 1] . "\n");
    			$loc2 = stripos($html, $tags[0][$i + 1], $loc1-1);
    			if($loc2=== false){
    				$loc2 = stripos($html, $tags[0][$i + 1], $oldLoc1);
    				log2 ("<!-- unex err 3 fileNoLog4 $fileNoLog4 -->\n");
    			}
    			if($loc2=== false){
    				$loc2 = strlen($html);
    				log2 ("<!-- unex err 4 fileNoLog4 $fileNoLog4 -->\n");
    			}
    		}else{
    			$loc2 = strlen($html);
    		}
    		log2 ("\n loc2 match $loc2 before loc1 $loc1 $ll");
    		$ln2 = $loc2 - $loc1;
    
    		$start= 0;
    		$s3 = substr($html, $loc1, $ln2);
    		$lns3 = strlen($s3);
    		$ln3a = $lns3 ;
    		log4("before loc1 $loc1 loc2 $loc2 ln2 $ln2 \n\n{$html}");
    		log4("before sending loc1 $loc1 ln2 $ln2 \n{$tag}\n\n{$s3}");
    		$s = splitHtmlTextOnlyProc($s3, $start , $lns3, $tag);
    		if($lns3 != $ln3a ){
    			$html = substr($html, 0, $loc1) . $s . substr($html, $loc2);
    
    			//$loc1 = $loc1 + ln2 - $ln3a;
    			//$loc1 += strlen($s) - $lns3;
    			$loc1 = $loc1 +  $lns3;
    			log2 ("split done with replce after loc1 $loc1 lns3 $lns3  $ll");
    			log4("after loc $loc1 loc2 $loc2 ln2 $ln2 lns3 $lns3 html \n\n{$html}");
    			log4("after loc $loc1 loc2 $loc2 ln2 $ln2 lns3 $lns3 s \n\n{$s}");
    		}else{
    			log2 ("split done no replces $ll");
    
    		}
    		$oldLoc1= $loc1;
    
    
    	 }
    	 return $html;
    
    }
    
    
    PHP:
     
    tgkprog, Jul 24, 2008 IP
  4. AT-XE

    AT-XE Peon

    Messages:
    676
    Likes Received:
    15
    Best Answers:
    0
    Trophy Points:
    0
    #4
    Thank you all very mcuh guys!
     
    AT-XE, Jul 26, 2008 IP