1. Advertising
    y u no do it?

    Advertising (learn more)

    Advertise virtually anything here, with CPM banner ads, CPM email ads and CPC contextual links. You can target relevant areas of the site and show ads based on geographical location of the user if you wish.

    Starts at just $1 per CPM or $0.10 per CPC.

I want to join arrays

Discussion in 'PHP' started by gilgalbiblewheel, Jul 1, 2018.

  1. #1
    How do I join the $showstrongs, $showstrongs2 and $showstrongs3 into the $total_etymologyStrongs array?
    SEMrush
    
    <?php
    set_time_limit(90);
    $string1 = "";
    $string2 = "";
    
    $string1 .=  "-----<br /><br />\n";
    $string1 .=  "stripatag <pre style=\"color: red; font-weight: bold;\">";
    
    $string2 .=  "</pre> ";
    $string2 .=  "<br /><br />\n";
    $all_etymologyStrongs = array();
    $all_description_blb = array();
    $blbdescription = "<span style=\"color: red; font-weight: bold;\"> The same as <a class=\"nowrap\" href=\"lexicon.cfm?strongs=H9&amp;t=KJV\" title=\"English: lost thing, that which was lost\"><span class=\"Hb\">אֲבֵדָה</span> (H9)</a>, incorrectly written for <a class=\"nowrap\" href=\"lexicon.cfm?strongs=H11&amp;t=KJV\" title=\"English: destruction\"><span class=\"Hb\">אֲבַדּוֹן</span> (H11)</a> </span>";
    //'/\([H|G]{1}([0-9]+)\)/'
    preg_match_all('/\(([H|G]{1}[0-9]+)\)/', $blbdescription, $showstrongs, PREG_SET_ORDER);
    array_push($all_description_blb, $blbdescription);
    array_push($all_etymologyStrongs, $showstrongs);
    $total_description = array();
    $total_etymologyStrongs = array();
    $total_etym_desc = array();
    for($a=0;$a<count($showstrongs);$a++){
       $all_etymologyStrongs = array();
       $all_description_blb = array();
       $file_link = "https://www.blueletterbible.org/lang/lexicon/lexicon.cfm?Strongs=".$showstrongs[$a][1]."&t=KJV";
       $file = file_get_contents($file_link);
       preg_match_all("#<\b(div)\b[^>]*>(.*?)</\b(div)\b>#si", $file, $divout, PREG_SET_ORDER);
       $blbdescription = $divout[78][2];
       preg_match_all('/\(([H|G]{1}[0-9]+)\)/', $blbdescription, $showstrongs2, PREG_SET_ORDER);
       array_push($all_description_blb, $blbdescription);
       array_push($all_etymologyStrongs, $showstrongs2);
       for($b=0;$b<count($showstrongs2);$b++){
         $file_link2 = "https://www.blueletterbible.org/lang/lexicon/lexicon.cfm?Strongs=".$showstrongs2[$b][1]."&t=KJV";
         $file2 = file_get_contents($file_link2);
         preg_match_all("#<\b(div)\b[^>]*>(.*?)</\b(div)\b>#si", $file2, $divout, PREG_SET_ORDER);
         $blbdescription = $divout[78][2];
         preg_match_all('/\(([H|G]{1}[0-9]+)\)/', $blbdescription, $showstrongs3, PREG_SET_ORDER);
         array_push($all_description_blb, $blbdescription);
         array_push($all_etymologyStrongs, $showstrongs3);
       }
    }
    $all_description_blb = array_values(array_unique($all_description_blb));
    $string_description_blb = addslashes(implode("|", $all_description_blb));
    
    array_push($total_etymologyStrongs, array_unique($all_etymologyStrongs));//implode("|", )
    array_push($total_etym_desc, $string_description_blb);
    ?>
    
    <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
    <html xmlns="http://www.w3.org/1999/xhtml">
    <head>
    <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
    <title>Test: Strip a Tag 3</title>
    </head>
    
    <body>
    <?php
    
    echo $string1;
    var_dump($showstrongs);
    echo $string2;
    
    echo $string1;
    var_dump($total_etymologyStrongs);
    echo $string2;
    
    
    ?>
    </body>
    </html>
    
    PHP:
     
    gilgalbiblewheel, Jul 1, 2018 IP
    SEMrush
  2. sarahk

    sarahk iTamer Staff

    Messages:
    25,583
    Likes Received:
    3,572
    Best Answers:
    103
    Trophy Points:
    665
    #2
    I started to have a look but have run out of time.

    I suspect array_merge is the command you need but I'm totally confused about what your goal actually is.

    
    <?php
    //set_time_limit(90);
    
    function debug($var){
    
      var_export($var);
      echo '<hr>';
    }
    $all_etymologyStrongs = $all_description_blb = $total_description = $total_etymologyStrongs = $total_etym_desc = array();
    
    $blbdescription = "<span style='color: red; font-weight: bold;'> The same as <a class='nowrap' href='lexicon.cfm?strongs=H9&amp;t=KJV' title='English: lost thing, that which was lost'><span class='Hb'>אֲבֵדָה</span> (H9)</a>, incorrectly written for <a class='nowrap' href='lexicon.cfm?strongs=H11&amp;t=KJV' title='English: destruction'><span class='Hb'>אֲבַדּוֹן</span> (H11)</a> </span>";
    //'/\([H|G]{1}([0-9]+)\)/'
    preg_match_all('/\(([H|G]{1}[0-9]+)\)/', $blbdescription, $showstrongs, PREG_SET_ORDER);
    $all_description_blb = array_merge($all_description_blb, $blbdescription);
    $all_etymologyStrongs = array_merge($all_etymologyStrongs, $showstrongs);
    debug($blbdescription);
    debug($showstrongs);
    
    foreach($showstrongs as $v){
        $all_etymologyStrongs = $all_description_blb = array();
        $file_link = "https://www.blueletterbible.org/lang/lexicon/lexicon.cfm?Strongs=".$v[1]."&t=KJV";
        $file = file_get_contents($file_link);
        //debug($file);
    
        preg_match_all("#<\b(div)\b[^>]*>(.*?)</\b(div)\b>#si", $file, $divout, PREG_SET_ORDER);
        $blbdescription = $divout[78][2];
    
        debug($blbdescription);
        preg_match_all('/\(([H|G]{1}[0-9]+)\)/', $blbdescription, $showstrongs2, PREG_SET_ORDER);
        //blbdescription isn't an array
        $all_description_blb[] = $blbdescription;
        debug('showstrongs');
        debug($showstrongs2);
        $all_etymologyStrongs = array_merge($all_etymologyStrongs, $showstrongs2);
       
        foreach($showstrongs2 as $b){
            $file_link2 = "https://www.blueletterbible.org/lang/lexicon/lexicon.cfm?Strongs=".$showstrongs2[$b][1]."&t=KJV";
            $file2 = file_get_contents($file_link2);
            preg_match_all("#<\b(div)\b[^>]*>(.*?)</\b(div)\b>#si", $file2, $divout, PREG_SET_ORDER);
            $blbdescription = $divout[78][2];
            preg_match_all('/\(([H|G]{1}[0-9]+)\)/', $blbdescription, $showstrongs3, PREG_SET_ORDER);
            array_merge($all_description_blb, $blbdescription);
            array_merge($all_etymologyStrongs, $showstrongs3);
        }
    }
    $all_description_blb = array_values(array_unique($all_description_blb));
    $string_description_blb = addslashes(implode("|", $all_description_blb));
    array_push($total_etymologyStrongs, array_unique($all_etymologyStrongs));//implode("|", )
    array_push($total_etym_desc, $string_description_blb);
    ?>
    <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
    <html xmlns="http://www.w3.org/1999/xhtml">
        <head>
            <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
            <title>
                Test: Strip a Tag 3
            </title>
        </head>
        <body>
            <?php
    debug($showstrongs);
    debug($total_etymologyStrongs);
    
            ?>
        </body>
    </html>
    PHP:
     
    sarahk, Jul 2, 2018 IP
  3. gilgalbiblewheel

    gilgalbiblewheel Well-Known Member

    Messages:
    435
    Likes Received:
    0
    Best Answers:
    0
    Trophy Points:
    101
    #3
    my goal is to collect the Strong's number in the following descriptions and in turn look for the link and get the descriptions and more strong's numbers until there are no more Strong's numbers or there is a repetition of Strong's numbers. That's why I created a for loop within a for loop. But perhaps you have a quicker method. I had gone from for $a = 0 - $d = 0, 4 levels of loops. But maybe it can go more without repeating links?
     
    gilgalbiblewheel, Jul 2, 2018 IP
  4. deathshadow

    deathshadow Acclaimed Member

    Messages:
    9,157
    Likes Received:
    1,708
    Best Answers:
    239
    Trophy Points:
    515
    #4
    Frankly, you're doing a bunch of string matching nonsense (on some really horrifyingly bad markup) over what should probably be done with something like DOMDocument.

    DOMDocument.loadHTMLFile, then just use the JS-like getElement(s)Bywhatever methodology to go through the document.
     
    deathshadow, Jul 3, 2018 IP
  5. gilgalbiblewheel

    gilgalbiblewheel Well-Known Member

    Messages:
    435
    Likes Received:
    0
    Best Answers:
    0
    Trophy Points:
    101
    #5
    I'm going to look into it. I think I have looked at it in the past and since it didn't give me the results I chose this way. I'll get back to you.
     
    gilgalbiblewheel, Jul 4, 2018 IP
  6. gilgalbiblewheel

    gilgalbiblewheel Well-Known Member

    Messages:
    435
    Likes Received:
    0
    Best Answers:
    0
    Trophy Points:
    101
    #6
    ok I'm looking into the following:
    
    <?php
    // try this html listing example for all nodes / includes a few getElementsByTagName options:
    
    //$file = $DOCUMENT_ROOT. "test.html";
    $file = $DOCUMENT_ROOT. "https://www.blueletterbible.org/kjv/gen/1/1/s_1001";
    $doc = new DOMDocument();
    $doc->loadHTMLFile($file);
    
    // example 1:
    $elements = $doc->getElementsByTagName('*');
    // example 2:
    $elements = $doc->getElementsByTagName('html');
    // example 3:
    //$elements = $doc->getElementsByTagName('body');
    // example 4:
    //$elements = $doc->getElementsByTagName('table');
    // example 5:
    //$elements = $doc->getElementsByTagName('div');
    
    if (!is_null($elements)) {
      foreach ($elements as $element) {
      echo "<br/>". $element->nodeName. ": ";
    
      $nodes = $element->childNodes;
      foreach ($nodes as $node) {
      echo $node->nodeValue. "\n";
      }
      }
    }
    ?>
    
    PHP:
    And the result is:
     
    gilgalbiblewheel, Jul 4, 2018 IP
  7. deathshadow

    deathshadow Acclaimed Member

    Messages:
    9,157
    Likes Received:
    1,708
    Best Answers:
    239
    Trophy Points:
    515
    #7
    You never defined $DOCUMENT_ROOT, of course it's undefined. You can't include a variable you haven't set the value on... Just get rid of that part.

    $file = 'https://www.blueletterbible.org/kjv/gen/1/1/s_1001';

    It choking on the HTML 5 fields is a bit more of a wonk, you might have to make it either load the DTD -- which I'm not sure works with a HTML 5 doctype -- or just suppress the warnings. I dislike turning warnings off, but when parsing bad HTML it's often the only choice...

    Hence why I'd also axe the variables for nothing and turn this:

    
    //$file = $DOCUMENT_ROOT. "test.html";
    $file = $DOCUMENT_ROOT. "https://www.blueletterbible.org/kjv/gen/1/1/s_1001";
    $doc = new DOMDocument();
    $doc->loadHTMLFile($file);
    
    Code (markup):
    Into this:

    
    $doc = new DOMDocument();
    $doc->loadHTMLFile(
      'https://www.blueletterbible.org/kjv/gen/1/1/s_1001',
      LIBXML_NOWARNING
    );
    
    Code (markup):
    Though honestly that site is such a train wreck of invalid markup with missing tags, tag soup, div for nothing, endless pointless classes for nothing, and a COMPLETE lack of anything remotely resembling semantics, I'm shocked a browser can process it much less try to do string or DOMDocument...

    ... at which point I'd be trying to access whatever database is underlying that site instead of trying to make sense of its (bloated nonsensical) HTML... unless of course that's not your site in which case this would be a bit ... hinky.

    Also your loop actually should either throw errors, or not even output anything -- nodes of type 1, "elements", have no nodeValue. As such THE HTML tag -- the only tag you actually grab since you overwrite the "*" result -- has no nodeValue to output. It's a tag, nodetype == 1, DOMElement. What you want to do is walk it's children to find any nodetype 3, DOMText.

    Relevant manual pages:
    http://php.net/manual/en/class.domnode.php
    http://php.net/manual/en/dom.constants.php

    A routine to pull any textnodes from inside an element AND all its children would go something like this:

    
    function walkForText($element) {
    	if ($element->nodeType !== 1) return false; // invalid element
    	$texts = [];
    	if ($walk = $element->firstChild) do {
    		if ($walk->nodeType == 3) $texts[] = $walk.nodeValue;
    	} while (
    		$walk = $walk->firstChild || $walk->nextSibling || (
    			$walk->parentNode == $element ? false : $walk->parentNode.nextSibling
    		)
    	);
    	return $texts;
    }
    
    Code (markup):
    DOM walking takes a bit of practice to grasp, but it's ridiculously powerful when used properly. Fast too since you're not spending as much time on slower memory-hungry routines like the various getElementsBy... be you working in PHP with DOMDocument, or on the DOM client side in JavaScript.

    It's kind of sad MOST people talking about using the DOM have no idea what it is or how to use it. See fans of things like jQuery and React where they TALK about it helping them use the DOM, when they aren't using it at all! MORE so when you get into idiocy like the "Virtual DOM" and the LIES that dupe people who don't know any better into using it.
     
    Last edited: Jul 4, 2018
    deathshadow, Jul 4, 2018 IP
  8. gilgalbiblewheel

    gilgalbiblewheel Well-Known Member

    Messages:
    435
    Likes Received:
    0
    Best Answers:
    0
    Trophy Points:
    101
    #8
    I rely on the class attribute in the tags. How is that done?
     
    gilgalbiblewheel, Jul 4, 2018 IP
  9. deathshadow

    deathshadow Acclaimed Member

    Messages:
    9,157
    Likes Received:
    1,708
    Best Answers:
    239
    Trophy Points:
    515
    #9
    DOMNodes -- if nodeType 1 -- will have an 'attributes' traversable of type DOMNamedNodeMap.

    http://php.net/manual/en/class.domnamednodemap.php

    So if you have the target element -- like a DIV -- pointed at in a variable, let's call it "$element" you should be able to access its classes as

    $element->attributes->getNamedItem('class');

    One fun way of handling this type of things when node walking is, well... let's say you were looking for... lemme open that page and pick a section. Here we go, let's say you were looking for these:

    <div class="columns tablet-8 small-10 tablet-order-3 small-order-2">
    Code (markup):
    [em]gah, those presentational classes are so bad...[/em]

    My above example of walking the DOM could be modified to walk the entire document and when it finds those DIV, instead of wasting memory making an array perform a callback on that element instead.

    Mind you this is raw, untested...
    
    function walkDOMForTagAndClass($element, $tagName, $class, $callback) {
    	if ($element->nodeType !== 1) return false; // invalid element
    	// we force case as XML vs. SGML are inconsistent on ths
    	$tagName = strtoupper($tagName); 
    	if ($walk = $element->firstChild) do {
    		if (
    			($walk->nodeType == 1) &&
    			(strtoupper($walk->nodeName) == $tagName) &&
    			($walk->attributes->getNamedItem('class') == $class)
    		) $callback($walk);
    	} while (
    		$walk = $walk->firstChild || $walk->nextSibling || (
    			$walk->parentNode == $element ? false : $walk->parentNode.nextSibling
    		)
    	);
    }
    
    Code (markup):
    Then you would just:

    
    walkDOMForTagAndClass(
    	$doc,
    	'div',
    	'columns tablet-8 small-10 tablet-order-3 small-order-2',
    	function($node) {
    		// do whatever it is you want with the matches here.
    	}
    );
    
    Code (markup):
    May be typos or other minor bugs in that, untested but should give you the general concepts. Nice thing is said routine would be re-usable for matches, and you could swap out $doc (your DOMDocument) for any other DOMNode variable, including the resulting $node from walkDOMForTagAndClass to search for other tags and classes inside there.

    A more robust version would probably detect if $class or $tagName are empty, properly handle the possibility of classes being out of order or if more classes than those included are present, but for your purposes that would/should do.
     
    deathshadow, Jul 4, 2018 IP
  10. deathshadow

    deathshadow Acclaimed Member

    Messages:
    9,157
    Likes Received:
    1,708
    Best Answers:
    239
    Trophy Points:
    515
    #10
    Oh and before anyone chimes in, NO, this is not a typo:

    
    } while (
    	$walk = $walk->firstChild || $walk->nextSibling || (
    		$walk->parentNode == $element ? false : $walk->parentNode.nextSibling
    	)
    );
    
    Code (markup):
    it's =, not ==. Test on assignment.
     
    deathshadow, Jul 4, 2018 IP
  11. gilgalbiblewheel

    gilgalbiblewheel Well-Known Member

    Messages:
    435
    Likes Received:
    0
    Best Answers:
    0
    Trophy Points:
    101
    #11
    Ok where am I going wrong (I'm not used to the "->" and "=>" since I don't know what they represent or do):
    
    <?php
    //should come back to here
    function walkDOMForTagAndClass($element, $tagName, $class, $callback) {
       if ($element->nodeType !== 1) return false; // invalid element
       // we force case as XML vs. SGML are inconsistent on ths
       $tagName = strtoupper($tagName);
       if ($walk = $element->firstChild) do {
         if (
           ($walk->nodeType == 1) &&
           (strtoupper($walk->nodeName) == $tagName) &&
           ($walk->attributes->getNamedItem('class') == $class)
         ) $callback($walk);
       } while (
         $walk = $walk->firstChild || $walk->nextSibling || (
           $walk->parentNode == $element ? false : $walk->parentNode.nextSibling
         )
       );
    }
    $file = "https://www.blueletterbible.org/lang/lexicon/lexicon.cfm?Strongs=H1&t=KJV";
    $doc = new DOMDocument();
    $doc->loadHTMLFile($file);
    walkDOMForTagAndClass(
       $doc,
       'div',
       //'columns tablet-8 small-10 tablet-order-3 small-order-2',
       'nocrumbs',
       function($file) {
         // do whatever it is you want with the matches here.
       }
    );
    
    
    
    /*$html = "https://www.blueletterbible.org/lang/lexicon/lexicon.cfm?Strongs=H1&t=KJV";
    
    $dom = new DOMDocument();
    $dom->loadHTML($html);*/
    
    //Evaluate Anchor tag in HTML
    $xpath = new DOMXPath($doc);
    $hrefs = $xpath->evaluate("/html/body//a");
    
    for ($i = 0; $i < $hrefs->length; $i++) {
      $href = $hrefs->item($i);
      $url = $href->getAttribute('href');
    
      //remove and set target attribute   
      $href->removeAttribute('target');
      $href->setAttribute("target", "_blank");
    
      $newURL=$url."/newurl";
    
      //remove and set href attribute   
      $href->removeAttribute('href');
      $href->setAttribute("href", $newURL);
    }
    
    // save html
    $file=$doc->saveHTML();
    
    echo $file;
    ?>
    
    Code (markup):
     
    Last edited: Jul 4, 2018
    gilgalbiblewheel, Jul 4, 2018 IP
  12. gilgalbiblewheel

    gilgalbiblewheel Well-Known Member

    Messages:
    435
    Likes Received:
    0
    Best Answers:
    0
    Trophy Points:
    101
    #12
    I haven't been able to come through this code.
     
    gilgalbiblewheel, Jul 6, 2018 IP