How do I join the $showstrongs, $showstrongs2 and $showstrongs3 into the $total_etymologyStrongs array? <?php set_time_limit(90); $string1 = ""; $string2 = ""; $string1 .= "-----<br /><br />\n"; $string1 .= "stripatag <pre style=\"color: red; font-weight: bold;\">"; $string2 .= "</pre> "; $string2 .= "<br /><br />\n"; $all_etymologyStrongs = array(); $all_description_blb = array(); $blbdescription = "<span style=\"color: red; font-weight: bold;\"> The same as <a class=\"nowrap\" href=\"lexicon.cfm?strongs=H9&t=KJV\" title=\"English: lost thing, that which was lost\"><span class=\"Hb\">אֲבֵדָה</span> (H9)</a>, incorrectly written for <a class=\"nowrap\" href=\"lexicon.cfm?strongs=H11&t=KJV\" title=\"English: destruction\"><span class=\"Hb\">אֲבַדּוֹן</span> (H11)</a> </span>"; //'/\([H|G]{1}([0-9]+)\)/' preg_match_all('/\(([H|G]{1}[0-9]+)\)/', $blbdescription, $showstrongs, PREG_SET_ORDER); array_push($all_description_blb, $blbdescription); array_push($all_etymologyStrongs, $showstrongs); $total_description = array(); $total_etymologyStrongs = array(); $total_etym_desc = array(); for($a=0;$a<count($showstrongs);$a++){ $all_etymologyStrongs = array(); $all_description_blb = array(); $file_link = "https://www.blueletterbible.org/lang/lexicon/lexicon.cfm?Strongs=".$showstrongs[$a][1]."&t=KJV"; $file = file_get_contents($file_link); preg_match_all("#<\b(div)\b[^>]*>(.*?)</\b(div)\b>#si", $file, $divout, PREG_SET_ORDER); $blbdescription = $divout[78][2]; preg_match_all('/\(([H|G]{1}[0-9]+)\)/', $blbdescription, $showstrongs2, PREG_SET_ORDER); array_push($all_description_blb, $blbdescription); array_push($all_etymologyStrongs, $showstrongs2); for($b=0;$b<count($showstrongs2);$b++){ $file_link2 = "https://www.blueletterbible.org/lang/lexicon/lexicon.cfm?Strongs=".$showstrongs2[$b][1]."&t=KJV"; $file2 = file_get_contents($file_link2); preg_match_all("#<\b(div)\b[^>]*>(.*?)</\b(div)\b>#si", $file2, $divout, PREG_SET_ORDER); $blbdescription = $divout[78][2]; preg_match_all('/\(([H|G]{1}[0-9]+)\)/', $blbdescription, $showstrongs3, PREG_SET_ORDER); array_push($all_description_blb, $blbdescription); array_push($all_etymologyStrongs, $showstrongs3); } } $all_description_blb = array_values(array_unique($all_description_blb)); $string_description_blb = addslashes(implode("|", $all_description_blb)); array_push($total_etymologyStrongs, array_unique($all_etymologyStrongs));//implode("|", ) array_push($total_etym_desc, $string_description_blb); ?> <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"> <html xmlns="http://www.w3.org/1999/xhtml"> <head> <meta http-equiv="Content-Type" content="text/html; charset=utf-8" /> <title>Test: Strip a Tag 3</title> </head> <body> <?php echo $string1; var_dump($showstrongs); echo $string2; echo $string1; var_dump($total_etymologyStrongs); echo $string2; ?> </body> </html> PHP:
I started to have a look but have run out of time. I suspect array_merge is the command you need but I'm totally confused about what your goal actually is. <?php //set_time_limit(90); function debug($var){ var_export($var); echo '<hr>'; } $all_etymologyStrongs = $all_description_blb = $total_description = $total_etymologyStrongs = $total_etym_desc = array(); $blbdescription = "<span style='color: red; font-weight: bold;'> The same as <a class='nowrap' href='lexicon.cfm?strongs=H9&t=KJV' title='English: lost thing, that which was lost'><span class='Hb'>אֲבֵדָה</span> (H9)</a>, incorrectly written for <a class='nowrap' href='lexicon.cfm?strongs=H11&t=KJV' title='English: destruction'><span class='Hb'>אֲבַדּוֹן</span> (H11)</a> </span>"; //'/\([H|G]{1}([0-9]+)\)/' preg_match_all('/\(([H|G]{1}[0-9]+)\)/', $blbdescription, $showstrongs, PREG_SET_ORDER); $all_description_blb = array_merge($all_description_blb, $blbdescription); $all_etymologyStrongs = array_merge($all_etymologyStrongs, $showstrongs); debug($blbdescription); debug($showstrongs); foreach($showstrongs as $v){ $all_etymologyStrongs = $all_description_blb = array(); $file_link = "https://www.blueletterbible.org/lang/lexicon/lexicon.cfm?Strongs=".$v[1]."&t=KJV"; $file = file_get_contents($file_link); //debug($file); preg_match_all("#<\b(div)\b[^>]*>(.*?)</\b(div)\b>#si", $file, $divout, PREG_SET_ORDER); $blbdescription = $divout[78][2]; debug($blbdescription); preg_match_all('/\(([H|G]{1}[0-9]+)\)/', $blbdescription, $showstrongs2, PREG_SET_ORDER); //blbdescription isn't an array $all_description_blb[] = $blbdescription; debug('showstrongs'); debug($showstrongs2); $all_etymologyStrongs = array_merge($all_etymologyStrongs, $showstrongs2); foreach($showstrongs2 as $b){ $file_link2 = "https://www.blueletterbible.org/lang/lexicon/lexicon.cfm?Strongs=".$showstrongs2[$b][1]."&t=KJV"; $file2 = file_get_contents($file_link2); preg_match_all("#<\b(div)\b[^>]*>(.*?)</\b(div)\b>#si", $file2, $divout, PREG_SET_ORDER); $blbdescription = $divout[78][2]; preg_match_all('/\(([H|G]{1}[0-9]+)\)/', $blbdescription, $showstrongs3, PREG_SET_ORDER); array_merge($all_description_blb, $blbdescription); array_merge($all_etymologyStrongs, $showstrongs3); } } $all_description_blb = array_values(array_unique($all_description_blb)); $string_description_blb = addslashes(implode("|", $all_description_blb)); array_push($total_etymologyStrongs, array_unique($all_etymologyStrongs));//implode("|", ) array_push($total_etym_desc, $string_description_blb); ?> <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"> <html xmlns="http://www.w3.org/1999/xhtml"> <head> <meta http-equiv="Content-Type" content="text/html; charset=utf-8" /> <title> Test: Strip a Tag 3 </title> </head> <body> <?php debug($showstrongs); debug($total_etymologyStrongs); ?> </body> </html> PHP:
my goal is to collect the Strong's number in the following descriptions and in turn look for the link and get the descriptions and more strong's numbers until there are no more Strong's numbers or there is a repetition of Strong's numbers. That's why I created a for loop within a for loop. But perhaps you have a quicker method. I had gone from for $a = 0 - $d = 0, 4 levels of loops. But maybe it can go more without repeating links?
Frankly, you're doing a bunch of string matching nonsense (on some really horrifyingly bad markup) over what should probably be done with something like DOMDocument. DOMDocument.loadHTMLFile, then just use the JS-like getElement(s)Bywhatever methodology to go through the document.
I'm going to look into it. I think I have looked at it in the past and since it didn't give me the results I chose this way. I'll get back to you.
ok I'm looking into the following: <?php // try this html listing example for all nodes / includes a few getElementsByTagName options: //$file = $DOCUMENT_ROOT. "test.html"; $file = $DOCUMENT_ROOT. "https://www.blueletterbible.org/kjv/gen/1/1/s_1001"; $doc = new DOMDocument(); $doc->loadHTMLFile($file); // example 1: $elements = $doc->getElementsByTagName('*'); // example 2: $elements = $doc->getElementsByTagName('html'); // example 3: //$elements = $doc->getElementsByTagName('body'); // example 4: //$elements = $doc->getElementsByTagName('table'); // example 5: //$elements = $doc->getElementsByTagName('div'); if (!is_null($elements)) { foreach ($elements as $element) { echo "<br/>". $element->nodeName. ": "; $nodes = $element->childNodes; foreach ($nodes as $node) { echo $node->nodeValue. "\n"; } } } ?> PHP: And the result is:
You never defined $DOCUMENT_ROOT, of course it's undefined. You can't include a variable you haven't set the value on... Just get rid of that part. $file = 'https://www.blueletterbible.org/kjv/gen/1/1/s_1001'; It choking on the HTML 5 fields is a bit more of a wonk, you might have to make it either load the DTD -- which I'm not sure works with a HTML 5 doctype -- or just suppress the warnings. I dislike turning warnings off, but when parsing bad HTML it's often the only choice... Hence why I'd also axe the variables for nothing and turn this: //$file = $DOCUMENT_ROOT. "test.html"; $file = $DOCUMENT_ROOT. "https://www.blueletterbible.org/kjv/gen/1/1/s_1001"; $doc = new DOMDocument(); $doc->loadHTMLFile($file); Code (markup): Into this: $doc = new DOMDocument(); $doc->loadHTMLFile( 'https://www.blueletterbible.org/kjv/gen/1/1/s_1001', LIBXML_NOWARNING ); Code (markup): Though honestly that site is such a train wreck of invalid markup with missing tags, tag soup, div for nothing, endless pointless classes for nothing, and a COMPLETE lack of anything remotely resembling semantics, I'm shocked a browser can process it much less try to do string or DOMDocument... ... at which point I'd be trying to access whatever database is underlying that site instead of trying to make sense of its (bloated nonsensical) HTML... unless of course that's not your site in which case this would be a bit ... hinky. Also your loop actually should either throw errors, or not even output anything -- nodes of type 1, "elements", have no nodeValue. As such THE HTML tag -- the only tag you actually grab since you overwrite the "*" result -- has no nodeValue to output. It's a tag, nodetype == 1, DOMElement. What you want to do is walk it's children to find any nodetype 3, DOMText. Relevant manual pages: http://php.net/manual/en/class.domnode.php http://php.net/manual/en/dom.constants.php A routine to pull any textnodes from inside an element AND all its children would go something like this: function walkForText($element) { if ($element->nodeType !== 1) return false; // invalid element $texts = []; if ($walk = $element->firstChild) do { if ($walk->nodeType == 3) $texts[] = $walk.nodeValue; } while ( $walk = $walk->firstChild || $walk->nextSibling || ( $walk->parentNode == $element ? false : $walk->parentNode.nextSibling ) ); return $texts; } Code (markup): DOM walking takes a bit of practice to grasp, but it's ridiculously powerful when used properly. Fast too since you're not spending as much time on slower memory-hungry routines like the various getElementsBy... be you working in PHP with DOMDocument, or on the DOM client side in JavaScript. It's kind of sad MOST people talking about using the DOM have no idea what it is or how to use it. See fans of things like jQuery and React where they TALK about it helping them use the DOM, when they aren't using it at all! MORE so when you get into idiocy like the "Virtual DOM" and the LIES that dupe people who don't know any better into using it.
DOMNodes -- if nodeType 1 -- will have an 'attributes' traversable of type DOMNamedNodeMap. http://php.net/manual/en/class.domnamednodemap.php So if you have the target element -- like a DIV -- pointed at in a variable, let's call it "$element" you should be able to access its classes as $element->attributes->getNamedItem('class'); One fun way of handling this type of things when node walking is, well... let's say you were looking for... lemme open that page and pick a section. Here we go, let's say you were looking for these: <div class="columns tablet-8 small-10 tablet-order-3 small-order-2"> Code (markup): [em]gah, those presentational classes are so bad...[/em] My above example of walking the DOM could be modified to walk the entire document and when it finds those DIV, instead of wasting memory making an array perform a callback on that element instead. Mind you this is raw, untested... function walkDOMForTagAndClass($element, $tagName, $class, $callback) { if ($element->nodeType !== 1) return false; // invalid element // we force case as XML vs. SGML are inconsistent on ths $tagName = strtoupper($tagName); if ($walk = $element->firstChild) do { if ( ($walk->nodeType == 1) && (strtoupper($walk->nodeName) == $tagName) && ($walk->attributes->getNamedItem('class') == $class) ) $callback($walk); } while ( $walk = $walk->firstChild || $walk->nextSibling || ( $walk->parentNode == $element ? false : $walk->parentNode.nextSibling ) ); } Code (markup): Then you would just: walkDOMForTagAndClass( $doc, 'div', 'columns tablet-8 small-10 tablet-order-3 small-order-2', function($node) { // do whatever it is you want with the matches here. } ); Code (markup): May be typos or other minor bugs in that, untested but should give you the general concepts. Nice thing is said routine would be re-usable for matches, and you could swap out $doc (your DOMDocument) for any other DOMNode variable, including the resulting $node from walkDOMForTagAndClass to search for other tags and classes inside there. A more robust version would probably detect if $class or $tagName are empty, properly handle the possibility of classes being out of order or if more classes than those included are present, but for your purposes that would/should do.
Oh and before anyone chimes in, NO, this is not a typo: } while ( $walk = $walk->firstChild || $walk->nextSibling || ( $walk->parentNode == $element ? false : $walk->parentNode.nextSibling ) ); Code (markup): it's =, not ==. Test on assignment.
Ok where am I going wrong (I'm not used to the "->" and "=>" since I don't know what they represent or do): <?php //should come back to here function walkDOMForTagAndClass($element, $tagName, $class, $callback) { if ($element->nodeType !== 1) return false; // invalid element // we force case as XML vs. SGML are inconsistent on ths $tagName = strtoupper($tagName); if ($walk = $element->firstChild) do { if ( ($walk->nodeType == 1) && (strtoupper($walk->nodeName) == $tagName) && ($walk->attributes->getNamedItem('class') == $class) ) $callback($walk); } while ( $walk = $walk->firstChild || $walk->nextSibling || ( $walk->parentNode == $element ? false : $walk->parentNode.nextSibling ) ); } $file = "https://www.blueletterbible.org/lang/lexicon/lexicon.cfm?Strongs=H1&t=KJV"; $doc = new DOMDocument(); $doc->loadHTMLFile($file); walkDOMForTagAndClass( $doc, 'div', //'columns tablet-8 small-10 tablet-order-3 small-order-2', 'nocrumbs', function($file) { // do whatever it is you want with the matches here. } ); /*$html = "https://www.blueletterbible.org/lang/lexicon/lexicon.cfm?Strongs=H1&t=KJV"; $dom = new DOMDocument(); $dom->loadHTML($html);*/ //Evaluate Anchor tag in HTML $xpath = new DOMXPath($doc); $hrefs = $xpath->evaluate("/html/body//a"); for ($i = 0; $i < $hrefs->length; $i++) { $href = $hrefs->item($i); $url = $href->getAttribute('href'); //remove and set target attribute $href->removeAttribute('target'); $href->setAttribute("target", "_blank"); $newURL=$url."/newurl"; //remove and set href attribute $href->removeAttribute('href'); $href->setAttribute("href", $newURL); } // save html $file=$doc->saveHTML(); echo $file; ?> Code (markup):