diff options
author | Andres Rey <[email protected]> | 2016-11-27 20:42:20 +0000 |
---|---|---|
committer | Andres Rey <[email protected]> | 2016-11-27 20:42:20 +0000 |
commit | 0a8077f9e2a3f531a087b3ec5c07cf16ba5497ae (patch) | |
tree | 747bec38ef5d1fc897371cee7e58dadcf173fb5d | |
parent | 7481a75b39db90d88158e25f27a35196460eb5bc (diff) |
Fixed node trasverse while cleaningConditionally
-rw-r--r-- | src/HTMLParser.php | 12 |
1 files changed, 8 insertions, 4 deletions
diff --git a/src/HTMLParser.php b/src/HTMLParser.php index 863723a..7ebf97d 100644 --- a/src/HTMLParser.php +++ b/src/HTMLParser.php @@ -631,7 +631,7 @@ class HTMLParser */ public function _cleanExtraParagraphs(DOMDocument $article) { - foreach($article->getElementsByTagName('p') as $paragraph){ + foreach ($article->getElementsByTagName('p') as $paragraph) { $imgCount = $paragraph->getElementsByTagName('img')->length; $embedCount = $paragraph->getElementsByTagName('embed')->length; $objectCount = $paragraph->getElementsByTagName('object')->length; @@ -639,7 +639,7 @@ class HTMLParser $iframeCount = $paragraph->getElementsByTagName('iframe')->length; $totalCount = $imgCount + $embedCount + $objectCount + $iframeCount; - if($totalCount === 0 && !trim($paragraph->textContent)){ + if ($totalCount === 0 && !trim($paragraph->textContent)) { // TODO must be done via readability $paragraph->parentNode->removeChild($paragraph); } @@ -668,8 +668,12 @@ class HTMLParser * without effecting the traversal. */ - // TODO Check for node shifting and if the removal function is working as expected - foreach ($article->getElementsByTagName($tag) as $node) { + $DOMNodeList = $article->getElementsByTagName($tag); + $length = $DOMNodeList->length; + for ($i = 0; $i < $length; $i++) { + + $node = $DOMNodeList->item($length - 1 - $i); + $node = new Readability($node); $weight = $node->getClassWeight(); |