From 0a8077f9e2a3f531a087b3ec5c07cf16ba5497ae Mon Sep 17 00:00:00 2001 From: Andres Rey Date: Sun, 27 Nov 2016 20:42:20 +0000 Subject: Fixed node trasverse while cleaningConditionally --- src/HTMLParser.php | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/src/HTMLParser.php b/src/HTMLParser.php index 863723a..7ebf97d 100644 --- a/src/HTMLParser.php +++ b/src/HTMLParser.php @@ -631,7 +631,7 @@ class HTMLParser */ public function _cleanExtraParagraphs(DOMDocument $article) { - foreach($article->getElementsByTagName('p') as $paragraph){ + foreach ($article->getElementsByTagName('p') as $paragraph) { $imgCount = $paragraph->getElementsByTagName('img')->length; $embedCount = $paragraph->getElementsByTagName('embed')->length; $objectCount = $paragraph->getElementsByTagName('object')->length; @@ -639,7 +639,7 @@ class HTMLParser $iframeCount = $paragraph->getElementsByTagName('iframe')->length; $totalCount = $imgCount + $embedCount + $objectCount + $iframeCount; - if($totalCount === 0 && !trim($paragraph->textContent)){ + if ($totalCount === 0 && !trim($paragraph->textContent)) { // TODO must be done via readability $paragraph->parentNode->removeChild($paragraph); } @@ -668,8 +668,12 @@ class HTMLParser * without effecting the traversal. */ - // TODO Check for node shifting and if the removal function is working as expected - foreach ($article->getElementsByTagName($tag) as $node) { + $DOMNodeList = $article->getElementsByTagName($tag); + $length = $DOMNodeList->length; + for ($i = 0; $i < $length; $i++) { + + $node = $DOMNodeList->item($length - 1 - $i); + $node = new Readability($node); $weight = $node->getClassWeight(); -- cgit v1.2.3