From 0940c192d8f9c74e734a6a90c553f3510aa99b40 Mon Sep 17 00:00:00 2001 From: Andres Rey Date: Thu, 9 Nov 2017 18:41:46 +0000 Subject: Scan nodes in reverse in removing functions. In other words: Node shifting is a bitch --- src/HTMLParser.php | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'src') diff --git a/src/HTMLParser.php b/src/HTMLParser.php index 1345bc1..1223891 100644 --- a/src/HTMLParser.php +++ b/src/HTMLParser.php @@ -1215,7 +1215,12 @@ class HTMLParser */ public function _cleanExtraParagraphs(DOMDocument $article) { - foreach ($article->getElementsByTagName('p') as $paragraph) { + $paragraphs = $article->getElementsByTagName('p'); + $length = $paragraphs->length; + + for ($i = 0; $i < $length; $i++) { + $paragraph = $paragraphs->item($length - 1 - $i); + $imgCount = $paragraph->getElementsByTagName('img')->length; $embedCount = $paragraph->getElementsByTagName('embed')->length; $objectCount = $paragraph->getElementsByTagName('object')->length; -- cgit v1.2.3