diff options
author | Andres Rey <[email protected]> | 2017-11-09 18:41:46 +0000 |
---|---|---|
committer | Andres Rey <[email protected]> | 2017-11-09 18:41:46 +0000 |
commit | 0940c192d8f9c74e734a6a90c553f3510aa99b40 (patch) | |
tree | c7ead4e0b1ddc50e4890be5ae93e9a54dd3dfe23 /src/HTMLParser.php | |
parent | 6437e87b77eec9870ac042298731c8cf34fc8b74 (diff) |
Scan nodes in reverse in removing functions.
In other words: Node shifting is a bitch
Diffstat (limited to 'src/HTMLParser.php')
-rw-r--r-- | src/HTMLParser.php | 7 |
1 files changed, 6 insertions, 1 deletions
diff --git a/src/HTMLParser.php b/src/HTMLParser.php index 1345bc1..1223891 100644 --- a/src/HTMLParser.php +++ b/src/HTMLParser.php @@ -1215,7 +1215,12 @@ class HTMLParser */ public function _cleanExtraParagraphs(DOMDocument $article) { - foreach ($article->getElementsByTagName('p') as $paragraph) { + $paragraphs = $article->getElementsByTagName('p'); + $length = $paragraphs->length; + + for ($i = 0; $i < $length; $i++) { + $paragraph = $paragraphs->item($length - 1 - $i); + $imgCount = $paragraph->getElementsByTagName('img')->length; $embedCount = $paragraph->getElementsByTagName('embed')->length; $objectCount = $paragraph->getElementsByTagName('object')->length; |