summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorAndres Rey <[email protected]>2017-11-09 18:41:46 +0000
committerAndres Rey <[email protected]>2017-11-09 18:41:46 +0000
commit0940c192d8f9c74e734a6a90c553f3510aa99b40 (patch)
treec7ead4e0b1ddc50e4890be5ae93e9a54dd3dfe23 /src
parent6437e87b77eec9870ac042298731c8cf34fc8b74 (diff)
Scan nodes in reverse in removing functions.
In other words: Node shifting is a bitch
Diffstat (limited to 'src')
-rw-r--r--src/HTMLParser.php7
1 files changed, 6 insertions, 1 deletions
diff --git a/src/HTMLParser.php b/src/HTMLParser.php
index 1345bc1..1223891 100644
--- a/src/HTMLParser.php
+++ b/src/HTMLParser.php
@@ -1215,7 +1215,12 @@ class HTMLParser
*/
public function _cleanExtraParagraphs(DOMDocument $article)
{
- foreach ($article->getElementsByTagName('p') as $paragraph) {
+ $paragraphs = $article->getElementsByTagName('p');
+ $length = $paragraphs->length;
+
+ for ($i = 0; $i < $length; $i++) {
+ $paragraph = $paragraphs->item($length - 1 - $i);
+
$imgCount = $paragraph->getElementsByTagName('img')->length;
$embedCount = $paragraph->getElementsByTagName('embed')->length;
$objectCount = $paragraph->getElementsByTagName('object')->length;