From 422081a1003d3b5b668ddf7fb513a26f09b71b46 Mon Sep 17 00:00:00 2001 From: Andres Rey Date: Thu, 9 Nov 2017 19:20:15 +0000 Subject: Remove extra brs between p nodes after processing the article --- src/HTMLParser.php | 18 +++++++----------- 1 file changed, 7 insertions(+), 11 deletions(-) (limited to 'src') diff --git a/src/HTMLParser.php b/src/HTMLParser.php index 6da5828..d98ac68 100644 --- a/src/HTMLParser.php +++ b/src/HTMLParser.php @@ -273,7 +273,7 @@ class HTMLParser * (If we go the other way around we need to search for previous nodes forcing the creation of new functions * that will be used only here) */ - foreach(iterator_to_array($dom->getElementsByTagName('br')) as $br){ + foreach (iterator_to_array($dom->getElementsByTagName('br')) as $br) { $next = $br->nextSibling; /* @@ -1057,16 +1057,12 @@ class HTMLParser $this->_cleanReadabilityTags($article); - // TODO: Check if we can actually remove this. Removes BRs from text nodes. -// $brs = $article->getElementsByTagName('br'); -// $length = $brs->length; -// for ($i = 0; $i < $length; $i++) { -// $node = $brs->item($length - 1 - $i); -// $next = $node->nextSibling; -// if ($next && $next->nodeType === XML_TEXT_NODE) { -// $node->parentNode->removeChild($node); -// } -// } + foreach (iterator_to_array($article->getElementsByTagName('br')) as $br) { + $next = $br->nextSibling; + if ($next && $next->nodeName === 'p') { + $br->parentNode->removeChild($br); + } + } return $article; } -- cgit v1.2.3