diff options
author | Andres Rey <[email protected]> | 2017-11-09 19:20:15 +0000 |
---|---|---|
committer | Andres Rey <[email protected]> | 2017-11-09 19:20:15 +0000 |
commit | 422081a1003d3b5b668ddf7fb513a26f09b71b46 (patch) | |
tree | 3048c838426061f30ee1a9b9c3a737c7f4c5832a /src/HTMLParser.php | |
parent | 9c8febc1e381aa642c634b757b4c32e12f444ba6 (diff) |
Remove extra brs between p nodes after processing the article
Diffstat (limited to 'src/HTMLParser.php')
-rw-r--r-- | src/HTMLParser.php | 18 |
1 files changed, 7 insertions, 11 deletions
diff --git a/src/HTMLParser.php b/src/HTMLParser.php index 6da5828..d98ac68 100644 --- a/src/HTMLParser.php +++ b/src/HTMLParser.php @@ -273,7 +273,7 @@ class HTMLParser * (If we go the other way around we need to search for previous nodes forcing the creation of new functions * that will be used only here) */ - foreach(iterator_to_array($dom->getElementsByTagName('br')) as $br){ + foreach (iterator_to_array($dom->getElementsByTagName('br')) as $br) { $next = $br->nextSibling; /* @@ -1057,16 +1057,12 @@ class HTMLParser $this->_cleanReadabilityTags($article); - // TODO: Check if we can actually remove this. Removes BRs from text nodes. -// $brs = $article->getElementsByTagName('br'); -// $length = $brs->length; -// for ($i = 0; $i < $length; $i++) { -// $node = $brs->item($length - 1 - $i); -// $next = $node->nextSibling; -// if ($next && $next->nodeType === XML_TEXT_NODE) { -// $node->parentNode->removeChild($node); -// } -// } + foreach (iterator_to_array($article->getElementsByTagName('br')) as $br) { + $next = $br->nextSibling; + if ($next && $next->nodeName === 'p') { + $br->parentNode->removeChild($br); + } + } return $article; } |