summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAndres Rey <[email protected]>2017-11-09 19:20:15 +0000
committerAndres Rey <[email protected]>2017-11-09 19:20:15 +0000
commit422081a1003d3b5b668ddf7fb513a26f09b71b46 (patch)
tree3048c838426061f30ee1a9b9c3a737c7f4c5832a
parent9c8febc1e381aa642c634b757b4c32e12f444ba6 (diff)
Remove extra brs between p nodes after processing the article
-rw-r--r--src/HTMLParser.php18
1 files changed, 7 insertions, 11 deletions
diff --git a/src/HTMLParser.php b/src/HTMLParser.php
index 6da5828..d98ac68 100644
--- a/src/HTMLParser.php
+++ b/src/HTMLParser.php
@@ -273,7 +273,7 @@ class HTMLParser
* (If we go the other way around we need to search for previous nodes forcing the creation of new functions
* that will be used only here)
*/
- foreach(iterator_to_array($dom->getElementsByTagName('br')) as $br){
+ foreach (iterator_to_array($dom->getElementsByTagName('br')) as $br) {
$next = $br->nextSibling;
/*
@@ -1057,16 +1057,12 @@ class HTMLParser
$this->_cleanReadabilityTags($article);
- // TODO: Check if we can actually remove this. Removes BRs from text nodes.
-// $brs = $article->getElementsByTagName('br');
-// $length = $brs->length;
-// for ($i = 0; $i < $length; $i++) {
-// $node = $brs->item($length - 1 - $i);
-// $next = $node->nextSibling;
-// if ($next && $next->nodeType === XML_TEXT_NODE) {
-// $node->parentNode->removeChild($node);
-// }
-// }
+ foreach (iterator_to_array($article->getElementsByTagName('br')) as $br) {
+ $next = $br->nextSibling;
+ if ($next && $next->nodeName === 'p') {
+ $br->parentNode->removeChild($br);
+ }
+ }
return $article;
}