summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorAndres Rey <[email protected]>2016-12-22 21:24:19 -0300
committerAndres Rey <[email protected]>2016-12-22 21:24:19 -0300
commite9737116493b2dd2893a9a5ac44d0e768dc363bd (patch)
treeb2c093dca54fae939c7ae506f783c8a3f6a83f91 /src
parentf5fb536ffd0f247c0ee5f633089c15f66fbba481 (diff)
Node shifting is a bitch
Diffstat (limited to 'src')
-rw-r--r--src/HTMLParser.php15
1 files changed, 13 insertions, 2 deletions
diff --git a/src/HTMLParser.php b/src/HTMLParser.php
index 43cf505..58da4b7 100644
--- a/src/HTMLParser.php
+++ b/src/HTMLParser.php
@@ -228,8 +228,11 @@ class HTMLParser
*/
private function prepDocument()
{
- foreach ($this->dom->getElementsByTagName('br') as $br) {
+ $brs = $this->dom->getElementsByTagName('br');
+ $length = $brs->length;
+ for ($i = 0; $i < $length; $i++) {
/** @var \DOMNode $br */
+ $br = $brs->item($length - 1 - $i);
$next = $br->nextSibling;
/*
@@ -733,7 +736,15 @@ class HTMLParser
$this->_cleanReadabilityTags($article);
- // TODO Remove extra BR nodes that have a P sibling.
+ $brs = $article->getElementsByTagName('br');
+ $length = $brs->length;
+ for ($i = 0; $i < $length; $i++) {
+ $node = $brs->item($length - 1 - $i);
+ $next = $node->nextSibling;
+ if ($next && $next->nodeName === 'p') {
+ $node->parentNode->removeChild($node);
+ }
+ }
return $article;
}