summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorAndres Rey <[email protected]>2017-11-11 19:35:36 +0000
committerAndres Rey <[email protected]>2017-11-11 19:35:49 +0000
commit0e9d83dfea6982426f73b15a5f1d943164cf409d (patch)
tree1938fd30adc33e6950e4a1d6d5a609c77ccafdea /src
parent539668e447940d0b8ea12b863916ce367137c9a6 (diff)
Remove nodes when there's only one DOMText node with no text
Diffstat (limited to 'src')
-rw-r--r--src/Readability.php8
1 files changed, 7 insertions, 1 deletions
diff --git a/src/Readability.php b/src/Readability.php
index 922e60f..74cb71f 100644
--- a/src/Readability.php
+++ b/src/Readability.php
@@ -530,7 +530,13 @@ class Readability extends Element implements ReadabilityInterface
return ($this->node instanceof \DOMElement &&
mb_strlen(trim($this->node->textContent)) === 0 &&
($this->node->childNodes->length === 0 ||
- $this->node->childNodes->length === $this->node->getElementsByTagName('br')->length + $this->node->getElementsByTagName('hr')->length
+ $this->node->childNodes->length === $this->node->getElementsByTagName('br')->length + $this->node->getElementsByTagName('hr')->length ||
+ /*
+ * Special DOMDocument case: When there's an empty tag with a space inside, like "<h3> </h3>", the
+ * previous if will fail because DOMElement will say that it has one node inside (A DOMText) and this
+ * in JS doesn't happens. So here we check if we have exactly one node, and that node is a DOMText one.
+ */
+ ($this->node->childNodes->length === 1 && $this->node->childNodes->item(0)->nodeType === XML_TEXT_NODE)
));
}
}