diff options
author | Andres Rey <[email protected]> | 2017-11-11 19:35:36 +0000 |
---|---|---|
committer | Andres Rey <[email protected]> | 2017-11-11 19:35:49 +0000 |
commit | 0e9d83dfea6982426f73b15a5f1d943164cf409d (patch) | |
tree | 1938fd30adc33e6950e4a1d6d5a609c77ccafdea /src/Readability.php | |
parent | 539668e447940d0b8ea12b863916ce367137c9a6 (diff) |
Remove nodes when there's only one DOMText node with no text
Diffstat (limited to 'src/Readability.php')
-rw-r--r-- | src/Readability.php | 8 |
1 files changed, 7 insertions, 1 deletions
diff --git a/src/Readability.php b/src/Readability.php index 922e60f..74cb71f 100644 --- a/src/Readability.php +++ b/src/Readability.php @@ -530,7 +530,13 @@ class Readability extends Element implements ReadabilityInterface return ($this->node instanceof \DOMElement && mb_strlen(trim($this->node->textContent)) === 0 && ($this->node->childNodes->length === 0 || - $this->node->childNodes->length === $this->node->getElementsByTagName('br')->length + $this->node->getElementsByTagName('hr')->length + $this->node->childNodes->length === $this->node->getElementsByTagName('br')->length + $this->node->getElementsByTagName('hr')->length || + /* + * Special DOMDocument case: When there's an empty tag with a space inside, like "<h3> </h3>", the + * previous if will fail because DOMElement will say that it has one node inside (A DOMText) and this + * in JS doesn't happens. So here we check if we have exactly one node, and that node is a DOMText one. + */ + ($this->node->childNodes->length === 1 && $this->node->childNodes->item(0)->nodeType === XML_TEXT_NODE) )); } } |