diff options
-rw-r--r-- | src/HTMLParser.php | 15 | ||||
-rw-r--r-- | src/Readability.php | 19 |
2 files changed, 24 insertions, 10 deletions
diff --git a/src/HTMLParser.php b/src/HTMLParser.php index 022f616..27e376d 100644 --- a/src/HTMLParser.php +++ b/src/HTMLParser.php @@ -318,7 +318,7 @@ class HTMLParser * algorithm with DIVs with are, in practice, paragraphs. */ if ($this->hasSinglePNode($node)) { - $pNode = $node->getChildren()[0]; + $pNode = $node->getChildren(true)[0]; $node->replaceChild($pNode); $node = $pNode; } elseif (!$this->hasSingleChildBlockElement($node)) { @@ -877,17 +877,12 @@ class HTMLParser private function hasSinglePNode(Readability $node) { // There should be exactly 1 element child which is a P: - if ($node->hasChildren()) { - $children = $node->getChildren(); - - if (count($children) === 1) { - if ($children[0]->tagNameEqualsTo('p')) { - return true; - } - } + // And there should be no text nodes with real content (param true on ->getChildren) + if (count($children = $node->getChildren(true)) !== 1 || !$children[0]->tagNameEqualsTo('p')) { + return false; } - return false; + return true; } private function hasSingleChildBlockElement(Readability $node) diff --git a/src/Readability.php b/src/Readability.php index e3122dd..9255bc6 100644 --- a/src/Readability.php +++ b/src/Readability.php @@ -476,4 +476,23 @@ class Readability extends Element implements ReadabilityInterface } return false; } + + /** + * @param bool $filterEmptyDOMText Filter empty DOMText nodes? + * @return array + */ + public function getChildren($filterEmptyDOMText = false) + { + $ret = array(); + /** @var \DOMNode $node */ + foreach ($this->node->childNodes as $node) { + if ($filterEmptyDOMText && $node->nodeName === '#text' && !trim($node->nodeValue)) { + continue; + } + + $ret[] = new static($node); + } + + return $ret; + } } |