diff options
author | Andres Rey <[email protected]> | 2017-11-30 20:09:59 +0000 |
---|---|---|
committer | Andres Rey <[email protected]> | 2017-11-30 20:09:59 +0000 |
commit | 269cae7d0faad6748ffa1454cf03e45fa63e524c (patch) | |
tree | 968c8db7f9b8de3166225328c9dec0041b599cd9 /src | |
parent | fc4834c588c61ab42b89e3324fa01d467f4a8d00 (diff) |
Import functions from NodeUtility to NodeClassTrait
Diffstat (limited to 'src')
-rw-r--r-- | src/NodeClass/NodeClassTrait.php | 65 | ||||
-rw-r--r-- | src/NodeUtility.php | 66 | ||||
-rw-r--r-- | src/Readability.php | 4 |
3 files changed, 68 insertions, 67 deletions
diff --git a/src/NodeClass/NodeClassTrait.php b/src/NodeClass/NodeClassTrait.php index 1446fb6..403cd58 100644 --- a/src/NodeClass/NodeClassTrait.php +++ b/src/NodeClass/NodeClassTrait.php @@ -22,6 +22,23 @@ trait NodeClassTrait private $initialized = false; /** + * @var array + */ + private $divToPElements = [ + 'a', + 'blockquote', + 'dl', + 'div', + 'img', + 'ol', + 'p', + 'pre', + 'table', + 'ul', + 'select', + ]; + + /** * initialized getter * * @return bool @@ -263,6 +280,54 @@ trait NodeClassTrait } /** + * Checks if the current node has a single child and if that child is a P node. + * Useful to convert <div><p> nodes to a single <p> node and avoid confusing the scoring system since div with p + * tags are, in practice, paragraphs. + * + * @param DOMNode $node + * + * @return bool + */ + public function hasSinglePNode() + { + // There should be exactly 1 element child which is a P: + if (count($children = $this->getChildren(true)) !== 1 || $children[0]->nodeName !== 'p') { + return false; + } + + // And there should be no text nodes with real content (param true on ->getChildren) + foreach ($children as $child) { + /** @var $child DOMNode */ + if ($child->nodeType === XML_TEXT_NODE && !preg_match('/\S$/', $child->getTextContent())) { + return false; + } + } + + return true; + } + + /** + * @param $node DOMNode + * @return bool + */ + public function hasSingleChildBlockElement() + { + $result = false; + if ($this->hasChildNodes()) { + foreach ($this->getChildren() as $child) { + if (in_array($child->nodeName, $this->divToPElements)) { + $result = true; + } else { + // If any of the hasSingleChildBlockElement calls return true, return true then. + $result = ($result || $child->hasSingleChildBlockElement()); + } + } + } + + return $result; + } + + /** * Returns the children of the current node. * * @param bool $filterEmptyDOMText Filter empty DOMText nodes? diff --git a/src/NodeUtility.php b/src/NodeUtility.php index 07bc8da..d0796dd 100644 --- a/src/NodeUtility.php +++ b/src/NodeUtility.php @@ -1,6 +1,7 @@ <?php namespace andreskrey\Readability; + use andreskrey\Readability\NodeClass\DOMDocument; use andreskrey\Readability\NodeClass\DOMElement; use andreskrey\Readability\NodeClass\DOMNode; @@ -13,23 +14,6 @@ class NodeUtility { /** - * @var array - */ - private static $divToPElements = [ - 'a', - 'blockquote', - 'dl', - 'div', - 'img', - 'ol', - 'p', - 'pre', - 'table', - 'ul', - 'select', - ]; - - /** * Collection of regexps to check the node usability * * @var array @@ -177,52 +161,4 @@ class NodeUtility return ($originalNode) ? $originalNode->nextSibling : $originalNode; } - - /** - * Checks if the current node has a single child and if that child is a P node. - * Useful to convert <div><p> nodes to a single <p> node and avoid confusing the scoring system since div with p - * tags are, in practice, paragraphs. - * - * @param DOMNode $node - * - * @return bool - */ - public static function hasSinglePNode($node) - { - // There should be exactly 1 element child which is a P: - if (count($children = $node->getChildren(true)) !== 1 || $children[0]->nodeName !== 'p') { - return false; - } - - // And there should be no text nodes with real content (param true on ->getChildren) - foreach ($children as $child) { - /** @var $child DOMNode */ - if ($child->nodeType === XML_TEXT_NODE && !preg_match('/\S$/', $child->getTextContent())) { - return false; - } - } - - return true; - } - - /** - * @param $node DOMNode - * @return bool - */ - public static function hasSingleChildBlockElement($node) - { - $result = false; - if ($node->hasChildNodes()) { - foreach ($node->getChildren() as $child) { - if (in_array($child->nodeName, self::$divToPElements)) { - $result = true; - } else { - // If any of the hasSingleChildBlockElement calls return true, return true then. - $result = ($result || self::hasSingleChildBlockElement($child)); - } - } - } - - return $result; - } } diff --git a/src/Readability.php b/src/Readability.php index 1cd7c1f..a259f5f 100644 --- a/src/Readability.php +++ b/src/Readability.php @@ -543,12 +543,12 @@ class Readability * safely converted into plain P elements to avoid confusing the scoring * algorithm with DIVs with are, in practice, paragraphs. */ - if (NodeUtility::hasSinglePNode($node)) { + if ($node->hasSinglePNode()) { $pNode = $node->getChildren(true)[0]; $node->parentNode->replaceChild($pNode, $node); $node = $pNode; $elementsToScore[] = $node; - } elseif (!NodeUtility::hasSingleChildBlockElement($node)) { + } elseif (!$node->hasSingleChildBlockElement()) { $node = NodeUtility::setNodeTag($node, 'p'); $elementsToScore[] = $node; } else { |