summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorAndres Rey <[email protected]>2017-11-30 20:09:59 +0000
committerAndres Rey <[email protected]>2017-11-30 20:09:59 +0000
commit269cae7d0faad6748ffa1454cf03e45fa63e524c (patch)
tree968c8db7f9b8de3166225328c9dec0041b599cd9 /src
parentfc4834c588c61ab42b89e3324fa01d467f4a8d00 (diff)
Import functions from NodeUtility to NodeClassTrait
Diffstat (limited to 'src')
-rw-r--r--src/NodeClass/NodeClassTrait.php65
-rw-r--r--src/NodeUtility.php66
-rw-r--r--src/Readability.php4
3 files changed, 68 insertions, 67 deletions
diff --git a/src/NodeClass/NodeClassTrait.php b/src/NodeClass/NodeClassTrait.php
index 1446fb6..403cd58 100644
--- a/src/NodeClass/NodeClassTrait.php
+++ b/src/NodeClass/NodeClassTrait.php
@@ -22,6 +22,23 @@ trait NodeClassTrait
private $initialized = false;
/**
+ * @var array
+ */
+ private $divToPElements = [
+ 'a',
+ 'blockquote',
+ 'dl',
+ 'div',
+ 'img',
+ 'ol',
+ 'p',
+ 'pre',
+ 'table',
+ 'ul',
+ 'select',
+ ];
+
+ /**
* initialized getter
*
* @return bool
@@ -263,6 +280,54 @@ trait NodeClassTrait
}
/**
+ * Checks if the current node has a single child and if that child is a P node.
+ * Useful to convert <div><p> nodes to a single <p> node and avoid confusing the scoring system since div with p
+ * tags are, in practice, paragraphs.
+ *
+ * @param DOMNode $node
+ *
+ * @return bool
+ */
+ public function hasSinglePNode()
+ {
+ // There should be exactly 1 element child which is a P:
+ if (count($children = $this->getChildren(true)) !== 1 || $children[0]->nodeName !== 'p') {
+ return false;
+ }
+
+ // And there should be no text nodes with real content (param true on ->getChildren)
+ foreach ($children as $child) {
+ /** @var $child DOMNode */
+ if ($child->nodeType === XML_TEXT_NODE && !preg_match('/\S$/', $child->getTextContent())) {
+ return false;
+ }
+ }
+
+ return true;
+ }
+
+ /**
+ * @param $node DOMNode
+ * @return bool
+ */
+ public function hasSingleChildBlockElement()
+ {
+ $result = false;
+ if ($this->hasChildNodes()) {
+ foreach ($this->getChildren() as $child) {
+ if (in_array($child->nodeName, $this->divToPElements)) {
+ $result = true;
+ } else {
+ // If any of the hasSingleChildBlockElement calls return true, return true then.
+ $result = ($result || $child->hasSingleChildBlockElement());
+ }
+ }
+ }
+
+ return $result;
+ }
+
+ /**
* Returns the children of the current node.
*
* @param bool $filterEmptyDOMText Filter empty DOMText nodes?
diff --git a/src/NodeUtility.php b/src/NodeUtility.php
index 07bc8da..d0796dd 100644
--- a/src/NodeUtility.php
+++ b/src/NodeUtility.php
@@ -1,6 +1,7 @@
<?php
namespace andreskrey\Readability;
+
use andreskrey\Readability\NodeClass\DOMDocument;
use andreskrey\Readability\NodeClass\DOMElement;
use andreskrey\Readability\NodeClass\DOMNode;
@@ -13,23 +14,6 @@ class NodeUtility
{
/**
- * @var array
- */
- private static $divToPElements = [
- 'a',
- 'blockquote',
- 'dl',
- 'div',
- 'img',
- 'ol',
- 'p',
- 'pre',
- 'table',
- 'ul',
- 'select',
- ];
-
- /**
* Collection of regexps to check the node usability
*
* @var array
@@ -177,52 +161,4 @@ class NodeUtility
return ($originalNode) ? $originalNode->nextSibling : $originalNode;
}
-
- /**
- * Checks if the current node has a single child and if that child is a P node.
- * Useful to convert <div><p> nodes to a single <p> node and avoid confusing the scoring system since div with p
- * tags are, in practice, paragraphs.
- *
- * @param DOMNode $node
- *
- * @return bool
- */
- public static function hasSinglePNode($node)
- {
- // There should be exactly 1 element child which is a P:
- if (count($children = $node->getChildren(true)) !== 1 || $children[0]->nodeName !== 'p') {
- return false;
- }
-
- // And there should be no text nodes with real content (param true on ->getChildren)
- foreach ($children as $child) {
- /** @var $child DOMNode */
- if ($child->nodeType === XML_TEXT_NODE && !preg_match('/\S$/', $child->getTextContent())) {
- return false;
- }
- }
-
- return true;
- }
-
- /**
- * @param $node DOMNode
- * @return bool
- */
- public static function hasSingleChildBlockElement($node)
- {
- $result = false;
- if ($node->hasChildNodes()) {
- foreach ($node->getChildren() as $child) {
- if (in_array($child->nodeName, self::$divToPElements)) {
- $result = true;
- } else {
- // If any of the hasSingleChildBlockElement calls return true, return true then.
- $result = ($result || self::hasSingleChildBlockElement($child));
- }
- }
- }
-
- return $result;
- }
}
diff --git a/src/Readability.php b/src/Readability.php
index 1cd7c1f..a259f5f 100644
--- a/src/Readability.php
+++ b/src/Readability.php
@@ -543,12 +543,12 @@ class Readability
* safely converted into plain P elements to avoid confusing the scoring
* algorithm with DIVs with are, in practice, paragraphs.
*/
- if (NodeUtility::hasSinglePNode($node)) {
+ if ($node->hasSinglePNode()) {
$pNode = $node->getChildren(true)[0];
$node->parentNode->replaceChild($pNode, $node);
$node = $pNode;
$elementsToScore[] = $node;
- } elseif (!NodeUtility::hasSingleChildBlockElement($node)) {
+ } elseif (!$node->hasSingleChildBlockElement()) {
$node = NodeUtility::setNodeTag($node, 'p');
$elementsToScore[] = $node;
} else {