summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/NodeClass/NodeClassTrait.php103
-rw-r--r--src/Readability.php2
2 files changed, 54 insertions, 51 deletions
diff --git a/src/NodeClass/NodeClassTrait.php b/src/NodeClass/NodeClassTrait.php
index 403cd58..a1382d1 100644
--- a/src/NodeClass/NodeClassTrait.php
+++ b/src/NodeClass/NodeClassTrait.php
@@ -6,7 +6,6 @@ use andreskrey\Readability\NodeUtility;
trait NodeClassTrait
{
-
/**
* Content score of the node. Used to determine the value of the content
*
@@ -237,6 +236,55 @@ trait NodeClassTrait
}
/**
+ * Returns the children of the current node.
+ *
+ * @param bool $filterEmptyDOMText Filter empty DOMText nodes?
+ *
+ * @return array
+ */
+ public function getChildren($filterEmptyDOMText = false)
+ {
+ $ret = iterator_to_array($this->childNodes);
+ if ($filterEmptyDOMText) {
+ // Array values is used to discard the key order. Needs to be 0 to whatever without skipping any number
+ $ret = array_values(array_filter($ret, function ($node) {
+ return $node->nodeName !== '#text' || mb_strlen(trim($node->nodeValue));
+ }));
+ }
+
+ return $ret;
+ }
+
+ /**
+ * Return an array indicating how many rows and columns this table has.
+ *
+ * @return array
+ */
+ public function getRowAndColumnCount()
+ {
+ $rows = $columns = 0;
+ $trs = $this->getElementsByTagName('tr');
+ foreach ($trs as $tr) {
+ /** @var \DOMElement $tr */
+ $rowspan = $tr->getAttribute('rowspan');
+ $rows += ($rowspan || 1);
+
+ // Now look for column-related info
+ $columnsInThisRow = 0;
+ $cells = $tr->getElementsByTagName('td');
+ foreach ($cells as $cell) {
+ /** @var \DOMElement $cell */
+ $colspan = $cell->getAttribute('colspan');
+ $columnsInThisRow += ($colspan || 1);
+ }
+ $columns = max($columns, $columnsInThisRow);
+ }
+
+ return ['rows' => $rows, 'columns' => $columns];
+ }
+
+
+ /**
* Creates a new node based on the text content of the original node.
*
* @param $originalNode DOMElement
@@ -307,7 +355,9 @@ trait NodeClassTrait
}
/**
- * @param $node DOMNode
+ * Check if the current element has a single child block element.
+ * Block elements are the ones defined in the divToPElements array.
+ *
* @return bool
*/
public function hasSingleChildBlockElement()
@@ -319,6 +369,7 @@ trait NodeClassTrait
$result = true;
} else {
// If any of the hasSingleChildBlockElement calls return true, return true then.
+ /** @var $child DOMElement */
$result = ($result || $child->hasSingleChildBlockElement());
}
}
@@ -328,26 +379,6 @@ trait NodeClassTrait
}
/**
- * Returns the children of the current node.
- *
- * @param bool $filterEmptyDOMText Filter empty DOMText nodes?
- *
- * @return array
- */
- public function getChildren($filterEmptyDOMText = false)
- {
- $ret = iterator_to_array($this->childNodes);
- if ($filterEmptyDOMText) {
- // Array values is used to discard the key order. Needs to be 0 to whatever without skipping any number
- $ret = array_values(array_filter($ret, function ($node) {
- return $node->nodeName !== '#text' || mb_strlen(trim($node->nodeValue));
- }));
- }
-
- return $ret;
- }
-
- /**
* Determines if a node has no content or it is just a bunch of dividing lines and/or whitespace.
*
* @return bool
@@ -372,32 +403,4 @@ trait NodeClassTrait
);
}
-
- /**
- * Return an array indicating how many rows and columns this table has.
- *
- * @return array
- */
- public function _getRowAndColumnCount()
- {
- $rows = $columns = 0;
- $trs = $this->getElementsByTagName('tr');
- foreach ($trs as $tr) {
- /** @var \DOMElement $tr */
- $rowspan = $tr->getAttribute('rowspan');
- $rows += ($rowspan || 1);
-
- // Now look for column-related info
- $columnsInThisRow = 0;
- $cells = $tr->getElementsByTagName('td');
- foreach ($cells as $cell) {
- /** @var \DOMElement $cell */
- $colspan = $cell->getAttribute('colspan');
- $columnsInThisRow += ($colspan || 1);
- }
- $columns = max($columns, $columnsInThisRow);
- }
-
- return ['rows' => $rows, 'columns' => $columns];
- }
}
diff --git a/src/Readability.php b/src/Readability.php
index a259f5f..730c327 100644
--- a/src/Readability.php
+++ b/src/Readability.php
@@ -1115,7 +1115,7 @@ class Readability
continue;
}
- $sizeInfo = $table->_getRowAndColumnCount();
+ $sizeInfo = $table->getRowAndColumnCount();
if ($sizeInfo['rows'] >= 10 || $sizeInfo['columns'] > 4) {
$table->readabilityDataTable = true;
continue;