From 43960986dc59fc59b7439af120ac6f4e76459a81 Mon Sep 17 00:00:00 2001 From: Andres Rey Date: Thu, 30 Nov 2017 20:20:51 +0000 Subject: Minor clean up and reorganization --- src/NodeClass/NodeClassTrait.php | 103 ++++++++++++++++++++------------------- src/Readability.php | 2 +- 2 files changed, 54 insertions(+), 51 deletions(-) diff --git a/src/NodeClass/NodeClassTrait.php b/src/NodeClass/NodeClassTrait.php index 403cd58..a1382d1 100644 --- a/src/NodeClass/NodeClassTrait.php +++ b/src/NodeClass/NodeClassTrait.php @@ -6,7 +6,6 @@ use andreskrey\Readability\NodeUtility; trait NodeClassTrait { - /** * Content score of the node. Used to determine the value of the content * @@ -236,6 +235,55 @@ trait NodeClassTrait return $nodeValue; } + /** + * Returns the children of the current node. + * + * @param bool $filterEmptyDOMText Filter empty DOMText nodes? + * + * @return array + */ + public function getChildren($filterEmptyDOMText = false) + { + $ret = iterator_to_array($this->childNodes); + if ($filterEmptyDOMText) { + // Array values is used to discard the key order. Needs to be 0 to whatever without skipping any number + $ret = array_values(array_filter($ret, function ($node) { + return $node->nodeName !== '#text' || mb_strlen(trim($node->nodeValue)); + })); + } + + return $ret; + } + + /** + * Return an array indicating how many rows and columns this table has. + * + * @return array + */ + public function getRowAndColumnCount() + { + $rows = $columns = 0; + $trs = $this->getElementsByTagName('tr'); + foreach ($trs as $tr) { + /** @var \DOMElement $tr */ + $rowspan = $tr->getAttribute('rowspan'); + $rows += ($rowspan || 1); + + // Now look for column-related info + $columnsInThisRow = 0; + $cells = $tr->getElementsByTagName('td'); + foreach ($cells as $cell) { + /** @var \DOMElement $cell */ + $colspan = $cell->getAttribute('colspan'); + $columnsInThisRow += ($colspan || 1); + } + $columns = max($columns, $columnsInThisRow); + } + + return ['rows' => $rows, 'columns' => $columns]; + } + + /** * Creates a new node based on the text content of the original node. * @@ -307,7 +355,9 @@ trait NodeClassTrait } /** - * @param $node DOMNode + * Check if the current element has a single child block element. + * Block elements are the ones defined in the divToPElements array. + * * @return bool */ public function hasSingleChildBlockElement() @@ -319,6 +369,7 @@ trait NodeClassTrait $result = true; } else { // If any of the hasSingleChildBlockElement calls return true, return true then. + /** @var $child DOMElement */ $result = ($result || $child->hasSingleChildBlockElement()); } } @@ -327,26 +378,6 @@ trait NodeClassTrait return $result; } - /** - * Returns the children of the current node. - * - * @param bool $filterEmptyDOMText Filter empty DOMText nodes? - * - * @return array - */ - public function getChildren($filterEmptyDOMText = false) - { - $ret = iterator_to_array($this->childNodes); - if ($filterEmptyDOMText) { - // Array values is used to discard the key order. Needs to be 0 to whatever without skipping any number - $ret = array_values(array_filter($ret, function ($node) { - return $node->nodeName !== '#text' || mb_strlen(trim($node->nodeValue)); - })); - } - - return $ret; - } - /** * Determines if a node has no content or it is just a bunch of dividing lines and/or whitespace. * @@ -372,32 +403,4 @@ trait NodeClassTrait ); } - - /** - * Return an array indicating how many rows and columns this table has. - * - * @return array - */ - public function _getRowAndColumnCount() - { - $rows = $columns = 0; - $trs = $this->getElementsByTagName('tr'); - foreach ($trs as $tr) { - /** @var \DOMElement $tr */ - $rowspan = $tr->getAttribute('rowspan'); - $rows += ($rowspan || 1); - - // Now look for column-related info - $columnsInThisRow = 0; - $cells = $tr->getElementsByTagName('td'); - foreach ($cells as $cell) { - /** @var \DOMElement $cell */ - $colspan = $cell->getAttribute('colspan'); - $columnsInThisRow += ($colspan || 1); - } - $columns = max($columns, $columnsInThisRow); - } - - return ['rows' => $rows, 'columns' => $columns]; - } } diff --git a/src/Readability.php b/src/Readability.php index a259f5f..730c327 100644 --- a/src/Readability.php +++ b/src/Readability.php @@ -1115,7 +1115,7 @@ class Readability continue; } - $sizeInfo = $table->_getRowAndColumnCount(); + $sizeInfo = $table->getRowAndColumnCount(); if ($sizeInfo['rows'] >= 10 || $sizeInfo['columns'] > 4) { $table->readabilityDataTable = true; continue; -- cgit v1.2.3