From 86823e6cb1b1ec4a21824bb36b53a3d810d18203 Mon Sep 17 00:00:00 2001 From: Andres Rey Date: Wed, 29 Nov 2017 23:53:43 +0000 Subject: Check for class weighting before doing it --- src/NodeClass/NodeClassTrait.php | 14 ++++---------- src/Readability.php | 14 +++++++++++--- 2 files changed, 15 insertions(+), 13 deletions(-) (limited to 'src') diff --git a/src/NodeClass/NodeClassTrait.php b/src/NodeClass/NodeClassTrait.php index 24ecebc..b24dd4c 100644 --- a/src/NodeClass/NodeClassTrait.php +++ b/src/NodeClass/NodeClassTrait.php @@ -44,9 +44,10 @@ trait NodeClassTrait /** * Initializer. Calculates the current score of the node and returns a full Readability object. * - * @return self + * @param $weightClasses bool Weight classes? + * @return static */ - public function initializeNode() + public function initializeNode($weightClasses) { if (!$this->isInitialized()) { $contentScore = 0; @@ -84,7 +85,7 @@ trait NodeClassTrait break; } - $this->contentScore = $contentScore + $this->getClassWeight(); + $this->contentScore = $contentScore + ($weightClasses ? $this->getClassWeight() : 0); $this->initialized = true; } @@ -176,17 +177,10 @@ trait NodeClassTrait /** * Calculates the weight of the class/id of the current element. * - * @todo check for flag that lets this function run or not - * * @return int */ public function getClassWeight() { - // TODO To implement. How to get config from html parser from readability -// if ($this->getConfig()->getOption('weightClasses')) { -// return 0; -// } -// $weight = 0; // Look for a special classname diff --git a/src/Readability.php b/src/Readability.php index 55583ac..53bc32d 100644 --- a/src/Readability.php +++ b/src/Readability.php @@ -773,7 +773,7 @@ class Readability /** @var $ancestor DOMElement */ foreach ($ancestors as $level => $ancestor) { if (!$ancestor->isInitialized()) { - $ancestor->initializeNode(); + $ancestor->initializeNode($this->configuration->getWeightClasses()); $candidates[] = $ancestor; } @@ -1262,7 +1262,10 @@ class Readability continue; } - $weight = $node->getClassWeight(); + $weight = 0; + if ($this->configuration->getWeightClasses()) { + $weight = $node->getClassWeight(); + } if ($weight < 0) { NodeUtility::removeNode($node); @@ -1367,7 +1370,12 @@ class Readability $headers = $article->getElementsByTagName('h' . $headerIndex); /** @var $header DOMElement */ foreach ($headers as $header) { - if ($header->getClassWeight() < 0) { + $weight = 0; + if ($this->configuration->getWeightClasses()) { + $weight = $header->getClassWeight(); + } + + if ($weight < 0) { NodeUtility::removeNode($header); } } -- cgit v1.2.3