From 18b61354ce2d0c2133b82b54254a9239ec5a5fe1 Mon Sep 17 00:00:00 2001 From: Andres Rey Date: Sun, 2 Sep 2018 20:41:39 +0100 Subject: Check for visible nodes before parsing --- src/Readability.php | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'src/Readability.php') diff --git a/src/Readability.php b/src/Readability.php index 2cad398..f913d80 100644 --- a/src/Readability.php +++ b/src/Readability.php @@ -626,6 +626,12 @@ class Readability $matchString = $node->getAttribute('class') . ' ' . $node->getAttribute('id'); + if (!$node->isProbablyVisible()) { + $this->logger->debug(sprintf('[Get Nodes] Removing hidden node... Match string was: \'%s\'', $matchString)); + $node = NodeUtility::removeAndGetNext($node); + continue; + } + // Check to see if this node is a byline, and remove it if it is. if ($this->checkByline($node, $matchString)) { $this->logger->debug(sprintf('[Get Nodes] Found byline, removing... Node content was: \'%s\'', substr($node->nodeValue, 0, 128))); -- cgit v1.2.3