From 4fe7aa2a39c12f9dd4bddc8699f76e9bc3eb4b4f Mon Sep 17 00:00:00 2001 From: "FiveFilters.org" Date: Wed, 25 Aug 2021 03:06:54 +0200 Subject: exclude additional elements based on their role https://github.com/mozilla/readability/commit/d5eea06a0095b3138dbd1f6233f656d690200509 --- src/Readability.php | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) (limited to 'src') diff --git a/src/Readability.php b/src/Readability.php index 2027db3..1089769 100644 --- a/src/Readability.php +++ b/src/Readability.php @@ -124,6 +124,11 @@ class Readability 'pre', ]; + /** + * @var array + */ + private $unlikelyRoles = ['menu', 'menubar', 'complementary', 'navigation', 'alert', 'alertdialog', 'dialog']; + /** * @var array */ @@ -896,8 +901,8 @@ class Readability } } - if ($node->getAttribute('role') === 'complementary') { - $this->logger->debug(sprintf('Removing complementary content - %s', $matchString)); + if (in_array($node->getAttribute('role'), $this->unlikelyRoles)) { + $this->logger->debug(sprintf('Removing content with role %s - %s', $node->getAttribute('role'), $matchString)); $node = NodeUtility::removeAndGetNext($node); continue; } -- cgit v1.2.3