summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorFiveFilters.org <[email protected]>2021-08-25 03:06:54 +0200
committerFiveFilters.org <[email protected]>2021-08-25 03:06:54 +0200
commit4fe7aa2a39c12f9dd4bddc8699f76e9bc3eb4b4f (patch)
tree108319b92ae8d8b8141b23632edb10feac0ce2ca /src
parent46ada6d063648ef2d1b1bbb5e9b1e56cd179cdab (diff)
exclude additional elements based on their role
https://github.com/mozilla/readability/commit/d5eea06a0095b3138dbd1f6233f656d690200509
Diffstat (limited to 'src')
-rw-r--r--src/Readability.php9
1 files changed, 7 insertions, 2 deletions
diff --git a/src/Readability.php b/src/Readability.php
index 2027db3..1089769 100644
--- a/src/Readability.php
+++ b/src/Readability.php
@@ -127,6 +127,11 @@ class Readability
/**
* @var array
*/
+ private $unlikelyRoles = ['menu', 'menubar', 'complementary', 'navigation', 'alert', 'alertdialog', 'dialog'];
+
+ /**
+ * @var array
+ */
private $alterToDIVExceptions = [
'div',
'article',
@@ -896,8 +901,8 @@ class Readability
}
}
- if ($node->getAttribute('role') === 'complementary') {
- $this->logger->debug(sprintf('Removing complementary content - %s', $matchString));
+ if (in_array($node->getAttribute('role'), $this->unlikelyRoles)) {
+ $this->logger->debug(sprintf('Removing content with role %s - %s', $node->getAttribute('role'), $matchString));
$node = NodeUtility::removeAndGetNext($node);
continue;
}