diff options
author | FiveFilters.org <[email protected]> | 2021-08-25 03:06:54 +0200 |
---|---|---|
committer | FiveFilters.org <[email protected]> | 2021-08-25 03:06:54 +0200 |
commit | 4fe7aa2a39c12f9dd4bddc8699f76e9bc3eb4b4f (patch) | |
tree | 108319b92ae8d8b8141b23632edb10feac0ce2ca | |
parent | 46ada6d063648ef2d1b1bbb5e9b1e56cd179cdab (diff) |
exclude additional elements based on their role
https://github.com/mozilla/readability/commit/d5eea06a0095b3138dbd1f6233f656d690200509
-rw-r--r-- | src/Readability.php | 9 | ||||
-rw-r--r-- | test/test-pages/nytimes-1/expected.html | 13 | ||||
-rw-r--r-- | test/test-pages/nytimes-2/expected.html | 13 |
3 files changed, 9 insertions, 26 deletions
diff --git a/src/Readability.php b/src/Readability.php index 2027db3..1089769 100644 --- a/src/Readability.php +++ b/src/Readability.php @@ -127,6 +127,11 @@ class Readability /** * @var array */ + private $unlikelyRoles = ['menu', 'menubar', 'complementary', 'navigation', 'alert', 'alertdialog', 'dialog']; + + /** + * @var array + */ private $alterToDIVExceptions = [ 'div', 'article', @@ -896,8 +901,8 @@ class Readability } } - if ($node->getAttribute('role') === 'complementary') { - $this->logger->debug(sprintf('Removing complementary content - %s', $matchString)); + if (in_array($node->getAttribute('role'), $this->unlikelyRoles)) { + $this->logger->debug(sprintf('Removing content with role %s - %s', $node->getAttribute('role'), $matchString)); $node = NodeUtility::removeAndGetNext($node); continue; } diff --git a/test/test-pages/nytimes-1/expected.html b/test/test-pages/nytimes-1/expected.html index a18a21e..4151659 100644 --- a/test/test-pages/nytimes-1/expected.html +++ b/test/test-pages/nytimes-1/expected.html @@ -68,18 +68,7 @@ </main> - <section id="site-index"> - - - <nav id="site-index-navigation" role="navigation"> - - - - - </nav> - - - </section> + diff --git a/test/test-pages/nytimes-2/expected.html b/test/test-pages/nytimes-2/expected.html index e0e48ff..b8ca26b 100644 --- a/test/test-pages/nytimes-2/expected.html +++ b/test/test-pages/nytimes-2/expected.html @@ -71,18 +71,7 @@ </main> - <section id="site-index"> - - - <nav id="site-index-navigation" role="navigation"> - - - - - </nav> - - - </section> + |