summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorFiveFilters.org <[email protected]>2021-08-19 13:49:24 +0200
committerFiveFilters.org <[email protected]>2021-08-19 13:49:24 +0200
commitd6ca859088f48563d48792158c9312eb5aa36f62 (patch)
treecfe93a9265d8a23bda2653f309d3ede664b1a74a
parenta34a91b5466fef43b041ccb984e8a4beee951b2e (diff)
Remove aria-hidden="true" nodes
https://github.com/mozilla/readability/commit/60f470c4bb618a7987d5701b1228ec5ff49f2773 (but changes based on current version of Readability.js)
-rw-r--r--src/Nodes/NodeTrait.php7
-rw-r--r--test/test-pages/remove-aria-hidden/expected-metadata.json8
-rw-r--r--test/test-pages/remove-aria-hidden/expected.html7
-rw-r--r--test/test-pages/remove-aria-hidden/source.html19
4 files changed, 38 insertions, 3 deletions
diff --git a/src/Nodes/NodeTrait.php b/src/Nodes/NodeTrait.php
index 690c91a..cdd28bf 100644
--- a/src/Nodes/NodeTrait.php
+++ b/src/Nodes/NodeTrait.php
@@ -508,13 +508,14 @@ trait NodeTrait
* In the original JS project they check if the node has the style display=none, which unfortunately
* in our case we have no way of knowing that. So we just check for the attribute hidden or "display: none".
*
- * Might be a good idea to check for classes or other attributes like 'aria-hidden'
- *
* @return bool
*/
public function isProbablyVisible()
{
- return !preg_match('/display:( )?none/', $this->getAttribute('style')) && !$this->hasAttribute('hidden');
+ return !preg_match('/display:( )?none/i', $this->getAttribute('style')) &&
+ !$this->hasAttribute('hidden') &&
+ //check for "fallback-image" so that wikimedia math images are displayed
+ (!$this->hasAttribute('aria-hidden') || $this->getAttribute('aria-hidden') !== 'true' || ($this->hasAttribute('class') && mb_strpos($this->getAttribute('class'), 'fallback-image') !== false));
}
/**
diff --git a/test/test-pages/remove-aria-hidden/expected-metadata.json b/test/test-pages/remove-aria-hidden/expected-metadata.json
new file mode 100644
index 0000000..b026cb4
--- /dev/null
+++ b/test/test-pages/remove-aria-hidden/expected-metadata.json
@@ -0,0 +1,8 @@
+{
+ "Author": null,
+ "Direction": null,
+ "Excerpt": "Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod\n tempor incididunt ut labore et dolore magna aliqua.",
+ "Image": null,
+ "Title": "Remove aria-hidden elements test",
+ "SiteName": null
+} \ No newline at end of file
diff --git a/test/test-pages/remove-aria-hidden/expected.html b/test/test-pages/remove-aria-hidden/expected.html
new file mode 100644
index 0000000..8d2152d
--- /dev/null
+++ b/test/test-pages/remove-aria-hidden/expected.html
@@ -0,0 +1,7 @@
+<div>
+ <p>Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod
+ tempor incididunt ut labore et dolore magna aliqua.</p>
+ <p>Ut enim ad minim veniam,
+ quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo
+ consequat.</p>
+ </div> \ No newline at end of file
diff --git a/test/test-pages/remove-aria-hidden/source.html b/test/test-pages/remove-aria-hidden/source.html
new file mode 100644
index 0000000..1c558e6
--- /dev/null
+++ b/test/test-pages/remove-aria-hidden/source.html
@@ -0,0 +1,19 @@
+<!DOCTYPE html>
+<html>
+<head>
+ <meta charset="utf-8"/>
+ <title>Remove aria-hidden elements test</title>
+</head>
+<body>
+ <article>
+ <h1>Lorem</h1>
+ <div>
+ <p><span aria-hidden="true">**WRONG**</span>Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod
+ tempor incididunt ut labore et dolore magna aliqua.</p>
+ <p>Ut enim ad minim veniam,
+ quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo
+ consequat.</p>
+ </div>
+ </article>
+</body>
+</html>