summaryrefslogtreecommitdiff
path: root/src/Readability.php
diff options
context:
space:
mode:
authorAndres Rey <[email protected]>2018-03-10 17:40:39 +0000
committerAndres Rey <[email protected]>2018-03-10 17:40:39 +0000
commitc2774e5df70f4933702c479d6356db2e0757e724 (patch)
tree9500040cedaceef546891c540060fad8d818d0f6 /src/Readability.php
parentf50a8f68414e608fa07f24e4e1238b3d1eb2678b (diff)
Add _cleanClasses function
Diffstat (limited to 'src/Readability.php')
-rw-r--r--src/Readability.php24
1 files changed, 24 insertions, 0 deletions
diff --git a/src/Readability.php b/src/Readability.php
index 91e703c..9a29313 100644
--- a/src/Readability.php
+++ b/src/Readability.php
@@ -1480,6 +1480,28 @@ class Readability
}
/**
+ * Removes the class="" attribute from every element in the given
+ * subtree.
+ *
+ * Readability.js has a special filter to avoid cleaning the classes that the algorithm adds. We don't add classes
+ * here so no need to filter those.
+ *
+ * @param DOMDocument|DOMNode $node
+ *
+ * @return void
+ **/
+ public function _cleanClasses($node)
+ {
+ if ($node->getAttribute('class') !== '') {
+ $node->removeAttribute('class');
+ }
+
+ for ($node = $node->firstChild; $node !== null; $node = $node->nextSibling) {
+ $this->_cleanClasses($node);
+ }
+ }
+
+ /**
* @param DOMDocument $article
*
* @return DOMDocument
@@ -1532,6 +1554,8 @@ class Readability
}
}
+ $this->_cleanClasses($article);
+
return $article;
}