diff options
-rw-r--r-- | src/Nodes/NodeTrait.php | 4 | ||||
-rw-r--r-- | src/Readability.php | 24 |
2 files changed, 28 insertions, 0 deletions
diff --git a/src/Nodes/NodeTrait.php b/src/Nodes/NodeTrait.php index 5a3cd7f..bb848ab 100644 --- a/src/Nodes/NodeTrait.php +++ b/src/Nodes/NodeTrait.php @@ -7,6 +7,10 @@ use andreskrey\Readability\Nodes\DOM\DOMElement; use andreskrey\Readability\Nodes\DOM\DOMNode; use andreskrey\Readability\Nodes\DOM\DOMText; + +/** + * @method \DOMNode removeAttribute($name) + */ trait NodeTrait { /** diff --git a/src/Readability.php b/src/Readability.php index 91e703c..9a29313 100644 --- a/src/Readability.php +++ b/src/Readability.php @@ -1480,6 +1480,28 @@ class Readability } /** + * Removes the class="" attribute from every element in the given + * subtree. + * + * Readability.js has a special filter to avoid cleaning the classes that the algorithm adds. We don't add classes + * here so no need to filter those. + * + * @param DOMDocument|DOMNode $node + * + * @return void + **/ + public function _cleanClasses($node) + { + if ($node->getAttribute('class') !== '') { + $node->removeAttribute('class'); + } + + for ($node = $node->firstChild; $node !== null; $node = $node->nextSibling) { + $this->_cleanClasses($node); + } + } + + /** * @param DOMDocument $article * * @return DOMDocument @@ -1532,6 +1554,8 @@ class Readability } } + $this->_cleanClasses($article); + return $article; } |