diff options
author | Andres Rey <[email protected]> | 2017-11-07 20:55:21 +0000 |
---|---|---|
committer | Andres Rey <[email protected]> | 2017-11-07 20:55:21 +0000 |
commit | 840cac3acba875d98b245f9d512ef6eebd8f50fd (patch) | |
tree | 5b546834dc0fd390d3f7bab2a936770b8944d0f2 /src | |
parent | e03d5591b499158a85e21614b1fd5a7f511e5fa8 (diff) |
Clean style attributes inside tags
Diffstat (limited to 'src')
-rw-r--r-- | src/HTMLParser.php | 34 |
1 files changed, 32 insertions, 2 deletions
diff --git a/src/HTMLParser.php b/src/HTMLParser.php index 27dd8e5..273dab9 100644 --- a/src/HTMLParser.php +++ b/src/HTMLParser.php @@ -1001,6 +1001,9 @@ class HTMLParser */ public function prepArticle(DOMDocument $article) { + $this->_cleanStyles($article); + $this->_clean($article, 'style'); + // Check for data tables before we continue, to avoid removing items in // those tables, which will often be isolated even though they're // visually linked to other content-ful elements (text, images, etc.). @@ -1014,8 +1017,6 @@ class HTMLParser $this->_clean($article, 'h1'); $this->_clean($article, 'footer'); - // Readability.js cleans styles on prepDocument but we do it here. - $this->_clean($article, 'style'); /* * If there is only one h2 and its text content substantially equals article title, @@ -1176,6 +1177,35 @@ class HTMLParser } /** + * Remove the style attribute on every e and under. + * TODO: To be moved to Readability + * + * @param $node \DOMDocument|\DOMNode + **/ + public function _cleanStyles($node) + { + $cur = $node->firstChild; + + // Remove any root styles, if we're able. + //TODO Check if we actually need to check for the method to exist + if (method_exists($cur, 'removeAttribute')) { + $cur->removeAttribute('style'); + } + + // Go until there are no more child nodes + while ($cur !== null) { + if ($cur->nodeType === XML_ELEMENT_NODE) { + // Remove style attribute(s) : + $cur->removeAttribute('style'); + + $this->_cleanStyles($cur); + } + + $cur = $cur->nextSibling; + } + } + + /** * TODO To be moved to Readability. * * @param DOMDocument $article |