diff options
author | Andres Rey <[email protected]> | 2016-12-24 12:43:56 -0300 |
---|---|---|
committer | Andres Rey <[email protected]> | 2016-12-24 12:43:56 -0300 |
commit | 2b70f70e380c6cda03f9b8d8fd9e418b496d8ca0 (patch) | |
tree | 195311aaa53a4c723b3ae06ea9368d5f4a58c2cc /src | |
parent | dd353da76f26f1822278279f5aa81ed43625c300 (diff) |
Added function to clean Style tags and refactored the _clean function to traverse the DOM backwards
Diffstat (limited to 'src')
-rw-r--r-- | src/HTMLParser.php | 9 |
1 files changed, 8 insertions, 1 deletions
diff --git a/src/HTMLParser.php b/src/HTMLParser.php index 9157c79..7ee0594 100644 --- a/src/HTMLParser.php +++ b/src/HTMLParser.php @@ -792,6 +792,9 @@ class HTMLParser $this->_clean($article, 'h1'); $this->_clean($article, 'footer'); + // Readability.js cleans styles on prepDocument but we do it here. + $this->_clean($article, 'style'); + // If there is only one h2, they are probably using it as a header // and not a subheader, so remove it since we already have a header. if ($article->getElementsByTagName('h2')->length === 1) { @@ -957,7 +960,11 @@ class HTMLParser { $isEmbed = in_array($tag, ['object', 'embed', 'iframe']); - foreach ($article->getElementsByTagName($tag) as $item) { + $DOMNodeList = $article->getElementsByTagName($tag); + $length = $DOMNodeList->length; + for ($i = 0; $i < $length; $i++) { + $item = $DOMNodeList->item($length - 1 - $i); + // Allow youtube and vimeo videos through as people usually want to see those. if ($isEmbed) { $attributeValues = []; |