From 2b70f70e380c6cda03f9b8d8fd9e418b496d8ca0 Mon Sep 17 00:00:00 2001 From: Andres Rey Date: Sat, 24 Dec 2016 12:43:56 -0300 Subject: Added function to clean Style tags and refactored the _clean function to traverse the DOM backwards --- src/HTMLParser.php | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) (limited to 'src') diff --git a/src/HTMLParser.php b/src/HTMLParser.php index 9157c79..7ee0594 100644 --- a/src/HTMLParser.php +++ b/src/HTMLParser.php @@ -792,6 +792,9 @@ class HTMLParser $this->_clean($article, 'h1'); $this->_clean($article, 'footer'); + // Readability.js cleans styles on prepDocument but we do it here. + $this->_clean($article, 'style'); + // If there is only one h2, they are probably using it as a header // and not a subheader, so remove it since we already have a header. if ($article->getElementsByTagName('h2')->length === 1) { @@ -957,7 +960,11 @@ class HTMLParser { $isEmbed = in_array($tag, ['object', 'embed', 'iframe']); - foreach ($article->getElementsByTagName($tag) as $item) { + $DOMNodeList = $article->getElementsByTagName($tag); + $length = $DOMNodeList->length; + for ($i = 0; $i < $length; $i++) { + $item = $DOMNodeList->item($length - 1 - $i); + // Allow youtube and vimeo videos through as people usually want to see those. if ($isEmbed) { $attributeValues = []; -- cgit v1.2.3