summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorAndres Rey <[email protected]>2016-12-24 12:43:56 -0300
committerAndres Rey <[email protected]>2016-12-24 12:43:56 -0300
commit2b70f70e380c6cda03f9b8d8fd9e418b496d8ca0 (patch)
tree195311aaa53a4c723b3ae06ea9368d5f4a58c2cc /src
parentdd353da76f26f1822278279f5aa81ed43625c300 (diff)
Added function to clean Style tags and refactored the _clean function to traverse the DOM backwards
Diffstat (limited to 'src')
-rw-r--r--src/HTMLParser.php9
1 files changed, 8 insertions, 1 deletions
diff --git a/src/HTMLParser.php b/src/HTMLParser.php
index 9157c79..7ee0594 100644
--- a/src/HTMLParser.php
+++ b/src/HTMLParser.php
@@ -792,6 +792,9 @@ class HTMLParser
$this->_clean($article, 'h1');
$this->_clean($article, 'footer');
+ // Readability.js cleans styles on prepDocument but we do it here.
+ $this->_clean($article, 'style');
+
// If there is only one h2, they are probably using it as a header
// and not a subheader, so remove it since we already have a header.
if ($article->getElementsByTagName('h2')->length === 1) {
@@ -957,7 +960,11 @@ class HTMLParser
{
$isEmbed = in_array($tag, ['object', 'embed', 'iframe']);
- foreach ($article->getElementsByTagName($tag) as $item) {
+ $DOMNodeList = $article->getElementsByTagName($tag);
+ $length = $DOMNodeList->length;
+ for ($i = 0; $i < $length; $i++) {
+ $item = $DOMNodeList->item($length - 1 - $i);
+
// Allow youtube and vimeo videos through as people usually want to see those.
if ($isEmbed) {
$attributeValues = [];