diff options
author | Andres Rey <[email protected]> | 2017-05-20 13:24:18 +0100 |
---|---|---|
committer | Andres Rey <[email protected]> | 2017-05-20 13:24:18 +0100 |
commit | c0f2df985a0770c058bcd1c0aed5cd6b220672b8 (patch) | |
tree | 279e6d35400120e41cafc0af531a2985fd85cf60 /src | |
parent | f21bca6fd42ecfa96146002580c06c4dd455cb6e (diff) |
Move the removeScripts and prepDocument functions inside the loadHTML function. Performance will suffer (as the system has to reparse the html eveytime it cycles) but is the only solution AFAIK.
Diffstat (limited to 'src')
-rw-r--r-- | src/HTMLParser.php | 26 |
1 files changed, 15 insertions, 11 deletions
diff --git a/src/HTMLParser.php b/src/HTMLParser.php index 13d7817..ef849a8 100644 --- a/src/HTMLParser.php +++ b/src/HTMLParser.php @@ -118,10 +118,6 @@ class HTMLParser { $this->dom = $this->loadHTML($html); - $this->removeScripts(); - - $this->prepDocument(); - $this->metadata = $this->getMetadata(); $this->metadata['image'] = $this->getMainImage(); @@ -223,6 +219,10 @@ class HTMLParser $dom->loadHTML('<?xml encoding="UTF-8">' . $html); $dom->encoding = 'UTF-8'; + $this->removeScripts($dom); + + $this->prepDocument($dom); + return $dom; } @@ -236,13 +236,15 @@ class HTMLParser /** * Removes all the scripts of the html. + * + * @param DOMDocument $dom */ - private function removeScripts() + private function removeScripts(DOMDocument $dom) { $toRemove = ['script', 'noscript']; foreach ($toRemove as $tag) { - while ($script = $this->dom->getElementsByTagName($tag)) { + while ($script = $dom->getElementsByTagName($tag)) { if ($script->item(0)) { $script->item(0)->parentNode->removeChild($script->item(0)); } else { @@ -252,12 +254,14 @@ class HTMLParser } } - /* + /** * Prepares the document for parsing + * + * @param DOMDocument $dom */ - private function prepDocument() + private function prepDocument(DOMDocument $dom) { - $brs = $this->dom->getElementsByTagName('br'); + $brs = $dom->getElementsByTagName('br'); $length = $brs->length; for ($i = 0; $i < $length; $i++) { /** @var \DOMNode $br */ @@ -289,7 +293,7 @@ class HTMLParser */ if ($replaced) { - $p = $this->dom->createElement('p'); + $p = $dom->createElement('p'); $br->parentNode->replaceChild($p, $br); $next = $p->nextSibling; @@ -311,7 +315,7 @@ class HTMLParser } // Replace font tags with span - $fonts = $this->dom->getElementsByTagName('font'); + $fonts = $dom->getElementsByTagName('font'); $length = $fonts->length; for ($i = 0; $i < $length; $i++) { $font = $fonts->item($length - 1 - $i); |