From c0f2df985a0770c058bcd1c0aed5cd6b220672b8 Mon Sep 17 00:00:00 2001 From: Andres Rey Date: Sat, 20 May 2017 13:24:18 +0100 Subject: Move the removeScripts and prepDocument functions inside the loadHTML function. Performance will suffer (as the system has to reparse the html eveytime it cycles) but is the only solution AFAIK. --- src/HTMLParser.php | 26 +++++++++++++++----------- 1 file changed, 15 insertions(+), 11 deletions(-) (limited to 'src') diff --git a/src/HTMLParser.php b/src/HTMLParser.php index 13d7817..ef849a8 100644 --- a/src/HTMLParser.php +++ b/src/HTMLParser.php @@ -118,10 +118,6 @@ class HTMLParser { $this->dom = $this->loadHTML($html); - $this->removeScripts(); - - $this->prepDocument(); - $this->metadata = $this->getMetadata(); $this->metadata['image'] = $this->getMainImage(); @@ -223,6 +219,10 @@ class HTMLParser $dom->loadHTML('' . $html); $dom->encoding = 'UTF-8'; + $this->removeScripts($dom); + + $this->prepDocument($dom); + return $dom; } @@ -236,13 +236,15 @@ class HTMLParser /** * Removes all the scripts of the html. + * + * @param DOMDocument $dom */ - private function removeScripts() + private function removeScripts(DOMDocument $dom) { $toRemove = ['script', 'noscript']; foreach ($toRemove as $tag) { - while ($script = $this->dom->getElementsByTagName($tag)) { + while ($script = $dom->getElementsByTagName($tag)) { if ($script->item(0)) { $script->item(0)->parentNode->removeChild($script->item(0)); } else { @@ -252,12 +254,14 @@ class HTMLParser } } - /* + /** * Prepares the document for parsing + * + * @param DOMDocument $dom */ - private function prepDocument() + private function prepDocument(DOMDocument $dom) { - $brs = $this->dom->getElementsByTagName('br'); + $brs = $dom->getElementsByTagName('br'); $length = $brs->length; for ($i = 0; $i < $length; $i++) { /** @var \DOMNode $br */ @@ -289,7 +293,7 @@ class HTMLParser */ if ($replaced) { - $p = $this->dom->createElement('p'); + $p = $dom->createElement('p'); $br->parentNode->replaceChild($p, $br); $next = $p->nextSibling; @@ -311,7 +315,7 @@ class HTMLParser } // Replace font tags with span - $fonts = $this->dom->getElementsByTagName('font'); + $fonts = $dom->getElementsByTagName('font'); $length = $fonts->length; for ($i = 0; $i < $length; $i++) { $font = $fonts->item($length - 1 - $i); -- cgit v1.2.3