summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorAndres Rey <[email protected]>2017-05-20 13:24:18 +0100
committerAndres Rey <[email protected]>2017-05-20 13:24:18 +0100
commitc0f2df985a0770c058bcd1c0aed5cd6b220672b8 (patch)
tree279e6d35400120e41cafc0af531a2985fd85cf60 /src
parentf21bca6fd42ecfa96146002580c06c4dd455cb6e (diff)
Move the removeScripts and prepDocument functions inside the loadHTML function. Performance will suffer (as the system has to reparse the html eveytime it cycles) but is the only solution AFAIK.
Diffstat (limited to 'src')
-rw-r--r--src/HTMLParser.php26
1 files changed, 15 insertions, 11 deletions
diff --git a/src/HTMLParser.php b/src/HTMLParser.php
index 13d7817..ef849a8 100644
--- a/src/HTMLParser.php
+++ b/src/HTMLParser.php
@@ -118,10 +118,6 @@ class HTMLParser
{
$this->dom = $this->loadHTML($html);
- $this->removeScripts();
-
- $this->prepDocument();
-
$this->metadata = $this->getMetadata();
$this->metadata['image'] = $this->getMainImage();
@@ -223,6 +219,10 @@ class HTMLParser
$dom->loadHTML('<?xml encoding="UTF-8">' . $html);
$dom->encoding = 'UTF-8';
+ $this->removeScripts($dom);
+
+ $this->prepDocument($dom);
+
return $dom;
}
@@ -236,13 +236,15 @@ class HTMLParser
/**
* Removes all the scripts of the html.
+ *
+ * @param DOMDocument $dom
*/
- private function removeScripts()
+ private function removeScripts(DOMDocument $dom)
{
$toRemove = ['script', 'noscript'];
foreach ($toRemove as $tag) {
- while ($script = $this->dom->getElementsByTagName($tag)) {
+ while ($script = $dom->getElementsByTagName($tag)) {
if ($script->item(0)) {
$script->item(0)->parentNode->removeChild($script->item(0));
} else {
@@ -252,12 +254,14 @@ class HTMLParser
}
}
- /*
+ /**
* Prepares the document for parsing
+ *
+ * @param DOMDocument $dom
*/
- private function prepDocument()
+ private function prepDocument(DOMDocument $dom)
{
- $brs = $this->dom->getElementsByTagName('br');
+ $brs = $dom->getElementsByTagName('br');
$length = $brs->length;
for ($i = 0; $i < $length; $i++) {
/** @var \DOMNode $br */
@@ -289,7 +293,7 @@ class HTMLParser
*/
if ($replaced) {
- $p = $this->dom->createElement('p');
+ $p = $dom->createElement('p');
$br->parentNode->replaceChild($p, $br);
$next = $p->nextSibling;
@@ -311,7 +315,7 @@ class HTMLParser
}
// Replace font tags with span
- $fonts = $this->dom->getElementsByTagName('font');
+ $fonts = $dom->getElementsByTagName('font');
$length = $fonts->length;
for ($i = 0; $i < $length; $i++) {
$font = $fonts->item($length - 1 - $i);