From 1fb1351943e27ca9d690a9e87c70ef4e1812a8ba Mon Sep 17 00:00:00 2001 From: Andres Rey Date: Sun, 11 Dec 2016 23:53:52 +0000 Subject: Added backupdom property, which will hold the original HTML in case it's needed to create a fake top candidate --- src/HTMLParser.php | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) (limited to 'src') diff --git a/src/HTMLParser.php b/src/HTMLParser.php index 27e376d..d262519 100644 --- a/src/HTMLParser.php +++ b/src/HTMLParser.php @@ -16,6 +16,11 @@ class HTMLParser */ private $dom = null; + /** + * @var DOMDocument + */ + private $backupdom = null; + /** * @var array */ @@ -154,6 +159,9 @@ class HTMLParser { $this->dom->loadHTML($html); $this->dom->encoding = 'UTF-8'; + + // In case we need the original HTML to create a fake top candidate + $this->backupdom = clone $this->dom; } /** @@ -465,7 +473,7 @@ class HTMLParser $topCandidate = new DOMDocument(); $topCandidate->appendChild($topCandidate->createElement('div', '')); - $kids = $this->dom->getElementsByTagName('body')->item(0)->childNodes; + $kids = $this->backupdom->getElementsByTagName('body')->item(0)->childNodes; // Cannot be foreached, don't ask me why. for ($i = 0; $i < $kids->length; $i++) { @@ -473,7 +481,7 @@ class HTMLParser $topCandidate->firstChild->appendChild($import); } - // Readability must be created using firstChild to grab de DOMElement instead of the DOMDocument. + // Readability must be created using firstChild to grab the DOMElement instead of the DOMDocument. $topCandidate = new Readability($topCandidate->firstChild); $topCandidate->initializeNode(); -- cgit v1.2.3