From 8e6dcdcdb48695fae6a6e57a8d4ddd3762c3c47a Mon Sep 17 00:00:00 2001
From: Andres Rey <andreskrey@gmail.com>
Date: Fri, 1 Dec 2017 21:23:40 +0000
Subject: Move load function below parse function

---
 src/Readability.php | 88 ++++++++++++++++++++++++++---------------------------
 1 file changed, 44 insertions(+), 44 deletions(-)

(limited to 'src')

diff --git a/src/Readability.php b/src/Readability.php
index f2617a4..c8c7c05 100644
--- a/src/Readability.php
+++ b/src/Readability.php
@@ -104,50 +104,6 @@ class Readability
         $this->configuration = $configuration;
     }
 
-    /**
-     * Creates a DOM Document object and loads the provided HTML on it.
-     *
-     * Used for the first load of Readability and subsequent reloads (when disabling flags and rescanning the text)
-     * Previous versions of Readability used this method one time and cloned the DOM to keep a backup. This caused bugs
-     * because cloning the DOM object keeps a relation between the clone and the original one, doing changes in both
-     * objects and ruining the backup.
-     *
-     * @param string $html
-     *
-     * @return DOMDocument
-     */
-    private function loadHTML($html)
-    {
-        // To avoid having a gazillion of errors on malformed HTMLs
-        libxml_use_internal_errors(true);
-
-        $dom = new DOMDocument('1.0', 'utf-8');
-
-        if (!$this->configuration->getSubstituteEntities()) {
-            // Keep the original HTML entities
-            $dom->substituteEntities = false;
-        }
-
-        if ($this->configuration->getNormalizeEntities()) {
-            // Replace UTF-8 characters with the HTML Entity equivalent. Useful to fix html with mixed content
-            $html = mb_convert_encoding($html, 'HTML-ENTITIES', 'UTF-8');
-        }
-
-        if ($this->configuration->getSummonCthulhu()) {
-            $html = preg_replace('/<script\b[^>]*>([\s\S]*?)<\/script>/', '', $html);
-        }
-
-        // Prepend the XML tag to avoid having issues with special characters. Should be harmless.
-        $dom->loadHTML('<?xml encoding="UTF-8">' . $html);
-        $dom->encoding = 'UTF-8';
-
-        $this->removeScripts($dom);
-
-        $this->prepDocument($dom);
-
-        return $dom;
-    }
-
     /**
      * Main parse function.
      *
@@ -213,6 +169,50 @@ class Readability
         return true;
     }
 
+    /**
+     * Creates a DOM Document object and loads the provided HTML on it.
+     *
+     * Used for the first load of Readability and subsequent reloads (when disabling flags and rescanning the text)
+     * Previous versions of Readability used this method one time and cloned the DOM to keep a backup. This caused bugs
+     * because cloning the DOM object keeps a relation between the clone and the original one, doing changes in both
+     * objects and ruining the backup.
+     *
+     * @param string $html
+     *
+     * @return DOMDocument
+     */
+    private function loadHTML($html)
+    {
+        // To avoid throwing a gazillion of errors on malformed HTMLs
+        libxml_use_internal_errors(true);
+
+        $dom = new DOMDocument('1.0', 'utf-8');
+
+        if (!$this->configuration->getSubstituteEntities()) {
+            // Keep the original HTML entities
+            $dom->substituteEntities = false;
+        }
+
+        if ($this->configuration->getNormalizeEntities()) {
+            // Replace UTF-8 characters with the HTML Entity equivalent. Useful to fix html with mixed content
+            $html = mb_convert_encoding($html, 'HTML-ENTITIES', 'UTF-8');
+        }
+
+        if ($this->configuration->getSummonCthulhu()) {
+            $html = preg_replace('/<script\b[^>]*>([\s\S]*?)<\/script>/', '', $html);
+        }
+
+        // Prepend the XML tag to avoid having issues with special characters. Should be harmless.
+        $dom->loadHTML('<?xml encoding="UTF-8">' . $html);
+        $dom->encoding = 'UTF-8';
+
+        $this->removeScripts($dom);
+
+        $this->prepDocument($dom);
+
+        return $dom;
+    }
+
     /**
      * Tries to guess relevant info from metadata of the html. Sets the results in the Readability properties.
      */
-- 
cgit v1.2.3