Move load function below parse function

author: Andres Rey <[email protected]> 2017-12-01 21:23:40 +0000
committer: Andres Rey <[email protected]> 2017-12-01 21:23:40 +0000
commit: 8e6dcdcdb48695fae6a6e57a8d4ddd3762c3c47a (patch)
tree: fee37562fef901e3009c889f54469ee1fb0f41b7 /src
parent: 7c8ee690e3c33c6a25670fba203ae14d1e1bea6e (diff)
1 files changed, 44 insertions, 44 deletions
diff --git a/src/Readability.php b/src/Readability.php
index f2617a4..c8c7c05 100644
--- a/src/Readability.php
+++ b/src/Readability.php
@@ -105,50 +105,6 @@ class Readability
     }
 
     /**
-     * Creates a DOM Document object and loads the provided HTML on it.
-     *
-     * Used for the first load of Readability and subsequent reloads (when disabling flags and rescanning the text)
-     * Previous versions of Readability used this method one time and cloned the DOM to keep a backup. This caused bugs
-     * because cloning the DOM object keeps a relation between the clone and the original one, doing changes in both
-     * objects and ruining the backup.
-     *
-     * @param string $html
-     *
-     * @return DOMDocument
-     */
-    private function loadHTML($html)
-    {
-        // To avoid having a gazillion of errors on malformed HTMLs
-        libxml_use_internal_errors(true);
-
-        $dom = new DOMDocument('1.0', 'utf-8');
-
-        if (!$this->configuration->getSubstituteEntities()) {
-            // Keep the original HTML entities
-            $dom->substituteEntities = false;
-        }
-
-        if ($this->configuration->getNormalizeEntities()) {
-            // Replace UTF-8 characters with the HTML Entity equivalent. Useful to fix html with mixed content
-            $html = mb_convert_encoding($html, 'HTML-ENTITIES', 'UTF-8');
-        }
-
-        if ($this->configuration->getSummonCthulhu()) {
-            $html = preg_replace('/<script\b[^>]*>([\s\S]*?)<\/script>/', '', $html);
-        }
-
-        // Prepend the XML tag to avoid having issues with special characters. Should be harmless.
-        $dom->loadHTML('<?xml encoding="UTF-8">' . $html);
-        $dom->encoding = 'UTF-8';
-
-        $this->removeScripts($dom);
-
-        $this->prepDocument($dom);
-
-        return $dom;
-    }
-
-    /**
      * Main parse function.
      *
      * @param $html
@@ -214,6 +170,50 @@ class Readability
     }
 
     /**
+     * Creates a DOM Document object and loads the provided HTML on it.
+     *
+     * Used for the first load of Readability and subsequent reloads (when disabling flags and rescanning the text)
+     * Previous versions of Readability used this method one time and cloned the DOM to keep a backup. This caused bugs
+     * because cloning the DOM object keeps a relation between the clone and the original one, doing changes in both
+     * objects and ruining the backup.
+     *
+     * @param string $html
+     *
+     * @return DOMDocument
+     */
+    private function loadHTML($html)
+    {
+        // To avoid throwing a gazillion of errors on malformed HTMLs
+        libxml_use_internal_errors(true);
+
+        $dom = new DOMDocument('1.0', 'utf-8');
+
+        if (!$this->configuration->getSubstituteEntities()) {
+            // Keep the original HTML entities
+            $dom->substituteEntities = false;
+        }
+
+        if ($this->configuration->getNormalizeEntities()) {
+            // Replace UTF-8 characters with the HTML Entity equivalent. Useful to fix html with mixed content
+            $html = mb_convert_encoding($html, 'HTML-ENTITIES', 'UTF-8');
+        }
+
+        if ($this->configuration->getSummonCthulhu()) {
+            $html = preg_replace('/<script\b[^>]*>([\s\S]*?)<\/script>/', '', $html);
+        }
+
+        // Prepend the XML tag to avoid having issues with special characters. Should be harmless.
+        $dom->loadHTML('<?xml encoding="UTF-8">' . $html);
+        $dom->encoding = 'UTF-8';
+
+        $this->removeScripts($dom);
+
+        $this->prepDocument($dom);
+
+        return $dom;
+    }
+
+    /**
      * Tries to guess relevant info from metadata of the html. Sets the results in the Readability properties.
      */
     private function getMetadata()
author	Andres Rey <[email protected]>	2017-12-01 21:23:40 +0000
committer	Andres Rey <[email protected]>	2017-12-01 21:23:40 +0000
commit	8e6dcdcdb48695fae6a6e57a8d4ddd3762c3c47a (patch)
tree	fee37562fef901e3009c889f54469ee1fb0f41b7 /src
parent	7c8ee690e3c33c6a25670fba203ae14d1e1bea6e (diff)