4 files changed, 29 insertions, 18 deletions
diff --git a/src/HTML5/Data.php b/src/HTML5/Data.php
index 497345f..a7c865c 100644
--- a/src/HTML5/Data.php
+++ b/src/HTML5/Data.php
@@ -1,9 +1,14 @@
 <?php
 
+namespace HTML5;
+
 // warning: this file is encoded in UTF-8!
 
-class HTML5_Data
-{
+
+/**
+ * Character data.
+ */
+class Data {
 
     // at some point this should be moved to a .ser file. Another
     // possible optimization is to give UTF-8 bytes, not Unicode
@@ -61,6 +66,8 @@ class HTML5_Data
     }
 
     public static function getNamedCharacterReferences() {
+      // Danger Will Robinson: This will prevent the opcode cache from 
+      // caching the entity references.
         if (!self::$namedCharacterReferences) {
             self::$namedCharacterReferences = unserialize(
                 file_get_contents(dirname(__FILE__) . '/named-character-references.ser'));
diff --git a/src/HTML5/InputStream.php b/src/HTML5/InputStream.php
index f98b427..d3bd8ac 100644
--- a/src/HTML5/InputStream.php
+++ b/src/HTML5/InputStream.php
@@ -1,4 +1,5 @@
 <?php
+namespace HTML5;
 
 /*
 
@@ -29,7 +30,7 @@ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 // /* */ indicates verbatim text from the HTML 5 specification
 // // indicates regular comments
 
-class HTML5_InputStream {
+class InputStream {
     /**
      * The string data we're parsing.
      */
diff --git a/src/HTML5/Parser.php b/src/HTML5/Parser.php
index 5f9ca56..892ab54 100644
--- a/src/HTML5/Parser.php
+++ b/src/HTML5/Parser.php
@@ -1,14 +1,16 @@
 <?php
+namespace HTML5;
 
-require_once dirname(__FILE__) . '/Data.php';
-require_once dirname(__FILE__) . '/InputStream.php';
-require_once dirname(__FILE__) . '/TreeBuilder.php';
-require_once dirname(__FILE__) . '/Tokenizer.php';
+# Use autoloader
+#require_once dirname(__FILE__) . '/Data.php';
+#require_once dirname(__FILE__) . '/InputStream.php';
+#require_once dirname(__FILE__) . '/TreeBuilder.php';
+#require_once dirname(__FILE__) . '/Tokenizer.php';
 
 /**
  * Outwards facing interface for HTML5.
  */
-class HTML5_Parser
+class Parser
 {
     /**
      * Parses a full HTML document.
@@ -17,7 +19,7 @@ class HTML5_Parser
      * @return Parsed HTML as DOMDocument
      */
     static public function parse($text, $builder = null) {
-        $tokenizer = new HTML5_Tokenizer($text, $builder);
+        $tokenizer = new Tokenizer($text, $builder);
         $tokenizer->parse();
         return $tokenizer->save();
     }
@@ -29,7 +31,7 @@ class HTML5_Parser
      * @return Parsed HTML as DOMDocument
      */
     static public function parseFragment($text, $context = null, $builder = null) {
-        $tokenizer = new HTML5_Tokenizer($text, $builder);
+        $tokenizer = new Tokenizer($text, $builder);
         $tokenizer->parseFragment($context);
         return $tokenizer->save();
     }
diff --git a/src/HTML5/Tokenizer.php b/src/HTML5/Tokenizer.php
index 0af0716..e27b16a 100644
--- a/src/HTML5/Tokenizer.php
+++ b/src/HTML5/Tokenizer.php
@@ -1,4 +1,5 @@
 <?php
+namespace HTML5;
 
 /*
 
@@ -33,7 +34,7 @@ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 
 // all flags are in hyphenated form
 
-class HTML5_Tokenizer {
+class Tokenizer {
     /**
      * Points to an InputStream object.
      */
@@ -85,8 +86,8 @@ class HTML5_Tokenizer {
      * @param $data Data to parse
      */
     public function __construct($data, $builder = null) {
-        $this->stream = new HTML5_InputStream($data);
-        if (!$builder) $this->tree = new HTML5_TreeBuilder;
+        $this->stream = new InputStream($data);
+        if (!$builder) $this->tree = new TreeBuilder;
         else $this->tree = $builder;
         $this->content_model = self::PCDATA;
     }
@@ -2208,13 +2209,13 @@ class HTML5_Tokenizer {
                 row with that number in the first column, and return a
                 character token for the Unicode character given in the
                 second column of that row. */
-                $new_codepoint = HTML5_Data::getRealCodepoint($codepoint);
+                $new_codepoint = Data::getRealCodepoint($codepoint);
                 if ($new_codepoint) {
                     $this->emitToken(array(
                         'type' => self::PARSEERROR,
                         'data' => 'illegal-windows-1252-entity'
                     ));
-                    return HTML5_Data::utf8chr($new_codepoint);
+                    return Data::utf8chr($new_codepoint);
                 } else {
                     /* Otherwise, if the number is greater than 0x10FFFF, then 
                      * this is a parse error. Return a U+FFFD REPLACEMENT 
@@ -2253,7 +2254,7 @@ class HTML5_Tokenizer {
                             'data' => 'illegal-codepoint-for-numeric-entity'
                         ));
                     }
-                    return HTML5_Data::utf8chr($codepoint);
+                    return Data::utf8chr($codepoint);
                 }
             }
 
@@ -2267,7 +2268,7 @@ class HTML5_Tokenizer {
             // What we actually do here is consume as much as we can while it
             // matches the start of one of the identifiers in the first column.
 
-            $refs = HTML5_Data::getNamedCharacterReferences();
+            $refs = Data::getNamedCharacterReferences();
             
             // Get the longest string which is the start of an identifier
             // ($chars) as well as the longest identifier which matches ($id)
@@ -2342,7 +2343,7 @@ class HTML5_Tokenizer {
             /* Otherwise, return a character token for the character
             corresponding to the character reference name (as given
             by the second column of the named character references table). */
-            return HTML5_Data::utf8chr($codepoint) . substr($chars, strlen($id));
+            return Data::utf8chr($codepoint) . substr($chars, strlen($id));
         }
     }