summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/HTML5/Data.php11
-rw-r--r--src/HTML5/InputStream.php3
-rw-r--r--src/HTML5/Parser.php16
-rw-r--r--src/HTML5/Tokenizer.php17
4 files changed, 29 insertions, 18 deletions
diff --git a/src/HTML5/Data.php b/src/HTML5/Data.php
index 497345f..a7c865c 100644
--- a/src/HTML5/Data.php
+++ b/src/HTML5/Data.php
@@ -1,9 +1,14 @@
<?php
+namespace HTML5;
+
// warning: this file is encoded in UTF-8!
-class HTML5_Data
-{
+
+/**
+ * Character data.
+ */
+class Data {
// at some point this should be moved to a .ser file. Another
// possible optimization is to give UTF-8 bytes, not Unicode
@@ -61,6 +66,8 @@ class HTML5_Data
}
public static function getNamedCharacterReferences() {
+ // Danger Will Robinson: This will prevent the opcode cache from
+ // caching the entity references.
if (!self::$namedCharacterReferences) {
self::$namedCharacterReferences = unserialize(
file_get_contents(dirname(__FILE__) . '/named-character-references.ser'));
diff --git a/src/HTML5/InputStream.php b/src/HTML5/InputStream.php
index f98b427..d3bd8ac 100644
--- a/src/HTML5/InputStream.php
+++ b/src/HTML5/InputStream.php
@@ -1,4 +1,5 @@
<?php
+namespace HTML5;
/*
@@ -29,7 +30,7 @@ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
// /* */ indicates verbatim text from the HTML 5 specification
// // indicates regular comments
-class HTML5_InputStream {
+class InputStream {
/**
* The string data we're parsing.
*/
diff --git a/src/HTML5/Parser.php b/src/HTML5/Parser.php
index 5f9ca56..892ab54 100644
--- a/src/HTML5/Parser.php
+++ b/src/HTML5/Parser.php
@@ -1,14 +1,16 @@
<?php
+namespace HTML5;
-require_once dirname(__FILE__) . '/Data.php';
-require_once dirname(__FILE__) . '/InputStream.php';
-require_once dirname(__FILE__) . '/TreeBuilder.php';
-require_once dirname(__FILE__) . '/Tokenizer.php';
+# Use autoloader
+#require_once dirname(__FILE__) . '/Data.php';
+#require_once dirname(__FILE__) . '/InputStream.php';
+#require_once dirname(__FILE__) . '/TreeBuilder.php';
+#require_once dirname(__FILE__) . '/Tokenizer.php';
/**
* Outwards facing interface for HTML5.
*/
-class HTML5_Parser
+class Parser
{
/**
* Parses a full HTML document.
@@ -17,7 +19,7 @@ class HTML5_Parser
* @return Parsed HTML as DOMDocument
*/
static public function parse($text, $builder = null) {
- $tokenizer = new HTML5_Tokenizer($text, $builder);
+ $tokenizer = new Tokenizer($text, $builder);
$tokenizer->parse();
return $tokenizer->save();
}
@@ -29,7 +31,7 @@ class HTML5_Parser
* @return Parsed HTML as DOMDocument
*/
static public function parseFragment($text, $context = null, $builder = null) {
- $tokenizer = new HTML5_Tokenizer($text, $builder);
+ $tokenizer = new Tokenizer($text, $builder);
$tokenizer->parseFragment($context);
return $tokenizer->save();
}
diff --git a/src/HTML5/Tokenizer.php b/src/HTML5/Tokenizer.php
index 0af0716..e27b16a 100644
--- a/src/HTML5/Tokenizer.php
+++ b/src/HTML5/Tokenizer.php
@@ -1,4 +1,5 @@
<?php
+namespace HTML5;
/*
@@ -33,7 +34,7 @@ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
// all flags are in hyphenated form
-class HTML5_Tokenizer {
+class Tokenizer {
/**
* Points to an InputStream object.
*/
@@ -85,8 +86,8 @@ class HTML5_Tokenizer {
* @param $data Data to parse
*/
public function __construct($data, $builder = null) {
- $this->stream = new HTML5_InputStream($data);
- if (!$builder) $this->tree = new HTML5_TreeBuilder;
+ $this->stream = new InputStream($data);
+ if (!$builder) $this->tree = new TreeBuilder;
else $this->tree = $builder;
$this->content_model = self::PCDATA;
}
@@ -2208,13 +2209,13 @@ class HTML5_Tokenizer {
row with that number in the first column, and return a
character token for the Unicode character given in the
second column of that row. */
- $new_codepoint = HTML5_Data::getRealCodepoint($codepoint);
+ $new_codepoint = Data::getRealCodepoint($codepoint);
if ($new_codepoint) {
$this->emitToken(array(
'type' => self::PARSEERROR,
'data' => 'illegal-windows-1252-entity'
));
- return HTML5_Data::utf8chr($new_codepoint);
+ return Data::utf8chr($new_codepoint);
} else {
/* Otherwise, if the number is greater than 0x10FFFF, then
* this is a parse error. Return a U+FFFD REPLACEMENT
@@ -2253,7 +2254,7 @@ class HTML5_Tokenizer {
'data' => 'illegal-codepoint-for-numeric-entity'
));
}
- return HTML5_Data::utf8chr($codepoint);
+ return Data::utf8chr($codepoint);
}
}
@@ -2267,7 +2268,7 @@ class HTML5_Tokenizer {
// What we actually do here is consume as much as we can while it
// matches the start of one of the identifiers in the first column.
- $refs = HTML5_Data::getNamedCharacterReferences();
+ $refs = Data::getNamedCharacterReferences();
// Get the longest string which is the start of an identifier
// ($chars) as well as the longest identifier which matches ($id)
@@ -2342,7 +2343,7 @@ class HTML5_Tokenizer {
/* Otherwise, return a character token for the character
corresponding to the character reference name (as given
by the second column of the named character references table). */
- return HTML5_Data::utf8chr($codepoint) . substr($chars, strlen($id));
+ return Data::utf8chr($codepoint) . substr($chars, strlen($id));
}
}