diff options
author | Matt Butcher <[email protected]> | 2013-04-15 09:41:52 -0500 |
---|---|---|
committer | Matt Butcher <[email protected]> | 2013-04-15 09:41:52 -0500 |
commit | 56e13d972eadec82cda8c9c59b2f597c38e1aafb (patch) | |
tree | 819efa8761b84020f9f8c58fbb427f67dccd7f32 | |
parent | efb1bb1327f3fcbfe5c1fa515b73e0804b7cf9df (diff) |
UNFINISHED: DOCTYPE parser is in progress.
-rw-r--r-- | README.md | 5 | ||||
-rw-r--r-- | src/HTML5/Parser/Tokenizer.php | 30 |
2 files changed, 34 insertions, 1 deletions
@@ -116,3 +116,8 @@ html5lib project. This software is released under the MIT license. The original html5lib library was also released under the MIT license. + +See LICENSE.txt + +Certain files contain copyright assertions by specific individuals +involved with html5lib. Those have been retained where appropriate. diff --git a/src/HTML5/Parser/Tokenizer.php b/src/HTML5/Parser/Tokenizer.php index bf484bd..378af43 100644 --- a/src/HTML5/Parser/Tokenizer.php +++ b/src/HTML5/Parser/Tokenizer.php @@ -422,7 +422,35 @@ class Tokenizer { return $this->bogusComment('<!' . $chars); } - // Now we need to parse the DOCTYPE. + $this->scanner->whitespace(); + $tok = $this->scanner->current(); + + // EOF: die. + if ($tok === FALSE) { + $this->events->doctype('html5','','', TRUE); + return $this->eof(); + } + + $doctypeName = ''; + + // NULL char: convert. + if ($tok === "\0") { + $this->parseError("Unexpected NULL character in DOCTYPE."); + $doctypeName .= UTF8::FFFD; + $tok = $this->scanner->next(); + } + + $stop = " \n\f>"; + $doctypeName = $this->scanner->charsUntil($stop); + // Lowercase ASCII, replace \0 with FFFD + $doctypeName = strtolower(strtr($doctypeName, "\0", UTF8Utils::FFFD)); + + // If FALSE, emit a parse error. + + // Get pub and sys IDs + + // If >, end doctype + } /** |