From 391e4f31e3f0e7935a46431783896153156a65eb Mon Sep 17 00:00:00 2001 From: Technosophos Date: Mon, 15 Apr 2013 17:47:58 -0500 Subject: Unit tests for DOCTYPE are all passing. --- src/HTML5/Parser/Tokenizer.php | 20 +++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) (limited to 'src') diff --git a/src/HTML5/Parser/Tokenizer.php b/src/HTML5/Parser/Tokenizer.php index 466cb5d..4f1404f 100644 --- a/src/HTML5/Parser/Tokenizer.php +++ b/src/HTML5/Parser/Tokenizer.php @@ -465,10 +465,18 @@ class Tokenizer { // Short DOCTYPE, like if ($tok == '>') { + // DOCTYPE without a name. + if (strlen($doctypeName) == 0) { + $this->parseError("Expected a DOCTYPE name. Got nothing."); + $this->events->doctype($doctypeName, 0, NULL, TRUE); + $this->scanner->next(); + return TRUE; + } $this->events->doctype($doctypeName); $this->scanner->next(); return TRUE; } + $this->scanner->whitespace(); $pub = strtoupper($this->scanner->getAsciiAlpha()); $white = strlen($this->scanner->whitespace()); @@ -484,10 +492,18 @@ class Tokenizer { return FALSE; } + // Premature EOF. + if ($this->scanner->current() === FALSE) { + $this->parseError("Unexpected EOF in DOCTYPE"); + $this->events->doctype($doctypeName, $type, $id, TRUE); + return TRUE; + } + // Well-formed complete DOCTYPE. $this->scanner->whitespace(); if ($this->scanner->current() == '>') { $this->events->doctype($doctypeName, $type, $id, FALSE); + $this->scanner->next(); return TRUE; } @@ -496,6 +512,7 @@ class Tokenizer { $this->scanner->charsUntil(">"); $this->parseError("Malformed DOCTYPE."); $this->events->doctype($doctypeName, $type, $id, TRUE); + $this->scanner->next(); return TRUE; } @@ -503,8 +520,9 @@ class Tokenizer { // Consume to > and trash. $this->scanner->charsUntil('>'); - $this->parseError("Expected PUBLIC or SYSTEM. Got %s%s.", $pub); + $this->parseError("Expected PUBLIC or SYSTEM. Got %s.", $pub); $this->events->doctype($doctypeName, 0, NULL, TRUE); + $this->scanner->next(); return TRUE; } -- cgit v1.2.3