diff options
-rw-r--r-- | src/HTML5/Parser/Tokenizer.php | 20 | ||||
-rw-r--r-- | test/HTML5/Parser/EventStack.php | 2 | ||||
-rw-r--r-- | test/HTML5/Parser/TokenizerTest.php | 16 |
3 files changed, 30 insertions, 8 deletions
diff --git a/src/HTML5/Parser/Tokenizer.php b/src/HTML5/Parser/Tokenizer.php index 466cb5d..4f1404f 100644 --- a/src/HTML5/Parser/Tokenizer.php +++ b/src/HTML5/Parser/Tokenizer.php @@ -465,10 +465,18 @@ class Tokenizer { // Short DOCTYPE, like <!DOCTYPE html> if ($tok == '>') { + // DOCTYPE without a name. + if (strlen($doctypeName) == 0) { + $this->parseError("Expected a DOCTYPE name. Got nothing."); + $this->events->doctype($doctypeName, 0, NULL, TRUE); + $this->scanner->next(); + return TRUE; + } $this->events->doctype($doctypeName); $this->scanner->next(); return TRUE; } + $this->scanner->whitespace(); $pub = strtoupper($this->scanner->getAsciiAlpha()); $white = strlen($this->scanner->whitespace()); @@ -484,10 +492,18 @@ class Tokenizer { return FALSE; } + // Premature EOF. + if ($this->scanner->current() === FALSE) { + $this->parseError("Unexpected EOF in DOCTYPE"); + $this->events->doctype($doctypeName, $type, $id, TRUE); + return TRUE; + } + // Well-formed complete DOCTYPE. $this->scanner->whitespace(); if ($this->scanner->current() == '>') { $this->events->doctype($doctypeName, $type, $id, FALSE); + $this->scanner->next(); return TRUE; } @@ -496,6 +512,7 @@ class Tokenizer { $this->scanner->charsUntil(">"); $this->parseError("Malformed DOCTYPE."); $this->events->doctype($doctypeName, $type, $id, TRUE); + $this->scanner->next(); return TRUE; } @@ -503,8 +520,9 @@ class Tokenizer { // Consume to > and trash. $this->scanner->charsUntil('>'); - $this->parseError("Expected PUBLIC or SYSTEM. Got %s%s.", $pub); + $this->parseError("Expected PUBLIC or SYSTEM. Got %s.", $pub); $this->events->doctype($doctypeName, 0, NULL, TRUE); + $this->scanner->next(); return TRUE; } diff --git a/test/HTML5/Parser/EventStack.php b/test/HTML5/Parser/EventStack.php index 24bd144..e865507 100644 --- a/test/HTML5/Parser/EventStack.php +++ b/test/HTML5/Parser/EventStack.php @@ -35,7 +35,7 @@ class EventStack implements EventHandler { } public function doctype($name, $type = 0, $id = NULL, $quirks = FALSE) { - $args = func_get_args(); + $args = array($name, $type, $id, $quirks); $this->store('doctype', $args); } diff --git a/test/HTML5/Parser/TokenizerTest.php b/test/HTML5/Parser/TokenizerTest.php index 349b713..fb33e37 100644 --- a/test/HTML5/Parser/TokenizerTest.php +++ b/test/HTML5/Parser/TokenizerTest.php @@ -227,7 +227,7 @@ class TokenizerTest extends \HTML5\Tests\TestCase { foreach ($good as $test => $expects) { $events = $this->parse($test); - $this->assertEquals(2, $events->depth(), "Counting events for '$test'"); + $this->assertEquals(2, $events->depth(), "Counting events for '$test': " . print_r($events, TRUE)); $this->assertEventEquals('doctype', $expects, $events->get(0)); } @@ -239,16 +239,20 @@ class TokenizerTest extends \HTML5\Tests\TestCase { '<!DOCTYPE foo PUB>' => array('foo', EventStack::DOCTYPE_NONE, NULL, TRUE), '<!DOCTYPE foo PUB "Looks good">' => array('foo', EventStack::DOCTYPE_NONE, NULL, TRUE), '<!DOCTYPE foo SYSTME "Looks good"' => array('foo', EventStack::DOCTYPE_NONE, NULL, TRUE), - '<!DOCTYPE foo PUBLIC' => array('foo', EventStack::DOCTYPE_PUBLIC, NULL, TRUE), - '<!DOCTYPE foo PUBLIC>' => array('foo', EventStack::DOCTYPE_PUBLIC, NULL, TRUE), - '<!DOCTYPE foo SYSTEM' => array('foo', EventStack::DOCTYPE_SYSTEM, NULL, TRUE), - '<!DOCTYPE foo SYSTEM>' => array('foo', EventStack::DOCTYPE_SYSTEM, NULL, TRUE), + + // Can't tell whether these are ids or ID types, since the context is chopped. + '<!DOCTYPE foo PUBLIC' => array('foo', EventStack::DOCTYPE_NONE, NULL, TRUE), + '<!DOCTYPE foo PUBLIC>' => array('foo', EventStack::DOCTYPE_NONE, NULL, TRUE), + '<!DOCTYPE foo SYSTEM' => array('foo', EventStack::DOCTYPE_NONE, NULL, TRUE), + '<!DOCTYPE foo SYSTEM>' => array('foo', EventStack::DOCTYPE_NONE, NULL, TRUE), + '<!DOCTYPE html SYSTEM "foo bar"' => array('html', EventStack::DOCTYPE_SYSTEM, 'foo bar', TRUE), '<!DOCTYPE html SYSTEM "foo bar" more stuff>' => array('html', EventStack::DOCTYPE_SYSTEM, 'foo bar', TRUE), ); foreach ($bad as $test => $expects) { $events = $this->parse($test); - $this->assertEquals(3, $events->depth(), "Counting events for '$test'"); + //fprintf(STDOUT, $test . PHP_EOL); + $this->assertEquals(3, $events->depth(), "Counting events for '$test': " . print_r($events, TRUE)); $this->assertEventError($events->get(0)); $this->assertEventEquals('doctype', $expects, $events->get(1)); } |