summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/HTML5/Parser/Tokenizer.php20
-rw-r--r--test/HTML5/Parser/EventStack.php2
-rw-r--r--test/HTML5/Parser/TokenizerTest.php16
3 files changed, 30 insertions, 8 deletions
diff --git a/src/HTML5/Parser/Tokenizer.php b/src/HTML5/Parser/Tokenizer.php
index 466cb5d..4f1404f 100644
--- a/src/HTML5/Parser/Tokenizer.php
+++ b/src/HTML5/Parser/Tokenizer.php
@@ -465,10 +465,18 @@ class Tokenizer {
// Short DOCTYPE, like <!DOCTYPE html>
if ($tok == '>') {
+ // DOCTYPE without a name.
+ if (strlen($doctypeName) == 0) {
+ $this->parseError("Expected a DOCTYPE name. Got nothing.");
+ $this->events->doctype($doctypeName, 0, NULL, TRUE);
+ $this->scanner->next();
+ return TRUE;
+ }
$this->events->doctype($doctypeName);
$this->scanner->next();
return TRUE;
}
+ $this->scanner->whitespace();
$pub = strtoupper($this->scanner->getAsciiAlpha());
$white = strlen($this->scanner->whitespace());
@@ -484,10 +492,18 @@ class Tokenizer {
return FALSE;
}
+ // Premature EOF.
+ if ($this->scanner->current() === FALSE) {
+ $this->parseError("Unexpected EOF in DOCTYPE");
+ $this->events->doctype($doctypeName, $type, $id, TRUE);
+ return TRUE;
+ }
+
// Well-formed complete DOCTYPE.
$this->scanner->whitespace();
if ($this->scanner->current() == '>') {
$this->events->doctype($doctypeName, $type, $id, FALSE);
+ $this->scanner->next();
return TRUE;
}
@@ -496,6 +512,7 @@ class Tokenizer {
$this->scanner->charsUntil(">");
$this->parseError("Malformed DOCTYPE.");
$this->events->doctype($doctypeName, $type, $id, TRUE);
+ $this->scanner->next();
return TRUE;
}
@@ -503,8 +520,9 @@ class Tokenizer {
// Consume to > and trash.
$this->scanner->charsUntil('>');
- $this->parseError("Expected PUBLIC or SYSTEM. Got %s%s.", $pub);
+ $this->parseError("Expected PUBLIC or SYSTEM. Got %s.", $pub);
$this->events->doctype($doctypeName, 0, NULL, TRUE);
+ $this->scanner->next();
return TRUE;
}
diff --git a/test/HTML5/Parser/EventStack.php b/test/HTML5/Parser/EventStack.php
index 24bd144..e865507 100644
--- a/test/HTML5/Parser/EventStack.php
+++ b/test/HTML5/Parser/EventStack.php
@@ -35,7 +35,7 @@ class EventStack implements EventHandler {
}
public function doctype($name, $type = 0, $id = NULL, $quirks = FALSE) {
- $args = func_get_args();
+ $args = array($name, $type, $id, $quirks);
$this->store('doctype', $args);
}
diff --git a/test/HTML5/Parser/TokenizerTest.php b/test/HTML5/Parser/TokenizerTest.php
index 349b713..fb33e37 100644
--- a/test/HTML5/Parser/TokenizerTest.php
+++ b/test/HTML5/Parser/TokenizerTest.php
@@ -227,7 +227,7 @@ class TokenizerTest extends \HTML5\Tests\TestCase {
foreach ($good as $test => $expects) {
$events = $this->parse($test);
- $this->assertEquals(2, $events->depth(), "Counting events for '$test'");
+ $this->assertEquals(2, $events->depth(), "Counting events for '$test': " . print_r($events, TRUE));
$this->assertEventEquals('doctype', $expects, $events->get(0));
}
@@ -239,16 +239,20 @@ class TokenizerTest extends \HTML5\Tests\TestCase {
'<!DOCTYPE foo PUB>' => array('foo', EventStack::DOCTYPE_NONE, NULL, TRUE),
'<!DOCTYPE foo PUB "Looks good">' => array('foo', EventStack::DOCTYPE_NONE, NULL, TRUE),
'<!DOCTYPE foo SYSTME "Looks good"' => array('foo', EventStack::DOCTYPE_NONE, NULL, TRUE),
- '<!DOCTYPE foo PUBLIC' => array('foo', EventStack::DOCTYPE_PUBLIC, NULL, TRUE),
- '<!DOCTYPE foo PUBLIC>' => array('foo', EventStack::DOCTYPE_PUBLIC, NULL, TRUE),
- '<!DOCTYPE foo SYSTEM' => array('foo', EventStack::DOCTYPE_SYSTEM, NULL, TRUE),
- '<!DOCTYPE foo SYSTEM>' => array('foo', EventStack::DOCTYPE_SYSTEM, NULL, TRUE),
+
+ // Can't tell whether these are ids or ID types, since the context is chopped.
+ '<!DOCTYPE foo PUBLIC' => array('foo', EventStack::DOCTYPE_NONE, NULL, TRUE),
+ '<!DOCTYPE foo PUBLIC>' => array('foo', EventStack::DOCTYPE_NONE, NULL, TRUE),
+ '<!DOCTYPE foo SYSTEM' => array('foo', EventStack::DOCTYPE_NONE, NULL, TRUE),
+ '<!DOCTYPE foo SYSTEM>' => array('foo', EventStack::DOCTYPE_NONE, NULL, TRUE),
+
'<!DOCTYPE html SYSTEM "foo bar"' => array('html', EventStack::DOCTYPE_SYSTEM, 'foo bar', TRUE),
'<!DOCTYPE html SYSTEM "foo bar" more stuff>' => array('html', EventStack::DOCTYPE_SYSTEM, 'foo bar', TRUE),
);
foreach ($bad as $test => $expects) {
$events = $this->parse($test);
- $this->assertEquals(3, $events->depth(), "Counting events for '$test'");
+ //fprintf(STDOUT, $test . PHP_EOL);
+ $this->assertEquals(3, $events->depth(), "Counting events for '$test': " . print_r($events, TRUE));
$this->assertEventError($events->get(0));
$this->assertEventEquals('doctype', $expects, $events->get(1));
}