summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTechnosophos <[email protected]>2013-04-12 17:37:44 -0500
committerTechnosophos <[email protected]>2013-04-12 17:37:44 -0500
commit1fdc03495c7702422638941235fde4fa3f327232 (patch)
treed5b4f19af3ee3ed3b4c909d2e8b02e72c4bf90a5
parent9b23c378489e40d4eeea1cb5d304acd3909c2d1d (diff)
BogusCOmments. How cool.
-rw-r--r--src/HTML5/Parser/Tokenizer.php32
-rw-r--r--test/HTML5/Parser/TokenizerTest.php39
2 files changed, 48 insertions, 23 deletions
diff --git a/src/HTML5/Parser/Tokenizer.php b/src/HTML5/Parser/Tokenizer.php
index a23e1f4..b84f9bf 100644
--- a/src/HTML5/Parser/Tokenizer.php
+++ b/src/HTML5/Parser/Tokenizer.php
@@ -235,8 +235,18 @@ class Tokenizer {
// Comment:
if ($tok == '-' && $this->scanner->peek() == '-') {
+ $this->scanner->next(); // Consume the other '-'
+ $this->scanner->next(); // Next char.
+ return $this->comment();
}
+ elseif($tok == 'D') {
+ }
+ elseif($tok == '[') {
+ }
+
// FINISH
+ $this->parseError("Expected <!--, <![CDATA[, or <!DOCTYPE. Got <!%s", $tok);
+ $this->bogusComment('<!');
return TRUE;
}
@@ -472,10 +482,11 @@ class Tokenizer {
return TRUE;
}
- protected function commentStart() {
- }
- protected function commentStartDash() {
- }
+ /**
+ * Read a comment.
+ *
+ * Expects the first tok to be inside of the comment.
+ */
protected function comment() {
$tok = $this->scanner->current();
$comment = '';
@@ -504,17 +515,18 @@ class Tokenizer {
}
protected function isCommentEnd() {
+ // EOF
+ if($this->scanner->current() === FALSE) {
+ // Hit the end.
+ $this->parseError("Unexpected EOF in a comment.");
+ return TRUE;
+ }
+
// If it doesn't start with -, not the end.
if($this->scanner->current() != '-') {
return FALSE;
}
- // EOF
- if($this->scanner->Current() === FALSE) {
- // Hit the end.
- $this->events->parseError("Unexpected EOF in a comment.");
- return TRUE;
- }
// Advance one, and test for '->'
if ($this->scanner->next() == '-'
diff --git a/test/HTML5/Parser/TokenizerTest.php b/test/HTML5/Parser/TokenizerTest.php
index c057360..291bd0f 100644
--- a/test/HTML5/Parser/TokenizerTest.php
+++ b/test/HTML5/Parser/TokenizerTest.php
@@ -87,13 +87,18 @@ class TokenizerTest extends \HTML5\Tests\TestCase {
}
public function testBogusComment() {
- $str = '</+this is a bogus comment. +>';
- $events = $this->parse($str . ' ');
- $e0 = $events->get(0);
- $this->assertEquals('error', $e0['name']);
- $e1 = $events->get(1);
- $this->assertEquals('comment', $e1['name']);
- $this->assertEquals($str, $e1['data'][0]);
+ $bogus = array(
+ '</+this is a bogus comment. +>',
+ '<!+this is a bogus comment. !>',
+ );
+ foreach ($bogus as $str) {
+ $events = $this->parse($str . ' ');
+ $e0 = $events->get(0);
+ $this->assertEquals('error', $e0['name']);
+ $e1 = $events->get(1);
+ $this->assertEquals('comment', $e1['name']);
+ $this->assertEquals($str, $e1['data'][0]);
+ }
}
public function testEndTag() {
@@ -162,19 +167,27 @@ class TokenizerTest extends \HTML5\Tests\TestCase {
'<!-- --$i -->' => ' --$i ',
'<!----$i-->' => '--$i',
'<!-- 1 > 0 -->' => ' 1 > 0 ',
- '<!--
- Hello World.
- -->' => "\nHello World\n",
+ "<!--\nHello World.\na-->" => "\nHello World.\na",
'<!-- <!-- -->' => ' <!-- ',
);
+ foreach ($good as $test => $expected) {
+ $events = $this->parse($test);
+ $e1 = $events->get(0);
+ $this->assertEquals('comment', $e1['name'], 'Expected a comment for ' . $test);
+ $this->assertEquals($expected, $e1['data'][0]);
+ }
+
$fail = array(
'<!-->' => '',
'<!--Hello' => 'Hello',
+ "<!--\0Hello" => UTF8Utils::FFFD . 'Hello',
);
+ foreach ($fail as $test => $expected) {
+ $events = $this->parse($test);
+ $e0 = $events->get(0);
+ $this->assertEquals('error', $e0['name'], 'Expected an error for ' . $test . print_r($events, TRUE));
- foreach ($good as $test => $expected) {
- $events = $this->parse($good);
- $e1 = $events->get(0);
+ $e1 = $events->get(1);
$this->assertEquals('comment', $e1['name'], 'Expected a comment for ' . $test);
$this->assertEquals($expected, $e1['data'][0]);
}