summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMatt Butcher <[email protected]>2014-02-11 08:35:21 -0700
committerMatt Butcher <[email protected]>2014-02-11 08:35:21 -0700
commit77ad931cd824feb33eebae08cf3a5a47bce1e337 (patch)
treea543c05b9b032b992df6a8c86167d8106f18f422
parent246ed7785e93cc1ebf59a2f15af61472d20efca0 (diff)
Fix for #25: Handle missing tag close in attribute list.
-rw-r--r--src/HTML5/Parser/Tokenizer.php23
-rw-r--r--test/HTML5/Parser/TokenizerTest.php8
2 files changed, 27 insertions, 4 deletions
diff --git a/src/HTML5/Parser/Tokenizer.php b/src/HTML5/Parser/Tokenizer.php
index 1fd1d25..9866246 100644
--- a/src/HTML5/Parser/Tokenizer.php
+++ b/src/HTML5/Parser/Tokenizer.php
@@ -326,11 +326,18 @@ class Tokenizer {
$attributes = array();
$selfClose = FALSE;
- do {
- $this->scanner->whitespace();
- $this->attribute($attributes);
+ // Handle attribute parse exceptions here so that we can
+ // react by trying to build a sensible parse tree.
+ try {
+ do {
+ $this->scanner->whitespace();
+ $this->attribute($attributes);
+ }
+ while (!$this->isTagEnd($selfClose));
+ }
+ catch (ParseError $e) {
+ $selfClose = FALSE;
}
- while (!$this->isTagEnd($selfClose));
$mode = $this->events->startTag($name, $attributes, $selfClose);
// Should we do this? What does this buy that selfClose doesn't?
@@ -390,6 +397,14 @@ class Tokenizer {
return FALSE;
}
+ if ($tok == '<') {
+ $this->parseError("Unexepcted '<' inside of attributes list.");
+ // Push the < back onto the stack.
+ $this->scanner->unconsume();
+ // Let the caller figure out how to handle this.
+ throw new ParseError("Start tag inside of attribute.");
+ }
+
$name = strtolower($this->scanner->charsUntil("/>=\n\f\t "));
if (strlen($name) == 0) {
diff --git a/test/HTML5/Parser/TokenizerTest.php b/test/HTML5/Parser/TokenizerTest.php
index edc427c..0ac987f 100644
--- a/test/HTML5/Parser/TokenizerTest.php
+++ b/test/HTML5/Parser/TokenizerTest.php
@@ -387,6 +387,14 @@ class TokenizerTest extends \HTML5\Tests\TestCase {
$this->assertEventError($events->get(1));
//$this->assertEventEquals('startTag', $expects, $events->get(1));
}
+
+ // Regression: Malformed elements should be detected.
+ // '<foo baz="1" <bar></foo>' => array('foo', array('baz' => '1'), FALSE),
+ $events = $this->parse('<foo baz="1" <bar></foo>');
+ $this->assertEventError($events->get(0));
+ $this->assertEventEquals('startTag', array('foo', array('baz' => '1'), FALSE), $events->get(1));
+ $this->assertEventEquals('startTag', array('bar', array(), FALSE), $events->get(2));
+ $this->assertEventEquals('endTag', array('foo'), $events->get(3));
}
public function testRawText() {