From 77ad931cd824feb33eebae08cf3a5a47bce1e337 Mon Sep 17 00:00:00 2001 From: Matt Butcher Date: Tue, 11 Feb 2014 08:35:21 -0700 Subject: Fix for #25: Handle missing tag close in attribute list. --- src/HTML5/Parser/Tokenizer.php | 23 +++++++++++++++++++---- test/HTML5/Parser/TokenizerTest.php | 8 ++++++++ 2 files changed, 27 insertions(+), 4 deletions(-) diff --git a/src/HTML5/Parser/Tokenizer.php b/src/HTML5/Parser/Tokenizer.php index 1fd1d25..9866246 100644 --- a/src/HTML5/Parser/Tokenizer.php +++ b/src/HTML5/Parser/Tokenizer.php @@ -326,11 +326,18 @@ class Tokenizer { $attributes = array(); $selfClose = FALSE; - do { - $this->scanner->whitespace(); - $this->attribute($attributes); + // Handle attribute parse exceptions here so that we can + // react by trying to build a sensible parse tree. + try { + do { + $this->scanner->whitespace(); + $this->attribute($attributes); + } + while (!$this->isTagEnd($selfClose)); + } + catch (ParseError $e) { + $selfClose = FALSE; } - while (!$this->isTagEnd($selfClose)); $mode = $this->events->startTag($name, $attributes, $selfClose); // Should we do this? What does this buy that selfClose doesn't? @@ -390,6 +397,14 @@ class Tokenizer { return FALSE; } + if ($tok == '<') { + $this->parseError("Unexepcted '<' inside of attributes list."); + // Push the < back onto the stack. + $this->scanner->unconsume(); + // Let the caller figure out how to handle this. + throw new ParseError("Start tag inside of attribute."); + } + $name = strtolower($this->scanner->charsUntil("/>=\n\f\t ")); if (strlen($name) == 0) { diff --git a/test/HTML5/Parser/TokenizerTest.php b/test/HTML5/Parser/TokenizerTest.php index edc427c..0ac987f 100644 --- a/test/HTML5/Parser/TokenizerTest.php +++ b/test/HTML5/Parser/TokenizerTest.php @@ -387,6 +387,14 @@ class TokenizerTest extends \HTML5\Tests\TestCase { $this->assertEventError($events->get(1)); //$this->assertEventEquals('startTag', $expects, $events->get(1)); } + + // Regression: Malformed elements should be detected. + // '' => array('foo', array('baz' => '1'), FALSE), + $events = $this->parse(''); + $this->assertEventError($events->get(0)); + $this->assertEventEquals('startTag', array('foo', array('baz' => '1'), FALSE), $events->get(1)); + $this->assertEventEquals('startTag', array('bar', array(), FALSE), $events->get(2)); + $this->assertEventEquals('endTag', array('foo'), $events->get(3)); } public function testRawText() { -- cgit v1.2.3