diff options
-rw-r--r-- | src/HTML5/Parser/Tokenizer.php | 13 | ||||
-rw-r--r-- | test/HTML5/Parser/TokenizerTest.php | 35 |
2 files changed, 36 insertions, 12 deletions
diff --git a/src/HTML5/Parser/Tokenizer.php b/src/HTML5/Parser/Tokenizer.php index 8250a8f..d98f619 100644 --- a/src/HTML5/Parser/Tokenizer.php +++ b/src/HTML5/Parser/Tokenizer.php @@ -159,7 +159,7 @@ class Tokenizer { // These indicate not an entity. We return just // the &. if (strspn($tok, self::WHITE . "&<") == 1) { - $this->scanner->next(); + //$this->scanner->next(); return '&'; } @@ -401,10 +401,12 @@ class Tokenizer { $name = $this->scanner->current(); $this->scanner->next(); } - if (preg_match('/\'\"/', $name)) { - $this->parseError("Unexpected characters in attribute name"); + if (preg_match('/[\'\"]/', $name)) { + //if (strspn($name, '\'\"')) { + $this->parseError("Unexpected characters in attribute name: %s", $name); } - $this->scanner->whitespace(); + // Whitespace not allowed between name and =. + //$this->scanner->whitespace(); $val = $this->attributeValue(); //return array($name, $val); @@ -485,6 +487,9 @@ class Tokenizer { $val .= $this->decodeCharacterReference(TRUE); } else { + if(strspn($tok, "\"'<=`") > 0) { + $this->parseError("Unexpected chars in unquoted attribute value %s", $tok); + } $val .= $tok; $tok = $this->scanner->next(); } diff --git a/test/HTML5/Parser/TokenizerTest.php b/test/HTML5/Parser/TokenizerTest.php index d525e56..eb94035 100644 --- a/test/HTML5/Parser/TokenizerTest.php +++ b/test/HTML5/Parser/TokenizerTest.php @@ -322,6 +322,7 @@ class TokenizerTest extends \HTML5\Tests\TestCase { * @depends testCharacterReference */ public function testTagAttributes() { + // Opening tags. $good = array( '<foo bar="baz">' => array('foo', array('bar' => 'baz'), FALSE), '<foo bar=" baz ">' => array('foo', array('bar' => ' baz '), FALSE), @@ -330,31 +331,36 @@ class TokenizerTest extends \HTML5\Tests\TestCase { "<foo a='1' b=\"2\">" => array('foo', array('a' => '1', 'b' => '2'), FALSE), "<foo ns:bar='baz'>" => array('foo', array('ns:bar' => 'baz'), FALSE), "<foo a='blue&red'>" => array('foo', array('a' => 'blue&red'), FALSE), + "<foo a='blue&&red'>" => array('foo', array('a' => 'blue&&red'), FALSE), "<foo\nbar='baz'\n>" => array('foo', array('bar' => 'baz'), FALSE), '<doe a deer>' => array('doe', array('a' => NULL, 'deer' => NULL), FALSE), + '<foo bar=baz>' => array('foo', array('bar' => 'baz'), FALSE), + + // The spec allows an unquoted value '/'. This will not be a closing + // tag. + '<foo bar=/>' => array('foo', array('bar' => '/'), FALSE), + '<foo bar=baz/>' => array('foo', array('bar' => 'baz/'), FALSE), ); $this->isAllGood('startTag', 2, $good); + // Self-closing tags. $withEnd = array( '<foo bar="baz"/>' => array('foo', array('bar' => 'baz'), TRUE), '<foo BAR="baz"/>' => array('foo', array('bar' => 'baz'), TRUE), '<foo BAR="BAZ"/>' => array('foo', array('bar' => 'BAZ'), TRUE), + "<foo a='1' b=\"2\" c=3 d/>" => array('foo', array('a' => '1', 'b' => '2', 'c' => '3', 'd' => NULL), TRUE), ); $this->isAllGood('startTag', 3, $withEnd); - /* + // Cause a parse error. $bad = array( // This will emit an entity lookup failure for &red. "<foo a='blue&red'>" => array('foo', array('a' => 'blue&red'), FALSE), + "<foo a='blue&&&red'>" => array('foo', array('a' => 'blue&&&red'), FALSE), '<foo b"="baz">' => array('foo', array('b"' => 'baz'), FALSE), - '<foo ="bar">' => array('foo', array('="bar"' => NULL), FALSE), - '<foo bar=/>' => array('foo', array('bar' => NULL), TRUE), - '<foo////>' => array('foo', array(), TRUE), '<foo bar=>' => array('foo', array('bar' => NULL), FALSE), '<foo bar="oh' => array('foo', array('bar' => 'oh'), FALSE), - '<foo bar=baz>' => array('foo', array('bar' => 'baz'), FALSE), - // This one causes so many errors, it might need to be tested on its own. - '<foo bar = "baz" >' => array('foo', array('bar' => NULL, '=' => NULL, '"baz"' => NULL), FALSE), + '<foo bar=oh">' => array('foo', array('bar' => 'oh"'), FALSE), ); foreach ($bad as $test => $expects) { @@ -363,7 +369,20 @@ class TokenizerTest extends \HTML5\Tests\TestCase { $this->assertEventError($events->get(0)); $this->assertEventEquals('startTag', $expects, $events->get(1)); } - */ + + // Cause multiple parse errors. + $reallyBad = array( + '<foo ="bar">' => array('foo', array('=' => NULL, '"bar"' => NULL), FALSE), + '<foo////>' => array('foo', array(), TRUE), + '<foo bar = "baz" >' => array('foo', array('bar' => NULL, '=' => NULL, '"baz"' => NULL), FALSE), + ); + foreach ($reallyBad as $test => $expects) { + $events = $this->parse($test); + //fprintf(STDOUT, $test . print_r($events, TRUE)); + $this->assertEventError($events->get(0)); + $this->assertEventError($events->get(1)); + //$this->assertEventEquals('startTag', $expects, $events->get(1)); + } } public function testText() { |