From 2a38f56f3772f943be436c7b411c2ae5fac6cee6 Mon Sep 17 00:00:00 2001 From: Asmir Mustafic Date: Mon, 28 Aug 2017 14:44:01 +0200 Subject: Fixes https://github.com/Masterminds/html5-php/issues/124 Reference: https://www.w3.org/TR/html52/syntax.html#character-reference-state If the character reference was consumed as part of an attribute (return state is either attribute value (double-quoted) state, attribute value (single-quoted) state or attribute value (unquoted) state), and the last character matched is not a U+003B SEMICOLON character (;), and the next input character is either a U+003D EQUALS SIGN character (=) or an alphanumeric ASCII character, then, for historical reasons, switch to the character reference end state. If the last character matched is not a U+003B SEMICOLON character (;), this is a parse error. --- test/HTML5/Parser/DOMTreeBuilderTest.php | 74 ++++++++++++++++++++++++++++++++ test/HTML5/Parser/TokenizerTest.php | 27 +++++++----- 2 files changed, 91 insertions(+), 10 deletions(-) (limited to 'test/HTML5') diff --git a/test/HTML5/Parser/DOMTreeBuilderTest.php b/test/HTML5/Parser/DOMTreeBuilderTest.php index 3e1a58f..a441af8 100644 --- a/test/HTML5/Parser/DOMTreeBuilderTest.php +++ b/test/HTML5/Parser/DOMTreeBuilderTest.php @@ -58,6 +58,80 @@ class DOMTreeBuilderTest extends \Masterminds\HTML5\Tests\TestCase $this->assertEquals('http://www.w3.org/1999/xhtml', $doc->documentElement->namespaceURI); } + public function testBareAmpersand() + { + $html = " + + + + + + + + "; + $doc = $this->parse($html); + + $this->assertEmpty($this->errors); + $this->assertXmlStringEqualsXmlString(' + + + + + + + + ', $doc->saveXML()); + } + + public function testBareAmpersandNotAllowedInAttributes() + { + $html = " + + + + + + "; + $doc = $this->parse($html); + + $this->assertCount(2, $this->errors); + $this->assertXmlStringEqualsXmlString(' + + + + + + ', $doc->saveXML()); + } + public function testBareAmpersandNotAllowedInBody() + { + $html = " + + + a&b + a&= + a&=c + a&=9 + a&+ + a& -- valid + + "; + $doc = $this->parse($html); + + $this->assertCount(5, $this->errors); + $this->assertXmlStringEqualsXmlString(' + + + a&b + a&= + a&=c + a&=9 + a&+ + a& -- valid + + ', $doc->saveXML()); + } + public function testStrangeCapitalization() { $html = " diff --git a/test/HTML5/Parser/TokenizerTest.php b/test/HTML5/Parser/TokenizerTest.php index f2f612c..5e52722 100644 --- a/test/HTML5/Parser/TokenizerTest.php +++ b/test/HTML5/Parser/TokenizerTest.php @@ -622,6 +622,13 @@ class TokenizerTest extends \Masterminds\HTML5\Tests\TestCase ), false ), + "" => array( + 'foo', + array( + 'a' => 'blue&red' + ), + false + ), "" => array( 'foo', array( @@ -629,6 +636,13 @@ class TokenizerTest extends \Masterminds\HTML5\Tests\TestCase ), false ), + "" => array( + 'foo', + array( + 'a' => 'blue&&&red' + ), + false + ), "" => array( 'foo', array( @@ -725,18 +739,11 @@ class TokenizerTest extends \Masterminds\HTML5\Tests\TestCase // Cause a parse error. $bad = array( - // This will emit an entity lookup failure for &red. - "" => array( + // This will emit an entity lookup failure for &+dark. + "" => array( 'foo', array( - 'a' => 'blue&red' - ), - false - ), - "" => array( - 'foo', - array( - 'a' => 'blue&&&red' + 'a' => 'blue&+dark' ), false ), -- cgit v1.2.3