diff options
author | Asmir Mustafic <[email protected]> | 2017-09-01 17:07:16 +0200 |
---|---|---|
committer | GitHub <[email protected]> | 2017-09-01 17:07:16 +0200 |
commit | 39e2a7a30757948389a7ca744653a9adcca06bcb (patch) | |
tree | 5d6247ec963b0664b2728d5a319b8026f692a798 | |
parent | b8afbae8cdb626c786a1590b3a83d366933d807d (diff) | |
parent | 2a38f56f3772f943be436c7b411c2ae5fac6cee6 (diff) |
Merge pull request #134 from Masterminds/ampersand-in-urls
Raw & in attributes
-rw-r--r-- | src/HTML5/Parser/Tokenizer.php | 10 | ||||
-rw-r--r-- | test/HTML5/Parser/DOMTreeBuilderTest.php | 74 | ||||
-rw-r--r-- | test/HTML5/Parser/TokenizerTest.php | 27 |
3 files changed, 98 insertions, 13 deletions
diff --git a/src/HTML5/Parser/Tokenizer.php b/src/HTML5/Parser/Tokenizer.php index 95dbf84..c42bc3d 100644 --- a/src/HTML5/Parser/Tokenizer.php +++ b/src/HTML5/Parser/Tokenizer.php @@ -1074,8 +1074,10 @@ class Tokenizer } $entity = CharacterReference::lookupDecimal($numeric); } - } // String entity. - else { + } elseif ($tok === '=' && $inAttribute) { + return '&'; + } else { // String entity. + // Attempt to consume a string up to a ';'. // [a-zA-Z0-9]+; $cname = $this->scanner->getAsciiAlphaNum(); @@ -1085,7 +1087,9 @@ class Tokenizer // and continue on as the & is not part of an entity. The & will // be converted to & elsewhere. if ($entity == null) { - $this->parseError("No match in entity table for '%s'", $cname); + if (!$inAttribute || strlen($cname) === 0) { + $this->parseError("No match in entity table for '%s'", $cname); + } $this->scanner->unconsume($this->scanner->position() - $start); return '&'; } diff --git a/test/HTML5/Parser/DOMTreeBuilderTest.php b/test/HTML5/Parser/DOMTreeBuilderTest.php index 3e1a58f..a441af8 100644 --- a/test/HTML5/Parser/DOMTreeBuilderTest.php +++ b/test/HTML5/Parser/DOMTreeBuilderTest.php @@ -58,6 +58,80 @@ class DOMTreeBuilderTest extends \Masterminds\HTML5\Tests\TestCase $this->assertEquals('http://www.w3.org/1999/xhtml', $doc->documentElement->namespaceURI); } + public function testBareAmpersand() + { + $html = "<!doctype html> + <html> + <body> + <img src='a&b' /> + <img src='a&=' /> + <img src='a&=c' /> + <img src='a&=9' /> + </body> + </html>"; + $doc = $this->parse($html); + + $this->assertEmpty($this->errors); + $this->assertXmlStringEqualsXmlString(' + <!DOCTYPE html> + <html xmlns="http://www.w3.org/1999/xhtml"><body> + <img src="a&b"/> + <img src="a&="/> + <img src="a&=c"/> + <img src="a&=9"/> + </body> + </html>', $doc->saveXML()); + } + + public function testBareAmpersandNotAllowedInAttributes() + { + $html = "<!doctype html> + <html> + <body> + <img src='a&' /> + <img src='a&+' /> + </body> + </html>"; + $doc = $this->parse($html); + + $this->assertCount(2, $this->errors); + $this->assertXmlStringEqualsXmlString(' + <!DOCTYPE html> + <html xmlns="http://www.w3.org/1999/xhtml"><body> + <img src="a&"/> + <img src="a&+"/> + </body> + </html>', $doc->saveXML()); + } + public function testBareAmpersandNotAllowedInBody() + { + $html = "<!doctype html> + <html> + <body> + a&b + a&= + a&=c + a&=9 + a&+ + a& -- valid + </body> + </html>"; + $doc = $this->parse($html); + + $this->assertCount(5, $this->errors); + $this->assertXmlStringEqualsXmlString(' + <!DOCTYPE html> + <html xmlns="http://www.w3.org/1999/xhtml"><body> + a&b + a&= + a&=c + a&=9 + a&+ + a& -- valid + </body> + </html>', $doc->saveXML()); + } + public function testStrangeCapitalization() { $html = "<!doctype html> diff --git a/test/HTML5/Parser/TokenizerTest.php b/test/HTML5/Parser/TokenizerTest.php index f2f612c..5e52722 100644 --- a/test/HTML5/Parser/TokenizerTest.php +++ b/test/HTML5/Parser/TokenizerTest.php @@ -622,6 +622,13 @@ class TokenizerTest extends \Masterminds\HTML5\Tests\TestCase ), false ), + "<foo a='blue&red'>" => array( + 'foo', + array( + 'a' => 'blue&red' + ), + false + ), "<foo a='blue&red'>" => array( 'foo', array( @@ -629,6 +636,13 @@ class TokenizerTest extends \Masterminds\HTML5\Tests\TestCase ), false ), + "<foo a='blue&&&red'>" => array( + 'foo', + array( + 'a' => 'blue&&&red' + ), + false + ), "<foo a='blue&&red'>" => array( 'foo', array( @@ -725,18 +739,11 @@ class TokenizerTest extends \Masterminds\HTML5\Tests\TestCase // Cause a parse error. $bad = array( - // This will emit an entity lookup failure for &red. - "<foo a='blue&red'>" => array( + // This will emit an entity lookup failure for &+dark. + "<foo a='blue&+dark'>" => array( 'foo', array( - 'a' => 'blue&red' - ), - false - ), - "<foo a='blue&&&red'>" => array( - 'foo', - array( - 'a' => 'blue&&&red' + 'a' => 'blue&+dark' ), false ), |