summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAsmir Mustafic <[email protected]>2017-09-01 17:07:16 +0200
committerGitHub <[email protected]>2017-09-01 17:07:16 +0200
commit39e2a7a30757948389a7ca744653a9adcca06bcb (patch)
tree5d6247ec963b0664b2728d5a319b8026f692a798
parentb8afbae8cdb626c786a1590b3a83d366933d807d (diff)
parent2a38f56f3772f943be436c7b411c2ae5fac6cee6 (diff)
Merge pull request #134 from Masterminds/ampersand-in-urls
Raw & in attributes
-rw-r--r--src/HTML5/Parser/Tokenizer.php10
-rw-r--r--test/HTML5/Parser/DOMTreeBuilderTest.php74
-rw-r--r--test/HTML5/Parser/TokenizerTest.php27
3 files changed, 98 insertions, 13 deletions
diff --git a/src/HTML5/Parser/Tokenizer.php b/src/HTML5/Parser/Tokenizer.php
index 95dbf84..c42bc3d 100644
--- a/src/HTML5/Parser/Tokenizer.php
+++ b/src/HTML5/Parser/Tokenizer.php
@@ -1074,8 +1074,10 @@ class Tokenizer
}
$entity = CharacterReference::lookupDecimal($numeric);
}
- } // String entity.
- else {
+ } elseif ($tok === '=' && $inAttribute) {
+ return '&';
+ } else { // String entity.
+
// Attempt to consume a string up to a ';'.
// [a-zA-Z0-9]+;
$cname = $this->scanner->getAsciiAlphaNum();
@@ -1085,7 +1087,9 @@ class Tokenizer
// and continue on as the & is not part of an entity. The & will
// be converted to &amp; elsewhere.
if ($entity == null) {
- $this->parseError("No match in entity table for '%s'", $cname);
+ if (!$inAttribute || strlen($cname) === 0) {
+ $this->parseError("No match in entity table for '%s'", $cname);
+ }
$this->scanner->unconsume($this->scanner->position() - $start);
return '&';
}
diff --git a/test/HTML5/Parser/DOMTreeBuilderTest.php b/test/HTML5/Parser/DOMTreeBuilderTest.php
index 3e1a58f..a441af8 100644
--- a/test/HTML5/Parser/DOMTreeBuilderTest.php
+++ b/test/HTML5/Parser/DOMTreeBuilderTest.php
@@ -58,6 +58,80 @@ class DOMTreeBuilderTest extends \Masterminds\HTML5\Tests\TestCase
$this->assertEquals('http://www.w3.org/1999/xhtml', $doc->documentElement->namespaceURI);
}
+ public function testBareAmpersand()
+ {
+ $html = "<!doctype html>
+ <html>
+ <body>
+ <img src='a&b' />
+ <img src='a&=' />
+ <img src='a&=c' />
+ <img src='a&=9' />
+ </body>
+ </html>";
+ $doc = $this->parse($html);
+
+ $this->assertEmpty($this->errors);
+ $this->assertXmlStringEqualsXmlString('
+ <!DOCTYPE html>
+ <html xmlns="http://www.w3.org/1999/xhtml"><body>
+ <img src="a&amp;b"/>
+ <img src="a&amp;="/>
+ <img src="a&amp;=c"/>
+ <img src="a&amp;=9"/>
+ </body>
+ </html>', $doc->saveXML());
+ }
+
+ public function testBareAmpersandNotAllowedInAttributes()
+ {
+ $html = "<!doctype html>
+ <html>
+ <body>
+ <img src='a&' />
+ <img src='a&+' />
+ </body>
+ </html>";
+ $doc = $this->parse($html);
+
+ $this->assertCount(2, $this->errors);
+ $this->assertXmlStringEqualsXmlString('
+ <!DOCTYPE html>
+ <html xmlns="http://www.w3.org/1999/xhtml"><body>
+ <img src="a&amp;"/>
+ <img src="a&amp;+"/>
+ </body>
+ </html>', $doc->saveXML());
+ }
+ public function testBareAmpersandNotAllowedInBody()
+ {
+ $html = "<!doctype html>
+ <html>
+ <body>
+ a&b
+ a&=
+ a&=c
+ a&=9
+ a&+
+ a& -- valid
+ </body>
+ </html>";
+ $doc = $this->parse($html);
+
+ $this->assertCount(5, $this->errors);
+ $this->assertXmlStringEqualsXmlString('
+ <!DOCTYPE html>
+ <html xmlns="http://www.w3.org/1999/xhtml"><body>
+ a&amp;b
+ a&amp;=
+ a&amp;=c
+ a&amp;=9
+ a&amp;+
+ a&amp; -- valid
+ </body>
+ </html>', $doc->saveXML());
+ }
+
public function testStrangeCapitalization()
{
$html = "<!doctype html>
diff --git a/test/HTML5/Parser/TokenizerTest.php b/test/HTML5/Parser/TokenizerTest.php
index f2f612c..5e52722 100644
--- a/test/HTML5/Parser/TokenizerTest.php
+++ b/test/HTML5/Parser/TokenizerTest.php
@@ -622,6 +622,13 @@ class TokenizerTest extends \Masterminds\HTML5\Tests\TestCase
),
false
),
+ "<foo a='blue&red'>" => array(
+ 'foo',
+ array(
+ 'a' => 'blue&red'
+ ),
+ false
+ ),
"<foo a='blue&amp;red'>" => array(
'foo',
array(
@@ -629,6 +636,13 @@ class TokenizerTest extends \Masterminds\HTML5\Tests\TestCase
),
false
),
+ "<foo a='blue&&amp;&red'>" => array(
+ 'foo',
+ array(
+ 'a' => 'blue&&&red'
+ ),
+ false
+ ),
"<foo a='blue&&amp;red'>" => array(
'foo',
array(
@@ -725,18 +739,11 @@ class TokenizerTest extends \Masterminds\HTML5\Tests\TestCase
// Cause a parse error.
$bad = array(
- // This will emit an entity lookup failure for &red.
- "<foo a='blue&red'>" => array(
+ // This will emit an entity lookup failure for &+dark.
+ "<foo a='blue&+dark'>" => array(
'foo',
array(
- 'a' => 'blue&red'
- ),
- false
- ),
- "<foo a='blue&&amp;&red'>" => array(
- 'foo',
- array(
- 'a' => 'blue&&&red'
+ 'a' => 'blue&+dark'
),
false
),