summaryrefslogtreecommitdiff
path: root/test/HTML5
diff options
context:
space:
mode:
authorAsmir Mustafic <[email protected]>2017-08-28 14:44:01 +0200
committerAsmir Mustafic <[email protected]>2017-08-28 14:44:01 +0200
commit2a38f56f3772f943be436c7b411c2ae5fac6cee6 (patch)
tree2b1cec9a30b2301afeda759a16341972b5a492bd /test/HTML5
parente965886a79a560b4b00a4c471e2bdfafea23fdcb (diff)
Fixes https://github.com/Masterminds/html5-php/issues/124
Reference: https://www.w3.org/TR/html52/syntax.html#character-reference-state If the character reference was consumed as part of an attribute (return state is either attribute value (double-quoted) state, attribute value (single-quoted) state or attribute value (unquoted) state), and the last character matched is not a U+003B SEMICOLON character (;), and the next input character is either a U+003D EQUALS SIGN character (=) or an alphanumeric ASCII character, then, for historical reasons, switch to the character reference end state. If the last character matched is not a U+003B SEMICOLON character (;), this is a parse error.
Diffstat (limited to 'test/HTML5')
-rw-r--r--test/HTML5/Parser/DOMTreeBuilderTest.php74
-rw-r--r--test/HTML5/Parser/TokenizerTest.php27
2 files changed, 91 insertions, 10 deletions
diff --git a/test/HTML5/Parser/DOMTreeBuilderTest.php b/test/HTML5/Parser/DOMTreeBuilderTest.php
index 3e1a58f..a441af8 100644
--- a/test/HTML5/Parser/DOMTreeBuilderTest.php
+++ b/test/HTML5/Parser/DOMTreeBuilderTest.php
@@ -58,6 +58,80 @@ class DOMTreeBuilderTest extends \Masterminds\HTML5\Tests\TestCase
$this->assertEquals('http://www.w3.org/1999/xhtml', $doc->documentElement->namespaceURI);
}
+ public function testBareAmpersand()
+ {
+ $html = "<!doctype html>
+ <html>
+ <body>
+ <img src='a&b' />
+ <img src='a&=' />
+ <img src='a&=c' />
+ <img src='a&=9' />
+ </body>
+ </html>";
+ $doc = $this->parse($html);
+
+ $this->assertEmpty($this->errors);
+ $this->assertXmlStringEqualsXmlString('
+ <!DOCTYPE html>
+ <html xmlns="http://www.w3.org/1999/xhtml"><body>
+ <img src="a&amp;b"/>
+ <img src="a&amp;="/>
+ <img src="a&amp;=c"/>
+ <img src="a&amp;=9"/>
+ </body>
+ </html>', $doc->saveXML());
+ }
+
+ public function testBareAmpersandNotAllowedInAttributes()
+ {
+ $html = "<!doctype html>
+ <html>
+ <body>
+ <img src='a&' />
+ <img src='a&+' />
+ </body>
+ </html>";
+ $doc = $this->parse($html);
+
+ $this->assertCount(2, $this->errors);
+ $this->assertXmlStringEqualsXmlString('
+ <!DOCTYPE html>
+ <html xmlns="http://www.w3.org/1999/xhtml"><body>
+ <img src="a&amp;"/>
+ <img src="a&amp;+"/>
+ </body>
+ </html>', $doc->saveXML());
+ }
+ public function testBareAmpersandNotAllowedInBody()
+ {
+ $html = "<!doctype html>
+ <html>
+ <body>
+ a&b
+ a&=
+ a&=c
+ a&=9
+ a&+
+ a& -- valid
+ </body>
+ </html>";
+ $doc = $this->parse($html);
+
+ $this->assertCount(5, $this->errors);
+ $this->assertXmlStringEqualsXmlString('
+ <!DOCTYPE html>
+ <html xmlns="http://www.w3.org/1999/xhtml"><body>
+ a&amp;b
+ a&amp;=
+ a&amp;=c
+ a&amp;=9
+ a&amp;+
+ a&amp; -- valid
+ </body>
+ </html>', $doc->saveXML());
+ }
+
public function testStrangeCapitalization()
{
$html = "<!doctype html>
diff --git a/test/HTML5/Parser/TokenizerTest.php b/test/HTML5/Parser/TokenizerTest.php
index f2f612c..5e52722 100644
--- a/test/HTML5/Parser/TokenizerTest.php
+++ b/test/HTML5/Parser/TokenizerTest.php
@@ -622,6 +622,13 @@ class TokenizerTest extends \Masterminds\HTML5\Tests\TestCase
),
false
),
+ "<foo a='blue&red'>" => array(
+ 'foo',
+ array(
+ 'a' => 'blue&red'
+ ),
+ false
+ ),
"<foo a='blue&amp;red'>" => array(
'foo',
array(
@@ -629,6 +636,13 @@ class TokenizerTest extends \Masterminds\HTML5\Tests\TestCase
),
false
),
+ "<foo a='blue&&amp;&red'>" => array(
+ 'foo',
+ array(
+ 'a' => 'blue&&&red'
+ ),
+ false
+ ),
"<foo a='blue&&amp;red'>" => array(
'foo',
array(
@@ -725,18 +739,11 @@ class TokenizerTest extends \Masterminds\HTML5\Tests\TestCase
// Cause a parse error.
$bad = array(
- // This will emit an entity lookup failure for &red.
- "<foo a='blue&red'>" => array(
+ // This will emit an entity lookup failure for &+dark.
+ "<foo a='blue&+dark'>" => array(
'foo',
array(
- 'a' => 'blue&red'
- ),
- false
- ),
- "<foo a='blue&&amp;&red'>" => array(
- 'foo',
- array(
- 'a' => 'blue&&&red'
+ 'a' => 'blue&+dark'
),
false
),