diff options
Diffstat (limited to 'plugins/af_readability/vendor/masterminds/html5/test/HTML5/Parser/DOMTreeBuilderTest.php')
-rw-r--r-- | plugins/af_readability/vendor/masterminds/html5/test/HTML5/Parser/DOMTreeBuilderTest.php | 743 |
1 files changed, 743 insertions, 0 deletions
diff --git a/plugins/af_readability/vendor/masterminds/html5/test/HTML5/Parser/DOMTreeBuilderTest.php b/plugins/af_readability/vendor/masterminds/html5/test/HTML5/Parser/DOMTreeBuilderTest.php new file mode 100644 index 000000000..659378c68 --- /dev/null +++ b/plugins/af_readability/vendor/masterminds/html5/test/HTML5/Parser/DOMTreeBuilderTest.php @@ -0,0 +1,743 @@ +<?php +/** + * @file + * Test the Tree Builder. + */ + +namespace Masterminds\HTML5\Tests\Parser; + +use Masterminds\HTML5\Parser\Scanner; +use Masterminds\HTML5\Parser\Tokenizer; +use Masterminds\HTML5\Parser\DOMTreeBuilder; + +/** + * These tests are functional, not necessarily unit tests. + */ +class DOMTreeBuilderTest extends \Masterminds\HTML5\Tests\TestCase +{ + protected $errors = array(); + + /** + * Convenience function for parsing. + */ + protected function parse($string, array $options = array()) + { + $treeBuilder = new DOMTreeBuilder(false, $options); + $scanner = new Scanner($string); + $parser = new Tokenizer($scanner, $treeBuilder); + + $parser->parse(); + $this->errors = $treeBuilder->getErrors(); + + return $treeBuilder->document(); + } + + /** + * Utility function for parsing a fragment of HTML5. + */ + protected function parseFragment($string) + { + $treeBuilder = new DOMTreeBuilder(true); + $scanner = new Scanner($string); + $parser = new Tokenizer($scanner, $treeBuilder); + + $parser->parse(); + $this->errors = $treeBuilder->getErrors(); + + return $treeBuilder->fragment(); + } + + public function testDocument() + { + $html = '<!DOCTYPE html><html></html>'; + $doc = $this->parse($html); + + $this->assertEquals('UTF-8', $doc->encoding); + $this->assertInstanceOf('\DOMDocument', $doc); + $this->assertEquals('html', $doc->documentElement->tagName); + $this->assertEquals('http://www.w3.org/1999/xhtml', $doc->documentElement->namespaceURI); + } + + public function testBareAmpersand() + { + $html = "<!doctype html> + <html> + <body> + <img src='a&b' /> + <img src='a&=' /> + <img src='a&=c' /> + <img src='a&=9' /> + </body> + </html>"; + $doc = $this->parse($html); + + $this->assertEmpty($this->errors); + $this->assertXmlStringEqualsXmlString(' + <!DOCTYPE html> + <html xmlns="http://www.w3.org/1999/xhtml"><body> + <img src="a&b"/> + <img src="a&="/> + <img src="a&=c"/> + <img src="a&=9"/> + </body> + </html>', $doc->saveXML()); + } + + public function testBareAmpersandNotAllowedInAttributes() + { + $html = "<!doctype html> + <html> + <body> + <img src='a&' /> + <img src='a&+' /> + </body> + </html>"; + $doc = $this->parse($html); + + $this->assertCount(2, $this->errors); + $this->assertXmlStringEqualsXmlString(' + <!DOCTYPE html> + <html xmlns="http://www.w3.org/1999/xhtml"><body> + <img src="a&"/> + <img src="a&+"/> + </body> + </html>', $doc->saveXML()); + } + + public function testBareAmpersandNotAllowedInBody() + { + $html = '<!doctype html> + <html> + <body> + a&b + a&= + a&=c + a&=9 + a&+ + a& -- valid + </body> + </html>'; + $doc = $this->parse($html); + + $this->assertCount(5, $this->errors); + $this->assertXmlStringEqualsXmlString(' + <!DOCTYPE html> + <html xmlns="http://www.w3.org/1999/xhtml"><body> + a&b + a&= + a&=c + a&=9 + a&+ + a& -- valid + </body> + </html>', $doc->saveXML()); + } + + public function testEntityAtEndOfFile() + { + $fragment = $this->parseFragment('&#'); + $this->assertInstanceOf('DOMDocumentFragment', $fragment); + $this->assertSame('&#', $fragment->textContent); + $this->assertEquals('Line 1, Col 2: Expected &#DEC; &#HEX;, got EOF', $this->errors[0]); + } + + public function testStrangeCapitalization() + { + $html = '<!doctype html> + <html> + <head> + <Title>Hello, world!</TitlE> + </head> + <body>TheBody<script>foo</script></body> + </html>'; + $doc = $this->parse($html); + + $this->assertInstanceOf('\DOMDocument', $doc); + $this->assertEquals('html', $doc->documentElement->tagName); + + $xpath = new \DOMXPath($doc); + $xpath->registerNamespace('x', 'http://www.w3.org/1999/xhtml'); + + $this->assertEquals('Hello, world!', $xpath->query('//x:title')->item(0)->nodeValue); + $this->assertEquals('foo', $xpath->query('//x:script')->item(0)->nodeValue); + } + + public function testDocumentWithDisabledNamespaces() + { + $html = '<!DOCTYPE html><html></html>'; + $doc = $this->parse($html, array('disable_html_ns' => true)); + + $this->assertInstanceOf('\DOMDocument', $doc); + $this->assertEquals('html', $doc->documentElement->tagName); + $this->assertNull($doc->documentElement->namespaceURI); + } + + public function testDocumentWithATargetDocument() + { + $targetDom = new \DOMDocument(); + + $html = '<!DOCTYPE html><html></html>'; + $doc = $this->parse($html, array('target_document' => $targetDom)); + + $this->assertInstanceOf('\DOMDocument', $doc); + $this->assertSame($doc, $targetDom); + $this->assertEquals('html', $doc->documentElement->tagName); + } + + public function testDocumentFakeAttrAbsence() + { + $html = '<!DOCTYPE html><html xmlns="http://www.w3.org/1999/xhtml"><body>foo</body></html>'; + $doc = $this->parse($html, array('xmlNamespaces' => true)); + + $xp = new \DOMXPath($doc); + $this->assertEquals(0, $xp->query('//@html5-php-fake-id-attribute')->length); + } + + public function testFragment() + { + $html = '<div>test</div><span>test2</span>'; + $doc = $this->parseFragment($html); + + $this->assertInstanceOf('\DOMDocumentFragment', $doc); + $this->assertTrue($doc->hasChildNodes()); + $this->assertEquals('div', $doc->childNodes->item(0)->tagName); + $this->assertEquals('test', $doc->childNodes->item(0)->textContent); + $this->assertEquals('span', $doc->childNodes->item(1)->tagName); + $this->assertEquals('test2', $doc->childNodes->item(1)->textContent); + } + + public function testElements() + { + $html = '<!DOCTYPE html><html><head><title></title></head><body></body></html>'; + $doc = $this->parse($html); + $root = $doc->documentElement; + + $this->assertEquals('html', $root->tagName); + $this->assertEquals('html', $root->localName); + $this->assertEquals('html', $root->nodeName); + + $this->assertEquals(2, $root->childNodes->length); + $kids = $root->childNodes; + + $this->assertEquals('head', $kids->item(0)->tagName); + $this->assertEquals('body', $kids->item(1)->tagName); + + $head = $kids->item(0); + $this->assertEquals(1, $head->childNodes->length); + $this->assertEquals('title', $head->childNodes->item(0)->tagName); + } + + public function testImplicitNamespaces() + { + $dom = $this->parse('<!DOCTYPE html><html><body><a xlink:href="bar">foo</a></body></html>'); + $a = $dom->getElementsByTagName('a')->item(0); + $attr = $a->getAttributeNode('xlink:href'); + $this->assertEquals('http://www.w3.org/1999/xlink', $attr->namespaceURI); + + $dom = $this->parse('<!DOCTYPE html><html><body><a xml:base="bar">foo</a></body></html>'); + $a = $dom->getElementsByTagName('a')->item(0); + $attr = $a->getAttributeNode('xml:base'); + $this->assertEquals('http://www.w3.org/XML/1998/namespace', $attr->namespaceURI); + } + + public function testCustomImplicitNamespaces() + { + $dom = $this->parse('<!DOCTYPE html><html><body><a t:href="bar">foo</a></body></html>', array( + 'implicitNamespaces' => array( + 't' => 'http://www.example.com', + ), + )); + $a = $dom->getElementsByTagName('a')->item(0); + $attr = $a->getAttributeNode('t:href'); + $this->assertEquals('http://www.example.com', $attr->namespaceURI); + + $dom = $this->parse('<!DOCTYPE html><html><body><t:a>foo</t:a></body></html>', array( + 'implicitNamespaces' => array( + 't' => 'http://www.example.com', + ), + )); + $list = $dom->getElementsByTagNameNS('http://www.example.com', 'a'); + $this->assertEquals(1, $list->length); + } + + public function testXmlNamespaces() + { + $dom = $this->parse( + '<!DOCTYPE html><html> + <t:body xmlns:t="http://www.example.com"> + <a t:href="bar">foo</a> + </body> + <div>foo</div> + </html>', array( + 'xmlNamespaces' => true, + )); + $a = $dom->getElementsByTagName('a')->item(0); + $attr = $a->getAttributeNode('t:href'); + $this->assertEquals('http://www.example.com', $attr->namespaceURI); + + $list = $dom->getElementsByTagNameNS('http://www.example.com', 'body'); + $this->assertEquals(1, $list->length); + } + + public function testXmlNamespaceNesting() + { + $dom = $this->parse( + '<!DOCTYPE html><html> + <body xmlns:x="http://www.prefixed.com" id="body"> + <a id="bar1" xmlns="http://www.prefixed.com/bar1"> + <b id="bar4" xmlns="http://www.prefixed.com/bar4"><x:prefixed id="prefixed"/></b> + </a> + <svg id="svg"></svg> + <c id="bar2" xmlns="http://www.prefixed.com/bar2"></c> + <div id="div"></div> + <d id="bar3"></d> + <xn:d xmlns:xn="http://www.prefixed.com/xn" xmlns="http://www.prefixed.com/bar5_x" id="bar5"><x id="bar5_x"/></xn:d> + </body> + </html>', array( + 'xmlNamespaces' => true, + )); + + $this->assertEmpty($this->errors); + + $div = $dom->getElementById('div'); + $this->assertEquals('http://www.w3.org/1999/xhtml', $div->namespaceURI); + + $body = $dom->getElementById('body'); + $this->assertEquals('http://www.w3.org/1999/xhtml', $body->namespaceURI); + + $bar1 = $dom->getElementById('bar1'); + $this->assertEquals('http://www.prefixed.com/bar1', $bar1->namespaceURI); + + $bar2 = $dom->getElementById('bar2'); + $this->assertEquals('http://www.prefixed.com/bar2', $bar2->namespaceURI); + + $bar3 = $dom->getElementById('bar3'); + $this->assertEquals('http://www.w3.org/1999/xhtml', $bar3->namespaceURI); + + $bar4 = $dom->getElementById('bar4'); + $this->assertEquals('http://www.prefixed.com/bar4', $bar4->namespaceURI); + + $svg = $dom->getElementById('svg'); + $this->assertEquals('http://www.w3.org/2000/svg', $svg->namespaceURI); + + $prefixed = $dom->getElementById('prefixed'); + $this->assertEquals('http://www.prefixed.com', $prefixed->namespaceURI); + + $prefixed = $dom->getElementById('bar5'); + $this->assertEquals('http://www.prefixed.com/xn', $prefixed->namespaceURI); + + $prefixed = $dom->getElementById('bar5_x'); + $this->assertEquals('http://www.prefixed.com/bar5_x', $prefixed->namespaceURI); + } + + public function testMoveNonInlineElements() + { + $doc = $this->parse('<p>line1<br/><hr/>line2</p>'); + $this->assertEquals('<html xmlns="http://www.w3.org/1999/xhtml"><p>line1<br/></p><hr/>line2</html>', $doc->saveXML($doc->documentElement), 'Move non-inline elements outside of inline containers.'); + + $doc = $this->parse('<p>line1<div>line2</div></p>'); + $this->assertEquals('<html xmlns="http://www.w3.org/1999/xhtml"><p>line1</p><div>line2</div></html>', $doc->saveXML($doc->documentElement), 'Move non-inline elements outside of inline containers.'); + } + + public function testAttributes() + { + $html = "<!DOCTYPE html> + <html> + <head><title></title></head> + <body id='a' class='b c'></body> + </html>"; + $doc = $this->parse($html); + $root = $doc->documentElement; + + $body = $root->GetElementsByTagName('body')->item(0); + $this->assertEquals('body', $body->tagName); + $this->assertTrue($body->hasAttributes()); + $this->assertEquals('a', $body->getAttribute('id')); + $this->assertEquals('b c', $body->getAttribute('class')); + + $body2 = $doc->getElementById('a'); + $this->assertEquals('body', $body2->tagName); + $this->assertEquals('a', $body2->getAttribute('id')); + } + + public function testSVGAttributes() + { + $html = "<!DOCTYPE html> + <html><body> + <svg width='150' viewbox='2'> + <rect textlength='2'/> + <animatecolor>foo</animatecolor> + </svg> + </body></html>"; + $doc = $this->parse($html); + $root = $doc->documentElement; + + $svg = $root->getElementsByTagName('svg')->item(0); + $this->assertTrue($svg->hasAttribute('viewBox')); + + $rect = $root->getElementsByTagName('rect')->item(0); + $this->assertTrue($rect->hasAttribute('textLength')); + + $ac = $root->getElementsByTagName('animateColor'); + $this->assertEquals(1, $ac->length); + } + + public function testMathMLAttribute() + { + $html = '<!doctype html> + <html lang="en"> + <body> + <math> + <mi>x</mi> + <csymbol definitionurl="http://www.example.com/mathops/multiops.html#plusminus"> + <mo>±</mo> + </csymbol> + <mi>y</mi> + </math> + </body> + </html>'; + + $doc = $this->parse($html); + $root = $doc->documentElement; + + $csymbol = $root->getElementsByTagName('csymbol')->item(0); + $this->assertTrue($csymbol->hasAttribute('definitionURL')); + } + + public function testMissingHtmlTag() + { + $html = '<!DOCTYPE html><title>test</title>'; + $doc = $this->parse($html); + + $this->assertEquals('html', $doc->documentElement->tagName); + $this->assertEquals('title', $doc->documentElement->childNodes->item(0)->tagName); + } + + public function testComment() + { + $html = '<html><!--Hello World.--></html>'; + + $doc = $this->parse($html); + + $comment = $doc->documentElement->childNodes->item(0); + $this->assertEquals(XML_COMMENT_NODE, $comment->nodeType); + $this->assertEquals('Hello World.', $comment->data); + + $html = '<!--Hello World.--><html></html>'; + $doc = $this->parse($html); + + $comment = $doc->childNodes->item(1); + $this->assertEquals(XML_COMMENT_NODE, $comment->nodeType); + $this->assertEquals('Hello World.', $comment->data); + + $comment = $doc->childNodes->item(2); + $this->assertEquals(XML_ELEMENT_NODE, $comment->nodeType); + $this->assertEquals('html', $comment->tagName); + } + + public function testCDATA() + { + $html = '<!DOCTYPE html><html><math><![CDATA[test]]></math></html>'; + $doc = $this->parse($html); + + $wrapper = $doc->getElementsByTagName('math')->item(0); + $this->assertEquals(1, $wrapper->childNodes->length); + $cdata = $wrapper->childNodes->item(0); + $this->assertEquals(XML_CDATA_SECTION_NODE, $cdata->nodeType); + $this->assertEquals('test', $cdata->data); + } + + public function testText() + { + $html = '<!DOCTYPE html><html><head></head><body><math>test</math></body></html>'; + $doc = $this->parse($html); + + $wrapper = $doc->getElementsByTagName('math')->item(0); + $this->assertEquals(1, $wrapper->childNodes->length); + $data = $wrapper->childNodes->item(0); + $this->assertEquals(XML_TEXT_NODE, $data->nodeType); + $this->assertEquals('test', $data->data); + + // The DomTreeBuilder has special handling for text when in before head mode. + $html = '<!DOCTYPE html><html> + Foo<head></head><body></body></html>'; + $doc = $this->parse($html); + $this->assertEquals('Line 0, Col 0: Unexpected text. Ignoring: Foo', $this->errors[0]); + $headElement = $doc->documentElement->firstChild; + $this->assertEquals('head', $headElement->tagName); + } + + public function testParseErrors() + { + $html = '<!DOCTYPE html><html><math><![CDATA[test'; + $doc = $this->parse($html); + + // We're JUST testing that we can access errors. Actual testing of + // error messages happen in the Tokenizer's tests. + $this->assertGreaterThan(0, count($this->errors)); + $this->assertTrue(is_string($this->errors[0])); + } + + public function testProcessingInstruction() + { + // Test the simple case, which is where PIs are inserted into the DOM. + $doc = $this->parse('<!DOCTYPE html><html><?foo bar?>'); + $this->assertEquals(1, $doc->documentElement->childNodes->length); + $pi = $doc->documentElement->firstChild; + $this->assertInstanceOf('\DOMProcessingInstruction', $pi); + $this->assertEquals('foo', $pi->nodeName); + $this->assertEquals('bar', $pi->data); + + // Leading xml PIs should be ignored. + $doc = $this->parse('<?xml version="1.0"?><!DOCTYPE html><html><head></head></html>'); + + $this->assertEquals(2, $doc->childNodes->length); + $this->assertInstanceOf('\DOMDocumentType', $doc->childNodes->item(0)); + $this->assertInstanceOf('\DOMElement', $doc->childNodes->item(1)); + } + + public function testAutocloseP() + { + $html = '<!DOCTYPE html><html><body><p><figure></body></html>'; + $doc = $this->parse($html); + + $p = $doc->getElementsByTagName('p')->item(0); + $this->assertEquals(0, $p->childNodes->length); + $this->assertEquals('figure', $p->nextSibling->tagName); + } + + public function testAutocloseLI() + { + $html = '<!doctype html> + <html lang="en"> + <body> + <ul><li>Foo<li>Bar<li>Baz</ul> + </body> + </html>'; + + $doc = $this->parse($html); + $length = $doc->getElementsByTagName('ul')->item(0)->childNodes->length; + $this->assertEquals(3, $length); + } + + public function testMathML() + { + $html = '<!doctype html> + <html lang="en"> + <body> + <math xmlns="http://www.w3.org/1998/Math/MathML"> + <mi>x</mi> + <csymbol definitionurl="http://www.example.com/mathops/multiops.html#plusminus"> + <mo>±</mo> + </csymbol> + <mi>y</mi> + </math> + </body> + </html>'; + + $doc = $this->parse($html); + $math = $doc->getElementsByTagName('math')->item(0); + $this->assertEquals('math', $math->tagName); + $this->assertEquals('math', $math->nodeName); + $this->assertEquals('math', $math->localName); + $this->assertEquals('http://www.w3.org/1998/Math/MathML', $math->namespaceURI); + } + + public function testSVG() + { + $html = '<!doctype html> + <html lang="en"> + <body> + <svg width="150" height="100" viewBox="0 0 3 2" xmlns="http://www.w3.org/2000/svg"> + <rect width="1" height="2" x="2" fill="#d2232c" /> + <text font-family="Verdana" font-size="32"> + <textpath xlink:href="#Foo"> + Test Text. + </textPath> + </text> + </svg> + </body> + </html>'; + + $doc = $this->parse($html); + $svg = $doc->getElementsByTagName('svg')->item(0); + $this->assertEquals('svg', $svg->tagName); + $this->assertEquals('svg', $svg->nodeName); + $this->assertEquals('svg', $svg->localName); + $this->assertEquals('http://www.w3.org/2000/svg', $svg->namespaceURI); + + $textPath = $doc->getElementsByTagName('textPath')->item(0); + $this->assertEquals('textPath', $textPath->tagName); + } + + public function testNoScript() + { + $html = '<!DOCTYPE html><html><head><noscript>No JS</noscript></head></html>'; + $doc = $this->parse($html); + $this->assertEmpty($this->errors); + $noscript = $doc->getElementsByTagName('noscript')->item(0); + $this->assertEquals('noscript', $noscript->tagName); + + $html = '<!DOCTYPE html><html><body><noscript><p>No JS</p></noscript></body></html>'; + $doc = $this->parse($html); + $this->assertEmpty($this->errors); + $p = $doc->getElementsByTagName('p')->item(0); + $this->assertEquals('p', $p->tagName); + } + + /** + * Regression for issue #13. + */ + public function testRegressionHTMLNoBody() + { + $html = '<!DOCTYPE html><html><span id="test">Test</span></html>'; + $doc = $this->parse($html); + $span = $doc->getElementById('test'); + + $this->assertEmpty($this->errors); + + $this->assertEquals('span', $span->tagName); + $this->assertEquals('Test', $span->textContent); + } + + public function testInstructionProcessor() + { + $string = '<!DOCTYPE html><html><?foo bar ?></html>'; + + $treeBuilder = new DOMTreeBuilder(); + $is = new InstructionProcessorMock(); + $treeBuilder->setInstructionProcessor($is); + + $scanner = new Scanner($string); + $parser = new Tokenizer($scanner, $treeBuilder); + + $parser->parse(); + $dom = $treeBuilder->document(); + $div = $dom->getElementsByTagName('div')->item(0); + + $this->assertEquals(1, $is->count); + $this->assertEquals('foo', $is->name); + $this->assertEquals('bar ', $is->data); + $this->assertEquals('div', $div->tagName); + $this->assertEquals('foo', $div->textContent); + } + + public function testSelectGroupedOptions() + { + $html = <<<EOM +<!DOCTYPE html> +<html> + <head> + <title>testSelectGroupedOptions</title> + </head> + <body> + <select> + <optgroup id="first" label="first"> + <option value="foo">foo</option> + <option value="bar">bar</option> + <option value="baz">baz</option> + </optgroup> + <optgroup id="second" label="second"> + <option value="lorem">lorem</option> + <option value="ipsum">ipsum</option> + </optgroup> + </select> + </body> +</html> +EOM; + $dom = $this->parse($html); + + $this->assertSame(3, $dom->getElementById('first')->getElementsByTagName('option')->length); + $this->assertSame(2, $dom->getElementById('second')->getElementsByTagName('option')->length); + } + + public function testVoidTag() + { + $html = <<<EOM +<!DOCTYPE html> +<html> + <head> + <title>testVoidTag</title> + <meta> + <meta> + </head> + <body></body> +</html> +EOM; + + $dom = $this->parse($html); + $this->assertSame(2, $dom->getElementsByTagName('meta')->length); + $this->assertSame(0, $dom->getElementsByTagName('meta')->item(0)->childNodes->length); + $this->assertSame(0, $dom->getElementsByTagName('meta')->item(1)->childNodes->length); + } + + public function testIgnoreSelfClosingTag() + { + $html = <<<EOM +<!DOCTYPE html> +<html> + <head> + <title>testIllegalSelfClosingTag</title> + </head> + <body> + <div /><span>Hello, World!</span></div> + </body> +</html> +EOM; + + $dom = $this->parse($html); + $this->assertSame(1, $dom->getElementsByTagName('div')->item(0)->childNodes->length); + } + + public function testIAudioInParagraph() + { + $html = <<<EOM +<!DOCTYPE html> +<html> + <head> + <title>testIllegalSelfClosingTag</title> + </head> + <body> + <p> + <audio preload="none" controls="controls"> + <source src="https://example.com/test.mp3" type="audio/mpeg" /> + Your browser does not support the audio element. + </audio> + </p> + </body> +</html>> +</html> +EOM; + + $dom = $this->parse($html); + $audio = $dom->getElementsByTagName('audio')->item(0); + + $this->assertSame('p', $audio->parentNode->nodeName); + $this->assertSame(3, $audio->childNodes->length); + } + + public function testClosingBr() + { + $html = <<<EOM +<!DOCTYPE html> +<html> + <head> + <title>testClosingBr</title> + </head> + <body> + <p> + This line ends with a normal line break <br class="attribute-should-be-retained"> + This line ends with a line break marked up as a closing tag </br class="attribute-should-be-discarded"> + </p> + </body> +</html>> +</html> +EOM; + + $dom = $this->parse($html); + + $this->assertSame(2, $dom->getElementsByTagName('br')->length); + $this->assertSame(1, $dom->getElementsByTagName('br')->item(0)->attributes->length); + $this->assertSame(0, $dom->getElementsByTagName('br')->item(1)->attributes->length); + } +} |