From 0226e0ca0dc70f9a0310b3eef045ee1c1e0ca3ac Mon Sep 17 00:00:00 2001 From: Andrew Dolgov Date: Tue, 13 Dec 2022 20:00:46 +0300 Subject: split into a separate repo --- .../test/HTML5/Parser/CharacterReferenceTest.php | 44 + .../html5/test/HTML5/Parser/DOMTreeBuilderTest.php | 743 ++++++++++++++++ .../html5/test/HTML5/Parser/EventStack.php | 116 +++ .../html5/test/HTML5/Parser/EventStackError.php | 7 + .../test/HTML5/Parser/InstructionProcessorMock.php | 26 + .../html5/test/HTML5/Parser/ScannerTest.php | 184 ++++ .../html5/test/HTML5/Parser/TokenizerTest.php | 978 +++++++++++++++++++++ .../test/HTML5/Parser/TreeBuildingRulesTest.php | 118 +++ .../html5/test/HTML5/Parser/UTF8UtilsTest.php | 28 + 9 files changed, 2244 insertions(+) create mode 100644 vendor/masterminds/html5/test/HTML5/Parser/CharacterReferenceTest.php create mode 100644 vendor/masterminds/html5/test/HTML5/Parser/DOMTreeBuilderTest.php create mode 100644 vendor/masterminds/html5/test/HTML5/Parser/EventStack.php create mode 100644 vendor/masterminds/html5/test/HTML5/Parser/EventStackError.php create mode 100644 vendor/masterminds/html5/test/HTML5/Parser/InstructionProcessorMock.php create mode 100644 vendor/masterminds/html5/test/HTML5/Parser/ScannerTest.php create mode 100644 vendor/masterminds/html5/test/HTML5/Parser/TokenizerTest.php create mode 100644 vendor/masterminds/html5/test/HTML5/Parser/TreeBuildingRulesTest.php create mode 100644 vendor/masterminds/html5/test/HTML5/Parser/UTF8UtilsTest.php (limited to 'vendor/masterminds/html5/test/HTML5/Parser') diff --git a/vendor/masterminds/html5/test/HTML5/Parser/CharacterReferenceTest.php b/vendor/masterminds/html5/test/HTML5/Parser/CharacterReferenceTest.php new file mode 100644 index 0000000..a5eee73 --- /dev/null +++ b/vendor/masterminds/html5/test/HTML5/Parser/CharacterReferenceTest.php @@ -0,0 +1,44 @@ +assertEquals('&', CharacterReference::lookupName('amp')); + $this->assertEquals('<', CharacterReference::lookupName('lt')); + $this->assertEquals('>', CharacterReference::lookupName('gt')); + $this->assertEquals('"', CharacterReference::lookupName('quot')); + $this->assertEquals('∌', CharacterReference::lookupName('NotReverseElement')); + + $this->assertNull(CharacterReference::lookupName('StinkyCheese')); + } + + public function testLookupHex() + { + $this->assertEquals('<', CharacterReference::lookupHex('3c')); + $this->assertEquals('<', CharacterReference::lookupHex('003c')); + $this->assertEquals('&', CharacterReference::lookupHex('26')); + $this->assertEquals('}', CharacterReference::lookupHex('7d')); + $this->assertEquals('Σ', CharacterReference::lookupHex('3A3')); + $this->assertEquals('Σ', CharacterReference::lookupHex('03A3')); + $this->assertEquals('Σ', CharacterReference::lookupHex('3a3')); + $this->assertEquals('Σ', CharacterReference::lookupHex('03a3')); + } + + public function testLookupDecimal() + { + $this->assertEquals('&', CharacterReference::lookupDecimal(38)); + $this->assertEquals('&', CharacterReference::lookupDecimal('38')); + $this->assertEquals('<', CharacterReference::lookupDecimal(60)); + $this->assertEquals('Σ', CharacterReference::lookupDecimal(931)); + $this->assertEquals('Σ', CharacterReference::lookupDecimal('0931')); + } +} diff --git a/vendor/masterminds/html5/test/HTML5/Parser/DOMTreeBuilderTest.php b/vendor/masterminds/html5/test/HTML5/Parser/DOMTreeBuilderTest.php new file mode 100644 index 0000000..659378c --- /dev/null +++ b/vendor/masterminds/html5/test/HTML5/Parser/DOMTreeBuilderTest.php @@ -0,0 +1,743 @@ +parse(); + $this->errors = $treeBuilder->getErrors(); + + return $treeBuilder->document(); + } + + /** + * Utility function for parsing a fragment of HTML5. + */ + protected function parseFragment($string) + { + $treeBuilder = new DOMTreeBuilder(true); + $scanner = new Scanner($string); + $parser = new Tokenizer($scanner, $treeBuilder); + + $parser->parse(); + $this->errors = $treeBuilder->getErrors(); + + return $treeBuilder->fragment(); + } + + public function testDocument() + { + $html = ''; + $doc = $this->parse($html); + + $this->assertEquals('UTF-8', $doc->encoding); + $this->assertInstanceOf('\DOMDocument', $doc); + $this->assertEquals('html', $doc->documentElement->tagName); + $this->assertEquals('http://www.w3.org/1999/xhtml', $doc->documentElement->namespaceURI); + } + + public function testBareAmpersand() + { + $html = " + + + + + + + + "; + $doc = $this->parse($html); + + $this->assertEmpty($this->errors); + $this->assertXmlStringEqualsXmlString(' + + + + + + + + ', $doc->saveXML()); + } + + public function testBareAmpersandNotAllowedInAttributes() + { + $html = " + + + + + + "; + $doc = $this->parse($html); + + $this->assertCount(2, $this->errors); + $this->assertXmlStringEqualsXmlString(' + + + + + + ', $doc->saveXML()); + } + + public function testBareAmpersandNotAllowedInBody() + { + $html = ' + + + a&b + a&= + a&=c + a&=9 + a&+ + a& -- valid + + '; + $doc = $this->parse($html); + + $this->assertCount(5, $this->errors); + $this->assertXmlStringEqualsXmlString(' + + + a&b + a&= + a&=c + a&=9 + a&+ + a& -- valid + + ', $doc->saveXML()); + } + + public function testEntityAtEndOfFile() + { + $fragment = $this->parseFragment('&#'); + $this->assertInstanceOf('DOMDocumentFragment', $fragment); + $this->assertSame('&#', $fragment->textContent); + $this->assertEquals('Line 1, Col 2: Expected &#DEC; &#HEX;, got EOF', $this->errors[0]); + } + + public function testStrangeCapitalization() + { + $html = ' + + + Hello, world! + + TheBody + '; + $doc = $this->parse($html); + + $this->assertInstanceOf('\DOMDocument', $doc); + $this->assertEquals('html', $doc->documentElement->tagName); + + $xpath = new \DOMXPath($doc); + $xpath->registerNamespace('x', 'http://www.w3.org/1999/xhtml'); + + $this->assertEquals('Hello, world!', $xpath->query('//x:title')->item(0)->nodeValue); + $this->assertEquals('foo', $xpath->query('//x:script')->item(0)->nodeValue); + } + + public function testDocumentWithDisabledNamespaces() + { + $html = ''; + $doc = $this->parse($html, array('disable_html_ns' => true)); + + $this->assertInstanceOf('\DOMDocument', $doc); + $this->assertEquals('html', $doc->documentElement->tagName); + $this->assertNull($doc->documentElement->namespaceURI); + } + + public function testDocumentWithATargetDocument() + { + $targetDom = new \DOMDocument(); + + $html = ''; + $doc = $this->parse($html, array('target_document' => $targetDom)); + + $this->assertInstanceOf('\DOMDocument', $doc); + $this->assertSame($doc, $targetDom); + $this->assertEquals('html', $doc->documentElement->tagName); + } + + public function testDocumentFakeAttrAbsence() + { + $html = 'foo'; + $doc = $this->parse($html, array('xmlNamespaces' => true)); + + $xp = new \DOMXPath($doc); + $this->assertEquals(0, $xp->query('//@html5-php-fake-id-attribute')->length); + } + + public function testFragment() + { + $html = '
test
test2'; + $doc = $this->parseFragment($html); + + $this->assertInstanceOf('\DOMDocumentFragment', $doc); + $this->assertTrue($doc->hasChildNodes()); + $this->assertEquals('div', $doc->childNodes->item(0)->tagName); + $this->assertEquals('test', $doc->childNodes->item(0)->textContent); + $this->assertEquals('span', $doc->childNodes->item(1)->tagName); + $this->assertEquals('test2', $doc->childNodes->item(1)->textContent); + } + + public function testElements() + { + $html = ''; + $doc = $this->parse($html); + $root = $doc->documentElement; + + $this->assertEquals('html', $root->tagName); + $this->assertEquals('html', $root->localName); + $this->assertEquals('html', $root->nodeName); + + $this->assertEquals(2, $root->childNodes->length); + $kids = $root->childNodes; + + $this->assertEquals('head', $kids->item(0)->tagName); + $this->assertEquals('body', $kids->item(1)->tagName); + + $head = $kids->item(0); + $this->assertEquals(1, $head->childNodes->length); + $this->assertEquals('title', $head->childNodes->item(0)->tagName); + } + + public function testImplicitNamespaces() + { + $dom = $this->parse('foo'); + $a = $dom->getElementsByTagName('a')->item(0); + $attr = $a->getAttributeNode('xlink:href'); + $this->assertEquals('http://www.w3.org/1999/xlink', $attr->namespaceURI); + + $dom = $this->parse('foo'); + $a = $dom->getElementsByTagName('a')->item(0); + $attr = $a->getAttributeNode('xml:base'); + $this->assertEquals('http://www.w3.org/XML/1998/namespace', $attr->namespaceURI); + } + + public function testCustomImplicitNamespaces() + { + $dom = $this->parse('foo', array( + 'implicitNamespaces' => array( + 't' => 'http://www.example.com', + ), + )); + $a = $dom->getElementsByTagName('a')->item(0); + $attr = $a->getAttributeNode('t:href'); + $this->assertEquals('http://www.example.com', $attr->namespaceURI); + + $dom = $this->parse('foo', array( + 'implicitNamespaces' => array( + 't' => 'http://www.example.com', + ), + )); + $list = $dom->getElementsByTagNameNS('http://www.example.com', 'a'); + $this->assertEquals(1, $list->length); + } + + public function testXmlNamespaces() + { + $dom = $this->parse( + ' + + foo + +
foo
+ ', array( + 'xmlNamespaces' => true, + )); + $a = $dom->getElementsByTagName('a')->item(0); + $attr = $a->getAttributeNode('t:href'); + $this->assertEquals('http://www.example.com', $attr->namespaceURI); + + $list = $dom->getElementsByTagNameNS('http://www.example.com', 'body'); + $this->assertEquals(1, $list->length); + } + + public function testXmlNamespaceNesting() + { + $dom = $this->parse( + ' + + + + + + +
+ + + + ', array( + 'xmlNamespaces' => true, + )); + + $this->assertEmpty($this->errors); + + $div = $dom->getElementById('div'); + $this->assertEquals('http://www.w3.org/1999/xhtml', $div->namespaceURI); + + $body = $dom->getElementById('body'); + $this->assertEquals('http://www.w3.org/1999/xhtml', $body->namespaceURI); + + $bar1 = $dom->getElementById('bar1'); + $this->assertEquals('http://www.prefixed.com/bar1', $bar1->namespaceURI); + + $bar2 = $dom->getElementById('bar2'); + $this->assertEquals('http://www.prefixed.com/bar2', $bar2->namespaceURI); + + $bar3 = $dom->getElementById('bar3'); + $this->assertEquals('http://www.w3.org/1999/xhtml', $bar3->namespaceURI); + + $bar4 = $dom->getElementById('bar4'); + $this->assertEquals('http://www.prefixed.com/bar4', $bar4->namespaceURI); + + $svg = $dom->getElementById('svg'); + $this->assertEquals('http://www.w3.org/2000/svg', $svg->namespaceURI); + + $prefixed = $dom->getElementById('prefixed'); + $this->assertEquals('http://www.prefixed.com', $prefixed->namespaceURI); + + $prefixed = $dom->getElementById('bar5'); + $this->assertEquals('http://www.prefixed.com/xn', $prefixed->namespaceURI); + + $prefixed = $dom->getElementById('bar5_x'); + $this->assertEquals('http://www.prefixed.com/bar5_x', $prefixed->namespaceURI); + } + + public function testMoveNonInlineElements() + { + $doc = $this->parse('

line1


line2

'); + $this->assertEquals('

line1


line2', $doc->saveXML($doc->documentElement), 'Move non-inline elements outside of inline containers.'); + + $doc = $this->parse('

line1

line2

'); + $this->assertEquals('

line1

line2
', $doc->saveXML($doc->documentElement), 'Move non-inline elements outside of inline containers.'); + } + + public function testAttributes() + { + $html = " + + + + "; + $doc = $this->parse($html); + $root = $doc->documentElement; + + $body = $root->GetElementsByTagName('body')->item(0); + $this->assertEquals('body', $body->tagName); + $this->assertTrue($body->hasAttributes()); + $this->assertEquals('a', $body->getAttribute('id')); + $this->assertEquals('b c', $body->getAttribute('class')); + + $body2 = $doc->getElementById('a'); + $this->assertEquals('body', $body2->tagName); + $this->assertEquals('a', $body2->getAttribute('id')); + } + + public function testSVGAttributes() + { + $html = " + + + + foo + + "; + $doc = $this->parse($html); + $root = $doc->documentElement; + + $svg = $root->getElementsByTagName('svg')->item(0); + $this->assertTrue($svg->hasAttribute('viewBox')); + + $rect = $root->getElementsByTagName('rect')->item(0); + $this->assertTrue($rect->hasAttribute('textLength')); + + $ac = $root->getElementsByTagName('animateColor'); + $this->assertEquals(1, $ac->length); + } + + public function testMathMLAttribute() + { + $html = ' + + + + x + + ± + + y + + + '; + + $doc = $this->parse($html); + $root = $doc->documentElement; + + $csymbol = $root->getElementsByTagName('csymbol')->item(0); + $this->assertTrue($csymbol->hasAttribute('definitionURL')); + } + + public function testMissingHtmlTag() + { + $html = 'test'; + $doc = $this->parse($html); + + $this->assertEquals('html', $doc->documentElement->tagName); + $this->assertEquals('title', $doc->documentElement->childNodes->item(0)->tagName); + } + + public function testComment() + { + $html = ''; + + $doc = $this->parse($html); + + $comment = $doc->documentElement->childNodes->item(0); + $this->assertEquals(XML_COMMENT_NODE, $comment->nodeType); + $this->assertEquals('Hello World.', $comment->data); + + $html = ''; + $doc = $this->parse($html); + + $comment = $doc->childNodes->item(1); + $this->assertEquals(XML_COMMENT_NODE, $comment->nodeType); + $this->assertEquals('Hello World.', $comment->data); + + $comment = $doc->childNodes->item(2); + $this->assertEquals(XML_ELEMENT_NODE, $comment->nodeType); + $this->assertEquals('html', $comment->tagName); + } + + public function testCDATA() + { + $html = 'test'; + $doc = $this->parse($html); + + $wrapper = $doc->getElementsByTagName('math')->item(0); + $this->assertEquals(1, $wrapper->childNodes->length); + $cdata = $wrapper->childNodes->item(0); + $this->assertEquals(XML_CDATA_SECTION_NODE, $cdata->nodeType); + $this->assertEquals('test', $cdata->data); + } + + public function testText() + { + $html = 'test'; + $doc = $this->parse($html); + + $wrapper = $doc->getElementsByTagName('math')->item(0); + $this->assertEquals(1, $wrapper->childNodes->length); + $data = $wrapper->childNodes->item(0); + $this->assertEquals(XML_TEXT_NODE, $data->nodeType); + $this->assertEquals('test', $data->data); + + // The DomTreeBuilder has special handling for text when in before head mode. + $html = ' + Foo'; + $doc = $this->parse($html); + $this->assertEquals('Line 0, Col 0: Unexpected text. Ignoring: Foo', $this->errors[0]); + $headElement = $doc->documentElement->firstChild; + $this->assertEquals('head', $headElement->tagName); + } + + public function testParseErrors() + { + $html = 'test'; + $doc = $this->parse($html); + + // We're JUST testing that we can access errors. Actual testing of + // error messages happen in the Tokenizer's tests. + $this->assertGreaterThan(0, count($this->errors)); + $this->assertTrue(is_string($this->errors[0])); + } + + public function testProcessingInstruction() + { + // Test the simple case, which is where PIs are inserted into the DOM. + $doc = $this->parse('<!DOCTYPE html><html><?foo bar?>'); + $this->assertEquals(1, $doc->documentElement->childNodes->length); + $pi = $doc->documentElement->firstChild; + $this->assertInstanceOf('\DOMProcessingInstruction', $pi); + $this->assertEquals('foo', $pi->nodeName); + $this->assertEquals('bar', $pi->data); + + // Leading xml PIs should be ignored. + $doc = $this->parse('<?xml version="1.0"?><!DOCTYPE html><html><head></head></html>'); + + $this->assertEquals(2, $doc->childNodes->length); + $this->assertInstanceOf('\DOMDocumentType', $doc->childNodes->item(0)); + $this->assertInstanceOf('\DOMElement', $doc->childNodes->item(1)); + } + + public function testAutocloseP() + { + $html = '<!DOCTYPE html><html><body><p><figure></body></html>'; + $doc = $this->parse($html); + + $p = $doc->getElementsByTagName('p')->item(0); + $this->assertEquals(0, $p->childNodes->length); + $this->assertEquals('figure', $p->nextSibling->tagName); + } + + public function testAutocloseLI() + { + $html = '<!doctype html> + <html lang="en"> + <body> + <ul><li>Foo<li>Bar<li>Baz</ul> + </body> + </html>'; + + $doc = $this->parse($html); + $length = $doc->getElementsByTagName('ul')->item(0)->childNodes->length; + $this->assertEquals(3, $length); + } + + public function testMathML() + { + $html = '<!doctype html> + <html lang="en"> + <body> + <math xmlns="http://www.w3.org/1998/Math/MathML"> + <mi>x</mi> + <csymbol definitionurl="http://www.example.com/mathops/multiops.html#plusminus"> + <mo>&PlusMinus;</mo> + </csymbol> + <mi>y</mi> + </math> + </body> + </html>'; + + $doc = $this->parse($html); + $math = $doc->getElementsByTagName('math')->item(0); + $this->assertEquals('math', $math->tagName); + $this->assertEquals('math', $math->nodeName); + $this->assertEquals('math', $math->localName); + $this->assertEquals('http://www.w3.org/1998/Math/MathML', $math->namespaceURI); + } + + public function testSVG() + { + $html = '<!doctype html> + <html lang="en"> + <body> + <svg width="150" height="100" viewBox="0 0 3 2" xmlns="http://www.w3.org/2000/svg"> + <rect width="1" height="2" x="2" fill="#d2232c" /> + <text font-family="Verdana" font-size="32"> + <textpath xlink:href="#Foo"> + Test Text. + </textPath> + </text> + </svg> + </body> + </html>'; + + $doc = $this->parse($html); + $svg = $doc->getElementsByTagName('svg')->item(0); + $this->assertEquals('svg', $svg->tagName); + $this->assertEquals('svg', $svg->nodeName); + $this->assertEquals('svg', $svg->localName); + $this->assertEquals('http://www.w3.org/2000/svg', $svg->namespaceURI); + + $textPath = $doc->getElementsByTagName('textPath')->item(0); + $this->assertEquals('textPath', $textPath->tagName); + } + + public function testNoScript() + { + $html = '<!DOCTYPE html><html><head><noscript>No JS</noscript></head></html>'; + $doc = $this->parse($html); + $this->assertEmpty($this->errors); + $noscript = $doc->getElementsByTagName('noscript')->item(0); + $this->assertEquals('noscript', $noscript->tagName); + + $html = '<!DOCTYPE html><html><body><noscript><p>No JS</p></noscript></body></html>'; + $doc = $this->parse($html); + $this->assertEmpty($this->errors); + $p = $doc->getElementsByTagName('p')->item(0); + $this->assertEquals('p', $p->tagName); + } + + /** + * Regression for issue #13. + */ + public function testRegressionHTMLNoBody() + { + $html = '<!DOCTYPE html><html><span id="test">Test</span></html>'; + $doc = $this->parse($html); + $span = $doc->getElementById('test'); + + $this->assertEmpty($this->errors); + + $this->assertEquals('span', $span->tagName); + $this->assertEquals('Test', $span->textContent); + } + + public function testInstructionProcessor() + { + $string = '<!DOCTYPE html><html><?foo bar ?></html>'; + + $treeBuilder = new DOMTreeBuilder(); + $is = new InstructionProcessorMock(); + $treeBuilder->setInstructionProcessor($is); + + $scanner = new Scanner($string); + $parser = new Tokenizer($scanner, $treeBuilder); + + $parser->parse(); + $dom = $treeBuilder->document(); + $div = $dom->getElementsByTagName('div')->item(0); + + $this->assertEquals(1, $is->count); + $this->assertEquals('foo', $is->name); + $this->assertEquals('bar ', $is->data); + $this->assertEquals('div', $div->tagName); + $this->assertEquals('foo', $div->textContent); + } + + public function testSelectGroupedOptions() + { + $html = <<<EOM +<!DOCTYPE html> +<html> + <head> + <title>testSelectGroupedOptions</title> + </head> + <body> + <select> + <optgroup id="first" label="first"> + <option value="foo">foo</option> + <option value="bar">bar</option> + <option value="baz">baz</option> + </optgroup> + <optgroup id="second" label="second"> + <option value="lorem">lorem</option> + <option value="ipsum">ipsum</option> + </optgroup> + </select> + </body> +</html> +EOM; + $dom = $this->parse($html); + + $this->assertSame(3, $dom->getElementById('first')->getElementsByTagName('option')->length); + $this->assertSame(2, $dom->getElementById('second')->getElementsByTagName('option')->length); + } + + public function testVoidTag() + { + $html = <<<EOM +<!DOCTYPE html> +<html> + <head> + <title>testVoidTag</title> + <meta> + <meta> + </head> + <body></body> +</html> +EOM; + + $dom = $this->parse($html); + $this->assertSame(2, $dom->getElementsByTagName('meta')->length); + $this->assertSame(0, $dom->getElementsByTagName('meta')->item(0)->childNodes->length); + $this->assertSame(0, $dom->getElementsByTagName('meta')->item(1)->childNodes->length); + } + + public function testIgnoreSelfClosingTag() + { + $html = <<<EOM +<!DOCTYPE html> +<html> + <head> + <title>testIllegalSelfClosingTag</title> + </head> + <body> + <div /><span>Hello, World!</span></div> + </body> +</html> +EOM; + + $dom = $this->parse($html); + $this->assertSame(1, $dom->getElementsByTagName('div')->item(0)->childNodes->length); + } + + public function testIAudioInParagraph() + { + $html = <<<EOM +<!DOCTYPE html> +<html> + <head> + <title>testIllegalSelfClosingTag</title> + </head> + <body> + <p> + <audio preload="none" controls="controls"> + <source src="https://example.com/test.mp3" type="audio/mpeg" /> + Your browser does not support the audio element. + </audio> + </p> + </body> +</html>> +</html> +EOM; + + $dom = $this->parse($html); + $audio = $dom->getElementsByTagName('audio')->item(0); + + $this->assertSame('p', $audio->parentNode->nodeName); + $this->assertSame(3, $audio->childNodes->length); + } + + public function testClosingBr() + { + $html = <<<EOM +<!DOCTYPE html> +<html> + <head> + <title>testClosingBr</title> + </head> + <body> + <p> + This line ends with a normal line break <br class="attribute-should-be-retained"> + This line ends with a line break marked up as a closing tag </br class="attribute-should-be-discarded"> + </p> + </body> +</html>> +</html> +EOM; + + $dom = $this->parse($html); + + $this->assertSame(2, $dom->getElementsByTagName('br')->length); + $this->assertSame(1, $dom->getElementsByTagName('br')->item(0)->attributes->length); + $this->assertSame(0, $dom->getElementsByTagName('br')->item(1)->attributes->length); + } +} diff --git a/vendor/masterminds/html5/test/HTML5/Parser/EventStack.php b/vendor/masterminds/html5/test/HTML5/Parser/EventStack.php new file mode 100644 index 0000000..3d1de77 --- /dev/null +++ b/vendor/masterminds/html5/test/HTML5/Parser/EventStack.php @@ -0,0 +1,116 @@ +<?php + +namespace Masterminds\HTML5\Tests\Parser; + +use Masterminds\HTML5\Elements; +use Masterminds\HTML5\Parser\EventHandler; + +/** + * This testing class gathers events from a parser and builds a stack of events. + * It is useful for checking the output of a tokenizer. + * + * IMPORTANT: + * + * The startTag event also kicks the parser into TEXT_RAW when it encounters + * script or pre tags. This is to match the behavior required by the HTML5 spec, + * which says that the tree builder must tell the tokenizer when to switch states. + */ +class EventStack implements EventHandler +{ + protected $stack; + + public function __construct() + { + $this->stack = array(); + } + + /** + * Get the event stack. + */ + public function events() + { + return $this->stack; + } + + public function depth() + { + return count($this->stack); + } + + public function get($index) + { + return $this->stack[$index]; + } + + protected function store($event, $data = null) + { + $this->stack[] = array( + 'name' => $event, + 'data' => $data, + ); + } + + public function doctype($name, $type = 0, $id = null, $quirks = false) + { + $args = array( + $name, + $type, + $id, + $quirks, + ); + $this->store('doctype', $args); + } + + public function startTag($name, $attributes = array(), $selfClosing = false) + { + $args = func_get_args(); + $this->store('startTag', $args); + if ('pre' == $name || 'script' == $name) { + return Elements::TEXT_RAW; + } + } + + public function endTag($name) + { + $this->store('endTag', array( + $name, + )); + } + + public function comment($cdata) + { + $this->store('comment', array( + $cdata, + )); + } + + public function cdata($data) + { + $this->store('cdata', func_get_args()); + } + + public function text($cdata) + { + // fprintf(STDOUT, "Received TEXT event with: " . $cdata); + $this->store('text', array( + $cdata, + )); + } + + public function eof() + { + $this->store('eof'); + } + + public function parseError($msg, $line, $col) + { + // throw new EventStackParseError(sprintf("%s (line %d, col %d)", $msg, $line, $col)); + // $this->store(sprintf("%s (line %d, col %d)", $msg, $line, $col)); + $this->store('error', func_get_args()); + } + + public function processingInstruction($name, $data = null) + { + $this->store('pi', func_get_args()); + } +} diff --git a/vendor/masterminds/html5/test/HTML5/Parser/EventStackError.php b/vendor/masterminds/html5/test/HTML5/Parser/EventStackError.php new file mode 100644 index 0000000..05c0a49 --- /dev/null +++ b/vendor/masterminds/html5/test/HTML5/Parser/EventStackError.php @@ -0,0 +1,7 @@ +<?php + +namespace Masterminds\HTML5\Tests\Parser; + +class EventStackError extends \Exception +{ +} diff --git a/vendor/masterminds/html5/test/HTML5/Parser/InstructionProcessorMock.php b/vendor/masterminds/html5/test/HTML5/Parser/InstructionProcessorMock.php new file mode 100644 index 0000000..4637a80 --- /dev/null +++ b/vendor/masterminds/html5/test/HTML5/Parser/InstructionProcessorMock.php @@ -0,0 +1,26 @@ +<?php + +namespace Masterminds\HTML5\Tests\Parser; + +class InstructionProcessorMock implements \Masterminds\HTML5\InstructionProcessor +{ + public $name = null; + + public $data = null; + + public $count = 0; + + public function process(\DOMElement $element, $name, $data) + { + $this->name = $name; + $this->data = $data; + ++$this->count; + + $div = $element->ownerDocument->createElement('div'); + $div->nodeValue = 'foo'; + + $element->appendChild($div); + + return $div; + } +} diff --git a/vendor/masterminds/html5/test/HTML5/Parser/ScannerTest.php b/vendor/masterminds/html5/test/HTML5/Parser/ScannerTest.php new file mode 100644 index 0000000..9f75c4d --- /dev/null +++ b/vendor/masterminds/html5/test/HTML5/Parser/ScannerTest.php @@ -0,0 +1,184 @@ +<?php +/** + * @file + * Test the Scanner. This requires the InputStream tests are all good. + */ + +namespace Masterminds\HTML5\Tests\Parser; + +use Masterminds\HTML5\Parser\StringInputStream; +use Masterminds\HTML5\Parser\Scanner; + +class ScannerTest extends \Masterminds\HTML5\Tests\TestCase +{ + /** + * A canary test to make sure the basics are setup and working. + */ + public function testConstructDeprecated() + { + $is = new StringInputStream('abc'); + $s = new Scanner($is); + + $this->assertInstanceOf('\Masterminds\HTML5\Parser\Scanner', $s); + } + + public function testConstruct() + { + $this->assertInstanceOf('\Masterminds\HTML5\Parser\Scanner', new Scanner('abc')); + } + + public function testNextDeprecated() + { + $s = new Scanner(new StringInputStream('abc')); + + $this->assertEquals('b', $s->next()); + $this->assertEquals('c', $s->next()); + } + + public function testNext() + { + $s = new Scanner('abc'); + + $this->assertEquals('b', $s->next()); + $this->assertEquals('c', $s->next()); + } + + public function testPosition() + { + $s = new Scanner('abc'); + + $this->assertEquals(0, $s->position()); + + $s->next(); + $this->assertEquals(1, $s->position()); + } + + public function testPeek() + { + $s = new Scanner('abc'); + + $this->assertEquals('b', $s->peek()); + + $s->next(); + $this->assertEquals('c', $s->peek()); + } + + public function testCurrent() + { + $s = new Scanner('abc'); + + // Before scanning the string begins the current is empty. + $this->assertEquals('a', $s->current()); + + $c = $s->next(); + $this->assertEquals('b', $s->current()); + + // Test movement through the string. + $c = $s->next(); + $this->assertEquals('c', $s->current()); + } + + public function testUnconsume() + { + $s = new Scanner('abcdefghijklmnopqrst'); + + // Get initial position. + $s->next(); + $start = $s->position(); + + // Move forward a bunch of positions. + $amount = 7; + for ($i = 0; $i < $amount; ++$i) { + $s->next(); + } + + // Roll back the amount we moved forward. + $s->unconsume($amount); + + $this->assertEquals($start, $s->position()); + } + + public function testGetHex() + { + $s = new Scanner('ab13ck45DE*'); + + $this->assertEquals('ab13c', $s->getHex()); + + $s->next(); + $this->assertEquals('45DE', $s->getHex()); + } + + public function testGetAsciiAlpha() + { + $s = new Scanner('abcdef1%mnop*'); + + $this->assertEquals('abcdef', $s->getAsciiAlpha()); + + // Move past the 1% to scan the next group of text. + $s->next(); + $s->next(); + $this->assertEquals('mnop', $s->getAsciiAlpha()); + } + + public function testGetAsciiAlphaNum() + { + $s = new Scanner('abcdef1ghpo#mn94op'); + + $this->assertEquals('abcdef1ghpo', $s->getAsciiAlphaNum()); + + // Move past the # to scan the next group of text. + $s->next(); + $this->assertEquals('mn94op', $s->getAsciiAlphaNum()); + } + + public function testGetNumeric() + { + $s = new Scanner('1784a 45 9867 #'); + + $this->assertEquals('1784', $s->getNumeric()); + + // Move past the 'a ' to scan the next group of text. + $s->next(); + $s->next(); + $this->assertEquals('45', $s->getNumeric()); + } + + public function testCurrentLine() + { + $s = new Scanner("1784a\n45\n9867 #\nThis is a test."); + + $this->assertEquals(1, $s->currentLine()); + + // Move to the next line. + $s->getAsciiAlphaNum(); + $s->next(); + $this->assertEquals(2, $s->currentLine()); + } + + public function testColumnOffset() + { + $s = new Scanner("1784a a\n45 9867 #\nThis is a test."); + + // Move the pointer to the space. + $s->getAsciiAlphaNum(); + $this->assertEquals(5, $s->columnOffset()); + + // We move the pointer ahead. There must be a better way to do this. + $s->next(); + $s->next(); + $s->next(); + $s->next(); + $s->next(); + $s->next(); + $this->assertEquals(3, $s->columnOffset()); + } + + public function testRemainingChars() + { + $string = "\n45\n9867 #\nThis is a test."; + $s = new Scanner("1784a\n45\n9867 #\nThis is a test."); + + $s->getAsciiAlphaNum(); + $this->assertEquals($string, $s->remainingChars()); + } +} diff --git a/vendor/masterminds/html5/test/HTML5/Parser/TokenizerTest.php b/vendor/masterminds/html5/test/HTML5/Parser/TokenizerTest.php new file mode 100644 index 0000000..5284d30 --- /dev/null +++ b/vendor/masterminds/html5/test/HTML5/Parser/TokenizerTest.php @@ -0,0 +1,978 @@ +<?php + +namespace Masterminds\HTML5\Tests\Parser; + +use Masterminds\HTML5\Parser\UTF8Utils; +use Masterminds\HTML5\Parser\Scanner; +use Masterminds\HTML5\Parser\Tokenizer; + +class TokenizerTest extends \Masterminds\HTML5\Tests\TestCase +{ + // ================================================================ + // Additional assertions. + // ================================================================ + + /** + * Tests that an event matches both the event type and the expected value. + * + * @param string $type + * Expected event type + * @param string $expects + * The value expected in $event['data'][0] + */ + public function assertEventEquals($type, $expects, $event) + { + $this->assertEquals($type, $event['name'], "Event $type for " . print_r($event, true)); + if (is_array($expects)) { + $this->assertEquals($expects, $event['data'], "Event $type should equal " . print_r($expects, true) . ': ' . print_r($event, true)); + } else { + $d = (is_array($event['data']) ? $event['data'][0] : null); + $this->assertEquals($expects, $d, "Event $type should equal $expects: " . print_r($event, true)); + } + } + + /** + * Assert that a given event is 'error'. + */ + public function assertEventError($event) + { + $this->assertEquals('error', $event['name'], 'Expected error for event: ' . print_r($event, true)); + } + + /** + * Asserts that all of the tests are good. + * + * This loops through a map of tests/expectations and runs a few assertions on each test. + * + * Checks: + * - depth (if depth is > 0) + * - event name + * - matches on event 0. + */ + protected function isAllGood($name, $depth, $tests, $debug = false) + { + foreach ($tests as $try => $expects) { + if ($debug) { + fprintf(STDOUT, "%s expects %s\n", $try, print_r($expects, true)); + } + $e = $this->parse($try); + if ($depth > 0) { + $this->assertEquals($depth, $e->depth(), "Expected depth $depth for test $try." . print_r($e, true)); + } + $this->assertEventEquals($name, $expects, $e->get(0)); + } + } + + // ================================================================ + // Utility functions. + // ================================================================ + public function testParse() + { + list($tok, $events) = $this->createTokenizer(''); + + $tok->parse(); + $e1 = $events->get(0); + + $this->assertEquals(1, $events->Depth()); + $this->assertEquals('eof', $e1['name']); + } + + public function testWhitespace() + { + $spaces = ' '; + list($tok, $events) = $this->createTokenizer($spaces); + + $tok->parse(); + + $this->assertEquals(2, $events->depth()); + + $e1 = $events->get(0); + + $this->assertEquals('text', $e1['name']); + $this->assertEquals($spaces, $e1['data'][0]); + } + + public function testCharacterReference() + { + $good = array( + '&amp;' => '&', + '&#x0003c;' => '<', + '&#38;' => '&', + '&' => '&', + ); + $this->isAllGood('text', 2, $good); + + // Test with broken charref + $str = '&foo'; + $events = $this->parse($str); + $e1 = $events->get(0); + $this->assertEquals('error', $e1['name']); + + $str = '&#xfoo'; + $events = $this->parse($str); + $e1 = $events->get(0); + $this->assertEquals('error', $e1['name']); + + $str = '&#foo'; + $events = $this->parse($str); + $e1 = $events->get(0); + $this->assertEquals('error', $e1['name']); + + // FIXME: Once the text processor is done, need to verify that the + // tokens are transformed correctly into text. + } + + public function testBogusComment() + { + $bogus = array( + '</+this is a bogus comment. +>', + '<!+this is a bogus comment. !>', + '<!D OCTYPE foo bar>', + '<!DOCTYEP foo bar>', + '<![CADATA[ TEST ', + '', + ' Hello [[>', + '<!CDATA[[ test ', + '', + '<![CDATA[hellooooo hello', + '<? Hello World ?>', + '<? Hello World', + ); + foreach ($bogus as $str) { + $events = $this->parse($str); + $this->assertEventError($events->get(0)); + $this->assertEventEquals('comment', $str, $events->get(1)); + } + } + + public function testEndTag() + { + $succeed = array( + '</a>' => 'a', + '</test>' => 'test', + '</test + >' => 'test', + '</thisIsTheTagThatDoesntEndItJustGoesOnAndOnMyFriend>' => 'thisisthetagthatdoesntenditjustgoesonandonmyfriend', + // See 8.2.4.10, which requires this and does not say error. + '</a<b>' => 'a<b', + ); + $this->isAllGood('endTag', 2, $succeed); + + // Recoverable failures + $fail = array( + '</a class="monkey">' => 'a', + '</a <b>' => 'a', + '</a <b <c>' => 'a', + '</a is the loneliest letter>' => 'a', + '</a' => 'a', + ); + foreach ($fail as $test => $result) { + $events = $this->parse($test); + $this->assertEquals(3, $events->depth()); + // Should have triggered an error. + $this->assertEventError($events->get(0)); + // Should have tried to parse anyway. + $this->assertEventEquals('endTag', $result, $events->get(1)); + } + + // BogoComments + $comments = array( + '</>' => '</>', + '</ >' => '</ >', + '</ a>' => '</ a>', + ); + foreach ($comments as $test => $result) { + $events = $this->parse($test); + $this->assertEquals(3, $events->depth()); + + // Should have triggered an error. + $this->assertEventError($events->get(0)); + + // Should have tried to parse anyway. + $this->assertEventEquals('comment', $result, $events->get(1)); + } + } + + public function testComment() + { + $good = array( + '<!--easy-->' => 'easy', + '<!-- 1 > 0 -->' => ' 1 > 0 ', + '<!-- --$i -->' => ' --$i ', + '<!----$i-->' => '--$i', + "<!--\nHello World.\na-->" => "\nHello World.\na", + '<!-- <!-- -->' => ' <!-- ', + ); + foreach ($good as $test => $expected) { + $events = $this->parse($test); + $this->assertEventEquals('comment', $expected, $events->get(0)); + } + + $fail = array( + '<!-->' => '', + '<!--Hello' => 'Hello', + "<!--\0Hello" => UTF8Utils::FFFD . 'Hello', + '<!--' => '', + ); + foreach ($fail as $test => $expected) { + $events = $this->parse($test); + $this->assertEquals(3, $events->depth()); + $this->assertEventError($events->get(0)); + $this->assertEventEquals('comment', $expected, $events->get(1)); + } + } + + public function testCDATASection() + { + $good = array( + '<![CDATA[ This is a test. ' => ' This is a test. ', + 'CDATA' => 'CDATA', + ' ]] > ' => ' ]] > ', + ' ' => ' ', + ); + $this->isAllGood('cdata', 2, $good); + } + + public function testDoctype() + { + $good = array( + '' => array( + 'html', + 0, + null, + false, + ), + '' => array( + 'html', + 0, + null, + false, + ), + '' => array( + 'html', + 0, + null, + false, + ), + "" => array( + 'html', + 0, + null, + false, + ), + "" => array( + 'html', + 0, + null, + false, + ), + '' => array( + 'html', + EventStack::DOCTYPE_PUBLIC, + 'foo bar', + false, + ), + "" => array( + 'html', + EventStack::DOCTYPE_PUBLIC, + 'foo bar', + false, + ), + '' => array( + 'html', + EventStack::DOCTYPE_PUBLIC, + 'foo bar', + false, + ), + "" => array( + 'html', + EventStack::DOCTYPE_PUBLIC, + 'foo bar', + false, + ), + '' => array( + 'html', + EventStack::DOCTYPE_SYSTEM, + 'foo bar', + false, + ), + "" => array( + 'html', + EventStack::DOCTYPE_SYSTEM, + 'foo bar', + false, + ), + '' => array( + 'html', + EventStack::DOCTYPE_SYSTEM, + 'foo/bar', + false, + ), + "" => array( + 'html', + EventStack::DOCTYPE_SYSTEM, + 'foo bar', + false, + ), + ); + $this->isAllGood('doctype', 2, $good); + + $bad = array( + '' => array( + null, + EventStack::DOCTYPE_NONE, + null, + true, + ), + '' => array( + null, + EventStack::DOCTYPE_NONE, + null, + true, + ), + ' array( + 'foo', + EventStack::DOCTYPE_NONE, + null, + true, + ), + ' array( + 'foo', + EventStack::DOCTYPE_NONE, + null, + true, + ), + '' => array( + 'foo', + EventStack::DOCTYPE_NONE, + null, + true, + ), + '' => array( + 'foo', + EventStack::DOCTYPE_NONE, + null, + true, + ), + ' array( + 'foo', + EventStack::DOCTYPE_NONE, + null, + true, + ), + + // Can't tell whether these are ids or ID types, since the context is chopped. + ' array( + 'foo', + EventStack::DOCTYPE_NONE, + null, + true, + ), + '' => array( + 'foo', + EventStack::DOCTYPE_NONE, + null, + true, + ), + ' array( + 'foo', + EventStack::DOCTYPE_NONE, + null, + true, + ), + '' => array( + 'foo', + EventStack::DOCTYPE_NONE, + null, + true, + ), + + ' array( + 'html', + EventStack::DOCTYPE_SYSTEM, + 'foo bar', + true, + ), + '' => array( + 'html', + EventStack::DOCTYPE_SYSTEM, + 'foo bar', + true, + ), + ); + foreach ($bad as $test => $expects) { + $events = $this->parse($test); + // fprintf(STDOUT, $test . PHP_EOL); + $this->assertEquals(3, $events->depth(), "Counting events for '$test': " . print_r($events, true)); + $this->assertEventError($events->get(0)); + $this->assertEventEquals('doctype', $expects, $events->get(1)); + } + } + + public function testProcessorInstruction() + { + $good = array( + '' => 'hph', + '' => array( + 'hph', + 'echo "Hello World"; ', + ), + "" => array( + 'hph', + "echo 'Hello World';\n", + ), + ); + $this->isAllGood('pi', 2, $good); + } + + /** + * This tests just simple tags. + */ + public function testSimpleTags() + { + $open = array( + '' => 'foo', + '' => 'foo', + '' => 'foo', + '' => 'foo', + "" => 'foo', + '' => 'foo:bar', + ); + $this->isAllGood('startTag', 2, $open); + + $selfClose = array( + '' => 'foo', + '' => 'foo', + '' => 'foo', + "" => 'foo', + '' => 'foo:bar', + ); + foreach ($selfClose as $test => $expects) { + $events = $this->parse($test); + $this->assertEquals(2, $events->depth(), "Counting events for '$test'" . print_r($events, true)); + $this->assertEventEquals('startTag', $expects, $events->get(0)); + $event = $events->get(0); + $this->assertTrue($event['data'][2]); + } + + $bad = array( + ' 'foo', + ' 'foo', + ' 'foo', + ' 'foo', + ); + + foreach ($bad as $test => $expects) { + $events = $this->parse($test); + $this->assertEquals(3, $events->depth(), "Counting events for '$test': " . print_r($events, true)); + $this->assertEventError($events->get(0)); + $this->assertEventEquals('startTag', $expects, $events->get(1)); + } + } + + public function testTagsWithAttributeAndMissingName() + { + $cases = array( + '' => 'id', + '' => 'color', + "" => 'class', + '' => 'bgcolor', + '' => 'class', + ); + + foreach ($cases as $html => $expected) { + $events = $this->parse($html); + $this->assertEventError($events->get(0)); + $this->assertEventError($events->get(1)); + $this->assertEventError($events->get(2)); + $this->assertEventEquals('startTag', $expected, $events->get(3)); + $this->assertEventEquals('eof', null, $events->get(4)); + } + } + + public function testTagNotClosedAfterTagName() + { + $cases = array( + '' => array( + 'noscript', + 'img', + ), + '' => array( + 'center', + 'a', + ), + '' => array( + 'br', + 'br', + ), + ); + + foreach ($cases as $html => $expected) { + $events = $this->parse($html); + $this->assertEventError($events->get(0)); + $this->assertEventEquals('startTag', $expected[0], $events->get(1)); + $this->assertEventEquals('startTag', $expected[1], $events->get(2)); + $this->assertEventEquals('eof', null, $events->get(3)); + } + + $events = $this->parse('02'); + $this->assertEventError($events->get(0)); + $this->assertEventEquals('startTag', 'span', $events->get(1)); + $this->assertEventError($events->get(2)); + $this->assertEventEquals('text', '>02', $events->get(3)); + $this->assertEventEquals('endTag', 'span', $events->get(4)); + $this->assertEventEquals('eof', null, $events->get(5)); + + $events = $this->parse(''); + $this->assertEventError($events->get(0)); + $this->assertEventEquals('startTag', 'p', $events->get(1)); + $this->assertEventEquals('endTag', 'p', $events->get(2)); + $this->assertEventEquals('eof', null, $events->get(3)); + + $events = $this->parse(''); + $this->assertEventEquals('startTag', 'strong', $events->get(0)); + $this->assertEventError($events->get(1)); + $this->assertEventEquals('startTag', 'wordpress', $events->get(2)); + $this->assertEventEquals('endTag', 'strong', $events->get(3)); + $this->assertEventEquals('eof', null, $events->get(4)); + + $events = $this->parse(''); + $this->assertEventError($events->get(0)); + $this->assertEventError($events->get(1)); + $this->assertEventError($events->get(2)); + $this->assertEventEquals('startTag', 'src', $events->get(3)); + $this->assertEventEquals('startTag', 'a', $events->get(4)); + $this->assertEventEquals('eof', null, $events->get(5)); + + $events = $this->parse(''); + $this->assertEventError($events->get(0)); + $this->assertEventEquals('startTag', 'br', $events->get(1)); + $this->assertEventEquals('eof', null, $events->get(2)); + } + + public function testIllegalTagNames() + { + $cases = array( + '' => 'li', + '' => 'p', + '' => 'b', + '' => 'static', + '' => 'h', + '' => 'st', + ); + + foreach ($cases as $html => $expected) { + $events = $this->parse($html); + $this->assertEventError($events->get(0)); + $this->assertEventEquals('startTag', $expected, $events->get(1)); + } + } + + public function testTagAttributes() + { + // Opening tags. + $good = array( + '' => array( + 'foo', + array( + 'bar' => 'baz', + ), + false, + ), + '' => array( + 'foo', + array( + 'bar' => ' baz ', + ), + false, + ), + "" => array( + 'foo', + array( + 'bar' => "\nbaz\n", + ), + false, + ), + "" => array( + 'foo', + array( + 'bar' => 'baz', + ), + false, + ), + '' => array( + 'foo', + array( + 'bar' => 'A full sentence.', + ), + false, + ), + "" => array( + 'foo', + array( + 'a' => '1', + 'b' => '2', + ), + false, + ), + "" => array( + 'foo', + array( + 'ns:bar' => 'baz', + ), + false, + ), + "" => array( + 'foo', + array( + 'a' => 'blue&red', + ), + false, + ), + "" => array( + 'foo', + array( + 'a' => 'blue&red', + ), + false, + ), + "" => array( + 'foo', + array( + 'a' => 'blue&&&red', + ), + false, + ), + "" => array( + 'foo', + array( + 'a' => 'blue&&red', + ), + false, + ), + "" => array( + 'foo', + array( + 'bar' => 'baz', + ), + false, + ), + '' => array( + 'doe', + array( + 'a' => null, + 'deer' => null, + ), + false, + ), + '' => array( + 'foo', + array( + 'bar' => 'baz', + ), + false, + ), + + // Updated for 8.1.2.3 + '' => array( + 'foo', + array( + 'bar' => 'baz', + ), + false, + ), + + // The spec allows an unquoted value '/'. This will not be a closing + // tag. + '' => array( + 'foo', + array( + 'bar' => '/', + ), + false, + ), + '' => array( + 'foo', + array( + 'bar' => 'baz/', + ), + false, + ), + ); + $this->isAllGood('startTag', 2, $good); + + // Self-closing tags. + $withEnd = array( + '' => array( + 'foo', + array( + 'bar' => 'baz', + ), + true, + ), + '' => array( + 'foo', + array( + 'bar' => 'baz', + ), + true, + ), + '' => array( + 'foo', + array( + 'bar' => 'BAZ', + ), + true, + ), + "" => array( + 'foo', + array( + 'a' => '1', + 'b' => '2', + 'c' => '3', + 'd' => null, + ), + true, + ), + ); + $this->isAllGood('startTag', 2, $withEnd); + + // Cause a parse error. + $bad = array( + // This will emit an entity lookup failure for &+dark. + "" => array( + 'foo', + array( + 'a' => 'blue&+dark', + ), + false, + ), + '' => array( + 'foo', + array( + 'bar' => null, + ), + false, + ), + '' => array( + 'foo', + array( + 'bar' => 'oh"', + ), + false, + ), + + // these attributes are ignored because of current implementation + // of method "DOMElement::setAttribute" + // see issue #23: https://github.com/Masterminds/html5-php/issues/23 + '' => array( + 'foo', + array(), + false, + ), + '' => array( + 'foo', + array(), + false, + ), + '' => array( + 'foo', + array(), + false, + ), + '' => array( + 'foo', + array(), + false, + ), + ) + ; + foreach ($bad as $test => $expects) { + $events = $this->parse($test); + $this->assertEquals(3, $events->depth(), "Counting events for '$test': " . print_r($events, true)); + $this->assertEventError($events->get(0)); + $this->assertEventEquals('startTag', $expects, $events->get(1)); + } + + // Cause multiple parse errors. + $reallyBad = array( + '' => array( + 'foo', + array( + '=' => null, + '"bar"' => null, + ), + false, + ), + '' => array( + 'foo', + array(), + true, + ), + // character "&" in unquoted attribute shouldn't cause an infinite loop + '' => array( + 'foo', + array( + 'bar' => 'index.php?str=1&id=29', + ), + false, + ), + ); + foreach ($reallyBad as $test => $expects) { + $events = $this->parse($test); + // fprintf(STDOUT, $test . print_r($events, true)); + $this->assertEventError($events->get(0)); + $this->assertEventError($events->get(1)); + // $this->assertEventEquals('startTag', $expects, $events->get(1)); + } + + // Regression: Malformed elements should be detected. + // '' => array('foo', array('baz' => '1'), false), + $events = $this->parse(''); + $this->assertEventError($events->get(0)); + $this->assertEventEquals('startTag', array( + 'foo', + array( + 'baz' => '1', + ), + false, + ), $events->get(1)); + $this->assertEventEquals('startTag', array( + 'bar', + array(), + false, + ), $events->get(2)); + $this->assertEventEquals('endTag', array( + 'foo', + ), $events->get(3)); + } + + public function testRawText() + { + $good = array( + ' ' => 'abcd efg hijk lmnop', + '' => '', + '' => '<<<<<<<<', + '' => 'hello\nhello" => "\nhello&' => '&', + '' => '', + '' => '', + ); + foreach ($good as $test => $expects) { + $events = $this->parse($test); + $this->assertEventEquals('startTag', 'script', $events->get(0)); + $this->assertEventEquals('text', $expects, $events->get(1)); + $this->assertEventEquals('endTag', 'script', $events->get(2)); + } + + $bad = array( + '