diff options
Diffstat (limited to 'test/HTML5/Parser')
-rw-r--r-- | test/HTML5/Parser/CharacterReferenceTest.php | 56 | ||||
-rw-r--r-- | test/HTML5/Parser/DOMTreeBuilderTest.php | 569 | ||||
-rw-r--r-- | test/HTML5/Parser/EventStack.php | 156 | ||||
-rw-r--r-- | test/HTML5/Parser/EventStackError.php | 3 | ||||
-rw-r--r-- | test/HTML5/Parser/FileInputStreamTest.php | 322 | ||||
-rw-r--r-- | test/HTML5/Parser/InstructionProcessorMock.php | 12 | ||||
-rw-r--r-- | test/HTML5/Parser/ScannerTest.php | 231 | ||||
-rw-r--r-- | test/HTML5/Parser/StringInputStreamTest.php | 654 | ||||
-rw-r--r-- | test/HTML5/Parser/TokenizerTest.php | 1475 | ||||
-rw-r--r-- | test/HTML5/Parser/TreeBuildingRulesTest.php | 97 |
10 files changed, 2018 insertions, 1557 deletions
diff --git a/test/HTML5/Parser/CharacterReferenceTest.php b/test/HTML5/Parser/CharacterReferenceTest.php index 6dedb00..762bcc2 100644 --- a/test/HTML5/Parser/CharacterReferenceTest.php +++ b/test/HTML5/Parser/CharacterReferenceTest.php @@ -6,35 +6,39 @@ namespace Masterminds\HTML5\Tests\Parser; use Masterminds\HTML5\Parser\CharacterReference; -class CharacterReferenceTest extends \Masterminds\HTML5\Tests\TestCase { - public function testLookupName() { - $this->assertEquals('&', CharacterReference::lookupName('amp')); - $this->assertEquals('<', CharacterReference::lookupName('lt')); - $this->assertEquals('>', CharacterReference::lookupName('gt')); - $this->assertEquals('"', CharacterReference::lookupName('quot')); - $this->assertEquals('∌', CharacterReference::lookupName('NotReverseElement')); - $this->assertNull(CharacterReference::lookupName('StinkyCheese')); - } +class CharacterReferenceTest extends \Masterminds\HTML5\Tests\TestCase +{ - public function testLookupHex() { - $this->assertEquals('<', CharacterReference::lookupHex('3c')); - $this->assertEquals('<', CharacterReference::lookupHex('003c')); - $this->assertEquals('&', CharacterReference::lookupHex('26')); - $this->assertEquals('}', CharacterReference::lookupHex('7d')); - $this->assertEquals('Σ', CharacterReference::lookupHex('3A3')); - $this->assertEquals('Σ', CharacterReference::lookupHex('03A3')); - $this->assertEquals('Σ', CharacterReference::lookupHex('3a3')); - $this->assertEquals('Σ', CharacterReference::lookupHex('03a3')); - } + public function testLookupName() + { + $this->assertEquals('&', CharacterReference::lookupName('amp')); + $this->assertEquals('<', CharacterReference::lookupName('lt')); + $this->assertEquals('>', CharacterReference::lookupName('gt')); + $this->assertEquals('"', CharacterReference::lookupName('quot')); + $this->assertEquals('∌', CharacterReference::lookupName('NotReverseElement')); - public function testLookupDecimal() { - $this->assertEquals('&', CharacterReference::lookupDecimal(38)); - $this->assertEquals('&', CharacterReference::lookupDecimal('38')); - $this->assertEquals('<', CharacterReference::lookupDecimal(60)); - $this->assertEquals('Σ', CharacterReference::lookupDecimal(931)); - $this->assertEquals('Σ', CharacterReference::lookupDecimal('0931')); - } + $this->assertNull(CharacterReference::lookupName('StinkyCheese')); + } + public function testLookupHex() + { + $this->assertEquals('<', CharacterReference::lookupHex('3c')); + $this->assertEquals('<', CharacterReference::lookupHex('003c')); + $this->assertEquals('&', CharacterReference::lookupHex('26')); + $this->assertEquals('}', CharacterReference::lookupHex('7d')); + $this->assertEquals('Σ', CharacterReference::lookupHex('3A3')); + $this->assertEquals('Σ', CharacterReference::lookupHex('03A3')); + $this->assertEquals('Σ', CharacterReference::lookupHex('3a3')); + $this->assertEquals('Σ', CharacterReference::lookupHex('03a3')); + } + public function testLookupDecimal() + { + $this->assertEquals('&', CharacterReference::lookupDecimal(38)); + $this->assertEquals('&', CharacterReference::lookupDecimal('38')); + $this->assertEquals('<', CharacterReference::lookupDecimal(60)); + $this->assertEquals('Σ', CharacterReference::lookupDecimal(931)); + $this->assertEquals('Σ', CharacterReference::lookupDecimal('0931')); + } } diff --git a/test/HTML5/Parser/DOMTreeBuilderTest.php b/test/HTML5/Parser/DOMTreeBuilderTest.php index 52dad30..d8b686c 100644 --- a/test/HTML5/Parser/DOMTreeBuilderTest.php +++ b/test/HTML5/Parser/DOMTreeBuilderTest.php @@ -5,7 +5,6 @@ */ namespace Masterminds\HTML5\Tests\Parser; -use Masterminds\HTML5\Elements; use Masterminds\HTML5\Parser\StringInputStream; use Masterminds\HTML5\Parser\Scanner; use Masterminds\HTML5\Parser\Tokenizer; @@ -14,119 +13,128 @@ use Masterminds\HTML5\Parser\DOMTreeBuilder; /** * These tests are functional, not necessarily unit tests. */ -class DOMTreeBuilderTest extends \Masterminds\HTML5\Tests\TestCase { - - /** - * Convenience function for parsing. - */ - protected function parse($string) { - $treeBuilder = new DOMTreeBuilder(); - $input = new StringInputStream($string); - $scanner = new Scanner($input); - $parser = new Tokenizer($scanner, $treeBuilder); - - $parser->parse(); - - return $treeBuilder->document(); - } - - /** - * Utility function for parsing a fragment of HTML5. - */ - protected function parseFragment($string) { - $treeBuilder = new DOMTreeBuilder(TRUE); - $input = new StringInputStream($string); - $scanner = new Scanner($input); - $parser = new Tokenizer($scanner, $treeBuilder); - - $parser->parse(); - - return $treeBuilder->fragment(); - } - - public function testDocument() { - $html = "<!DOCTYPE html><html></html>"; - $doc = $this->parse($html); - - $this->assertInstanceOf('\DOMDocument', $doc); - $this->assertEquals('html', $doc->documentElement->tagName); - } - - public function testFragment() { - $html = "<div>test</div><span>test2</span>"; - $doc = $this->parseFragment($html); - - $this->assertInstanceOf('\DOMDocumentFragment', $doc); - $this->assertTrue($doc->hasChildNodes()); - $this->assertEquals('div', $doc->childNodes->item(0)->tagName); - $this->assertEquals('test', $doc->childNodes->item(0)->textContent); - $this->assertEquals('span', $doc->childNodes->item(1)->tagName); - $this->assertEquals('test2', $doc->childNodes->item(1)->textContent); - } - - public function testElements() { - $html = "<!DOCTYPE html><html><head><title></title></head><body></body></html>"; - $doc = $this->parse($html); - $root = $doc->documentElement; - - $this->assertEquals('html', $root->tagName); - $this->assertEquals('html', $root->localName); - $this->assertEquals('html', $root->nodeName); - - $this->assertEquals(2, $root->childNodes->length); - $kids = $root->childNodes; - - $this->assertEquals('head', $kids->item(0)->tagName); - $this->assertEquals('body', $kids->item(1)->tagName); - - $head = $kids->item(0); - $this->assertEquals(1, $head->childNodes->length); - $this->assertEquals('title', $head->childNodes->item(0)->tagName); - } - - public function testAttributes() { - $html = "<!DOCTYPE html> +class DOMTreeBuilderTest extends \Masterminds\HTML5\Tests\TestCase +{ + + /** + * Convenience function for parsing. + */ + protected function parse($string) + { + $treeBuilder = new DOMTreeBuilder(); + $input = new StringInputStream($string); + $scanner = new Scanner($input); + $parser = new Tokenizer($scanner, $treeBuilder); + + $parser->parse(); + + return $treeBuilder->document(); + } + + /** + * Utility function for parsing a fragment of HTML5. + */ + protected function parseFragment($string) + { + $treeBuilder = new DOMTreeBuilder(TRUE); + $input = new StringInputStream($string); + $scanner = new Scanner($input); + $parser = new Tokenizer($scanner, $treeBuilder); + + $parser->parse(); + + return $treeBuilder->fragment(); + } + + public function testDocument() + { + $html = "<!DOCTYPE html><html></html>"; + $doc = $this->parse($html); + + $this->assertInstanceOf('\DOMDocument', $doc); + $this->assertEquals('html', $doc->documentElement->tagName); + } + + public function testFragment() + { + $html = "<div>test</div><span>test2</span>"; + $doc = $this->parseFragment($html); + + $this->assertInstanceOf('\DOMDocumentFragment', $doc); + $this->assertTrue($doc->hasChildNodes()); + $this->assertEquals('div', $doc->childNodes->item(0)->tagName); + $this->assertEquals('test', $doc->childNodes->item(0)->textContent); + $this->assertEquals('span', $doc->childNodes->item(1)->tagName); + $this->assertEquals('test2', $doc->childNodes->item(1)->textContent); + } + + public function testElements() + { + $html = "<!DOCTYPE html><html><head><title></title></head><body></body></html>"; + $doc = $this->parse($html); + $root = $doc->documentElement; + + $this->assertEquals('html', $root->tagName); + $this->assertEquals('html', $root->localName); + $this->assertEquals('html', $root->nodeName); + + $this->assertEquals(2, $root->childNodes->length); + $kids = $root->childNodes; + + $this->assertEquals('head', $kids->item(0)->tagName); + $this->assertEquals('body', $kids->item(1)->tagName); + + $head = $kids->item(0); + $this->assertEquals(1, $head->childNodes->length); + $this->assertEquals('title', $head->childNodes->item(0)->tagName); + } + + public function testAttributes() + { + $html = "<!DOCTYPE html> <html> <head><title></title></head> <body id='a' class='b c'></body> </html>"; - $doc = $this->parse($html); - $root = $doc->documentElement; - - $body = $root->GetElementsByTagName('body')->item(0); - $this->assertEquals('body', $body->tagName); - $this->assertTrue($body->hasAttributes()); - $this->assertEquals('a', $body->getAttribute('id')); - $this->assertEquals('b c', $body->getAttribute('class')); - - $body2 = $doc->getElementById('a'); - $this->assertEquals('body', $body2->tagName); - $this->assertEquals('a', $body2->getAttribute('id')); - } - - public function testSVGAttributes() { - $html = "<!DOCTYPE html> + $doc = $this->parse($html); + $root = $doc->documentElement; + + $body = $root->GetElementsByTagName('body')->item(0); + $this->assertEquals('body', $body->tagName); + $this->assertTrue($body->hasAttributes()); + $this->assertEquals('a', $body->getAttribute('id')); + $this->assertEquals('b c', $body->getAttribute('class')); + + $body2 = $doc->getElementById('a'); + $this->assertEquals('body', $body2->tagName); + $this->assertEquals('a', $body2->getAttribute('id')); + } + + public function testSVGAttributes() + { + $html = "<!DOCTYPE html> <html><body> <svg width='150' viewbox='2'> <rect textlength='2'/> <animatecolor>foo</animatecolor> </svg> </body></html>"; - $doc = $this->parse($html); - $root = $doc->documentElement; + $doc = $this->parse($html); + $root = $doc->documentElement; - $svg = $root->getElementsByTagName('svg')->item(0); - $this->assertTrue($svg->hasAttribute('viewBox')); + $svg = $root->getElementsByTagName('svg')->item(0); + $this->assertTrue($svg->hasAttribute('viewBox')); - $rect = $root->getElementsByTagName('rect')->item(0); - $this->assertTrue($rect->hasAttribute('textLength')); + $rect = $root->getElementsByTagName('rect')->item(0); + $this->assertTrue($rect->hasAttribute('textLength')); - $ac = $root->getElementsByTagName('animateColor'); - $this->assertEquals(1, $ac->length); - } + $ac = $root->getElementsByTagName('animateColor'); + $this->assertEquals(1, $ac->length); + } - public function testMathMLAttribute() { - $html = '<!doctype html> + public function testMathMLAttribute() + { + $html = '<!doctype html> <html lang="en"> <body> <math> @@ -139,124 +147,132 @@ class DOMTreeBuilderTest extends \Masterminds\HTML5\Tests\TestCase { </body> </html>'; - $doc = $this->parse($html); - $root = $doc->documentElement; - - $csymbol = $root->getElementsByTagName('csymbol')->item(0); - $this->assertTrue($csymbol->hasAttribute('definitionURL')); - } - - public function testMissingHtmlTag() { - $html = "<!DOCTYPE html><title>test</title>"; - $doc = $this->parse($html); - - $this->assertEquals('html', $doc->documentElement->tagName); - $this->assertEquals('title', $doc->documentElement->childNodes->item(0)->tagName); - } - - public function testComment() { - $html = '<html><!--Hello World.--></html>'; - - $doc = $this->parse($html); - - $comment = $doc->documentElement->childNodes->item(0); - $this->assertEquals(XML_COMMENT_NODE, $comment->nodeType); - $this->assertEquals("Hello World.", $comment->data); - - - $html = '<!--Hello World.--><html></html>'; - $doc = $this->parse($html); - - $comment = $doc->childNodes->item(1); - $this->assertEquals(XML_COMMENT_NODE, $comment->nodeType); - $this->assertEquals("Hello World.", $comment->data); - - $comment = $doc->childNodes->item(2); - $this->assertEquals(XML_ELEMENT_NODE, $comment->nodeType); - $this->assertEquals("html", $comment->tagName); - } - - public function testCDATA() { - $html = "<!DOCTYPE html><html><math><![CDATA[test]]></math></html>"; - $doc = $this->parse($html); - - $wrapper = $doc->getElementsByTagName('math')->item(0); - $this->assertEquals(1, $wrapper->childNodes->length); - $cdata = $wrapper->childNodes->item(0); - $this->assertEquals(XML_CDATA_SECTION_NODE, $cdata->nodeType); - $this->assertEquals('test', $cdata->data); - } - - public function testText() { - $html = "<!DOCTYPE html><html><head></head><body><math>test</math></body></html>"; - $doc = $this->parse($html); - - $wrapper = $doc->getElementsByTagName('math')->item(0); - $this->assertEquals(1, $wrapper->childNodes->length); - $data = $wrapper->childNodes->item(0); - $this->assertEquals(XML_TEXT_NODE, $data->nodeType); - $this->assertEquals('test', $data->data); - - // The DomTreeBuilder has special handling for text when in before head mode. - $html = "<!DOCTYPE html><html> + $doc = $this->parse($html); + $root = $doc->documentElement; + + $csymbol = $root->getElementsByTagName('csymbol')->item(0); + $this->assertTrue($csymbol->hasAttribute('definitionURL')); + } + + public function testMissingHtmlTag() + { + $html = "<!DOCTYPE html><title>test</title>"; + $doc = $this->parse($html); + + $this->assertEquals('html', $doc->documentElement->tagName); + $this->assertEquals('title', $doc->documentElement->childNodes->item(0)->tagName); + } + + public function testComment() + { + $html = '<html><!--Hello World.--></html>'; + + $doc = $this->parse($html); + + $comment = $doc->documentElement->childNodes->item(0); + $this->assertEquals(XML_COMMENT_NODE, $comment->nodeType); + $this->assertEquals("Hello World.", $comment->data); + + $html = '<!--Hello World.--><html></html>'; + $doc = $this->parse($html); + + $comment = $doc->childNodes->item(1); + $this->assertEquals(XML_COMMENT_NODE, $comment->nodeType); + $this->assertEquals("Hello World.", $comment->data); + + $comment = $doc->childNodes->item(2); + $this->assertEquals(XML_ELEMENT_NODE, $comment->nodeType); + $this->assertEquals("html", $comment->tagName); + } + + public function testCDATA() + { + $html = "<!DOCTYPE html><html><math><![CDATA[test]]></math></html>"; + $doc = $this->parse($html); + + $wrapper = $doc->getElementsByTagName('math')->item(0); + $this->assertEquals(1, $wrapper->childNodes->length); + $cdata = $wrapper->childNodes->item(0); + $this->assertEquals(XML_CDATA_SECTION_NODE, $cdata->nodeType); + $this->assertEquals('test', $cdata->data); + } + + public function testText() + { + $html = "<!DOCTYPE html><html><head></head><body><math>test</math></body></html>"; + $doc = $this->parse($html); + + $wrapper = $doc->getElementsByTagName('math')->item(0); + $this->assertEquals(1, $wrapper->childNodes->length); + $data = $wrapper->childNodes->item(0); + $this->assertEquals(XML_TEXT_NODE, $data->nodeType); + $this->assertEquals('test', $data->data); + + // The DomTreeBuilder has special handling for text when in before head mode. + $html = "<!DOCTYPE html><html> Foo<head></head><body></body></html>"; - $doc = $this->parse($html); - $this->assertEquals('Line 0, Col 0: Unexpected text. Ignoring: Foo', $doc->errors[0]); - $headElement = $doc->documentElement->firstChild; - $this->assertEquals('head', $headElement->tagName); - } - - public function testParseErrors() { - $html = "<!DOCTYPE html><html><math><![CDATA[test"; - $doc = $this->parse($html); - - // We're JUST testing that we can access errors. Actual testing of - // error messages happen in the Tokenizer's tests. - $this->assertGreaterThan(0, count($doc->errors)); - $this->assertTrue(is_string($doc->errors[0])); - } - - public function testProcessingInstruction() { - // Test the simple case, which is where PIs are inserted into the DOM. - $doc = $this->parse('<!DOCTYPE html><html><?foo bar?>'); - $this->assertEquals(1, $doc->documentElement->childNodes->length); - $pi = $doc->documentElement->firstChild; - $this->assertInstanceOf('\DOMProcessingInstruction', $pi); - $this->assertEquals('foo', $pi->nodeName); - $this->assertEquals('bar', $pi->data); - - // Leading xml PIs should be ignored. - $doc = $this->parse('<?xml version="1.0"?><!DOCTYPE html><html><head></head></html>'); - - $this->assertEquals(2, $doc->childNodes->length); - $this->assertInstanceOf('\DOMDocumentType', $doc->childNodes->item(0)); - $this->assertInstanceOf('\DOMElement', $doc->childNodes->item(1)); - } - - public function testAutocloseP() { - $html = "<!DOCTYPE html><html><body><p><figure></body></html>"; - $doc = $this->parse($html); - - $p = $doc->getElementsByTagName('p')->item(0); - $this->assertEquals(0, $p->childNodes->length); - $this->assertEquals('figure', $p->nextSibling->tagName); - } - - public function testAutocloseLI() { - $html = '<!doctype html> + $doc = $this->parse($html); + $this->assertEquals('Line 0, Col 0: Unexpected text. Ignoring: Foo', $doc->errors[0]); + $headElement = $doc->documentElement->firstChild; + $this->assertEquals('head', $headElement->tagName); + } + + public function testParseErrors() + { + $html = "<!DOCTYPE html><html><math><![CDATA[test"; + $doc = $this->parse($html); + + // We're JUST testing that we can access errors. Actual testing of + // error messages happen in the Tokenizer's tests. + $this->assertGreaterThan(0, count($doc->errors)); + $this->assertTrue(is_string($doc->errors[0])); + } + + public function testProcessingInstruction() + { + // Test the simple case, which is where PIs are inserted into the DOM. + $doc = $this->parse('<!DOCTYPE html><html><?foo bar?>'); + $this->assertEquals(1, $doc->documentElement->childNodes->length); + $pi = $doc->documentElement->firstChild; + $this->assertInstanceOf('\DOMProcessingInstruction', $pi); + $this->assertEquals('foo', $pi->nodeName); + $this->assertEquals('bar', $pi->data); + + // Leading xml PIs should be ignored. + $doc = $this->parse('<?xml version="1.0"?><!DOCTYPE html><html><head></head></html>'); + + $this->assertEquals(2, $doc->childNodes->length); + $this->assertInstanceOf('\DOMDocumentType', $doc->childNodes->item(0)); + $this->assertInstanceOf('\DOMElement', $doc->childNodes->item(1)); + } + + public function testAutocloseP() + { + $html = "<!DOCTYPE html><html><body><p><figure></body></html>"; + $doc = $this->parse($html); + + $p = $doc->getElementsByTagName('p')->item(0); + $this->assertEquals(0, $p->childNodes->length); + $this->assertEquals('figure', $p->nextSibling->tagName); + } + + public function testAutocloseLI() + { + $html = '<!doctype html> <html lang="en"> <body> <ul><li>Foo<li>Bar<li>Baz</ul> </body> </html>'; - $doc = $this->parse($html); - $length = $doc->getElementsByTagName('ul')->item(0)->childNodes->length; - $this->assertEquals(3, $length); - } + $doc = $this->parse($html); + $length = $doc->getElementsByTagName('ul')->item(0)->childNodes->length; + $this->assertEquals(3, $length); + } - public function testMathML() { - $html = '<!doctype html> + public function testMathML() + { + $html = '<!doctype html> <html lang="en"> <body> <math xmlns="http://www.w3.org/1998/Math/MathML"> @@ -269,16 +285,17 @@ class DOMTreeBuilderTest extends \Masterminds\HTML5\Tests\TestCase { </body> </html>'; - $doc = $this->parse($html); - $math = $doc->getElementsByTagName('math')->item(0); - $this->assertEquals('math', $math->tagName); - $this->assertEquals('math', $math->nodeName); - $this->assertEquals('math', $math->localName); - $this->assertEmpty($math->namespaceURI); - } - - public function testSVG() { - $html = '<!doctype html> + $doc = $this->parse($html); + $math = $doc->getElementsByTagName('math')->item(0); + $this->assertEquals('math', $math->tagName); + $this->assertEquals('math', $math->nodeName); + $this->assertEquals('math', $math->localName); + $this->assertEmpty($math->namespaceURI); + } + + public function testSVG() + { + $html = '<!doctype html> <html lang="en"> <body> <svg width="150" height="100" viewBox="0 0 3 2" xmlns="http://www.w3.org/2000/svg"> @@ -292,59 +309,61 @@ class DOMTreeBuilderTest extends \Masterminds\HTML5\Tests\TestCase { </body> </html>'; - $doc = $this->parse($html); - $svg = $doc->getElementsByTagName('svg')->item(0); - $this->assertEquals('svg', $svg->tagName); - $this->assertEquals('svg', $svg->nodeName); - $this->assertEquals('svg', $svg->localName); - $this->assertEmpty($svg->namespaceURI); - - $textPath = $doc->getElementsByTagName('textPath')->item(0); - $this->assertEquals('textPath', $textPath->tagName); - } - - public function testNoScript() { - $html = '<!DOCTYPE html><html><head><noscript>No JS</noscript></head></html>'; - $doc = $this->parse($html); - $this->assertEmpty($doc->errors); - $noscript = $doc->getElementsByTagName('noscript')->item(0); - $this->assertEquals('noscript', $noscript->tagName); - } - - /** - * Regression for issue #13 - */ - public function testRegressionHTMLNoBody() { - $html = '<!DOCTYPE html><html><span id="test">Test</span></html>'; - $doc = $this->parse($html); - $span = $doc->getElementById('test'); - - $this->assertEmpty($doc->errors); - - $this->assertEquals('span', $span->tagName); - $this->assertEquals('Test', $span->textContent); - } - - public function testInstructionProcessor() { - $string = '<!DOCTYPE html><html><?foo bar ?></html>'; - - $treeBuilder = new DOMTreeBuilder(); - $is = new InstructionProcessorMock(); - $treeBuilder->setInstructionProcessor($is); - - $input = new StringInputStream($string); - $scanner = new Scanner($input); - $parser = new Tokenizer($scanner, $treeBuilder); - - $parser->parse(); - $dom = $treeBuilder->document(); - $div = $dom->getElementsByTagName('div')->item(0); - - $this->assertEquals(1, $is->count); - $this->assertEquals('foo', $is->name); - $this->assertEquals('bar ', $is->data); - $this->assertEquals('div', $div->tagName); - $this->assertEquals('foo', $div->textContent); - } + $doc = $this->parse($html); + $svg = $doc->getElementsByTagName('svg')->item(0); + $this->assertEquals('svg', $svg->tagName); + $this->assertEquals('svg', $svg->nodeName); + $this->assertEquals('svg', $svg->localName); + $this->assertEmpty($svg->namespaceURI); + + $textPath = $doc->getElementsByTagName('textPath')->item(0); + $this->assertEquals('textPath', $textPath->tagName); + } + + public function testNoScript() + { + $html = '<!DOCTYPE html><html><head><noscript>No JS</noscript></head></html>'; + $doc = $this->parse($html); + $this->assertEmpty($doc->errors); + $noscript = $doc->getElementsByTagName('noscript')->item(0); + $this->assertEquals('noscript', $noscript->tagName); + } + + /** + * Regression for issue #13 + */ + public function testRegressionHTMLNoBody() + { + $html = '<!DOCTYPE html><html><span id="test">Test</span></html>'; + $doc = $this->parse($html); + $span = $doc->getElementById('test'); + + $this->assertEmpty($doc->errors); + + $this->assertEquals('span', $span->tagName); + $this->assertEquals('Test', $span->textContent); + } + + public function testInstructionProcessor() + { + $string = '<!DOCTYPE html><html><?foo bar ?></html>'; + + $treeBuilder = new DOMTreeBuilder(); + $is = new InstructionProcessorMock(); + $treeBuilder->setInstructionProcessor($is); + + $input = new StringInputStream($string); + $scanner = new Scanner($input); + $parser = new Tokenizer($scanner, $treeBuilder); + + $parser->parse(); + $dom = $treeBuilder->document(); + $div = $dom->getElementsByTagName('div')->item(0); + + $this->assertEquals(1, $is->count); + $this->assertEquals('foo', $is->name); + $this->assertEquals('bar ', $is->data); + $this->assertEquals('div', $div->tagName); + $this->assertEquals('foo', $div->textContent); + } } - diff --git a/test/HTML5/Parser/EventStack.php b/test/HTML5/Parser/EventStack.php index 050cb5a..da0d3ec 100644 --- a/test/HTML5/Parser/EventStack.php +++ b/test/HTML5/Parser/EventStack.php @@ -14,77 +14,103 @@ use Masterminds\HTML5\Parser\EventHandler; * script or pre tags. This is to match the behavior required by the HTML5 spec, * which says that the tree builder must tell the tokenizer when to switch states. */ -class EventStack implements EventHandler { - protected $stack; - - public function __construct() { - $this->stack = array(); - } - - /** - * Get the event stack. - */ - public function events() { - return $this->stack; - } - - public function depth() { - return count($this->stack); - } - - public function get($index) { - return $this->stack[$index]; - } - - protected function store($event, $data = NULL) { - $this->stack[] = array( - 'name' => $event, - 'data' => $data, - ); - } - - public function doctype($name, $type = 0, $id = NULL, $quirks = FALSE) { - $args = array($name, $type, $id, $quirks); - $this->store('doctype', $args); - } - - public function startTag($name, $attributes = array(), $selfClosing = FALSE) { - $args = func_get_args(); - $this->store('startTag', $args); - if ($name == 'pre' || $name == 'script') { - return Elements::TEXT_RAW; +class EventStack implements EventHandler +{ + + protected $stack; + + public function __construct() + { + $this->stack = array(); + } + + /** + * Get the event stack. + */ + public function events() + { + return $this->stack; + } + + public function depth() + { + return count($this->stack); + } + + public function get($index) + { + return $this->stack[$index]; + } + + protected function store($event, $data = NULL) + { + $this->stack[] = array( + 'name' => $event, + 'data' => $data + ); } - } - public function endTag($name) { - $this->store('endTag', array($name)); - } + public function doctype($name, $type = 0, $id = NULL, $quirks = FALSE) + { + $args = array( + $name, + $type, + $id, + $quirks + ); + $this->store('doctype', $args); + } - public function comment($cdata) { - $this->store('comment', array($cdata)); - } + public function startTag($name, $attributes = array(), $selfClosing = FALSE) + { + $args = func_get_args(); + $this->store('startTag', $args); + if ($name == 'pre' || $name == 'script') { + return Elements::TEXT_RAW; + } + } - public function cdata($data) { - $this->store('cdata', func_get_args()); - } + public function endTag($name) + { + $this->store('endTag', array( + $name + )); + } + + public function comment($cdata) + { + $this->store('comment', array( + $cdata + )); + } - public function text($cdata) { - //fprintf(STDOUT, "Received TEXT event with: " . $cdata); - $this->store('text', array($cdata)); - } + public function cdata($data) + { + $this->store('cdata', func_get_args()); + } - public function eof() { - $this->store('eof'); - } + public function text($cdata) + { + // fprintf(STDOUT, "Received TEXT event with: " . $cdata); + $this->store('text', array( + $cdata + )); + } - public function parseError($msg, $line, $col) { - //throw new EventStackParseError(sprintf("%s (line %d, col %d)", $msg, $line, $col)); - //$this->store(sprintf("%s (line %d, col %d)", $msg, $line, $col)); - $this->store('error', func_get_args()); - } + public function eof() + { + $this->store('eof'); + } - public function processingInstruction($name, $data = NULL) { - $this->store('pi', func_get_args()); - } + public function parseError($msg, $line, $col) + { + // throw new EventStackParseError(sprintf("%s (line %d, col %d)", $msg, $line, $col)); + // $this->store(sprintf("%s (line %d, col %d)", $msg, $line, $col)); + $this->store('error', func_get_args()); + } -}
\ No newline at end of file + public function processingInstruction($name, $data = NULL) + { + $this->store('pi', func_get_args()); + } +} diff --git a/test/HTML5/Parser/EventStackError.php b/test/HTML5/Parser/EventStackError.php index e2d6ba7..e58fdff 100644 --- a/test/HTML5/Parser/EventStackError.php +++ b/test/HTML5/Parser/EventStackError.php @@ -1,5 +1,6 @@ <?php namespace Masterminds\HTML5\Tests\Parser; -class EventStackParseError extends \Exception { +class EventStackError extends \Exception +{ } diff --git a/test/HTML5/Parser/FileInputStreamTest.php b/test/HTML5/Parser/FileInputStreamTest.php index 4efcbe1..71dd828 100644 --- a/test/HTML5/Parser/FileInputStreamTest.php +++ b/test/HTML5/Parser/FileInputStreamTest.php @@ -3,133 +3,176 @@ namespace Masterminds\HTML5\Tests\Parser; use Masterminds\HTML5\Parser\FileInputStream; -class FileInputStreamTest extends \Masterminds\HTML5\Tests\TestCase { - - function testConstruct() { - $s = new FileInputStream(__DIR__ . '/FileInputStreamTest.html'); - - $this->assertInstanceOf('\Masterminds\HTML5\Parser\FileInputStream', $s); - } - - public function testNext() { - $s = new FileInputStream(__DIR__ . '/FileInputStreamTest.html'); - - $s->next(); - $this->assertEquals('!', $s->current()); - $s->next(); - $this->assertEquals('d', $s->current()); - } - - public function testKey() { - $s = new FileInputStream(__DIR__ . '/FileInputStreamTest.html'); - - $this->assertEquals(0, $s->key()); - - $s->next(); - $this->assertEquals(1, $s->key()); - } - - public function testPeek() { - $s = new FileInputStream(__DIR__ . '/FileInputStreamTest.html'); - - $this->assertEquals('!', $s->peek()); - - $s->next(); - $this->assertEquals('d', $s->peek()); - } - - public function testCurrent() { - $s = new FileInputStream(__DIR__ . '/FileInputStreamTest.html'); - - $this->assertEquals('<', $s->current()); - - $s->next(); - $this->assertEquals('!', $s->current()); - - $s->next(); - $this->assertEquals('d', $s->current()); - } - - public function testColumnOffset() { - $s = new FileInputStream(__DIR__ . '/FileInputStreamTest.html'); - $this->assertEquals(0, $s->columnOffset()); - $s->next(); - $this->assertEquals(1, $s->columnOffset()); - $s->next(); - $this->assertEquals(2, $s->columnOffset()); - $s->next(); - $this->assertEquals(3, $s->columnOffset()); - - // Make sure we get to the second line - $s->next(); $s->next(); $s->next(); $s->next(); - $s->next(); $s->next(); $s->next(); $s->next(); - $s->next(); $s->next(); $s->next(); $s->next(); - $s->next(); - $this->assertEquals(0, $s->columnOffset()); - - $s->next(); - $canary = $s->current(); // h - $this->assertEquals('h', $canary); - $this->assertEquals(1, $s->columnOffset()); - } - - public function testCurrentLine() { - $s = new FileInputStream(__DIR__ . '/FileInputStreamTest.html'); - - $this->assertEquals(1, $s->currentLine()); - - // Make sure we get to the second line - $s->next(); $s->next(); $s->next(); $s->next(); - $s->next(); $s->next(); $s->next(); $s->next(); - $s->next(); $s->next(); $s->next(); $s->next(); - $s->next(); $s->next(); $s->next(); $s->next(); - $this->assertEquals(2, $s->currentLine()); - - // Make sure we get to the third line - $s->next(); $s->next(); $s->next(); $s->next(); - $s->next(); $s->next(); $s->next(); $s->next(); - $s->next(); $s->next(); $s->next(); $s->next(); - $s->next(); $s->next(); $s->next(); $s->next(); - $s->next(); - $this->assertEquals(3, $s->currentLine()); - } - - public function testRemainingChars() { - $text = file_get_contents(__DIR__ . '/FileInputStreamTest.html'); - $s = new FileInputStream(__DIR__ . '/FileInputStreamTest.html'); - $this->assertEquals($text, $s->remainingChars()); - - $text = substr(file_get_contents(__DIR__ . '/FileInputStreamTest.html'), 1); - $s = new FileInputStream(__DIR__ . '/FileInputStreamTest.html'); - $s->next(); // Pop one. - $this->assertEquals($text, $s->remainingChars()); - } - - public function testCharsUnitl() { - $s = new FileInputStream(__DIR__ . '/FileInputStreamTest.html'); - - $this->assertEquals('', $s->charsUntil('<')); - // Pointer at '<', moves to ' ' - $this->assertEquals('<!doctype', $s->charsUntil(' ', 20)); - - // Pointer at ' ', moves to '>' - $this->assertEquals(' html', $s->charsUntil('>')); - - // Pointer at '>', moves to '\n'. - $this->assertEquals('>', $s->charsUntil("\n")); - - // Pointer at '\n', move forward then to the next'\n'. - $s->next(); - $this->assertEquals('<html lang="en">', $s->charsUntil("\n")); - - // Ony get one of the spaces. - $this->assertEquals("\n ", $s->charsUntil('<', 2)); - - // Get the other space. - $this->assertEquals(" ", $s->charsUntil('<')); - - // This should scan to the end of the file. - $text = "<head> +class FileInputStreamTest extends \Masterminds\HTML5\Tests\TestCase +{ + + public function testConstruct() + { + $s = new FileInputStream(__DIR__ . '/FileInputStreamTest.html'); + + $this->assertInstanceOf('\Masterminds\HTML5\Parser\FileInputStream', $s); + } + + public function testNext() + { + $s = new FileInputStream(__DIR__ . '/FileInputStreamTest.html'); + + $s->next(); + $this->assertEquals('!', $s->current()); + $s->next(); + $this->assertEquals('d', $s->current()); + } + + public function testKey() + { + $s = new FileInputStream(__DIR__ . '/FileInputStreamTest.html'); + + $this->assertEquals(0, $s->key()); + + $s->next(); + $this->assertEquals(1, $s->key()); + } + + public function testPeek() + { + $s = new FileInputStream(__DIR__ . '/FileInputStreamTest.html'); + + $this->assertEquals('!', $s->peek()); + + $s->next(); + $this->assertEquals('d', $s->peek()); + } + + public function testCurrent() + { + $s = new FileInputStream(__DIR__ . '/FileInputStreamTest.html'); + + $this->assertEquals('<', $s->current()); + + $s->next(); + $this->assertEquals('!', $s->current()); + + $s->next(); + $this->assertEquals('d', $s->current()); + } + + public function testColumnOffset() + { + $s = new FileInputStream(__DIR__ . '/FileInputStreamTest.html'); + $this->assertEquals(0, $s->columnOffset()); + $s->next(); + $this->assertEquals(1, $s->columnOffset()); + $s->next(); + $this->assertEquals(2, $s->columnOffset()); + $s->next(); + $this->assertEquals(3, $s->columnOffset()); + + // Make sure we get to the second line + $s->next(); + $s->next(); + $s->next(); + $s->next(); + $s->next(); + $s->next(); + $s->next(); + $s->next(); + $s->next(); + $s->next(); + $s->next(); + $s->next(); + $s->next(); + $this->assertEquals(0, $s->columnOffset()); + + $s->next(); + $canary = $s->current(); // h + $this->assertEquals('h', $canary); + $this->assertEquals(1, $s->columnOffset()); + } + + public function testCurrentLine() + { + $s = new FileInputStream(__DIR__ . '/FileInputStreamTest.html'); + + $this->assertEquals(1, $s->currentLine()); + + // Make sure we get to the second line + $s->next(); + $s->next(); + $s->next(); + $s->next(); + $s->next(); + $s->next(); + $s->next(); + $s->next(); + $s->next(); + $s->next(); + $s->next(); + $s->next(); + $s->next(); + $s->next(); + $s->next(); + $s->next(); + $this->assertEquals(2, $s->currentLine()); + + // Make sure we get to the third line + $s->next(); + $s->next(); + $s->next(); + $s->next(); + $s->next(); + $s->next(); + $s->next(); + $s->next(); + $s->next(); + $s->next(); + $s->next(); + $s->next(); + $s->next(); + $s->next(); + $s->next(); + $s->next(); + $s->next(); + $this->assertEquals(3, $s->currentLine()); + } + + public function testRemainingChars() + { + $text = file_get_contents(__DIR__ . '/FileInputStreamTest.html'); + $s = new FileInputStream(__DIR__ . '/FileInputStreamTest.html'); + $this->assertEquals($text, $s->remainingChars()); + + $text = substr(file_get_contents(__DIR__ . '/FileInputStreamTest.html'), 1); + $s = new FileInputStream(__DIR__ . '/FileInputStreamTest.html'); + $s->next(); // Pop one. + $this->assertEquals($text, $s->remainingChars()); + } + + public function testCharsUnitl() + { + $s = new FileInputStream(__DIR__ . '/FileInputStreamTest.html'); + + $this->assertEquals('', $s->charsUntil('<')); + // Pointer at '<', moves to ' ' + $this->assertEquals('<!doctype', $s->charsUntil(' ', 20)); + + // Pointer at ' ', moves to '>' + $this->assertEquals(' html', $s->charsUntil('>')); + + // Pointer at '>', moves to '\n'. + $this->assertEquals('>', $s->charsUntil("\n")); + + // Pointer at '\n', move forward then to the next'\n'. + $s->next(); + $this->assertEquals('<html lang="en">', $s->charsUntil("\n")); + + // Ony get one of the spaces. + $this->assertEquals("\n ", $s->charsUntil('<', 2)); + + // Get the other space. + $this->assertEquals(" ", $s->charsUntil('<')); + + // This should scan to the end of the file. + $text = "<head> <meta charset=\"utf-8\"> <title>Test</title> </head> @@ -137,15 +180,16 @@ class FileInputStreamTest extends \Masterminds\HTML5\Tests\TestCase { <p>This is a test.</p> </body> </html>"; - $this->assertEquals($text, $s->charsUntil("\t")); - } - - public function testCharsWhile() { - $s = new FileInputStream(__DIR__ . '/FileInputStreamTest.html'); - - $this->assertEquals('<!', $s->charsWhile('!<')); - $this->assertEquals('', $s->charsWhile('>')); - $this->assertEquals('doctype', $s->charsWhile('odcyept')); - $this->assertEquals(' htm', $s->charsWhile('html ', 4)); - } -}
\ No newline at end of file + $this->assertEquals($text, $s->charsUntil("\t")); + } + + public function testCharsWhile() + { + $s = new FileInputStream(__DIR__ . '/FileInputStreamTest.html'); + + $this->assertEquals('<!', $s->charsWhile('!<')); + $this->assertEquals('', $s->charsWhile('>')); + $this->assertEquals('doctype', $s->charsWhile('odcyept')); + $this->assertEquals(' htm', $s->charsWhile('html ', 4)); + } +} diff --git a/test/HTML5/Parser/InstructionProcessorMock.php b/test/HTML5/Parser/InstructionProcessorMock.php index b668d26..ec69364 100644 --- a/test/HTML5/Parser/InstructionProcessorMock.php +++ b/test/HTML5/Parser/InstructionProcessorMock.php @@ -1,16 +1,20 @@ <?php namespace Masterminds\HTML5\Tests\Parser; -class InstructionProcessorMock implements \Masterminds\HTML5\InstructionProcessor { +class InstructionProcessorMock implements \Masterminds\HTML5\InstructionProcessor +{ public $name = NULL; + public $data = NULL; + public $count = 0; - public function process(\DOMElement $element, $name, $data) { + public function process(\DOMElement $element, $name, $data) + { $this->name = $name; $this->data = $data; - $this->count++; + $this->count ++; $div = $element->ownerDocument->createElement("div"); $div->nodeValue = 'foo'; @@ -19,4 +23,4 @@ class InstructionProcessorMock implements \Masterminds\HTML5\InstructionProcesso return $div; } -}
\ No newline at end of file +} diff --git a/test/HTML5/Parser/ScannerTest.php b/test/HTML5/Parser/ScannerTest.php index b0d638e..8fa5110 100644 --- a/test/HTML5/Parser/ScannerTest.php +++ b/test/HTML5/Parser/ScannerTest.php @@ -8,145 +8,164 @@ namespace Masterminds\HTML5\Tests\Parser; use Masterminds\HTML5\Parser\StringInputStream; use Masterminds\HTML5\Parser\Scanner; -class ScannerTest extends \Masterminds\HTML5\Tests\TestCase { - - /** - * A canary test to make sure the basics are setup and working. - */ - public function testConstruct() { - $is = new StringInputStream("abc"); - $s = new Scanner($is); +class ScannerTest extends \Masterminds\HTML5\Tests\TestCase +{ + + /** + * A canary test to make sure the basics are setup and working. + */ + public function testConstruct() + { + $is = new StringInputStream("abc"); + $s = new Scanner($is); + + $this->assertInstanceOf('\Masterminds\HTML5\Parser\Scanner', $s); + } - $this->assertInstanceOf('\Masterminds\HTML5\Parser\Scanner', $s); - } + public function testNext() + { + $s = new Scanner(new StringInputStream("abc")); - public function testNext() { - $s = new Scanner(new StringInputStream("abc")); + $this->assertEquals('b', $s->next()); + $this->assertEquals('c', $s->next()); + } - $this->assertEquals('b', $s->next()); - $this->assertEquals('c', $s->next()); - } + public function testPosition() + { + $s = new Scanner(new StringInputStream("abc")); - public function testPosition() { - $s = new Scanner(new StringInputStream("abc")); + $this->assertEquals(0, $s->position()); - $this->assertEquals(0, $s->position()); + $s->next(); + $this->assertEquals(1, $s->position()); + } - $s->next(); - $this->assertEquals(1, $s->position()); - } + public function testPeek() + { + $s = new Scanner(new StringInputStream("abc")); - public function testPeek() { - $s = new Scanner(new StringInputStream("abc")); + $this->assertEquals('b', $s->peek()); + $s->next(); + $this->assertEquals('c', $s->peek()); + } - $this->assertEquals('b', $s->peek()); + public function testCurrent() + { + $s = new Scanner(new StringInputStream("abc")); - $s->next(); - $this->assertEquals('c', $s->peek()); - } + // Before scanning the string begins the current is empty. + $this->assertEquals('a', $s->current()); - public function testCurrent() { - $s = new Scanner(new StringInputStream("abc")); + $c = $s->next(); + $this->assertEquals('b', $s->current()); - // Before scanning the string begins the current is empty. - $this->assertEquals('a', $s->current()); + // Test movement through the string. + $c = $s->next(); + $this->assertEquals('c', $s->current()); + } - $c = $s->next(); - $this->assertEquals('b', $s->current()); + public function testUnconsume() + { + $s = new Scanner(new StringInputStream("abcdefghijklmnopqrst")); - // Test movement through the string. - $c = $s->next(); - $this->assertEquals('c', $s->current()); - } + // Get initial position. + $s->next(); + $start = $s->position(); - public function testUnconsume() { - $s = new Scanner(new StringInputStream("abcdefghijklmnopqrst")); + // Move forward a bunch of positions. + $amount = 7; + for ($i = 0; $i < $amount; $i ++) { + $s->next(); + } - // Get initial position. - $s->next(); - $start = $s->position(); + // Roll back the amount we moved forward. + $s->unconsume($amount); - // Move forward a bunch of positions. - $amount = 7; - for($i = 0; $i < $amount; $i++) { - $s->next(); + $this->assertEquals($start, $s->position()); } - // Roll back the amount we moved forward. - $s->unconsume($amount); - - $this->assertEquals($start, $s->position()); - } - - public function testGetHex() { - $s = new Scanner(new StringInputStream("ab13ck45DE*")); + public function testGetHex() + { + $s = new Scanner(new StringInputStream("ab13ck45DE*")); - $this->assertEquals('ab13c', $s->getHex()); + $this->assertEquals('ab13c', $s->getHex()); - $s->next(); - $this->assertEquals('45DE', $s->getHex()); - } - - public function testGetAsciiAlpha() { - $s = new Scanner(new StringInputStream("abcdef1%mnop*")); - - $this->assertEquals('abcdef', $s->getAsciiAlpha()); + $s->next(); + $this->assertEquals('45DE', $s->getHex()); + } - // Move past the 1% to scan the next group of text. - $s->next(); - $s->next(); - $this->assertEquals('mnop', $s->getAsciiAlpha()); - } + public function testGetAsciiAlpha() + { + $s = new Scanner(new StringInputStream("abcdef1%mnop*")); - public function testGetAsciiAlphaNum() { - $s = new Scanner(new StringInputStream("abcdef1ghpo#mn94op")); + $this->assertEquals('abcdef', $s->getAsciiAlpha()); - $this->assertEquals('abcdef1ghpo', $s->getAsciiAlphaNum()); + // Move past the 1% to scan the next group of text. + $s->next(); + $s->next(); + $this->assertEquals('mnop', $s->getAsciiAlpha()); + } - // Move past the # to scan the next group of text. - $s->next(); - $this->assertEquals('mn94op', $s->getAsciiAlphaNum()); - } + public function testGetAsciiAlphaNum() + { + $s = new Scanner(new StringInputStream("abcdef1ghpo#mn94op")); - public function testGetNumeric() { - $s = new Scanner(new StringInputStream("1784a 45 9867 #")); + $this->assertEquals('abcdef1ghpo', $s->getAsciiAlphaNum()); - $this->assertEquals('1784', $s->getNumeric()); + // Move past the # to scan the next group of text. + $s->next(); + $this->assertEquals('mn94op', $s->getAsciiAlphaNum()); + } - // Move past the 'a ' to scan the next group of text. - $s->next(); - $s->next(); - $this->assertEquals('45', $s->getNumeric()); - } + public function testGetNumeric() + { + $s = new Scanner(new StringInputStream("1784a 45 9867 #")); - public function testCurrentLine() { - $s = new Scanner(new StringInputStream("1784a\n45\n9867 #\nThis is a test.")); + $this->assertEquals('1784', $s->getNumeric()); - $this->assertEquals(1, $s->currentLine()); + // Move past the 'a ' to scan the next group of text. + $s->next(); + $s->next(); + $this->assertEquals('45', $s->getNumeric()); + } - // Move to the next line. - $s->getAsciiAlphaNum(); $s->next(); - $this->assertEquals(2, $s->currentLine()); - } + public function testCurrentLine() + { + $s = new Scanner(new StringInputStream("1784a\n45\n9867 #\nThis is a test.")); - public function testColumnOffset() { - $s = new Scanner(new StringInputStream("1784a a\n45 9867 #\nThis is a test.")); + $this->assertEquals(1, $s->currentLine()); - // Move the pointer to the space. - $s->getAsciiAlphaNum(); - $this->assertEquals(5, $s->columnOffset()); + // Move to the next line. + $s->getAsciiAlphaNum(); + $s->next(); + $this->assertEquals(2, $s->currentLine()); + } - // We move the pointer ahead. There must be a better way to do this. - $s->next(); $s->next(); $s->next(); $s->next(); $s->next(); $s->next(); - $this->assertEquals(3, $s->columnOffset()); - } + public function testColumnOffset() + { + $s = new Scanner(new StringInputStream("1784a a\n45 9867 #\nThis is a test.")); + + // Move the pointer to the space. + $s->getAsciiAlphaNum(); + $this->assertEquals(5, $s->columnOffset()); + + // We move the pointer ahead. There must be a better way to do this. + $s->next(); + $s->next(); + $s->next(); + $s->next(); + $s->next(); + $s->next(); + $this->assertEquals(3, $s->columnOffset()); + } - public function testRemainingChars() { - $string = "\n45\n9867 #\nThis is a test."; - $s = new Scanner(new StringInputStream("1784a\n45\n9867 #\nThis is a test.")); + public function testRemainingChars() + { + $string = "\n45\n9867 #\nThis is a test."; + $s = new Scanner(new StringInputStream("1784a\n45\n9867 #\nThis is a test.")); - $s->getAsciiAlphaNum(); - $this->assertEquals($string, $s->remainingChars()); - } -}
\ No newline at end of file + $s->getAsciiAlphaNum(); + $this->assertEquals($string, $s->remainingChars()); + } +} diff --git a/test/HTML5/Parser/StringInputStreamTest.php b/test/HTML5/Parser/StringInputStreamTest.php index 5148e45..3ee768a 100644 --- a/test/HTML5/Parser/StringInputStreamTest.php +++ b/test/HTML5/Parser/StringInputStreamTest.php @@ -3,337 +3,325 @@ namespace Masterminds\HTML5\Tests\Parser; use Masterminds\HTML5\Parser\StringInputStream; -class StringInputStreamTest extends \Masterminds\HTML5\Tests\TestCase { - - /** - * A canary test to make sure the basics are setup and working. - */ - public function testConstruct() { - $s = new StringInputStream("abc"); - - $this->assertInstanceOf('\Masterminds\HTML5\Parser\StringInputStream', $s); - } - - public function testNext() { - $s = new StringInputStream("abc"); - - $s->next(); - $this->assertEquals('b', $s->current()); - $s->next(); - $this->assertEquals('c', $s->current()); - } - - public function testKey() { - $s = new StringInputStream("abc"); - - $this->assertEquals(0, $s->key()); - - $s->next(); - $this->assertEquals(1, $s->key()); - } - - public function testPeek() { - $s = new StringInputStream("abc"); - - $this->assertEquals('b', $s->peek()); - - $s->next(); - $this->assertEquals('c', $s->peek()); - } - - public function testCurrent() { - $s = new StringInputStream("abc"); - - // Before scanning the string begins the current is empty. - $this->assertEquals('a', $s->current()); - - $s->next(); - $this->assertEquals('b', $s->current()); - - // Test movement through the string. - $s->next(); - $this->assertEquals('c', $s->current()); - } - - public function testColumnOffset() { - $s = new StringInputStream("abc\ndef\n"); - $this->assertEquals(0, $s->columnOffset()); - $s->next(); - $this->assertEquals(1, $s->columnOffset()); - $s->next(); - $this->assertEquals(2, $s->columnOffset()); - $s->next(); - $this->assertEquals(3, $s->columnOffset()); - $s->next(); // LF - $this->assertEquals(0, $s->columnOffset()); - $s->next(); - $canary = $s->current(); // e - $this->assertEquals('e', $canary); - $this->assertEquals(1, $s->columnOffset()); - - $s = new StringInputStream("abc"); - $this->assertEquals(0, $s->columnOffset()); - $s->next(); - $this->assertEquals(1, $s->columnOffset()); - $s->next(); - $this->assertEquals(2, $s->columnOffset()); - } - - public function testCurrentLine() { - $txt = "1\n2\n\n\n\n3"; - $stream = new StringInputStream($txt); - $this->assertEquals(1, $stream->currentLine()); - - // Advance over 1 and LF on to line 2 value 2. - $stream->next(); $stream->next(); - $canary = $stream->current(); - $this->assertEquals(2, $stream->currentLine()); - $this->assertEquals('2', $canary); - - - // Advance over 4x LF - $stream->next(); $stream->next(); - $stream->next(); $stream->next(); - $stream->next(); - $this->assertEquals(6, $stream->currentLine()); - $this->assertEquals('3', $stream->current()); - - // Make sure it doesn't do 7. - $this->assertEquals(6, $stream->currentLine()); - } - - public function testRemainingChars() { - $text = "abcd"; - $s = new StringInputStream($text); - $this->assertEquals($text, $s->remainingChars()); - - $text = "abcd"; - $s = new StringInputStream($text); - $s->next(); // Pop one. - $this->assertEquals('bcd', $s->remainingChars()); - - } - - public function testCharsUnitl() { - $text = "abcdefffffffghi"; - $s = new StringInputStream($text); - $this->assertEquals('', $s->charsUntil('a')); - // Pointer at 'a', moves 2 to 'c' - $this->assertEquals('ab', $s->charsUntil('w', 2)); - - // Pointer at 'c', moves to first 'f' - $this->assertEquals('cde', $s->charsUntil('fzxv')); - - // Only get five 'f's - $this->assertEquals('fffff', $s->charsUntil('g', 5)); - - // Get just the last two 'f's - $this->assertEquals('ff', $s->charsUntil('g')); - - // This should scan to the end. - $this->assertEquals('ghi', $s->charsUntil('w', 9)); - - } - - public function testCharsWhile() { - $text = "abcdefffffffghi"; - $s = new StringInputStream($text); - - $this->assertEquals('ab', $s->charsWhile('ba')); - - $this->assertEquals('', $s->charsWhile('a')); - $this->assertEquals('cde', $s->charsWhile('cdeba')); - $this->assertEquals('ff', $s->charsWhile('f', 2)); - $this->assertEquals('fffff', $s->charsWhile('f')); - $this->assertEquals('g', $s->charsWhile('fg')); - $this->assertEquals('hi', $s->charsWhile('fghi', 99)); - - } - - public function testBOM() { - - // Ignore in-text BOM. - $stream = new StringInputStream("a\xEF\xBB\xBF"); - $this->assertEquals("a\xEF\xBB\xBF", $stream->remainingChars(), 'A non-leading U+FEFF (BOM/ZWNBSP) should remain'); - - // Strip leading BOM - $leading = new StringInputStream("\xEF\xBB\xBFa"); - $this->assertEquals('a', $leading->current(), 'BOM should be stripped'); - } - - public function testCarriageReturn() { - - // Replace NULL with Unicode replacement. - $stream = new StringInputStream("\0\0\0"); - $this->assertEquals("\xEF\xBF\xBD\xEF\xBF\xBD\xEF\xBF\xBD", $stream->remainingChars(), 'Null character should be replaced by U+FFFD'); - $this->assertEquals(3, count($stream->errors), 'Null character should set parse error: ' . print_r($stream->errors, TRUE)); - - // Remove CR when next to LF. - $stream = new StringInputStream("\r\n"); - $this->assertEquals("\n", $stream->remainingChars(), 'CRLF should be replaced by LF'); - - // Convert CR to LF when on its own. - $stream = new StringInputStream("\r"); - $this->assertEquals("\n", $stream->remainingChars(), 'CR should be replaced by LF'); - } - - - public function invalidParseErrorTestHandler($input, $numErrors, $name) { - $stream = new StringInputStream($input, 'UTF-8'); - $this->assertEquals($input, $stream->remainingChars(), $name . ' (stream content)'); - $this->assertEquals($numErrors, count($stream->errors), $name . ' (number of errors)'); - } - - public function testInvalidReplace() { - $invalidTest = array( - - // Min/max overlong - "\xC0\x80a" => 'Overlong representation of U+0000', - "\xE0\x80\x80a" => 'Overlong representation of U+0000', - "\xF0\x80\x80\x80a" => 'Overlong representation of U+0000', - "\xF8\x80\x80\x80\x80a" => 'Overlong representation of U+0000', - "\xFC\x80\x80\x80\x80\x80a" => 'Overlong representation of U+0000', - "\xC1\xBFa" => 'Overlong representation of U+007F', - "\xE0\x9F\xBFa" => 'Overlong representation of U+07FF', - "\xF0\x8F\xBF\xBFa" => 'Overlong representation of U+FFFF', - - - "a\xDF" => 'Incomplete two byte sequence (missing final byte)', - "a\xEF\xBF" => 'Incomplete three byte sequence (missing final byte)', - "a\xF4\xBF\xBF" => 'Incomplete four byte sequence (missing final byte)', - - // Min/max continuation bytes - "a\x80" => 'Lone 80 continuation byte', - "a\xBF" => 'Lone BF continuation byte', - - // Invalid bytes (these can never occur) - "a\xFE" => 'Invalid FE byte', - "a\xFF" => 'Invalid FF byte', - ); - foreach ($invalidTest as $test => $note) { - $stream = new StringInputStream($test); - $this->assertEquals('a', $stream->remainingChars(), $note); - } - - // MPB: - // It appears that iconv just leaves these alone. Not sure what to - // do. - /* - $converted = array( - "a\xF5\x90\x80\x80" => 'U+110000, off unicode planes.', - ); - foreach ($converted as $test => $note) { - $stream = new StringInputStream($test); - $this->assertEquals(2, mb_strlen($stream->remainingChars()), $note); - } - */ - } - - public function testInvalidParseError() { - // C0 controls (except U+0000 and U+000D due to different handling) - $this->invalidParseErrorTestHandler("\x01", 1, 'U+0001 (C0 control)'); - $this->invalidParseErrorTestHandler("\x02", 1, 'U+0002 (C0 control)'); - $this->invalidParseErrorTestHandler("\x03", 1, 'U+0003 (C0 control)'); - $this->invalidParseErrorTestHandler("\x04", 1, 'U+0004 (C0 control)'); - $this->invalidParseErrorTestHandler("\x05", 1, 'U+0005 (C0 control)'); - $this->invalidParseErrorTestHandler("\x06", 1, 'U+0006 (C0 control)'); - $this->invalidParseErrorTestHandler("\x07", 1, 'U+0007 (C0 control)'); - $this->invalidParseErrorTestHandler("\x08", 1, 'U+0008 (C0 control)'); - $this->invalidParseErrorTestHandler("\x09", 0, 'U+0009 (C0 control)'); - $this->invalidParseErrorTestHandler("\x0A", 0, 'U+000A (C0 control)'); - $this->invalidParseErrorTestHandler("\x0B", 1, 'U+000B (C0 control)'); - $this->invalidParseErrorTestHandler("\x0C", 0, 'U+000C (C0 control)'); - $this->invalidParseErrorTestHandler("\x0E", 1, 'U+000E (C0 control)'); - $this->invalidParseErrorTestHandler("\x0F", 1, 'U+000F (C0 control)'); - $this->invalidParseErrorTestHandler("\x10", 1, 'U+0010 (C0 control)'); - $this->invalidParseErrorTestHandler("\x11", 1, 'U+0011 (C0 control)'); - $this->invalidParseErrorTestHandler("\x12", 1, 'U+0012 (C0 control)'); - $this->invalidParseErrorTestHandler("\x13", 1, 'U+0013 (C0 control)'); - $this->invalidParseErrorTestHandler("\x14", 1, 'U+0014 (C0 control)'); - $this->invalidParseErrorTestHandler("\x15", 1, 'U+0015 (C0 control)'); - $this->invalidParseErrorTestHandler("\x16", 1, 'U+0016 (C0 control)'); - $this->invalidParseErrorTestHandler("\x17", 1, 'U+0017 (C0 control)'); - $this->invalidParseErrorTestHandler("\x18", 1, 'U+0018 (C0 control)'); - $this->invalidParseErrorTestHandler("\x19", 1, 'U+0019 (C0 control)'); - $this->invalidParseErrorTestHandler("\x1A", 1, 'U+001A (C0 control)'); - $this->invalidParseErrorTestHandler("\x1B", 1, 'U+001B (C0 control)'); - $this->invalidParseErrorTestHandler("\x1C", 1, 'U+001C (C0 control)'); - $this->invalidParseErrorTestHandler("\x1D", 1, 'U+001D (C0 control)'); - $this->invalidParseErrorTestHandler("\x1E", 1, 'U+001E (C0 control)'); - $this->invalidParseErrorTestHandler("\x1F", 1, 'U+001F (C0 control)'); - - // DEL (U+007F) - $this->invalidParseErrorTestHandler("\x7F", 1, 'U+007F'); - - // C1 Controls - $this->invalidParseErrorTestHandler("\xC2\x80", 1, 'U+0080 (C1 control)'); - $this->invalidParseErrorTestHandler("\xC2\x9F", 1, 'U+009F (C1 control)'); - $this->invalidParseErrorTestHandler("\xC2\xA0", 0, 'U+00A0 (first codepoint above highest C1 control)'); - - // Charcters surrounding surrogates - $this->invalidParseErrorTestHandler("\xED\x9F\xBF", 0, 'U+D7FF (one codepoint below lowest surrogate codepoint)'); - $this->invalidParseErrorTestHandler("\xEF\xBF\xBD", 0, 'U+DE00 (one codepoint above highest surrogate codepoint)'); - - // Permanent noncharacters - $this->invalidParseErrorTestHandler("\xEF\xB7\x90", 1, 'U+FDD0 (permanent noncharacter)'); - $this->invalidParseErrorTestHandler("\xEF\xB7\xAF", 1, 'U+FDEF (permanent noncharacter)'); - $this->invalidParseErrorTestHandler("\xEF\xBF\xBE", 1, 'U+FFFE (permanent noncharacter)'); - $this->invalidParseErrorTestHandler("\xEF\xBF\xBF", 1, 'U+FFFF (permanent noncharacter)'); - $this->invalidParseErrorTestHandler("\xF0\x9F\xBF\xBE", 1, 'U+1FFFE (permanent noncharacter)'); - $this->invalidParseErrorTestHandler("\xF0\x9F\xBF\xBF", 1, 'U+1FFFF (permanent noncharacter)'); - $this->invalidParseErrorTestHandler("\xF0\xAF\xBF\xBE", 1, 'U+2FFFE (permanent noncharacter)'); - $this->invalidParseErrorTestHandler("\xF0\xAF\xBF\xBF", 1, 'U+2FFFF (permanent noncharacter)'); - $this->invalidParseErrorTestHandler("\xF0\xBF\xBF\xBE", 1, 'U+3FFFE (permanent noncharacter)'); - $this->invalidParseErrorTestHandler("\xF0\xBF\xBF\xBF", 1, 'U+3FFFF (permanent noncharacter)'); - $this->invalidParseErrorTestHandler("\xF1\x8F\xBF\xBE", 1, 'U+4FFFE (permanent noncharacter)'); - $this->invalidParseErrorTestHandler("\xF1\x8F\xBF\xBF", 1, 'U+4FFFF (permanent noncharacter)'); - $this->invalidParseErrorTestHandler("\xF1\x9F\xBF\xBE", 1, 'U+5FFFE (permanent noncharacter)'); - $this->invalidParseErrorTestHandler("\xF1\x9F\xBF\xBF", 1, 'U+5FFFF (permanent noncharacter)'); - $this->invalidParseErrorTestHandler("\xF1\xAF\xBF\xBE", 1, 'U+6FFFE (permanent noncharacter)'); - $this->invalidParseErrorTestHandler("\xF1\xAF\xBF\xBF", 1, 'U+6FFFF (permanent noncharacter)'); - $this->invalidParseErrorTestHandler("\xF1\xBF\xBF\xBE", 1, 'U+7FFFE (permanent noncharacter)'); - $this->invalidParseErrorTestHandler("\xF1\xBF\xBF\xBF", 1, 'U+7FFFF (permanent noncharacter)'); - $this->invalidParseErrorTestHandler("\xF2\x8F\xBF\xBE", 1, 'U+8FFFE (permanent noncharacter)'); - $this->invalidParseErrorTestHandler("\xF2\x8F\xBF\xBF", 1, 'U+8FFFF (permanent noncharacter)'); - $this->invalidParseErrorTestHandler("\xF2\x9F\xBF\xBE", 1, 'U+9FFFE (permanent noncharacter)'); - $this->invalidParseErrorTestHandler("\xF2\x9F\xBF\xBF", 1, 'U+9FFFF (permanent noncharacter)'); - $this->invalidParseErrorTestHandler("\xF2\xAF\xBF\xBE", 1, 'U+AFFFE (permanent noncharacter)'); - $this->invalidParseErrorTestHandler("\xF2\xAF\xBF\xBF", 1, 'U+AFFFF (permanent noncharacter)'); - $this->invalidParseErrorTestHandler("\xF2\xBF\xBF\xBE", 1, 'U+BFFFE (permanent noncharacter)'); - $this->invalidParseErrorTestHandler("\xF2\xBF\xBF\xBF", 1, 'U+BFFFF (permanent noncharacter)'); - $this->invalidParseErrorTestHandler("\xF3\x8F\xBF\xBE", 1, 'U+CFFFE (permanent noncharacter)'); - $this->invalidParseErrorTestHandler("\xF3\x8F\xBF\xBF", 1, 'U+CFFFF (permanent noncharacter)'); - $this->invalidParseErrorTestHandler("\xF3\x9F\xBF\xBE", 1, 'U+DFFFE (permanent noncharacter)'); - $this->invalidParseErrorTestHandler("\xF3\x9F\xBF\xBF", 1, 'U+DFFFF (permanent noncharacter)'); - $this->invalidParseErrorTestHandler("\xF3\xAF\xBF\xBE", 1, 'U+EFFFE (permanent noncharacter)'); - $this->invalidParseErrorTestHandler("\xF3\xAF\xBF\xBF", 1, 'U+EFFFF (permanent noncharacter)'); - $this->invalidParseErrorTestHandler("\xF3\xBF\xBF\xBE", 1, 'U+FFFFE (permanent noncharacter)'); - $this->invalidParseErrorTestHandler("\xF3\xBF\xBF\xBF", 1, 'U+FFFFF (permanent noncharacter)'); - $this->invalidParseErrorTestHandler("\xF4\x8F\xBF\xBE", 1, 'U+10FFFE (permanent noncharacter)'); - $this->invalidParseErrorTestHandler("\xF4\x8F\xBF\xBF", 1, 'U+10FFFF (permanent noncharacter)'); - - // MPB: These pass on some versions of iconv, and fail on others. Since we aren't in the - // business of writing tests against iconv, I've just commented these out. Should revisit - // at a later point. - /* - $this->invalidParseErrorTestHandler("\xED\xA0\x80", 1, 'U+D800 (UTF-16 surrogate character)'); - $this->invalidParseErrorTestHandler("\xED\xAD\xBF", 1, 'U+DB7F (UTF-16 surrogate character)'); - $this->invalidParseErrorTestHandler("\xED\xAE\x80", 1, 'U+DB80 (UTF-16 surrogate character)'); - $this->invalidParseErrorTestHandler("\xED\xAF\xBF", 1, 'U+DBFF (UTF-16 surrogate character)'); - $this->invalidParseErrorTestHandler("\xED\xB0\x80", 1, 'U+DC00 (UTF-16 surrogate character)'); - $this->invalidParseErrorTestHandler("\xED\xBE\x80", 1, 'U+DF80 (UTF-16 surrogate character)'); - $this->invalidParseErrorTestHandler("\xED\xBF\xBF", 1, 'U+DFFF (UTF-16 surrogate character)'); - - // Paired UTF-16 surrogates - $this->invalidParseErrorTestHandler("\xED\xA0\x80\xED\xB0\x80", 2, 'U+D800 U+DC00 (paired UTF-16 surrogates)'); - $this->invalidParseErrorTestHandler("\xED\xA0\x80\xED\xBF\xBF", 2, 'U+D800 U+DFFF (paired UTF-16 surrogates)'); - $this->invalidParseErrorTestHandler("\xED\xAD\xBF\xED\xB0\x80", 2, 'U+DB7F U+DC00 (paired UTF-16 surrogates)'); - $this->invalidParseErrorTestHandler("\xED\xAD\xBF\xED\xBF\xBF", 2, 'U+DB7F U+DFFF (paired UTF-16 surrogates)'); - $this->invalidParseErrorTestHandler("\xED\xAE\x80\xED\xB0\x80", 2, 'U+DB80 U+DC00 (paired UTF-16 surrogates)'); - $this->invalidParseErrorTestHandler("\xED\xAE\x80\xED\xBF\xBF", 2, 'U+DB80 U+DFFF (paired UTF-16 surrogates)'); - $this->invalidParseErrorTestHandler("\xED\xAF\xBF\xED\xB0\x80", 2, 'U+DBFF U+DC00 (paired UTF-16 surrogates)'); - $this->invalidParseErrorTestHandler("\xED\xAF\xBF\xED\xBF\xBF", 2, 'U+DBFF U+DFFF (paired UTF-16 surrogates)'); - */ - } - +class StringInputStreamTest extends \Masterminds\HTML5\Tests\TestCase +{ + + /** + * A canary test to make sure the basics are setup and working. + */ + public function testConstruct() + { + $s = new StringInputStream("abc"); + + $this->assertInstanceOf('\Masterminds\HTML5\Parser\StringInputStream', $s); + } + + public function testNext() + { + $s = new StringInputStream("abc"); + + $s->next(); + $this->assertEquals('b', $s->current()); + $s->next(); + $this->assertEquals('c', $s->current()); + } + + public function testKey() + { + $s = new StringInputStream("abc"); + + $this->assertEquals(0, $s->key()); + + $s->next(); + $this->assertEquals(1, $s->key()); + } + + public function testPeek() + { + $s = new StringInputStream("abc"); + + $this->assertEquals('b', $s->peek()); + + $s->next(); + $this->assertEquals('c', $s->peek()); + } + + public function testCurrent() + { + $s = new StringInputStream("abc"); + + // Before scanning the string begins the current is empty. + $this->assertEquals('a', $s->current()); + + $s->next(); + $this->assertEquals('b', $s->current()); + + // Test movement through the string. + $s->next(); + $this->assertEquals('c', $s->current()); + } + + public function testColumnOffset() + { + $s = new StringInputStream("abc\ndef\n"); + $this->assertEquals(0, $s->columnOffset()); + $s->next(); + $this->assertEquals(1, $s->columnOffset()); + $s->next(); + $this->assertEquals(2, $s->columnOffset()); + $s->next(); + $this->assertEquals(3, $s->columnOffset()); + $s->next(); // LF + $this->assertEquals(0, $s->columnOffset()); + $s->next(); + $canary = $s->current(); // e + $this->assertEquals('e', $canary); + $this->assertEquals(1, $s->columnOffset()); + + $s = new StringInputStream("abc"); + $this->assertEquals(0, $s->columnOffset()); + $s->next(); + $this->assertEquals(1, $s->columnOffset()); + $s->next(); + $this->assertEquals(2, $s->columnOffset()); + } + + public function testCurrentLine() + { + $txt = "1\n2\n\n\n\n3"; + $stream = new StringInputStream($txt); + $this->assertEquals(1, $stream->currentLine()); + + // Advance over 1 and LF on to line 2 value 2. + $stream->next(); + $stream->next(); + $canary = $stream->current(); + $this->assertEquals(2, $stream->currentLine()); + $this->assertEquals('2', $canary); + + // Advance over 4x LF + $stream->next(); + $stream->next(); + $stream->next(); + $stream->next(); + $stream->next(); + $this->assertEquals(6, $stream->currentLine()); + $this->assertEquals('3', $stream->current()); + + // Make sure it doesn't do 7. + $this->assertEquals(6, $stream->currentLine()); + } + + public function testRemainingChars() + { + $text = "abcd"; + $s = new StringInputStream($text); + $this->assertEquals($text, $s->remainingChars()); + + $text = "abcd"; + $s = new StringInputStream($text); + $s->next(); // Pop one. + $this->assertEquals('bcd', $s->remainingChars()); + } + + public function testCharsUnitl() + { + $text = "abcdefffffffghi"; + $s = new StringInputStream($text); + $this->assertEquals('', $s->charsUntil('a')); + // Pointer at 'a', moves 2 to 'c' + $this->assertEquals('ab', $s->charsUntil('w', 2)); + + // Pointer at 'c', moves to first 'f' + $this->assertEquals('cde', $s->charsUntil('fzxv')); + + // Only get five 'f's + $this->assertEquals('fffff', $s->charsUntil('g', 5)); + + // Get just the last two 'f's + $this->assertEquals('ff', $s->charsUntil('g')); + + // This should scan to the end. + $this->assertEquals('ghi', $s->charsUntil('w', 9)); + } + + public function testCharsWhile() + { + $text = "abcdefffffffghi"; + $s = new StringInputStream($text); + + $this->assertEquals('ab', $s->charsWhile('ba')); + + $this->assertEquals('', $s->charsWhile('a')); + $this->assertEquals('cde', $s->charsWhile('cdeba')); + $this->assertEquals('ff', $s->charsWhile('f', 2)); + $this->assertEquals('fffff', $s->charsWhile('f')); + $this->assertEquals('g', $s->charsWhile('fg')); + $this->assertEquals('hi', $s->charsWhile('fghi', 99)); + } + + public function testBOM() + { + // Ignore in-text BOM. + $stream = new StringInputStream("a\xEF\xBB\xBF"); + $this->assertEquals("a\xEF\xBB\xBF", $stream->remainingChars(), 'A non-leading U+FEFF (BOM/ZWNBSP) should remain'); + + // Strip leading BOM + $leading = new StringInputStream("\xEF\xBB\xBFa"); + $this->assertEquals('a', $leading->current(), 'BOM should be stripped'); + } + + public function testCarriageReturn() + { + // Replace NULL with Unicode replacement. + $stream = new StringInputStream("\0\0\0"); + $this->assertEquals("\xEF\xBF\xBD\xEF\xBF\xBD\xEF\xBF\xBD", $stream->remainingChars(), 'Null character should be replaced by U+FFFD'); + $this->assertEquals(3, count($stream->errors), 'Null character should set parse error: ' . print_r($stream->errors, TRUE)); + + // Remove CR when next to LF. + $stream = new StringInputStream("\r\n"); + $this->assertEquals("\n", $stream->remainingChars(), 'CRLF should be replaced by LF'); + + // Convert CR to LF when on its own. + $stream = new StringInputStream("\r"); + $this->assertEquals("\n", $stream->remainingChars(), 'CR should be replaced by LF'); + } + + public function invalidParseErrorTestHandler($input, $numErrors, $name) + { + $stream = new StringInputStream($input, 'UTF-8'); + $this->assertEquals($input, $stream->remainingChars(), $name . ' (stream content)'); + $this->assertEquals($numErrors, count($stream->errors), $name . ' (number of errors)'); + } + + public function testInvalidReplace() + { + $invalidTest = array( + + // Min/max overlong + "\xC0\x80a" => 'Overlong representation of U+0000', + "\xE0\x80\x80a" => 'Overlong representation of U+0000', + "\xF0\x80\x80\x80a" => 'Overlong representation of U+0000', + "\xF8\x80\x80\x80\x80a" => 'Overlong representation of U+0000', + "\xFC\x80\x80\x80\x80\x80a" => 'Overlong representation of U+0000', + "\xC1\xBFa" => 'Overlong representation of U+007F', + "\xE0\x9F\xBFa" => 'Overlong representation of U+07FF', + "\xF0\x8F\xBF\xBFa" => 'Overlong representation of U+FFFF', + + "a\xDF" => 'Incomplete two byte sequence (missing final byte)', + "a\xEF\xBF" => 'Incomplete three byte sequence (missing final byte)', + "a\xF4\xBF\xBF" => 'Incomplete four byte sequence (missing final byte)', + + // Min/max continuation bytes + "a\x80" => 'Lone 80 continuation byte', + "a\xBF" => 'Lone BF continuation byte', + + // Invalid bytes (these can never occur) + "a\xFE" => 'Invalid FE byte', + "a\xFF" => 'Invalid FF byte' + ); + foreach ($invalidTest as $test => $note) { + $stream = new StringInputStream($test); + $this->assertEquals('a', $stream->remainingChars(), $note); + } + + // MPB: + // It appears that iconv just leaves these alone. Not sure what to + // do. + /* + * $converted = array( "a\xF5\x90\x80\x80" => 'U+110000, off unicode planes.', ); foreach ($converted as $test => $note) { $stream = new StringInputStream($test); $this->assertEquals(2, mb_strlen($stream->remainingChars()), $note); } + */ + } + + public function testInvalidParseError() + { + // C0 controls (except U+0000 and U+000D due to different handling) + $this->invalidParseErrorTestHandler("\x01", 1, 'U+0001 (C0 control)'); + $this->invalidParseErrorTestHandler("\x02", 1, 'U+0002 (C0 control)'); + $this->invalidParseErrorTestHandler("\x03", 1, 'U+0003 (C0 control)'); + $this->invalidParseErrorTestHandler("\x04", 1, 'U+0004 (C0 control)'); + $this->invalidParseErrorTestHandler("\x05", 1, 'U+0005 (C0 control)'); + $this->invalidParseErrorTestHandler("\x06", 1, 'U+0006 (C0 control)'); + $this->invalidParseErrorTestHandler("\x07", 1, 'U+0007 (C0 control)'); + $this->invalidParseErrorTestHandler("\x08", 1, 'U+0008 (C0 control)'); + $this->invalidParseErrorTestHandler("\x09", 0, 'U+0009 (C0 control)'); + $this->invalidParseErrorTestHandler("\x0A", 0, 'U+000A (C0 control)'); + $this->invalidParseErrorTestHandler("\x0B", 1, 'U+000B (C0 control)'); + $this->invalidParseErrorTestHandler("\x0C", 0, 'U+000C (C0 control)'); + $this->invalidParseErrorTestHandler("\x0E", 1, 'U+000E (C0 control)'); + $this->invalidParseErrorTestHandler("\x0F", 1, 'U+000F (C0 control)'); + $this->invalidParseErrorTestHandler("\x10", 1, 'U+0010 (C0 control)'); + $this->invalidParseErrorTestHandler("\x11", 1, 'U+0011 (C0 control)'); + $this->invalidParseErrorTestHandler("\x12", 1, 'U+0012 (C0 control)'); + $this->invalidParseErrorTestHandler("\x13", 1, 'U+0013 (C0 control)'); + $this->invalidParseErrorTestHandler("\x14", 1, 'U+0014 (C0 control)'); + $this->invalidParseErrorTestHandler("\x15", 1, 'U+0015 (C0 control)'); + $this->invalidParseErrorTestHandler("\x16", 1, 'U+0016 (C0 control)'); + $this->invalidParseErrorTestHandler("\x17", 1, 'U+0017 (C0 control)'); + $this->invalidParseErrorTestHandler("\x18", 1, 'U+0018 (C0 control)'); + $this->invalidParseErrorTestHandler("\x19", 1, 'U+0019 (C0 control)'); + $this->invalidParseErrorTestHandler("\x1A", 1, 'U+001A (C0 control)'); + $this->invalidParseErrorTestHandler("\x1B", 1, 'U+001B (C0 control)'); + $this->invalidParseErrorTestHandler("\x1C", 1, 'U+001C (C0 control)'); + $this->invalidParseErrorTestHandler("\x1D", 1, 'U+001D (C0 control)'); + $this->invalidParseErrorTestHandler("\x1E", 1, 'U+001E (C0 control)'); + $this->invalidParseErrorTestHandler("\x1F", 1, 'U+001F (C0 control)'); + + // DEL (U+007F) + $this->invalidParseErrorTestHandler("\x7F", 1, 'U+007F'); + + // C1 Controls + $this->invalidParseErrorTestHandler("\xC2\x80", 1, 'U+0080 (C1 control)'); + $this->invalidParseErrorTestHandler("\xC2\x9F", 1, 'U+009F (C1 control)'); + $this->invalidParseErrorTestHandler("\xC2\xA0", 0, 'U+00A0 (first codepoint above highest C1 control)'); + + // Charcters surrounding surrogates + $this->invalidParseErrorTestHandler("\xED\x9F\xBF", 0, 'U+D7FF (one codepoint below lowest surrogate codepoint)'); + $this->invalidParseErrorTestHandler("\xEF\xBF\xBD", 0, 'U+DE00 (one codepoint above highest surrogate codepoint)'); + + // Permanent noncharacters + $this->invalidParseErrorTestHandler("\xEF\xB7\x90", 1, 'U+FDD0 (permanent noncharacter)'); + $this->invalidParseErrorTestHandler("\xEF\xB7\xAF", 1, 'U+FDEF (permanent noncharacter)'); + $this->invalidParseErrorTestHandler("\xEF\xBF\xBE", 1, 'U+FFFE (permanent noncharacter)'); + $this->invalidParseErrorTestHandler("\xEF\xBF\xBF", 1, 'U+FFFF (permanent noncharacter)'); + $this->invalidParseErrorTestHandler("\xF0\x9F\xBF\xBE", 1, 'U+1FFFE (permanent noncharacter)'); + $this->invalidParseErrorTestHandler("\xF0\x9F\xBF\xBF", 1, 'U+1FFFF (permanent noncharacter)'); + $this->invalidParseErrorTestHandler("\xF0\xAF\xBF\xBE", 1, 'U+2FFFE (permanent noncharacter)'); + $this->invalidParseErrorTestHandler("\xF0\xAF\xBF\xBF", 1, 'U+2FFFF (permanent noncharacter)'); + $this->invalidParseErrorTestHandler("\xF0\xBF\xBF\xBE", 1, 'U+3FFFE (permanent noncharacter)'); + $this->invalidParseErrorTestHandler("\xF0\xBF\xBF\xBF", 1, 'U+3FFFF (permanent noncharacter)'); + $this->invalidParseErrorTestHandler("\xF1\x8F\xBF\xBE", 1, 'U+4FFFE (permanent noncharacter)'); + $this->invalidParseErrorTestHandler("\xF1\x8F\xBF\xBF", 1, 'U+4FFFF (permanent noncharacter)'); + $this->invalidParseErrorTestHandler("\xF1\x9F\xBF\xBE", 1, 'U+5FFFE (permanent noncharacter)'); + $this->invalidParseErrorTestHandler("\xF1\x9F\xBF\xBF", 1, 'U+5FFFF (permanent noncharacter)'); + $this->invalidParseErrorTestHandler("\xF1\xAF\xBF\xBE", 1, 'U+6FFFE (permanent noncharacter)'); + $this->invalidParseErrorTestHandler("\xF1\xAF\xBF\xBF", 1, 'U+6FFFF (permanent noncharacter)'); + $this->invalidParseErrorTestHandler("\xF1\xBF\xBF\xBE", 1, 'U+7FFFE (permanent noncharacter)'); + $this->invalidParseErrorTestHandler("\xF1\xBF\xBF\xBF", 1, 'U+7FFFF (permanent noncharacter)'); + $this->invalidParseErrorTestHandler("\xF2\x8F\xBF\xBE", 1, 'U+8FFFE (permanent noncharacter)'); + $this->invalidParseErrorTestHandler("\xF2\x8F\xBF\xBF", 1, 'U+8FFFF (permanent noncharacter)'); + $this->invalidParseErrorTestHandler("\xF2\x9F\xBF\xBE", 1, 'U+9FFFE (permanent noncharacter)'); + $this->invalidParseErrorTestHandler("\xF2\x9F\xBF\xBF", 1, 'U+9FFFF (permanent noncharacter)'); + $this->invalidParseErrorTestHandler("\xF2\xAF\xBF\xBE", 1, 'U+AFFFE (permanent noncharacter)'); + $this->invalidParseErrorTestHandler("\xF2\xAF\xBF\xBF", 1, 'U+AFFFF (permanent noncharacter)'); + $this->invalidParseErrorTestHandler("\xF2\xBF\xBF\xBE", 1, 'U+BFFFE (permanent noncharacter)'); + $this->invalidParseErrorTestHandler("\xF2\xBF\xBF\xBF", 1, 'U+BFFFF (permanent noncharacter)'); + $this->invalidParseErrorTestHandler("\xF3\x8F\xBF\xBE", 1, 'U+CFFFE (permanent noncharacter)'); + $this->invalidParseErrorTestHandler("\xF3\x8F\xBF\xBF", 1, 'U+CFFFF (permanent noncharacter)'); + $this->invalidParseErrorTestHandler("\xF3\x9F\xBF\xBE", 1, 'U+DFFFE (permanent noncharacter)'); + $this->invalidParseErrorTestHandler("\xF3\x9F\xBF\xBF", 1, 'U+DFFFF (permanent noncharacter)'); + $this->invalidParseErrorTestHandler("\xF3\xAF\xBF\xBE", 1, 'U+EFFFE (permanent noncharacter)'); + $this->invalidParseErrorTestHandler("\xF3\xAF\xBF\xBF", 1, 'U+EFFFF (permanent noncharacter)'); + $this->invalidParseErrorTestHandler("\xF3\xBF\xBF\xBE", 1, 'U+FFFFE (permanent noncharacter)'); + $this->invalidParseErrorTestHandler("\xF3\xBF\xBF\xBF", 1, 'U+FFFFF (permanent noncharacter)'); + $this->invalidParseErrorTestHandler("\xF4\x8F\xBF\xBE", 1, 'U+10FFFE (permanent noncharacter)'); + $this->invalidParseErrorTestHandler("\xF4\x8F\xBF\xBF", 1, 'U+10FFFF (permanent noncharacter)'); + + // MPB: These pass on some versions of iconv, and fail on others. Since we aren't in the + // business of writing tests against iconv, I've just commented these out. Should revisit + // at a later point. + /* + * $this->invalidParseErrorTestHandler("\xED\xA0\x80", 1, 'U+D800 (UTF-16 surrogate character)'); $this->invalidParseErrorTestHandler("\xED\xAD\xBF", 1, 'U+DB7F (UTF-16 surrogate character)'); $this->invalidParseErrorTestHandler("\xED\xAE\x80", 1, 'U+DB80 (UTF-16 surrogate character)'); $this->invalidParseErrorTestHandler("\xED\xAF\xBF", 1, 'U+DBFF (UTF-16 surrogate character)'); $this->invalidParseErrorTestHandler("\xED\xB0\x80", 1, 'U+DC00 (UTF-16 surrogate character)'); $this->invalidParseErrorTestHandler("\xED\xBE\x80", 1, 'U+DF80 (UTF-16 surrogate character)'); $this->invalidParseErrorTestHandler("\xED\xBF\xBF", 1, 'U+DFFF (UTF-16 surrogate character)'); // Paired UTF-16 surrogates $this->invalidParseErrorTestHandler("\xED\xA0\x80\xED\xB0\x80", 2, 'U+D800 U+DC00 (paired UTF-16 surrogates)'); $this->invalidParseErrorTestHandler("\xED\xA0\x80\xED\xBF\xBF", 2, 'U+D800 U+DFFF (paired UTF-16 surrogates)'); $this->invalidParseErrorTestHandler("\xED\xAD\xBF\xED\xB0\x80", 2, 'U+DB7F U+DC00 (paired UTF-16 surrogates)'); $this->invalidParseErrorTestHandler("\xED\xAD\xBF\xED\xBF\xBF", 2, 'U+DB7F U+DFFF (paired UTF-16 surrogates)'); $this->invalidParseErrorTestHandler("\xED\xAE\x80\xED\xB0\x80", 2, 'U+DB80 U+DC00 (paired UTF-16 surrogates)'); $this->invalidParseErrorTestHandler("\xED\xAE\x80\xED\xBF\xBF", 2, 'U+DB80 U+DFFF (paired UTF-16 surrogates)'); $this->invalidParseErrorTestHandler("\xED\xAF\xBF\xED\xB0\x80", 2, 'U+DBFF U+DC00 (paired UTF-16 surrogates)'); $this->invalidParseErrorTestHandler("\xED\xAF\xBF\xED\xBF\xBF", 2, 'U+DBFF U+DFFF (paired UTF-16 surrogates)'); + */ + } } diff --git a/test/HTML5/Parser/TokenizerTest.php b/test/HTML5/Parser/TokenizerTest.php index 27c3535..32f0fcb 100644 --- a/test/HTML5/Parser/TokenizerTest.php +++ b/test/HTML5/Parser/TokenizerTest.php @@ -6,593 +6,948 @@ use Masterminds\HTML5\Parser\StringInputStream; use Masterminds\HTML5\Parser\Scanner; use Masterminds\HTML5\Parser\Tokenizer; -class TokenizerTest extends \Masterminds\HTML5\Tests\TestCase { - // ================================================================ - // Additional assertions. - // ================================================================ - /** - * Tests that an event matches both the event type and the expected value. - * - * @param string $type - * Expected event type. - * @param string $expects - * The value expected in $event['data'][0]. - */ - public function assertEventEquals($type, $expects, $event) { - $this->assertEquals($type, $event['name'], "Event $type for " . print_r($event, TRUE)); - if (is_array($expects)) { - $this->assertEquals($expects, $event['data'], "Event $type should equal " . print_r($expects, TRUE) . ": " . print_r($event, TRUE)); +class TokenizerTest extends \Masterminds\HTML5\Tests\TestCase +{ + // ================================================================ + // Additional assertions. + // ================================================================ + /** + * Tests that an event matches both the event type and the expected value. + * + * @param string $type + * Expected event type. + * @param string $expects + * The value expected in $event['data'][0]. + */ + public function assertEventEquals($type, $expects, $event) + { + $this->assertEquals($type, $event['name'], "Event $type for " . print_r($event, TRUE)); + if (is_array($expects)) { + $this->assertEquals($expects, $event['data'], "Event $type should equal " . print_r($expects, TRUE) . ": " . print_r($event, TRUE)); + } else { + $this->assertEquals($expects, $event['data'][0], "Event $type should equal $expects: " . print_r($event, TRUE)); + } } - else { - $this->assertEquals($expects, $event['data'][0], "Event $type should equal $expects: " . print_r($event, TRUE)); + + /** + * Assert that a given event is 'error'. + */ + public function assertEventError($event) + { + $this->assertEquals('error', $event['name'], "Expected error for event: " . print_r($event, TRUE)); + } + + /** + * Asserts that all of the tests are good. + * + * This loops through a map of tests/expectations and runs a few assertions on each test. + * + * Checks: + * - depth (if depth is > 0) + * - event name + * - matches on event 0. + */ + protected function isAllGood($name, $depth, $tests, $debug = FALSE) + { + foreach ($tests as $try => $expects) { + if ($debug) { + fprintf(STDOUT, "%s expects %s\n", $try, print_r($expects, TRUE)); + } + $e = $this->parse($try); + if ($depth > 0) { + $this->assertEquals($depth, $e->depth(), "Expected depth $depth for test $try." . print_r($e, TRUE)); + } + $this->assertEventEquals($name, $expects, $e->get(0)); + } + } + + // ================================================================ + // Utility functions. + // ================================================================ + public function testParse() + { + list ($tok, $events) = $this->createTokenizer(''); + + $tok->parse(); + $e1 = $events->get(0); + + $this->assertEquals(1, $events->Depth()); + $this->assertEquals('eof', $e1['name']); + } + + public function testWhitespace() + { + $spaces = ' '; + list ($tok, $events) = $this->createTokenizer($spaces); + + $tok->parse(); + + $this->assertEquals(2, $events->depth()); + + $e1 = $events->get(0); + + $this->assertEquals('text', $e1['name']); + $this->assertEquals($spaces, $e1['data'][0]); } - } - - /** - * Assert that a given event is 'error'. - */ - public function assertEventError($event) { - $this->assertEquals('error', $event['name'], "Expected error for event: " . print_r($event, TRUE)); - } - - /** - * Asserts that all of the tests are good. - * - * This loops through a map of tests/expectations and runs a few assertions on each test. - * - * Checks: - * - depth (if depth is > 0) - * - event name - * - matches on event 0. - */ - protected function isAllGood($name, $depth, $tests, $debug = FALSE) { - foreach ($tests as $try => $expects) { - if ($debug) { - fprintf(STDOUT, "%s expects %s\n", $try, print_r($expects, TRUE)); - } - $e = $this->parse($try); - if ($depth > 0) { - $this->assertEquals($depth, $e->depth(), "Expected depth $depth for test $try." . print_r($e, TRUE)); - } - $this->assertEventEquals($name, $expects, $e->get(0)); + + public function testCharacterReference() + { + $good = array( + '&' => '&', + '<' => '<', + '&' => '&', + '&' => '&' + ); + $this->isAllGood('text', 2, $good); + + // Test with broken charref + $str = '&foo'; + $events = $this->parse($str); + $e1 = $events->get(0); + $this->assertEquals('error', $e1['name']); + + $str = 'oo'; + $events = $this->parse($str); + $e1 = $events->get(0); + $this->assertEquals('error', $e1['name']); + + $str = '&#foo'; + $events = $this->parse($str); + $e1 = $events->get(0); + $this->assertEquals('error', $e1['name']); + + // FIXME: Once the text processor is done, need to verify that the + // tokens are transformed correctly into text. } - } - - - // ================================================================ - // Utility functions. - // ================================================================ - - public function testParse() { - list($tok, $events) = $this->createTokenizer(''); - - $tok->parse(); - $e1 = $events->get(0); - - $this->assertEquals(1, $events->Depth()); - $this->assertEquals('eof', $e1['name']); - } - - public function testWhitespace() { - $spaces = ' '; - list($tok, $events) = $this->createTokenizer($spaces); - - $tok->parse(); - - $this->assertEquals(2, $events->depth()); - - $e1 = $events->get(0); - - $this->assertEquals('text', $e1['name']); - $this->assertEquals($spaces, $e1['data'][0]); - } - - public function testCharacterReference() { - $good = array( - '&' => '&', - '<' => '<', - '&' => '&', - '&' => '&', - ); - $this->isAllGood('text', 2, $good); - - // Test with broken charref - $str = '&foo'; - $events = $this->parse($str); - $e1 = $events->get(0); - $this->assertEquals('error', $e1['name']); - - $str = 'oo'; - $events = $this->parse($str); - $e1 = $events->get(0); - $this->assertEquals('error', $e1['name']); - - $str = '&#foo'; - $events = $this->parse($str); - $e1 = $events->get(0); - $this->assertEquals('error', $e1['name']); - - // FIXME: Once the text processor is done, need to verify that the - // tokens are transformed correctly into text. - } - - public function testBogusComment() { - $bogus = array( - '</+this is a bogus comment. +>', - '<!+this is a bogus comment. !>', - '<!D OCTYPE foo bar>', - '<!DOCTYEP foo bar>', - '<![CADATA[ TEST ]]>', - '<![CDATA Hello ]]>', - '<![CDATA[ Hello [[>', - '<!CDATA[[ test ]]>', - '<![CDATA[', - '<![CDATA[hellooooo hello', - '<? Hello World ?>', - '<? Hello World', - ); - foreach ($bogus as $str) { - $events = $this->parse($str); - $this->assertEventError($events->get(0)); - $this->assertEventEquals('comment', $str, $events->get(1)); + + public function testBogusComment() + { + $bogus = array( + '</+this is a bogus comment. +>', + '<!+this is a bogus comment. !>', + '<!D OCTYPE foo bar>', + '<!DOCTYEP foo bar>', + '<![CADATA[ TEST ]]>', + '<![CDATA Hello ]]>', + '<![CDATA[ Hello [[>', + '<!CDATA[[ test ]]>', + '<![CDATA[', + '<![CDATA[hellooooo hello', + '<? Hello World ?>', + '<? Hello World' + ); + foreach ($bogus as $str) { + $events = $this->parse($str); + $this->assertEventError($events->get(0)); + $this->assertEventEquals('comment', $str, $events->get(1)); + } } - } - public function testEndTag() { - $succeed = array( - '</a>' => 'a', - '</test>' => 'test', - '</test + public function testEndTag() + { + $succeed = array( + '</a>' => 'a', + '</test>' => 'test', + '</test >' => 'test', - '</thisIsTheTagThatDoesntEndItJustGoesOnAndOnMyFriend>' => - 'thisisthetagthatdoesntenditjustgoesonandonmyfriend', - // See 8.2.4.10, which requires this and does not say error. - '</a<b>' => 'a<b', - ); - $this->isAllGood('endTag', 2, $succeed); - - // Recoverable failures - $fail = array( - '</a class="monkey">' => 'a', - '</a <b>' => 'a', - '</a <b <c>' => 'a', - '</a is the loneliest letter>' => 'a', - '</a' => 'a', - ); - foreach ($fail as $test => $result) { - $events = $this->parse($test); - $this->assertEquals(3, $events->depth()); - // Should have triggered an error. - $this->assertEventError($events->get(0)); - // Should have tried to parse anyway. - $this->assertEventEquals('endTag', $result, $events->get(1)); + '</thisIsTheTagThatDoesntEndItJustGoesOnAndOnMyFriend>' => 'thisisthetagthatdoesntenditjustgoesonandonmyfriend', + // See 8.2.4.10, which requires this and does not say error. + '</a<b>' => 'a<b' + ); + $this->isAllGood('endTag', 2, $succeed); + + // Recoverable failures + $fail = array( + '</a class="monkey">' => 'a', + '</a <b>' => 'a', + '</a <b <c>' => 'a', + '</a is the loneliest letter>' => 'a', + '</a' => 'a' + ); + foreach ($fail as $test => $result) { + $events = $this->parse($test); + $this->assertEquals(3, $events->depth()); + // Should have triggered an error. + $this->assertEventError($events->get(0)); + // Should have tried to parse anyway. + $this->assertEventEquals('endTag', $result, $events->get(1)); + } + + // BogoComments + $comments = array( + '</>' => '</>', + '</ >' => '</ >', + '</ a>' => '</ a>' + ); + foreach ($comments as $test => $result) { + $events = $this->parse($test); + $this->assertEquals(3, $events->depth()); + + // Should have triggered an error. + $this->assertEventError($events->get(0)); + + // Should have tried to parse anyway. + $this->assertEventEquals('comment', $result, $events->get(1)); + } } - // BogoComments - $comments = array( - '</>' => '</>', - '</ >' => '</ >', - '</ a>' => '</ a>', - ); - foreach ($comments as $test => $result) { - $events = $this->parse($test); - $this->assertEquals(3, $events->depth()); - - // Should have triggered an error. - $this->assertEventError($events->get(0)); - - // Should have tried to parse anyway. - $this->assertEventEquals('comment', $result, $events->get(1)); + public function testComment() + { + $good = array( + '<!--easy-->' => 'easy', + '<!-- 1 > 0 -->' => ' 1 > 0 ', + '<!-- --$i -->' => ' --$i ', + '<!----$i-->' => '--$i', + '<!-- 1 > 0 -->' => ' 1 > 0 ', + "<!--\nHello World.\na-->" => "\nHello World.\na", + '<!-- <!-- -->' => ' <!-- ' + ); + foreach ($good as $test => $expected) { + $events = $this->parse($test); + $this->assertEventEquals('comment', $expected, $events->get(0)); + } + + $fail = array( + '<!-->' => '', + '<!--Hello' => 'Hello', + "<!--\0Hello" => UTF8Utils::FFFD . 'Hello', + '<!--' => '' + ); + foreach ($fail as $test => $expected) { + $events = $this->parse($test); + $this->assertEquals(3, $events->depth()); + $this->assertEventError($events->get(0)); + $this->assertEventEquals('comment', $expected, $events->get(1)); + } } - } - - public function testComment() { - $good = array( - '<!--easy-->' => 'easy', - '<!-- 1 > 0 -->' => ' 1 > 0 ', - '<!-- --$i -->' => ' --$i ', - '<!----$i-->' => '--$i', - '<!-- 1 > 0 -->' => ' 1 > 0 ', - "<!--\nHello World.\na-->" => "\nHello World.\na", - '<!-- <!-- -->' => ' <!-- ', - ); - foreach ($good as $test => $expected) { - $events = $this->parse($test); - $this->assertEventEquals('comment', $expected, $events->get(0)); + + public function testCDATASection() + { + $good = array( + '<![CDATA[ This is a test. ]]>' => ' This is a test. ', + '<![CDATA[CDATA]]>' => 'CDATA', + '<![CDATA[ ]] > ]]>' => ' ]] > ', + '<![CDATA[ ]]>' => ' ' + ); + $this->isAllGood('cdata', 2, $good); } - $fail = array( - '<!-->' => '', - '<!--Hello' => 'Hello', - "<!--\0Hello" => UTF8Utils::FFFD . 'Hello', - '<!--' => '', - ); - foreach ($fail as $test => $expected) { - $events = $this->parse($test); - $this->assertEquals(3, $events->depth()); - $this->assertEventError($events->get(0)); - $this->assertEventEquals('comment', $expected, $events->get(1)); + public function testDoctype() + { + $good = array( + '<!DOCTYPE html>' => array( + 'html', + 0, + NULL, + FALSE + ), + '<!doctype html>' => array( + 'html', + 0, + NULL, + FALSE + ), + '<!DocType html>' => array( + 'html', + 0, + NULL, + FALSE + ), + "<!DOCTYPE\nhtml>" => array( + 'html', + 0, + NULL, + FALSE + ), + "<!DOCTYPE\fhtml>" => array( + 'html', + 0, + NULL, + FALSE + ), + '<!DOCTYPE html PUBLIC "foo bar">' => array( + 'html', + EventStack::DOCTYPE_PUBLIC, + 'foo bar', + FALSE + ), + "<!DOCTYPE html PUBLIC 'foo bar'>" => array( + 'html', + EventStack::DOCTYPE_PUBLIC, + 'foo bar', + FALSE + ), + '<!DOCTYPE html PUBLIC "foo bar" >' => array( + 'html', + EventStack::DOCTYPE_PUBLIC, + 'foo bar', + FALSE + ), + "<!DOCTYPE html \nPUBLIC\n'foo bar'>" => array( + 'html', + EventStack::DOCTYPE_PUBLIC, + 'foo bar', + FALSE + ), + '<!DOCTYPE html SYSTEM "foo bar">' => array( + 'html', + EventStack::DOCTYPE_SYSTEM, + 'foo bar', + FALSE + ), + "<!DOCTYPE html SYSTEM 'foo bar'>" => array( + 'html', + EventStack::DOCTYPE_SYSTEM, + 'foo bar', + FALSE + ), + '<!DOCTYPE html SYSTEM "foo/bar" >' => array( + 'html', + EventStack::DOCTYPE_SYSTEM, + 'foo/bar', + FALSE + ), + "<!DOCTYPE html \nSYSTEM\n'foo bar'>" => array( + 'html', + EventStack::DOCTYPE_SYSTEM, + 'foo bar', + FALSE + ) + ); + $this->isAllGood('doctype', 2, $good); + + $bad = array( + '<!DOCTYPE>' => array( + NULL, + EventStack::DOCTYPE_NONE, + NULL, + TRUE + ), + '<!DOCTYPE >' => array( + NULL, + EventStack::DOCTYPE_NONE, + NULL, + TRUE + ), + '<!DOCTYPE foo' => array( + 'foo', + EventStack::DOCTYPE_NONE, + NULL, + TRUE + ), + '<!DOCTYPE foo PUB' => array( + 'foo', + EventStack::DOCTYPE_NONE, + NULL, + TRUE + ), + '<!DOCTYPE foo PUB>' => array( + 'foo', + EventStack::DOCTYPE_NONE, + NULL, + TRUE + ), + '<!DOCTYPE foo PUB "Looks good">' => array( + 'foo', + EventStack::DOCTYPE_NONE, + NULL, + TRUE + ), + '<!DOCTYPE foo SYSTME "Looks good"' => array( + 'foo', + EventStack::DOCTYPE_NONE, + NULL, + TRUE + ), + + // Can't tell whether these are ids or ID types, since the context is chopped. + '<!DOCTYPE foo PUBLIC' => array( + 'foo', + EventStack::DOCTYPE_NONE, + NULL, + TRUE + ), + '<!DOCTYPE foo PUBLIC>' => array( + 'foo', + EventStack::DOCTYPE_NONE, + NULL, + TRUE + ), + '<!DOCTYPE foo SYSTEM' => array( + 'foo', + EventStack::DOCTYPE_NONE, + NULL, + TRUE + ), + '<!DOCTYPE foo SYSTEM>' => array( + 'foo', + EventStack::DOCTYPE_NONE, + NULL, + TRUE + ), + + '<!DOCTYPE html SYSTEM "foo bar"' => array( + 'html', + EventStack::DOCTYPE_SYSTEM, + 'foo bar', + TRUE + ), + '<!DOCTYPE html SYSTEM "foo bar" more stuff>' => array( + 'html', + EventStack::DOCTYPE_SYSTEM, + 'foo bar', + TRUE + ) + ); + foreach ($bad as $test => $expects) { + $events = $this->parse($test); + // fprintf(STDOUT, $test . PHP_EOL); + $this->assertEquals(3, $events->depth(), "Counting events for '$test': " . print_r($events, TRUE)); + $this->assertEventError($events->get(0)); + $this->assertEventEquals('doctype', $expects, $events->get(1)); + } } - } - - public function testCDATASection() { - $good = array( - '<![CDATA[ This is a test. ]]>' => ' This is a test. ', - '<![CDATA[CDATA]]>' => 'CDATA', - '<![CDATA[ ]] > ]]>' => ' ]] > ', - '<![CDATA[ ]]>' => ' ', - ); - $this->isAllGood('cdata', 2, $good); - } - - public function testDoctype() { - $good = array( - '<!DOCTYPE html>' => array('html', 0, NULL, FALSE), - '<!doctype html>' => array('html', 0, NULL, FALSE), - '<!DocType html>' => array('html', 0, NULL, FALSE), - "<!DOCTYPE\nhtml>" => array('html', 0, NULL, FALSE), - "<!DOCTYPE\fhtml>" => array('html', 0, NULL, FALSE), - '<!DOCTYPE html PUBLIC "foo bar">' => array('html', EventStack::DOCTYPE_PUBLIC, 'foo bar', FALSE), - "<!DOCTYPE html PUBLIC 'foo bar'>" => array('html', EventStack::DOCTYPE_PUBLIC, 'foo bar', FALSE), - '<!DOCTYPE html PUBLIC "foo bar" >' => array('html', EventStack::DOCTYPE_PUBLIC, 'foo bar', FALSE), - "<!DOCTYPE html \nPUBLIC\n'foo bar'>" => array('html', EventStack::DOCTYPE_PUBLIC, 'foo bar', FALSE), - '<!DOCTYPE html SYSTEM "foo bar">' => array('html', EventStack::DOCTYPE_SYSTEM, 'foo bar', FALSE), - "<!DOCTYPE html SYSTEM 'foo bar'>" => array('html', EventStack::DOCTYPE_SYSTEM, 'foo bar', FALSE), - '<!DOCTYPE html SYSTEM "foo/bar" >' => array('html', EventStack::DOCTYPE_SYSTEM, 'foo/bar', FALSE), - "<!DOCTYPE html \nSYSTEM\n'foo bar'>" => array('html', EventStack::DOCTYPE_SYSTEM, 'foo bar', FALSE), - ); - $this->isAllGood('doctype', 2, $good); - - $bad = array( - '<!DOCTYPE>' => array(NULL, EventStack::DOCTYPE_NONE, NULL, TRUE), - '<!DOCTYPE >' => array(NULL, EventStack::DOCTYPE_NONE, NULL, TRUE), - '<!DOCTYPE foo' => array('foo', EventStack::DOCTYPE_NONE, NULL, TRUE), - '<!DOCTYPE foo PUB' => array('foo', EventStack::DOCTYPE_NONE, NULL, TRUE), - '<!DOCTYPE foo PUB>' => array('foo', EventStack::DOCTYPE_NONE, NULL, TRUE), - '<!DOCTYPE foo PUB "Looks good">' => array('foo', EventStack::DOCTYPE_NONE, NULL, TRUE), - '<!DOCTYPE foo SYSTME "Looks good"' => array('foo', EventStack::DOCTYPE_NONE, NULL, TRUE), - - // Can't tell whether these are ids or ID types, since the context is chopped. - '<!DOCTYPE foo PUBLIC' => array('foo', EventStack::DOCTYPE_NONE, NULL, TRUE), - '<!DOCTYPE foo PUBLIC>' => array('foo', EventStack::DOCTYPE_NONE, NULL, TRUE), - '<!DOCTYPE foo SYSTEM' => array('foo', EventStack::DOCTYPE_NONE, NULL, TRUE), - '<!DOCTYPE foo SYSTEM>' => array('foo', EventStack::DOCTYPE_NONE, NULL, TRUE), - - '<!DOCTYPE html SYSTEM "foo bar"' => array('html', EventStack::DOCTYPE_SYSTEM, 'foo bar', TRUE), - '<!DOCTYPE html SYSTEM "foo bar" more stuff>' => array('html', EventStack::DOCTYPE_SYSTEM, 'foo bar', TRUE), - ); - foreach ($bad as $test => $expects) { - $events = $this->parse($test); - //fprintf(STDOUT, $test . PHP_EOL); - $this->assertEquals(3, $events->depth(), "Counting events for '$test': " . print_r($events, TRUE)); - $this->assertEventError($events->get(0)); - $this->assertEventEquals('doctype', $expects, $events->get(1)); + public function testProcessorInstruction() + { + $good = array( + '<?hph ?>' => 'hph', + '<?hph echo "Hello World"; ?>' => array( + 'hph', + 'echo "Hello World"; ' + ), + "<?hph \necho 'Hello World';\n?>" => array( + 'hph', + "echo 'Hello World';\n" + ) + ); + $this->isAllGood('pi', 2, $good); } - } - - public function testProcessorInstruction() { - $good = array( - '<?hph ?>' => 'hph', - '<?hph echo "Hello World"; ?>' => array('hph', 'echo "Hello World"; '), - "<?hph \necho 'Hello World';\n?>" => array('hph', "echo 'Hello World';\n"), - ); - $this->isAllGood('pi', 2, $good); - } - - /** - * This tests just simple tags. - */ - public function testSimpleTags() { - $open = array( - '<foo>' => 'foo', - '<FOO>' => 'foo', - '<fOO>' => 'foo', - '<foo >' => 'foo', - "<foo\n\n\n\n>" => 'foo', - '<foo:bar>' => 'foo:bar', - ); - $this->isAllGood('startTag', 2, $open); - - $selfClose= array( - '<foo/>' => 'foo', - '<FOO/>' => 'foo', - '<foo />' => 'foo', - "<foo\n\n\n\n/>" => 'foo', - '<foo:bar/>' => 'foo:bar', - ); - foreach ($selfClose as $test => $expects) { - $events = $this->parse($test); - $this->assertEquals(3, $events->depth(), "Counting events for '$test'" . print_r($events, TRUE)); - $this->assertEventEquals('startTag', $expects, $events->get(0)); - $this->assertEventEquals('endTag', $expects, $events->get(1)); + + /** + * This tests just simple tags. + */ + public function testSimpleTags() + { + $open = array( + '<foo>' => 'foo', + '<FOO>' => 'foo', + '<fOO>' => 'foo', + '<foo >' => 'foo', + "<foo\n\n\n\n>" => 'foo', + '<foo:bar>' => 'foo:bar' + ); + $this->isAllGood('startTag', 2, $open); + + $selfClose = array( + '<foo/>' => 'foo', + '<FOO/>' => 'foo', + '<foo />' => 'foo', + "<foo\n\n\n\n/>" => 'foo', + '<foo:bar/>' => 'foo:bar' + ); + foreach ($selfClose as $test => $expects) { + $events = $this->parse($test); + $this->assertEquals(3, $events->depth(), "Counting events for '$test'" . print_r($events, TRUE)); + $this->assertEventEquals('startTag', $expects, $events->get(0)); + $this->assertEventEquals('endTag', $expects, $events->get(1)); + } + + $bad = array( + '<foo' => 'foo', + '<foo ' => 'foo', + '<foo/' => 'foo', + '<foo /' => 'foo' + ); + + foreach ($bad as $test => $expects) { + $events = $this->parse($test); + $this->assertEquals(3, $events->depth(), "Counting events for '$test': " . print_r($events, TRUE)); + $this->assertEventError($events->get(0)); + $this->assertEventEquals('startTag', $expects, $events->get(1)); + } } - $bad = array( - '<foo' => 'foo', - '<foo ' => 'foo', - '<foo/' => 'foo', - '<foo /' => 'foo', - ); - - foreach ($bad as $test => $expects) { - $events = $this->parse($test); - $this->assertEquals(3, $events->depth(), "Counting events for '$test': " . print_r($events, TRUE)); - $this->assertEventError($events->get(0)); - $this->assertEventEquals('startTag', $expects, $events->get(1)); + public function testTagsWithAttributeAndMissingName() + { + $cases = array( + '<id="top_featured">' => 'id', + '<color="white">' => 'color', + "<class='neaktivni_stranka'>" => 'class', + '<bgcolor="white">' => 'bgcolor', + '<class="nom">' => 'class' + ); + + foreach ($cases as $html => $expected) { + $events = $this->parse($html); + $this->assertEventError($events->get(0)); + $this->assertEventError($events->get(1)); + $this->assertEventError($events->get(2)); + $this->assertEventEquals('startTag', $expected, $events->get(3)); + $this->assertEventEquals('eof', NULL, $events->get(4)); + } } - } - - public function testTagsWithAttributeAndMissingName() { - $cases = array( - '<id="top_featured">' => 'id', - '<color="white">' => 'color', - "<class='neaktivni_stranka'>" => 'class', - '<bgcolor="white">' => 'bgcolor', - '<class="nom">' => 'class', - ); - - foreach($cases as $html => $expected) { - $events = $this->parse($html); - $this->assertEventError($events->get(0)); - $this->assertEventError($events->get(1)); - $this->assertEventError($events->get(2)); - $this->assertEventEquals('startTag', $expected, $events->get(3)); - $this->assertEventEquals('eof', NULL, $events->get(4)); + + public function testTagNotClosedAfterTagName() + { + $cases = array( + "<noscript<img>" => array( + 'noscript', + 'img' + ), + '<center<a>' => array( + 'center', + 'a' + ), + '<br<br>' => array( + 'br', + 'br' + ) + ); + + foreach ($cases as $html => $expected) { + $events = $this->parse($html); + $this->assertEventError($events->get(0)); + $this->assertEventEquals('startTag', $expected[0], $events->get(1)); + $this->assertEventEquals('startTag', $expected[1], $events->get(2)); + $this->assertEventEquals('eof', NULL, $events->get(3)); + } + + $events = $this->parse('<span<>02</span>'); + $this->assertEventError($events->get(0)); + $this->assertEventEquals('startTag', 'span', $events->get(1)); + $this->assertEventError($events->get(2)); + $this->assertEventEquals('text', '>02', $events->get(3)); + $this->assertEventEquals('endTag', 'span', $events->get(4)); + $this->assertEventEquals('eof', NULL, $events->get(5)); + + $events = $this->parse('<p</p>'); + $this->assertEventError($events->get(0)); + $this->assertEventEquals('startTag', 'p', $events->get(1)); + $this->assertEventEquals('endTag', 'p', $events->get(2)); + $this->assertEventEquals('eof', NULL, $events->get(3)); + + $events = $this->parse('<strong><WordPress</strong>'); + $this->assertEventEquals('startTag', 'strong', $events->get(0)); + $this->assertEventError($events->get(1)); + $this->assertEventEquals('startTag', 'wordpress', $events->get(2)); + $this->assertEventEquals('endTag', 'strong', $events->get(3)); + $this->assertEventEquals('eof', NULL, $events->get(4)); + + $events = $this->parse('<src=<a>'); + $this->assertEventError($events->get(0)); + $this->assertEventError($events->get(1)); + $this->assertEventError($events->get(2)); + $this->assertEventEquals('startTag', 'src', $events->get(3)); + $this->assertEventEquals('startTag', 'a', $events->get(4)); + $this->assertEventEquals('eof', NULL, $events->get(5)); + + $events = $this->parse('<br...<a>'); + $this->assertEventError($events->get(0)); + $this->assertEventEquals('startTag', 'br', $events->get(1)); + $this->assertEventEquals('eof', NULL, $events->get(2)); } - } - - public function testTagNotClosedAfterTagName() { - $cases = array( - "<noscript<img>" => array('noscript', 'img'), - '<center<a>' => array('center', 'a'), - '<br<br>' => array('br', 'br'), - ); - - foreach($cases as $html => $expected) { - $events = $this->parse($html); - $this->assertEventError($events->get(0)); - $this->assertEventEquals('startTag', $expected[0], $events->get(1)); - $this->assertEventEquals('startTag', $expected[1], $events->get(2)); - $this->assertEventEquals('eof', NULL, $events->get(3)); + + public function testIllegalTagNames() + { + $cases = array( + '<li">' => 'li', + '<p">' => 'p', + '<b >' => 'b', + '<static*all>' => 'static', + '<h*0720/>' => 'h', + '<st*ATTRIBUTE />' => 'st', + '<a-href="http://url.com/">' => 'a' + ); + + foreach ($cases as $html => $expected) { + $events = $this->parse($html); + $this->assertEventError($events->get(0)); + $this->assertEventEquals('startTag', $expected, $events->get(1)); + } } - $events = $this->parse('<span<>02</span>'); - $this->assertEventError($events->get(0)); - $this->assertEventEquals('startTag', 'span', $events->get(1)); - $this->assertEventError($events->get(2)); - $this->assertEventEquals('text', '>02', $events->get(3)); - $this->assertEventEquals('endTag', 'span', $events->get(4)); - $this->assertEventEquals('eof', NULL, $events->get(5)); - - $events = $this->parse('<p</p>'); - $this->assertEventError($events->get(0)); - $this->assertEventEquals('startTag', 'p', $events->get(1)); - $this->assertEventEquals('endTag', 'p', $events->get(2)); - $this->assertEventEquals('eof', NULL, $events->get(3)); - - $events = $this->parse('<strong><WordPress</strong>'); - $this->assertEventEquals('startTag', 'strong', $events->get(0)); - $this->assertEventError($events->get(1)); - $this->assertEventEquals('startTag', 'wordpress', $events->get(2)); - $this->assertEventEquals('endTag', 'strong', $events->get(3)); - $this->assertEventEquals('eof', NULL, $events->get(4)); - - $events = $this->parse('<src=<a>'); - $this->assertEventError($events->get(0)); - $this->assertEventError($events->get(1)); - $this->assertEventError($events->get(2)); - $this->assertEventEquals('startTag', 'src', $events->get(3)); - $this->assertEventEquals('startTag', 'a', $events->get(4)); - $this->assertEventEquals('eof', NULL, $events->get(5)); - - $events = $this->parse('<br...<a>'); - $this->assertEventError($events->get(0)); - $this->assertEventEquals('startTag', 'br', $events->get(1)); - $this->assertEventEquals('eof', NULL, $events->get(2)); - } - - public function testIllegalTagNames() { - $cases = array( - '<li">' => 'li', - '<p">' => 'p', - '<b >' => 'b', - '<static*all>' => 'static', - '<h*0720/>' => 'h', - '<st*ATTRIBUTE />' => 'st', - '<a-href="http://url.com/">' => 'a', - ); - - foreach($cases as $html => $expected) { - $events = $this->parse($html); - $this->assertEventError($events->get(0)); - $this->assertEventEquals('startTag', $expected, $events->get(1)); + /** + * @depends testCharacterReference + */ + public function testTagAttributes() + { + // Opening tags. + $good = array( + '<foo bar="baz">' => array( + 'foo', + array( + 'bar' => 'baz' + ), + FALSE + ), + '<foo bar=" baz ">' => array( + 'foo', + array( + 'bar' => ' baz ' + ), + FALSE + ), + "<foo bar=\"\nbaz\n\">" => array( + 'foo', + array( + 'bar' => "\nbaz\n" + ), + FALSE + ), + "<foo bar='baz'>" => array( + 'foo', + array( + 'bar' => 'baz' + ), + FALSE + ), + '<foo bar="A full sentence.">' => array( + 'foo', + array( + 'bar' => 'A full sentence.' + ), + FALSE + ), + "<foo a='1' b=\"2\">" => array( + 'foo', + array( + 'a' => '1', + 'b' => '2' + ), + FALSE + ), + "<foo ns:bar='baz'>" => array( + 'foo', + array( + 'ns:bar' => 'baz' + ), + FALSE + ), + "<foo a='blue&red'>" => array( + 'foo', + array( + 'a' => 'blue&red' + ), + FALSE + ), + "<foo a='blue&&red'>" => array( + 'foo', + array( + 'a' => 'blue&&red' + ), + FALSE + ), + "<foo\nbar='baz'\n>" => array( + 'foo', + array( + 'bar' => 'baz' + ), + FALSE + ), + '<doe a deer>' => array( + 'doe', + array( + 'a' => NULL, + 'deer' => NULL + ), + FALSE + ), + '<foo bar=baz>' => array( + 'foo', + array( + 'bar' => 'baz' + ), + FALSE + ), + + // Updated for 8.1.2.3 + '<foo bar = "baz" >' => array( + 'foo', + array( + 'bar' => 'baz' + ), + FALSE + ), + + // The spec allows an unquoted value '/'. This will not be a closing + // tag. + '<foo bar=/>' => array( + 'foo', + array( + 'bar' => '/' + ), + FALSE + ), + '<foo bar=baz/>' => array( + 'foo', + array( + 'bar' => 'baz/' + ), + FALSE + ) + ); + $this->isAllGood('startTag', 2, $good); + + // Self-closing tags. + $withEnd = array( + '<foo bar="baz"/>' => array( + 'foo', + array( + 'bar' => 'baz' + ), + TRUE + ), + '<foo BAR="baz"/>' => array( + 'foo', + array( + 'bar' => 'baz' + ), + TRUE + ), + '<foo BAR="BAZ"/>' => array( + 'foo', + array( + 'bar' => 'BAZ' + ), + TRUE + ), + "<foo a='1' b=\"2\" c=3 d/>" => array( + 'foo', + array( + 'a' => '1', + 'b' => '2', + 'c' => '3', + 'd' => NULL + ), + TRUE + ) + ); + $this->isAllGood('startTag', 3, $withEnd); + + // Cause a parse error. + $bad = array( + // This will emit an entity lookup failure for &red. + "<foo a='blue&red'>" => array( + 'foo', + array( + 'a' => 'blue&red' + ), + FALSE + ), + "<foo a='blue&&&red'>" => array( + 'foo', + array( + 'a' => 'blue&&&red' + ), + FALSE + ), + '<foo bar=>' => array( + 'foo', + array( + 'bar' => NULL + ), + FALSE + ), + '<foo bar="oh' => array( + 'foo', + array( + 'bar' => 'oh' + ), + FALSE + ), + '<foo bar=oh">' => array( + 'foo', + array( + 'bar' => 'oh"' + ), + FALSE + ), + + // these attributes are ignored because of current implementation + // of method "DOMElement::setAttribute" + // see issue #23: https://github.com/Masterminds/html5-php/issues/23 + '<foo b"="baz">' => array( + 'foo', + array(), + FALSE + ), + '<foo 2abc="baz">' => array( + 'foo', + array(), + FALSE + ), + '<foo ?="baz">' => array( + 'foo', + array(), + FALSE + ), + '<foo foo?bar="baz">' => array( + 'foo', + array(), + FALSE + ) + ) + ; + foreach ($bad as $test => $expects) { + $events = $this->parse($test); + $this->assertEquals(3, $events->depth(), "Counting events for '$test': " . print_r($events, TRUE)); + $this->assertEventError($events->get(0)); + $this->assertEventEquals('startTag', $expects, $events->get(1)); + } + + // Cause multiple parse errors. + $reallyBad = array( + '<foo ="bar">' => array( + 'foo', + array( + '=' => NULL, + '"bar"' => NULL + ), + FALSE + ), + '<foo////>' => array( + 'foo', + array(), + TRUE + ), + // character "&" in unquoted attribute shouldn't cause an infinite loop + '<foo bar=index.php?str=1&id=29>' => array( + 'foo', + array( + 'bar' => 'index.php?str=1&id=29' + ), + FALSE + ) + ); + foreach ($reallyBad as $test => $expects) { + $events = $this->parse($test); + // fprintf(STDOUT, $test . print_r($events, TRUE)); + $this->assertEventError($events->get(0)); + $this->assertEventError($events->get(1)); + // $this->assertEventEquals('startTag', $expects, $events->get(1)); + } + + // Regression: Malformed elements should be detected. + // '<foo baz="1" <bar></foo>' => array('foo', array('baz' => '1'), FALSE), + $events = $this->parse('<foo baz="1" <bar></foo>'); + $this->assertEventError($events->get(0)); + $this->assertEventEquals('startTag', array( + 'foo', + array( + 'baz' => '1' + ), + FALSE + ), $events->get(1)); + $this->assertEventEquals('startTag', array( + 'bar', + array(), + FALSE + ), $events->get(2)); + $this->assertEventEquals('endTag', array( + 'foo' + ), $events->get(3)); } - } - - /** - * @depends testCharacterReference - */ - public function testTagAttributes() { - // Opening tags. - $good = array( - '<foo bar="baz">' => array('foo', array('bar' => 'baz'), FALSE), - '<foo bar=" baz ">' => array('foo', array('bar' => ' baz '), FALSE), - "<foo bar=\"\nbaz\n\">" => array('foo', array('bar' => "\nbaz\n"), FALSE), - "<foo bar='baz'>" => array('foo', array('bar' => 'baz'), FALSE), - '<foo bar="A full sentence.">' => array('foo', array('bar' => 'A full sentence.'), FALSE), - "<foo a='1' b=\"2\">" => array('foo', array('a' => '1', 'b' => '2'), FALSE), - "<foo ns:bar='baz'>" => array('foo', array('ns:bar' => 'baz'), FALSE), - "<foo a='blue&red'>" => array('foo', array('a' => 'blue&red'), FALSE), - "<foo a='blue&&red'>" => array('foo', array('a' => 'blue&&red'), FALSE), - "<foo\nbar='baz'\n>" => array('foo', array('bar' => 'baz'), FALSE), - '<doe a deer>' => array('doe', array('a' => NULL, 'deer' => NULL), FALSE), - '<foo bar=baz>' => array('foo', array('bar' => 'baz'), FALSE), - - // Updated for 8.1.2.3 - '<foo bar = "baz" >' => array('foo', array('bar' => 'baz'), FALSE), - - // The spec allows an unquoted value '/'. This will not be a closing - // tag. - '<foo bar=/>' => array('foo', array('bar' => '/'), FALSE), - '<foo bar=baz/>' => array('foo', array('bar' => 'baz/'), FALSE), - ); - $this->isAllGood('startTag', 2, $good); - - // Self-closing tags. - $withEnd = array( - '<foo bar="baz"/>' => array('foo', array('bar' => 'baz'), TRUE), - '<foo BAR="baz"/>' => array('foo', array('bar' => 'baz'), TRUE), - '<foo BAR="BAZ"/>' => array('foo', array('bar' => 'BAZ'), TRUE), - "<foo a='1' b=\"2\" c=3 d/>" => array('foo', array('a' => '1', 'b' => '2', 'c' => '3', 'd' => NULL), TRUE), - ); - $this->isAllGood('startTag', 3, $withEnd); - - // Cause a parse error. - $bad = array( - // This will emit an entity lookup failure for &red. - "<foo a='blue&red'>" => array('foo', array('a' => 'blue&red'), FALSE), - "<foo a='blue&&&red'>" => array('foo', array('a' => 'blue&&&red'), FALSE), - '<foo bar=>' => array('foo', array('bar' => NULL), FALSE), - '<foo bar="oh' => array('foo', array('bar' => 'oh'), FALSE), - '<foo bar=oh">' => array('foo', array('bar' => 'oh"'), FALSE), - - // these attributes are ignored because of current implementation - // of method "DOMElement::setAttribute" - // see issue #23: https://github.com/Masterminds/html5-php/issues/23 - '<foo b"="baz">' => array('foo', array(), FALSE), - '<foo 2abc="baz">' => array('foo', array(), FALSE), - '<foo ?="baz">' => array('foo', array(), FALSE), - '<foo foo?bar="baz">' => array('foo', array(), FALSE), - - ); - foreach ($bad as $test => $expects) { - $events = $this->parse($test); - $this->assertEquals(3, $events->depth(), "Counting events for '$test': " . print_r($events, TRUE)); - $this->assertEventError($events->get(0)); - $this->assertEventEquals('startTag', $expects, $events->get(1)); + + public function testRawText() + { + $good = array( + '<script>abcd efg hijk lmnop</script> ' => 'abcd efg hijk lmnop', + '<script><not/><the/><tag></script>' => '<not/><the/><tag>', + '<script><<<<<<<<</script>' => '<<<<<<<<', + '<script>hello</script</script>' => 'hello</script', + "<script>\nhello</script\n</script>" => "\nhello</script\n", + '<script>&</script>' => '&', + '<script><!--not a comment--></script>' => '<!--not a comment-->', + '<script><![CDATA[not a comment]]></script>' => '<![CDATA[not a comment]]>' + ); + foreach ($good as $test => $expects) { + $events = $this->parse($test); + $this->assertEventEquals('startTag', 'script', $events->get(0)); + $this->assertEventEquals('text', $expects, $events->get(1)); + $this->assertEventEquals('endTag', 'script', $events->get(2)); + } + + $bad = array( + '<script>&</script' => '&</script', + '<script>Hello world' => 'Hello world' + ); + foreach ($bad as $test => $expects) { + $events = $this->parse($test); + $this->assertEquals(4, $events->depth(), "Counting events for '$test': " . print_r($events, TRUE)); + $this->assertEventEquals('startTag', 'script', $events->get(0)); + $this->assertEventError($events->get(1)); + $this->assertEventEquals('text', $expects, $events->get(2)); + } + + // Testing case sensitivity + $events = $this->parse('<TITLE>a test</TITLE>'); + $this->assertEventEquals('startTag', 'title', $events->get(0)); + $this->assertEventEquals('text', 'a test', $events->get(1)); + $this->assertEventEquals('endTag', 'title', $events->get(2)); } - // Cause multiple parse errors. - $reallyBad = array( - '<foo ="bar">' => array('foo', array('=' => NULL, '"bar"' => NULL), FALSE), - '<foo////>' => array('foo', array(), TRUE), - // character "&" in unquoted attribute shouldn't cause an infinite loop - '<foo bar=index.php?str=1&id=29>' => array('foo', array('bar' => 'index.php?str=1&id=29'), FALSE), - ); - foreach ($reallyBad as $test => $expects) { - $events = $this->parse($test); - //fprintf(STDOUT, $test . print_r($events, TRUE)); - $this->assertEventError($events->get(0)); - $this->assertEventError($events->get(1)); - //$this->assertEventEquals('startTag', $expects, $events->get(1)); + public function testRcdata() + { + list ($tok, $events) = $this->createTokenizer('<title>'<!-- not a comment --></TITLE>'); + $tok->setTextMode(\Masterminds\HTML5\Elements::TEXT_RCDATA, 'title'); + $tok->parse(); + $this->assertEventEquals('text', "'<!-- not a comment -->", $events->get(1)); } - // Regression: Malformed elements should be detected. - // '<foo baz="1" <bar></foo>' => array('foo', array('baz' => '1'), FALSE), - $events = $this->parse('<foo baz="1" <bar></foo>'); - $this->assertEventError($events->get(0)); - $this->assertEventEquals('startTag', array('foo', array('baz' => '1'), FALSE), $events->get(1)); - $this->assertEventEquals('startTag', array('bar', array(), FALSE), $events->get(2)); - $this->assertEventEquals('endTag', array('foo'), $events->get(3)); - } - - public function testRawText() { - $good = array( - '<script>abcd efg hijk lmnop</script> ' => 'abcd efg hijk lmnop', - '<script><not/><the/><tag></script>' => '<not/><the/><tag>', - '<script><<<<<<<<</script>' => '<<<<<<<<', - '<script>hello</script</script>' => 'hello</script', - "<script>\nhello</script\n</script>" => "\nhello</script\n", - '<script>&</script>' => '&', - '<script><!--not a comment--></script>' => '<!--not a comment-->', - '<script><![CDATA[not a comment]]></script>' => '<![CDATA[not a comment]]>', - ); - foreach ($good as $test => $expects) { - $events = $this->parse($test); - $this->assertEventEquals('startTag', 'script', $events->get(0)); - $this->assertEventEquals('text', $expects, $events->get(1)); - $this->assertEventEquals('endTag', 'script', $events->get(2)); + public function testText() + { + $events = $this->parse('a<br>b'); + $this->assertEquals(4, $events->depth(), "Events: " . print_r($events, TRUE)); + $this->assertEventEquals('text', 'a', $events->get(0)); + $this->assertEventEquals('startTag', 'br', $events->get(1)); + $this->assertEventEquals('text', 'b', $events->get(2)); + + $events = $this->parse('<a>Test</a>'); + $this->assertEquals(4, $events->depth(), "Events: " . print_r($events, TRUE)); + $this->assertEventEquals('startTag', 'a', $events->get(0)); + $this->assertEventEquals('text', 'Test', $events->get(1)); + $this->assertEventEquals('endTag', 'a', $events->get(2)); + + $events = $this->parse('a<![CDATA[test]]>b'); + $this->assertEquals(4, $events->depth(), "Events: " . print_r($events, TRUE)); + $this->assertEventEquals('text', 'a', $events->get(0)); + $this->assertEventEquals('cdata', 'test', $events->get(1)); + $this->assertEventEquals('text', 'b', $events->get(2)); + + $events = $this->parse('a<!--test-->b'); + $this->assertEquals(4, $events->depth(), "Events: " . print_r($events, TRUE)); + $this->assertEventEquals('text', 'a', $events->get(0)); + $this->assertEventEquals('comment', 'test', $events->get(1)); + $this->assertEventEquals('text', 'b', $events->get(2)); + + $events = $this->parse('a&b'); + $this->assertEquals(2, $events->depth(), "Events: " . print_r($events, TRUE)); + $this->assertEventEquals('text', 'a&b', $events->get(0)); } - $bad = array( - '<script>&</script' => '&</script', - '<script>Hello world' => 'Hello world', - ); - foreach ($bad as $test => $expects) { - $events = $this->parse($test); - $this->assertEquals(4, $events->depth(), "Counting events for '$test': " . print_r($events, TRUE)); - $this->assertEventEquals('startTag', 'script', $events->get(0)); - $this->assertEventError($events->get(1)); - $this->assertEventEquals('text', $expects, $events->get(2)); + // ================================================================ + // Utility functions. + // ================================================================ + protected function createTokenizer($string, $debug = FALSE) + { + $eventHandler = new EventStack(); + $stream = new StringInputStream($string); + $scanner = new Scanner($stream); + + $scanner->debug = $debug; + + return array( + new Tokenizer($scanner, $eventHandler), + $eventHandler + ); } - // Testing case sensitivity - $events = $this->parse('<TITLE>a test</TITLE>'); - $this->assertEventEquals('startTag', 'title', $events->get(0)); - $this->assertEventEquals('text', 'a test', $events->get(1)); - $this->assertEventEquals('endTag', 'title', $events->get(2)); - - } - - public function testRcdata() { - list($tok, $events) = $this->createTokenizer('<title>'<!-- not a comment --></TITLE>'); - $tok->setTextMode(\Masterminds\HTML5\Elements::TEXT_RCDATA, 'title'); - $tok->parse(); - $this->assertEventEquals('text', "'<!-- not a comment -->", $events->get(1)); - } - - public function testText() { - - $events = $this->parse('a<br>b'); - $this->assertEquals(4, $events->depth(), "Events: " . print_r($events, TRUE)); - $this->assertEventEquals('text', 'a', $events->get(0)); - $this->assertEventEquals('startTag', 'br', $events->get(1)); - $this->assertEventEquals('text', 'b', $events->get(2)); - - $events = $this->parse('<a>Test</a>'); - $this->assertEquals(4, $events->depth(), "Events: " . print_r($events, TRUE)); - $this->assertEventEquals('startTag', 'a', $events->get(0)); - $this->assertEventEquals('text', 'Test', $events->get(1)); - $this->assertEventEquals('endTag', 'a', $events->get(2)); - - $events = $this->parse('a<![CDATA[test]]>b'); - $this->assertEquals(4, $events->depth(), "Events: " . print_r($events, TRUE)); - $this->assertEventEquals('text', 'a', $events->get(0)); - $this->assertEventEquals('cdata', 'test', $events->get(1)); - $this->assertEventEquals('text', 'b', $events->get(2)); - - $events = $this->parse('a<!--test-->b'); - $this->assertEquals(4, $events->depth(), "Events: " . print_r($events, TRUE)); - $this->assertEventEquals('text', 'a', $events->get(0)); - $this->assertEventEquals('comment', 'test', $events->get(1)); - $this->assertEventEquals('text', 'b', $events->get(2)); - - $events = $this->parse('a&b'); - $this->assertEquals(2, $events->depth(), "Events: " . print_r($events, TRUE)); - $this->assertEventEquals('text', 'a&b', $events->get(0)); - } - - // ================================================================ - // Utility functions. - // ================================================================ - protected function createTokenizer($string, $debug = FALSE) { - $eventHandler = new EventStack(); - $stream = new StringInputStream($string); - $scanner = new Scanner($stream); - - $scanner->debug = $debug; - - return array( - new Tokenizer($scanner, $eventHandler), - $eventHandler, - ); - } - - public function parse($string, $debug = FALSE) { - list($tok, $events) = $this->createTokenizer($string, $debug); - $tok->parse(); - - return $events; - } + public function parse($string, $debug = FALSE) + { + list ($tok, $events) = $this->createTokenizer($string, $debug); + $tok->parse(); + + return $events; + } } diff --git a/test/HTML5/Parser/TreeBuildingRulesTest.php b/test/HTML5/Parser/TreeBuildingRulesTest.php index ff07a0a..cb9b1e4 100644 --- a/test/HTML5/Parser/TreeBuildingRulesTest.php +++ b/test/HTML5/Parser/TreeBuildingRulesTest.php @@ -5,77 +5,78 @@ */ namespace Masterminds\HTML5\Tests\Parser; -use Masterminds\HTML5\Elements; use Masterminds\HTML5\Parser\TreeBuildingRules; use Masterminds\HTML5\Parser\Tokenizer; use Masterminds\HTML5\Parser\Scanner; use Masterminds\HTML5\Parser\StringInputStream; use Masterminds\HTML5\Parser\DOMTreeBuilder; - - /** * These tests are functional, not necessarily unit tests. */ -class TreeBuildingRulesTest extends \Masterminds\HTML5\Tests\TestCase { +class TreeBuildingRulesTest extends \Masterminds\HTML5\Tests\TestCase +{ - const HTML_STUB = '<!DOCTYPE html><html><head><title>test</title></head><body>%s</body></html>'; + const HTML_STUB = '<!DOCTYPE html><html><head><title>test</title></head><body>%s</body></html>'; - /** - * Convenience function for parsing. - */ - protected function parse($string) { - $treeBuilder = new DOMTreeBuilder(); - $input = new StringInputStream($string); - $scanner = new Scanner($input); - $parser = new Tokenizer($scanner, $treeBuilder); + /** + * Convenience function for parsing. + */ + protected function parse($string) + { + $treeBuilder = new DOMTreeBuilder(); + $input = new StringInputStream($string); + $scanner = new Scanner($input); + $parser = new Tokenizer($scanner, $treeBuilder); - $parser->parse(); + $parser->parse(); - return $treeBuilder->document(); - } + return $treeBuilder->document(); + } - public function testHasRules() { - $doc = new \DOMDocument('1.0'); - $engine = new TreeBuildingRules($doc); + public function testHasRules() + { + $doc = new \DOMDocument('1.0'); + $engine = new TreeBuildingRules($doc); - $this->assertTrue($engine->hasRules('li')); - $this->assertFalse($engine->hasRules('imaginary')); - } + $this->assertTrue($engine->hasRules('li')); + $this->assertFalse($engine->hasRules('imaginary')); + } - public function testHandleLI() { - $html = sprintf(self::HTML_STUB, '<ul id="a"><li>test<li>test2</ul><a></a>'); - $doc = $this->parse($html); + public function testHandleLI() + { + $html = sprintf(self::HTML_STUB, '<ul id="a"><li>test<li>test2</ul><a></a>'); + $doc = $this->parse($html); - $list = $doc->getElementById('a'); + $list = $doc->getElementById('a'); - $this->assertEquals(2, $list->childNodes->length); - foreach($list->childNodes as $ele) { - $this->assertEquals('li', $ele->tagName); + $this->assertEquals(2, $list->childNodes->length); + foreach ($list->childNodes as $ele) { + $this->assertEquals('li', $ele->tagName); + } } - } + public function testHandleDT() + { + $html = sprintf(self::HTML_STUB, '<dl id="a"><dt>Hello<dd>Hi</dl><a></a>'); + $doc = $this->parse($html); - public function testHandleDT() { - $html = sprintf(self::HTML_STUB, '<dl id="a"><dt>Hello<dd>Hi</dl><a></a>'); - $doc = $this->parse($html); + $list = $doc->getElementById('a'); - $list = $doc->getElementById('a'); - - $this->assertEquals(2, $list->childNodes->length); - $this->assertEquals('dt', $list->firstChild->tagName); - $this->assertEquals('dd', $list->lastChild->tagName); - } - - public function testTable() { - $html = sprintf(self::HTML_STUB, '<table><thead id="a"><th>foo<td>bar<td>baz'); - $doc = $this->parse($html); + $this->assertEquals(2, $list->childNodes->length); + $this->assertEquals('dt', $list->firstChild->tagName); + $this->assertEquals('dd', $list->lastChild->tagName); + } - $list = $doc->getElementById('a'); + public function testTable() + { + $html = sprintf(self::HTML_STUB, '<table><thead id="a"><th>foo<td>bar<td>baz'); + $doc = $this->parse($html); - $this->assertEquals(3, $list->childNodes->length); - $this->assertEquals('th', $list->firstChild->tagName); - $this->assertEquals('td', $list->lastChild->tagName); - } + $list = $doc->getElementById('a'); + $this->assertEquals(3, $list->childNodes->length); + $this->assertEquals('th', $list->firstChild->tagName); + $this->assertEquals('td', $list->lastChild->tagName); + } } |