summaryrefslogtreecommitdiff
path: root/test/HTML5/Parser
diff options
context:
space:
mode:
Diffstat (limited to 'test/HTML5/Parser')
-rw-r--r--test/HTML5/Parser/CharacterReferenceTest.php56
-rw-r--r--test/HTML5/Parser/DOMTreeBuilderTest.php569
-rw-r--r--test/HTML5/Parser/EventStack.php156
-rw-r--r--test/HTML5/Parser/EventStackError.php3
-rw-r--r--test/HTML5/Parser/FileInputStreamTest.php322
-rw-r--r--test/HTML5/Parser/InstructionProcessorMock.php12
-rw-r--r--test/HTML5/Parser/ScannerTest.php231
-rw-r--r--test/HTML5/Parser/StringInputStreamTest.php654
-rw-r--r--test/HTML5/Parser/TokenizerTest.php1475
-rw-r--r--test/HTML5/Parser/TreeBuildingRulesTest.php97
10 files changed, 2018 insertions, 1557 deletions
diff --git a/test/HTML5/Parser/CharacterReferenceTest.php b/test/HTML5/Parser/CharacterReferenceTest.php
index 6dedb00..762bcc2 100644
--- a/test/HTML5/Parser/CharacterReferenceTest.php
+++ b/test/HTML5/Parser/CharacterReferenceTest.php
@@ -6,35 +6,39 @@
namespace Masterminds\HTML5\Tests\Parser;
use Masterminds\HTML5\Parser\CharacterReference;
-class CharacterReferenceTest extends \Masterminds\HTML5\Tests\TestCase {
- public function testLookupName() {
- $this->assertEquals('&', CharacterReference::lookupName('amp'));
- $this->assertEquals('<', CharacterReference::lookupName('lt'));
- $this->assertEquals('>', CharacterReference::lookupName('gt'));
- $this->assertEquals('"', CharacterReference::lookupName('quot'));
- $this->assertEquals('∌', CharacterReference::lookupName('NotReverseElement'));
- $this->assertNull(CharacterReference::lookupName('StinkyCheese'));
- }
+class CharacterReferenceTest extends \Masterminds\HTML5\Tests\TestCase
+{
- public function testLookupHex() {
- $this->assertEquals('<', CharacterReference::lookupHex('3c'));
- $this->assertEquals('<', CharacterReference::lookupHex('003c'));
- $this->assertEquals('&', CharacterReference::lookupHex('26'));
- $this->assertEquals('}', CharacterReference::lookupHex('7d'));
- $this->assertEquals('Σ', CharacterReference::lookupHex('3A3'));
- $this->assertEquals('Σ', CharacterReference::lookupHex('03A3'));
- $this->assertEquals('Σ', CharacterReference::lookupHex('3a3'));
- $this->assertEquals('Σ', CharacterReference::lookupHex('03a3'));
- }
+ public function testLookupName()
+ {
+ $this->assertEquals('&', CharacterReference::lookupName('amp'));
+ $this->assertEquals('<', CharacterReference::lookupName('lt'));
+ $this->assertEquals('>', CharacterReference::lookupName('gt'));
+ $this->assertEquals('"', CharacterReference::lookupName('quot'));
+ $this->assertEquals('∌', CharacterReference::lookupName('NotReverseElement'));
- public function testLookupDecimal() {
- $this->assertEquals('&', CharacterReference::lookupDecimal(38));
- $this->assertEquals('&', CharacterReference::lookupDecimal('38'));
- $this->assertEquals('<', CharacterReference::lookupDecimal(60));
- $this->assertEquals('Σ', CharacterReference::lookupDecimal(931));
- $this->assertEquals('Σ', CharacterReference::lookupDecimal('0931'));
- }
+ $this->assertNull(CharacterReference::lookupName('StinkyCheese'));
+ }
+ public function testLookupHex()
+ {
+ $this->assertEquals('<', CharacterReference::lookupHex('3c'));
+ $this->assertEquals('<', CharacterReference::lookupHex('003c'));
+ $this->assertEquals('&', CharacterReference::lookupHex('26'));
+ $this->assertEquals('}', CharacterReference::lookupHex('7d'));
+ $this->assertEquals('Σ', CharacterReference::lookupHex('3A3'));
+ $this->assertEquals('Σ', CharacterReference::lookupHex('03A3'));
+ $this->assertEquals('Σ', CharacterReference::lookupHex('3a3'));
+ $this->assertEquals('Σ', CharacterReference::lookupHex('03a3'));
+ }
+ public function testLookupDecimal()
+ {
+ $this->assertEquals('&', CharacterReference::lookupDecimal(38));
+ $this->assertEquals('&', CharacterReference::lookupDecimal('38'));
+ $this->assertEquals('<', CharacterReference::lookupDecimal(60));
+ $this->assertEquals('Σ', CharacterReference::lookupDecimal(931));
+ $this->assertEquals('Σ', CharacterReference::lookupDecimal('0931'));
+ }
}
diff --git a/test/HTML5/Parser/DOMTreeBuilderTest.php b/test/HTML5/Parser/DOMTreeBuilderTest.php
index 52dad30..d8b686c 100644
--- a/test/HTML5/Parser/DOMTreeBuilderTest.php
+++ b/test/HTML5/Parser/DOMTreeBuilderTest.php
@@ -5,7 +5,6 @@
*/
namespace Masterminds\HTML5\Tests\Parser;
-use Masterminds\HTML5\Elements;
use Masterminds\HTML5\Parser\StringInputStream;
use Masterminds\HTML5\Parser\Scanner;
use Masterminds\HTML5\Parser\Tokenizer;
@@ -14,119 +13,128 @@ use Masterminds\HTML5\Parser\DOMTreeBuilder;
/**
* These tests are functional, not necessarily unit tests.
*/
-class DOMTreeBuilderTest extends \Masterminds\HTML5\Tests\TestCase {
-
- /**
- * Convenience function for parsing.
- */
- protected function parse($string) {
- $treeBuilder = new DOMTreeBuilder();
- $input = new StringInputStream($string);
- $scanner = new Scanner($input);
- $parser = new Tokenizer($scanner, $treeBuilder);
-
- $parser->parse();
-
- return $treeBuilder->document();
- }
-
- /**
- * Utility function for parsing a fragment of HTML5.
- */
- protected function parseFragment($string) {
- $treeBuilder = new DOMTreeBuilder(TRUE);
- $input = new StringInputStream($string);
- $scanner = new Scanner($input);
- $parser = new Tokenizer($scanner, $treeBuilder);
-
- $parser->parse();
-
- return $treeBuilder->fragment();
- }
-
- public function testDocument() {
- $html = "<!DOCTYPE html><html></html>";
- $doc = $this->parse($html);
-
- $this->assertInstanceOf('\DOMDocument', $doc);
- $this->assertEquals('html', $doc->documentElement->tagName);
- }
-
- public function testFragment() {
- $html = "<div>test</div><span>test2</span>";
- $doc = $this->parseFragment($html);
-
- $this->assertInstanceOf('\DOMDocumentFragment', $doc);
- $this->assertTrue($doc->hasChildNodes());
- $this->assertEquals('div', $doc->childNodes->item(0)->tagName);
- $this->assertEquals('test', $doc->childNodes->item(0)->textContent);
- $this->assertEquals('span', $doc->childNodes->item(1)->tagName);
- $this->assertEquals('test2', $doc->childNodes->item(1)->textContent);
- }
-
- public function testElements() {
- $html = "<!DOCTYPE html><html><head><title></title></head><body></body></html>";
- $doc = $this->parse($html);
- $root = $doc->documentElement;
-
- $this->assertEquals('html', $root->tagName);
- $this->assertEquals('html', $root->localName);
- $this->assertEquals('html', $root->nodeName);
-
- $this->assertEquals(2, $root->childNodes->length);
- $kids = $root->childNodes;
-
- $this->assertEquals('head', $kids->item(0)->tagName);
- $this->assertEquals('body', $kids->item(1)->tagName);
-
- $head = $kids->item(0);
- $this->assertEquals(1, $head->childNodes->length);
- $this->assertEquals('title', $head->childNodes->item(0)->tagName);
- }
-
- public function testAttributes() {
- $html = "<!DOCTYPE html>
+class DOMTreeBuilderTest extends \Masterminds\HTML5\Tests\TestCase
+{
+
+ /**
+ * Convenience function for parsing.
+ */
+ protected function parse($string)
+ {
+ $treeBuilder = new DOMTreeBuilder();
+ $input = new StringInputStream($string);
+ $scanner = new Scanner($input);
+ $parser = new Tokenizer($scanner, $treeBuilder);
+
+ $parser->parse();
+
+ return $treeBuilder->document();
+ }
+
+ /**
+ * Utility function for parsing a fragment of HTML5.
+ */
+ protected function parseFragment($string)
+ {
+ $treeBuilder = new DOMTreeBuilder(TRUE);
+ $input = new StringInputStream($string);
+ $scanner = new Scanner($input);
+ $parser = new Tokenizer($scanner, $treeBuilder);
+
+ $parser->parse();
+
+ return $treeBuilder->fragment();
+ }
+
+ public function testDocument()
+ {
+ $html = "<!DOCTYPE html><html></html>";
+ $doc = $this->parse($html);
+
+ $this->assertInstanceOf('\DOMDocument', $doc);
+ $this->assertEquals('html', $doc->documentElement->tagName);
+ }
+
+ public function testFragment()
+ {
+ $html = "<div>test</div><span>test2</span>";
+ $doc = $this->parseFragment($html);
+
+ $this->assertInstanceOf('\DOMDocumentFragment', $doc);
+ $this->assertTrue($doc->hasChildNodes());
+ $this->assertEquals('div', $doc->childNodes->item(0)->tagName);
+ $this->assertEquals('test', $doc->childNodes->item(0)->textContent);
+ $this->assertEquals('span', $doc->childNodes->item(1)->tagName);
+ $this->assertEquals('test2', $doc->childNodes->item(1)->textContent);
+ }
+
+ public function testElements()
+ {
+ $html = "<!DOCTYPE html><html><head><title></title></head><body></body></html>";
+ $doc = $this->parse($html);
+ $root = $doc->documentElement;
+
+ $this->assertEquals('html', $root->tagName);
+ $this->assertEquals('html', $root->localName);
+ $this->assertEquals('html', $root->nodeName);
+
+ $this->assertEquals(2, $root->childNodes->length);
+ $kids = $root->childNodes;
+
+ $this->assertEquals('head', $kids->item(0)->tagName);
+ $this->assertEquals('body', $kids->item(1)->tagName);
+
+ $head = $kids->item(0);
+ $this->assertEquals(1, $head->childNodes->length);
+ $this->assertEquals('title', $head->childNodes->item(0)->tagName);
+ }
+
+ public function testAttributes()
+ {
+ $html = "<!DOCTYPE html>
<html>
<head><title></title></head>
<body id='a' class='b c'></body>
</html>";
- $doc = $this->parse($html);
- $root = $doc->documentElement;
-
- $body = $root->GetElementsByTagName('body')->item(0);
- $this->assertEquals('body', $body->tagName);
- $this->assertTrue($body->hasAttributes());
- $this->assertEquals('a', $body->getAttribute('id'));
- $this->assertEquals('b c', $body->getAttribute('class'));
-
- $body2 = $doc->getElementById('a');
- $this->assertEquals('body', $body2->tagName);
- $this->assertEquals('a', $body2->getAttribute('id'));
- }
-
- public function testSVGAttributes() {
- $html = "<!DOCTYPE html>
+ $doc = $this->parse($html);
+ $root = $doc->documentElement;
+
+ $body = $root->GetElementsByTagName('body')->item(0);
+ $this->assertEquals('body', $body->tagName);
+ $this->assertTrue($body->hasAttributes());
+ $this->assertEquals('a', $body->getAttribute('id'));
+ $this->assertEquals('b c', $body->getAttribute('class'));
+
+ $body2 = $doc->getElementById('a');
+ $this->assertEquals('body', $body2->tagName);
+ $this->assertEquals('a', $body2->getAttribute('id'));
+ }
+
+ public function testSVGAttributes()
+ {
+ $html = "<!DOCTYPE html>
<html><body>
<svg width='150' viewbox='2'>
<rect textlength='2'/>
<animatecolor>foo</animatecolor>
</svg>
</body></html>";
- $doc = $this->parse($html);
- $root = $doc->documentElement;
+ $doc = $this->parse($html);
+ $root = $doc->documentElement;
- $svg = $root->getElementsByTagName('svg')->item(0);
- $this->assertTrue($svg->hasAttribute('viewBox'));
+ $svg = $root->getElementsByTagName('svg')->item(0);
+ $this->assertTrue($svg->hasAttribute('viewBox'));
- $rect = $root->getElementsByTagName('rect')->item(0);
- $this->assertTrue($rect->hasAttribute('textLength'));
+ $rect = $root->getElementsByTagName('rect')->item(0);
+ $this->assertTrue($rect->hasAttribute('textLength'));
- $ac = $root->getElementsByTagName('animateColor');
- $this->assertEquals(1, $ac->length);
- }
+ $ac = $root->getElementsByTagName('animateColor');
+ $this->assertEquals(1, $ac->length);
+ }
- public function testMathMLAttribute() {
- $html = '<!doctype html>
+ public function testMathMLAttribute()
+ {
+ $html = '<!doctype html>
<html lang="en">
<body>
<math>
@@ -139,124 +147,132 @@ class DOMTreeBuilderTest extends \Masterminds\HTML5\Tests\TestCase {
</body>
</html>';
- $doc = $this->parse($html);
- $root = $doc->documentElement;
-
- $csymbol = $root->getElementsByTagName('csymbol')->item(0);
- $this->assertTrue($csymbol->hasAttribute('definitionURL'));
- }
-
- public function testMissingHtmlTag() {
- $html = "<!DOCTYPE html><title>test</title>";
- $doc = $this->parse($html);
-
- $this->assertEquals('html', $doc->documentElement->tagName);
- $this->assertEquals('title', $doc->documentElement->childNodes->item(0)->tagName);
- }
-
- public function testComment() {
- $html = '<html><!--Hello World.--></html>';
-
- $doc = $this->parse($html);
-
- $comment = $doc->documentElement->childNodes->item(0);
- $this->assertEquals(XML_COMMENT_NODE, $comment->nodeType);
- $this->assertEquals("Hello World.", $comment->data);
-
-
- $html = '<!--Hello World.--><html></html>';
- $doc = $this->parse($html);
-
- $comment = $doc->childNodes->item(1);
- $this->assertEquals(XML_COMMENT_NODE, $comment->nodeType);
- $this->assertEquals("Hello World.", $comment->data);
-
- $comment = $doc->childNodes->item(2);
- $this->assertEquals(XML_ELEMENT_NODE, $comment->nodeType);
- $this->assertEquals("html", $comment->tagName);
- }
-
- public function testCDATA() {
- $html = "<!DOCTYPE html><html><math><![CDATA[test]]></math></html>";
- $doc = $this->parse($html);
-
- $wrapper = $doc->getElementsByTagName('math')->item(0);
- $this->assertEquals(1, $wrapper->childNodes->length);
- $cdata = $wrapper->childNodes->item(0);
- $this->assertEquals(XML_CDATA_SECTION_NODE, $cdata->nodeType);
- $this->assertEquals('test', $cdata->data);
- }
-
- public function testText() {
- $html = "<!DOCTYPE html><html><head></head><body><math>test</math></body></html>";
- $doc = $this->parse($html);
-
- $wrapper = $doc->getElementsByTagName('math')->item(0);
- $this->assertEquals(1, $wrapper->childNodes->length);
- $data = $wrapper->childNodes->item(0);
- $this->assertEquals(XML_TEXT_NODE, $data->nodeType);
- $this->assertEquals('test', $data->data);
-
- // The DomTreeBuilder has special handling for text when in before head mode.
- $html = "<!DOCTYPE html><html>
+ $doc = $this->parse($html);
+ $root = $doc->documentElement;
+
+ $csymbol = $root->getElementsByTagName('csymbol')->item(0);
+ $this->assertTrue($csymbol->hasAttribute('definitionURL'));
+ }
+
+ public function testMissingHtmlTag()
+ {
+ $html = "<!DOCTYPE html><title>test</title>";
+ $doc = $this->parse($html);
+
+ $this->assertEquals('html', $doc->documentElement->tagName);
+ $this->assertEquals('title', $doc->documentElement->childNodes->item(0)->tagName);
+ }
+
+ public function testComment()
+ {
+ $html = '<html><!--Hello World.--></html>';
+
+ $doc = $this->parse($html);
+
+ $comment = $doc->documentElement->childNodes->item(0);
+ $this->assertEquals(XML_COMMENT_NODE, $comment->nodeType);
+ $this->assertEquals("Hello World.", $comment->data);
+
+ $html = '<!--Hello World.--><html></html>';
+ $doc = $this->parse($html);
+
+ $comment = $doc->childNodes->item(1);
+ $this->assertEquals(XML_COMMENT_NODE, $comment->nodeType);
+ $this->assertEquals("Hello World.", $comment->data);
+
+ $comment = $doc->childNodes->item(2);
+ $this->assertEquals(XML_ELEMENT_NODE, $comment->nodeType);
+ $this->assertEquals("html", $comment->tagName);
+ }
+
+ public function testCDATA()
+ {
+ $html = "<!DOCTYPE html><html><math><![CDATA[test]]></math></html>";
+ $doc = $this->parse($html);
+
+ $wrapper = $doc->getElementsByTagName('math')->item(0);
+ $this->assertEquals(1, $wrapper->childNodes->length);
+ $cdata = $wrapper->childNodes->item(0);
+ $this->assertEquals(XML_CDATA_SECTION_NODE, $cdata->nodeType);
+ $this->assertEquals('test', $cdata->data);
+ }
+
+ public function testText()
+ {
+ $html = "<!DOCTYPE html><html><head></head><body><math>test</math></body></html>";
+ $doc = $this->parse($html);
+
+ $wrapper = $doc->getElementsByTagName('math')->item(0);
+ $this->assertEquals(1, $wrapper->childNodes->length);
+ $data = $wrapper->childNodes->item(0);
+ $this->assertEquals(XML_TEXT_NODE, $data->nodeType);
+ $this->assertEquals('test', $data->data);
+
+ // The DomTreeBuilder has special handling for text when in before head mode.
+ $html = "<!DOCTYPE html><html>
Foo<head></head><body></body></html>";
- $doc = $this->parse($html);
- $this->assertEquals('Line 0, Col 0: Unexpected text. Ignoring: Foo', $doc->errors[0]);
- $headElement = $doc->documentElement->firstChild;
- $this->assertEquals('head', $headElement->tagName);
- }
-
- public function testParseErrors() {
- $html = "<!DOCTYPE html><html><math><![CDATA[test";
- $doc = $this->parse($html);
-
- // We're JUST testing that we can access errors. Actual testing of
- // error messages happen in the Tokenizer's tests.
- $this->assertGreaterThan(0, count($doc->errors));
- $this->assertTrue(is_string($doc->errors[0]));
- }
-
- public function testProcessingInstruction() {
- // Test the simple case, which is where PIs are inserted into the DOM.
- $doc = $this->parse('<!DOCTYPE html><html><?foo bar?>');
- $this->assertEquals(1, $doc->documentElement->childNodes->length);
- $pi = $doc->documentElement->firstChild;
- $this->assertInstanceOf('\DOMProcessingInstruction', $pi);
- $this->assertEquals('foo', $pi->nodeName);
- $this->assertEquals('bar', $pi->data);
-
- // Leading xml PIs should be ignored.
- $doc = $this->parse('<?xml version="1.0"?><!DOCTYPE html><html><head></head></html>');
-
- $this->assertEquals(2, $doc->childNodes->length);
- $this->assertInstanceOf('\DOMDocumentType', $doc->childNodes->item(0));
- $this->assertInstanceOf('\DOMElement', $doc->childNodes->item(1));
- }
-
- public function testAutocloseP() {
- $html = "<!DOCTYPE html><html><body><p><figure></body></html>";
- $doc = $this->parse($html);
-
- $p = $doc->getElementsByTagName('p')->item(0);
- $this->assertEquals(0, $p->childNodes->length);
- $this->assertEquals('figure', $p->nextSibling->tagName);
- }
-
- public function testAutocloseLI() {
- $html = '<!doctype html>
+ $doc = $this->parse($html);
+ $this->assertEquals('Line 0, Col 0: Unexpected text. Ignoring: Foo', $doc->errors[0]);
+ $headElement = $doc->documentElement->firstChild;
+ $this->assertEquals('head', $headElement->tagName);
+ }
+
+ public function testParseErrors()
+ {
+ $html = "<!DOCTYPE html><html><math><![CDATA[test";
+ $doc = $this->parse($html);
+
+ // We're JUST testing that we can access errors. Actual testing of
+ // error messages happen in the Tokenizer's tests.
+ $this->assertGreaterThan(0, count($doc->errors));
+ $this->assertTrue(is_string($doc->errors[0]));
+ }
+
+ public function testProcessingInstruction()
+ {
+ // Test the simple case, which is where PIs are inserted into the DOM.
+ $doc = $this->parse('<!DOCTYPE html><html><?foo bar?>');
+ $this->assertEquals(1, $doc->documentElement->childNodes->length);
+ $pi = $doc->documentElement->firstChild;
+ $this->assertInstanceOf('\DOMProcessingInstruction', $pi);
+ $this->assertEquals('foo', $pi->nodeName);
+ $this->assertEquals('bar', $pi->data);
+
+ // Leading xml PIs should be ignored.
+ $doc = $this->parse('<?xml version="1.0"?><!DOCTYPE html><html><head></head></html>');
+
+ $this->assertEquals(2, $doc->childNodes->length);
+ $this->assertInstanceOf('\DOMDocumentType', $doc->childNodes->item(0));
+ $this->assertInstanceOf('\DOMElement', $doc->childNodes->item(1));
+ }
+
+ public function testAutocloseP()
+ {
+ $html = "<!DOCTYPE html><html><body><p><figure></body></html>";
+ $doc = $this->parse($html);
+
+ $p = $doc->getElementsByTagName('p')->item(0);
+ $this->assertEquals(0, $p->childNodes->length);
+ $this->assertEquals('figure', $p->nextSibling->tagName);
+ }
+
+ public function testAutocloseLI()
+ {
+ $html = '<!doctype html>
<html lang="en">
<body>
<ul><li>Foo<li>Bar<li>Baz</ul>
</body>
</html>';
- $doc = $this->parse($html);
- $length = $doc->getElementsByTagName('ul')->item(0)->childNodes->length;
- $this->assertEquals(3, $length);
- }
+ $doc = $this->parse($html);
+ $length = $doc->getElementsByTagName('ul')->item(0)->childNodes->length;
+ $this->assertEquals(3, $length);
+ }
- public function testMathML() {
- $html = '<!doctype html>
+ public function testMathML()
+ {
+ $html = '<!doctype html>
<html lang="en">
<body>
<math xmlns="http://www.w3.org/1998/Math/MathML">
@@ -269,16 +285,17 @@ class DOMTreeBuilderTest extends \Masterminds\HTML5\Tests\TestCase {
</body>
</html>';
- $doc = $this->parse($html);
- $math = $doc->getElementsByTagName('math')->item(0);
- $this->assertEquals('math', $math->tagName);
- $this->assertEquals('math', $math->nodeName);
- $this->assertEquals('math', $math->localName);
- $this->assertEmpty($math->namespaceURI);
- }
-
- public function testSVG() {
- $html = '<!doctype html>
+ $doc = $this->parse($html);
+ $math = $doc->getElementsByTagName('math')->item(0);
+ $this->assertEquals('math', $math->tagName);
+ $this->assertEquals('math', $math->nodeName);
+ $this->assertEquals('math', $math->localName);
+ $this->assertEmpty($math->namespaceURI);
+ }
+
+ public function testSVG()
+ {
+ $html = '<!doctype html>
<html lang="en">
<body>
<svg width="150" height="100" viewBox="0 0 3 2" xmlns="http://www.w3.org/2000/svg">
@@ -292,59 +309,61 @@ class DOMTreeBuilderTest extends \Masterminds\HTML5\Tests\TestCase {
</body>
</html>';
- $doc = $this->parse($html);
- $svg = $doc->getElementsByTagName('svg')->item(0);
- $this->assertEquals('svg', $svg->tagName);
- $this->assertEquals('svg', $svg->nodeName);
- $this->assertEquals('svg', $svg->localName);
- $this->assertEmpty($svg->namespaceURI);
-
- $textPath = $doc->getElementsByTagName('textPath')->item(0);
- $this->assertEquals('textPath', $textPath->tagName);
- }
-
- public function testNoScript() {
- $html = '<!DOCTYPE html><html><head><noscript>No JS</noscript></head></html>';
- $doc = $this->parse($html);
- $this->assertEmpty($doc->errors);
- $noscript = $doc->getElementsByTagName('noscript')->item(0);
- $this->assertEquals('noscript', $noscript->tagName);
- }
-
- /**
- * Regression for issue #13
- */
- public function testRegressionHTMLNoBody() {
- $html = '<!DOCTYPE html><html><span id="test">Test</span></html>';
- $doc = $this->parse($html);
- $span = $doc->getElementById('test');
-
- $this->assertEmpty($doc->errors);
-
- $this->assertEquals('span', $span->tagName);
- $this->assertEquals('Test', $span->textContent);
- }
-
- public function testInstructionProcessor() {
- $string = '<!DOCTYPE html><html><?foo bar ?></html>';
-
- $treeBuilder = new DOMTreeBuilder();
- $is = new InstructionProcessorMock();
- $treeBuilder->setInstructionProcessor($is);
-
- $input = new StringInputStream($string);
- $scanner = new Scanner($input);
- $parser = new Tokenizer($scanner, $treeBuilder);
-
- $parser->parse();
- $dom = $treeBuilder->document();
- $div = $dom->getElementsByTagName('div')->item(0);
-
- $this->assertEquals(1, $is->count);
- $this->assertEquals('foo', $is->name);
- $this->assertEquals('bar ', $is->data);
- $this->assertEquals('div', $div->tagName);
- $this->assertEquals('foo', $div->textContent);
- }
+ $doc = $this->parse($html);
+ $svg = $doc->getElementsByTagName('svg')->item(0);
+ $this->assertEquals('svg', $svg->tagName);
+ $this->assertEquals('svg', $svg->nodeName);
+ $this->assertEquals('svg', $svg->localName);
+ $this->assertEmpty($svg->namespaceURI);
+
+ $textPath = $doc->getElementsByTagName('textPath')->item(0);
+ $this->assertEquals('textPath', $textPath->tagName);
+ }
+
+ public function testNoScript()
+ {
+ $html = '<!DOCTYPE html><html><head><noscript>No JS</noscript></head></html>';
+ $doc = $this->parse($html);
+ $this->assertEmpty($doc->errors);
+ $noscript = $doc->getElementsByTagName('noscript')->item(0);
+ $this->assertEquals('noscript', $noscript->tagName);
+ }
+
+ /**
+ * Regression for issue #13
+ */
+ public function testRegressionHTMLNoBody()
+ {
+ $html = '<!DOCTYPE html><html><span id="test">Test</span></html>';
+ $doc = $this->parse($html);
+ $span = $doc->getElementById('test');
+
+ $this->assertEmpty($doc->errors);
+
+ $this->assertEquals('span', $span->tagName);
+ $this->assertEquals('Test', $span->textContent);
+ }
+
+ public function testInstructionProcessor()
+ {
+ $string = '<!DOCTYPE html><html><?foo bar ?></html>';
+
+ $treeBuilder = new DOMTreeBuilder();
+ $is = new InstructionProcessorMock();
+ $treeBuilder->setInstructionProcessor($is);
+
+ $input = new StringInputStream($string);
+ $scanner = new Scanner($input);
+ $parser = new Tokenizer($scanner, $treeBuilder);
+
+ $parser->parse();
+ $dom = $treeBuilder->document();
+ $div = $dom->getElementsByTagName('div')->item(0);
+
+ $this->assertEquals(1, $is->count);
+ $this->assertEquals('foo', $is->name);
+ $this->assertEquals('bar ', $is->data);
+ $this->assertEquals('div', $div->tagName);
+ $this->assertEquals('foo', $div->textContent);
+ }
}
-
diff --git a/test/HTML5/Parser/EventStack.php b/test/HTML5/Parser/EventStack.php
index 050cb5a..da0d3ec 100644
--- a/test/HTML5/Parser/EventStack.php
+++ b/test/HTML5/Parser/EventStack.php
@@ -14,77 +14,103 @@ use Masterminds\HTML5\Parser\EventHandler;
* script or pre tags. This is to match the behavior required by the HTML5 spec,
* which says that the tree builder must tell the tokenizer when to switch states.
*/
-class EventStack implements EventHandler {
- protected $stack;
-
- public function __construct() {
- $this->stack = array();
- }
-
- /**
- * Get the event stack.
- */
- public function events() {
- return $this->stack;
- }
-
- public function depth() {
- return count($this->stack);
- }
-
- public function get($index) {
- return $this->stack[$index];
- }
-
- protected function store($event, $data = NULL) {
- $this->stack[] = array(
- 'name' => $event,
- 'data' => $data,
- );
- }
-
- public function doctype($name, $type = 0, $id = NULL, $quirks = FALSE) {
- $args = array($name, $type, $id, $quirks);
- $this->store('doctype', $args);
- }
-
- public function startTag($name, $attributes = array(), $selfClosing = FALSE) {
- $args = func_get_args();
- $this->store('startTag', $args);
- if ($name == 'pre' || $name == 'script') {
- return Elements::TEXT_RAW;
+class EventStack implements EventHandler
+{
+
+ protected $stack;
+
+ public function __construct()
+ {
+ $this->stack = array();
+ }
+
+ /**
+ * Get the event stack.
+ */
+ public function events()
+ {
+ return $this->stack;
+ }
+
+ public function depth()
+ {
+ return count($this->stack);
+ }
+
+ public function get($index)
+ {
+ return $this->stack[$index];
+ }
+
+ protected function store($event, $data = NULL)
+ {
+ $this->stack[] = array(
+ 'name' => $event,
+ 'data' => $data
+ );
}
- }
- public function endTag($name) {
- $this->store('endTag', array($name));
- }
+ public function doctype($name, $type = 0, $id = NULL, $quirks = FALSE)
+ {
+ $args = array(
+ $name,
+ $type,
+ $id,
+ $quirks
+ );
+ $this->store('doctype', $args);
+ }
- public function comment($cdata) {
- $this->store('comment', array($cdata));
- }
+ public function startTag($name, $attributes = array(), $selfClosing = FALSE)
+ {
+ $args = func_get_args();
+ $this->store('startTag', $args);
+ if ($name == 'pre' || $name == 'script') {
+ return Elements::TEXT_RAW;
+ }
+ }
- public function cdata($data) {
- $this->store('cdata', func_get_args());
- }
+ public function endTag($name)
+ {
+ $this->store('endTag', array(
+ $name
+ ));
+ }
+
+ public function comment($cdata)
+ {
+ $this->store('comment', array(
+ $cdata
+ ));
+ }
- public function text($cdata) {
- //fprintf(STDOUT, "Received TEXT event with: " . $cdata);
- $this->store('text', array($cdata));
- }
+ public function cdata($data)
+ {
+ $this->store('cdata', func_get_args());
+ }
- public function eof() {
- $this->store('eof');
- }
+ public function text($cdata)
+ {
+ // fprintf(STDOUT, "Received TEXT event with: " . $cdata);
+ $this->store('text', array(
+ $cdata
+ ));
+ }
- public function parseError($msg, $line, $col) {
- //throw new EventStackParseError(sprintf("%s (line %d, col %d)", $msg, $line, $col));
- //$this->store(sprintf("%s (line %d, col %d)", $msg, $line, $col));
- $this->store('error', func_get_args());
- }
+ public function eof()
+ {
+ $this->store('eof');
+ }
- public function processingInstruction($name, $data = NULL) {
- $this->store('pi', func_get_args());
- }
+ public function parseError($msg, $line, $col)
+ {
+ // throw new EventStackParseError(sprintf("%s (line %d, col %d)", $msg, $line, $col));
+ // $this->store(sprintf("%s (line %d, col %d)", $msg, $line, $col));
+ $this->store('error', func_get_args());
+ }
-} \ No newline at end of file
+ public function processingInstruction($name, $data = NULL)
+ {
+ $this->store('pi', func_get_args());
+ }
+}
diff --git a/test/HTML5/Parser/EventStackError.php b/test/HTML5/Parser/EventStackError.php
index e2d6ba7..e58fdff 100644
--- a/test/HTML5/Parser/EventStackError.php
+++ b/test/HTML5/Parser/EventStackError.php
@@ -1,5 +1,6 @@
<?php
namespace Masterminds\HTML5\Tests\Parser;
-class EventStackParseError extends \Exception {
+class EventStackError extends \Exception
+{
}
diff --git a/test/HTML5/Parser/FileInputStreamTest.php b/test/HTML5/Parser/FileInputStreamTest.php
index 4efcbe1..71dd828 100644
--- a/test/HTML5/Parser/FileInputStreamTest.php
+++ b/test/HTML5/Parser/FileInputStreamTest.php
@@ -3,133 +3,176 @@ namespace Masterminds\HTML5\Tests\Parser;
use Masterminds\HTML5\Parser\FileInputStream;
-class FileInputStreamTest extends \Masterminds\HTML5\Tests\TestCase {
-
- function testConstruct() {
- $s = new FileInputStream(__DIR__ . '/FileInputStreamTest.html');
-
- $this->assertInstanceOf('\Masterminds\HTML5\Parser\FileInputStream', $s);
- }
-
- public function testNext() {
- $s = new FileInputStream(__DIR__ . '/FileInputStreamTest.html');
-
- $s->next();
- $this->assertEquals('!', $s->current());
- $s->next();
- $this->assertEquals('d', $s->current());
- }
-
- public function testKey() {
- $s = new FileInputStream(__DIR__ . '/FileInputStreamTest.html');
-
- $this->assertEquals(0, $s->key());
-
- $s->next();
- $this->assertEquals(1, $s->key());
- }
-
- public function testPeek() {
- $s = new FileInputStream(__DIR__ . '/FileInputStreamTest.html');
-
- $this->assertEquals('!', $s->peek());
-
- $s->next();
- $this->assertEquals('d', $s->peek());
- }
-
- public function testCurrent() {
- $s = new FileInputStream(__DIR__ . '/FileInputStreamTest.html');
-
- $this->assertEquals('<', $s->current());
-
- $s->next();
- $this->assertEquals('!', $s->current());
-
- $s->next();
- $this->assertEquals('d', $s->current());
- }
-
- public function testColumnOffset() {
- $s = new FileInputStream(__DIR__ . '/FileInputStreamTest.html');
- $this->assertEquals(0, $s->columnOffset());
- $s->next();
- $this->assertEquals(1, $s->columnOffset());
- $s->next();
- $this->assertEquals(2, $s->columnOffset());
- $s->next();
- $this->assertEquals(3, $s->columnOffset());
-
- // Make sure we get to the second line
- $s->next(); $s->next(); $s->next(); $s->next();
- $s->next(); $s->next(); $s->next(); $s->next();
- $s->next(); $s->next(); $s->next(); $s->next();
- $s->next();
- $this->assertEquals(0, $s->columnOffset());
-
- $s->next();
- $canary = $s->current(); // h
- $this->assertEquals('h', $canary);
- $this->assertEquals(1, $s->columnOffset());
- }
-
- public function testCurrentLine() {
- $s = new FileInputStream(__DIR__ . '/FileInputStreamTest.html');
-
- $this->assertEquals(1, $s->currentLine());
-
- // Make sure we get to the second line
- $s->next(); $s->next(); $s->next(); $s->next();
- $s->next(); $s->next(); $s->next(); $s->next();
- $s->next(); $s->next(); $s->next(); $s->next();
- $s->next(); $s->next(); $s->next(); $s->next();
- $this->assertEquals(2, $s->currentLine());
-
- // Make sure we get to the third line
- $s->next(); $s->next(); $s->next(); $s->next();
- $s->next(); $s->next(); $s->next(); $s->next();
- $s->next(); $s->next(); $s->next(); $s->next();
- $s->next(); $s->next(); $s->next(); $s->next();
- $s->next();
- $this->assertEquals(3, $s->currentLine());
- }
-
- public function testRemainingChars() {
- $text = file_get_contents(__DIR__ . '/FileInputStreamTest.html');
- $s = new FileInputStream(__DIR__ . '/FileInputStreamTest.html');
- $this->assertEquals($text, $s->remainingChars());
-
- $text = substr(file_get_contents(__DIR__ . '/FileInputStreamTest.html'), 1);
- $s = new FileInputStream(__DIR__ . '/FileInputStreamTest.html');
- $s->next(); // Pop one.
- $this->assertEquals($text, $s->remainingChars());
- }
-
- public function testCharsUnitl() {
- $s = new FileInputStream(__DIR__ . '/FileInputStreamTest.html');
-
- $this->assertEquals('', $s->charsUntil('<'));
- // Pointer at '<', moves to ' '
- $this->assertEquals('<!doctype', $s->charsUntil(' ', 20));
-
- // Pointer at ' ', moves to '>'
- $this->assertEquals(' html', $s->charsUntil('>'));
-
- // Pointer at '>', moves to '\n'.
- $this->assertEquals('>', $s->charsUntil("\n"));
-
- // Pointer at '\n', move forward then to the next'\n'.
- $s->next();
- $this->assertEquals('<html lang="en">', $s->charsUntil("\n"));
-
- // Ony get one of the spaces.
- $this->assertEquals("\n ", $s->charsUntil('<', 2));
-
- // Get the other space.
- $this->assertEquals(" ", $s->charsUntil('<'));
-
- // This should scan to the end of the file.
- $text = "<head>
+class FileInputStreamTest extends \Masterminds\HTML5\Tests\TestCase
+{
+
+ public function testConstruct()
+ {
+ $s = new FileInputStream(__DIR__ . '/FileInputStreamTest.html');
+
+ $this->assertInstanceOf('\Masterminds\HTML5\Parser\FileInputStream', $s);
+ }
+
+ public function testNext()
+ {
+ $s = new FileInputStream(__DIR__ . '/FileInputStreamTest.html');
+
+ $s->next();
+ $this->assertEquals('!', $s->current());
+ $s->next();
+ $this->assertEquals('d', $s->current());
+ }
+
+ public function testKey()
+ {
+ $s = new FileInputStream(__DIR__ . '/FileInputStreamTest.html');
+
+ $this->assertEquals(0, $s->key());
+
+ $s->next();
+ $this->assertEquals(1, $s->key());
+ }
+
+ public function testPeek()
+ {
+ $s = new FileInputStream(__DIR__ . '/FileInputStreamTest.html');
+
+ $this->assertEquals('!', $s->peek());
+
+ $s->next();
+ $this->assertEquals('d', $s->peek());
+ }
+
+ public function testCurrent()
+ {
+ $s = new FileInputStream(__DIR__ . '/FileInputStreamTest.html');
+
+ $this->assertEquals('<', $s->current());
+
+ $s->next();
+ $this->assertEquals('!', $s->current());
+
+ $s->next();
+ $this->assertEquals('d', $s->current());
+ }
+
+ public function testColumnOffset()
+ {
+ $s = new FileInputStream(__DIR__ . '/FileInputStreamTest.html');
+ $this->assertEquals(0, $s->columnOffset());
+ $s->next();
+ $this->assertEquals(1, $s->columnOffset());
+ $s->next();
+ $this->assertEquals(2, $s->columnOffset());
+ $s->next();
+ $this->assertEquals(3, $s->columnOffset());
+
+ // Make sure we get to the second line
+ $s->next();
+ $s->next();
+ $s->next();
+ $s->next();
+ $s->next();
+ $s->next();
+ $s->next();
+ $s->next();
+ $s->next();
+ $s->next();
+ $s->next();
+ $s->next();
+ $s->next();
+ $this->assertEquals(0, $s->columnOffset());
+
+ $s->next();
+ $canary = $s->current(); // h
+ $this->assertEquals('h', $canary);
+ $this->assertEquals(1, $s->columnOffset());
+ }
+
+ public function testCurrentLine()
+ {
+ $s = new FileInputStream(__DIR__ . '/FileInputStreamTest.html');
+
+ $this->assertEquals(1, $s->currentLine());
+
+ // Make sure we get to the second line
+ $s->next();
+ $s->next();
+ $s->next();
+ $s->next();
+ $s->next();
+ $s->next();
+ $s->next();
+ $s->next();
+ $s->next();
+ $s->next();
+ $s->next();
+ $s->next();
+ $s->next();
+ $s->next();
+ $s->next();
+ $s->next();
+ $this->assertEquals(2, $s->currentLine());
+
+ // Make sure we get to the third line
+ $s->next();
+ $s->next();
+ $s->next();
+ $s->next();
+ $s->next();
+ $s->next();
+ $s->next();
+ $s->next();
+ $s->next();
+ $s->next();
+ $s->next();
+ $s->next();
+ $s->next();
+ $s->next();
+ $s->next();
+ $s->next();
+ $s->next();
+ $this->assertEquals(3, $s->currentLine());
+ }
+
+ public function testRemainingChars()
+ {
+ $text = file_get_contents(__DIR__ . '/FileInputStreamTest.html');
+ $s = new FileInputStream(__DIR__ . '/FileInputStreamTest.html');
+ $this->assertEquals($text, $s->remainingChars());
+
+ $text = substr(file_get_contents(__DIR__ . '/FileInputStreamTest.html'), 1);
+ $s = new FileInputStream(__DIR__ . '/FileInputStreamTest.html');
+ $s->next(); // Pop one.
+ $this->assertEquals($text, $s->remainingChars());
+ }
+
+ public function testCharsUnitl()
+ {
+ $s = new FileInputStream(__DIR__ . '/FileInputStreamTest.html');
+
+ $this->assertEquals('', $s->charsUntil('<'));
+ // Pointer at '<', moves to ' '
+ $this->assertEquals('<!doctype', $s->charsUntil(' ', 20));
+
+ // Pointer at ' ', moves to '>'
+ $this->assertEquals(' html', $s->charsUntil('>'));
+
+ // Pointer at '>', moves to '\n'.
+ $this->assertEquals('>', $s->charsUntil("\n"));
+
+ // Pointer at '\n', move forward then to the next'\n'.
+ $s->next();
+ $this->assertEquals('<html lang="en">', $s->charsUntil("\n"));
+
+ // Ony get one of the spaces.
+ $this->assertEquals("\n ", $s->charsUntil('<', 2));
+
+ // Get the other space.
+ $this->assertEquals(" ", $s->charsUntil('<'));
+
+ // This should scan to the end of the file.
+ $text = "<head>
<meta charset=\"utf-8\">
<title>Test</title>
</head>
@@ -137,15 +180,16 @@ class FileInputStreamTest extends \Masterminds\HTML5\Tests\TestCase {
<p>This is a test.</p>
</body>
</html>";
- $this->assertEquals($text, $s->charsUntil("\t"));
- }
-
- public function testCharsWhile() {
- $s = new FileInputStream(__DIR__ . '/FileInputStreamTest.html');
-
- $this->assertEquals('<!', $s->charsWhile('!<'));
- $this->assertEquals('', $s->charsWhile('>'));
- $this->assertEquals('doctype', $s->charsWhile('odcyept'));
- $this->assertEquals(' htm', $s->charsWhile('html ', 4));
- }
-} \ No newline at end of file
+ $this->assertEquals($text, $s->charsUntil("\t"));
+ }
+
+ public function testCharsWhile()
+ {
+ $s = new FileInputStream(__DIR__ . '/FileInputStreamTest.html');
+
+ $this->assertEquals('<!', $s->charsWhile('!<'));
+ $this->assertEquals('', $s->charsWhile('>'));
+ $this->assertEquals('doctype', $s->charsWhile('odcyept'));
+ $this->assertEquals(' htm', $s->charsWhile('html ', 4));
+ }
+}
diff --git a/test/HTML5/Parser/InstructionProcessorMock.php b/test/HTML5/Parser/InstructionProcessorMock.php
index b668d26..ec69364 100644
--- a/test/HTML5/Parser/InstructionProcessorMock.php
+++ b/test/HTML5/Parser/InstructionProcessorMock.php
@@ -1,16 +1,20 @@
<?php
namespace Masterminds\HTML5\Tests\Parser;
-class InstructionProcessorMock implements \Masterminds\HTML5\InstructionProcessor {
+class InstructionProcessorMock implements \Masterminds\HTML5\InstructionProcessor
+{
public $name = NULL;
+
public $data = NULL;
+
public $count = 0;
- public function process(\DOMElement $element, $name, $data) {
+ public function process(\DOMElement $element, $name, $data)
+ {
$this->name = $name;
$this->data = $data;
- $this->count++;
+ $this->count ++;
$div = $element->ownerDocument->createElement("div");
$div->nodeValue = 'foo';
@@ -19,4 +23,4 @@ class InstructionProcessorMock implements \Masterminds\HTML5\InstructionProcesso
return $div;
}
-} \ No newline at end of file
+}
diff --git a/test/HTML5/Parser/ScannerTest.php b/test/HTML5/Parser/ScannerTest.php
index b0d638e..8fa5110 100644
--- a/test/HTML5/Parser/ScannerTest.php
+++ b/test/HTML5/Parser/ScannerTest.php
@@ -8,145 +8,164 @@ namespace Masterminds\HTML5\Tests\Parser;
use Masterminds\HTML5\Parser\StringInputStream;
use Masterminds\HTML5\Parser\Scanner;
-class ScannerTest extends \Masterminds\HTML5\Tests\TestCase {
-
- /**
- * A canary test to make sure the basics are setup and working.
- */
- public function testConstruct() {
- $is = new StringInputStream("abc");
- $s = new Scanner($is);
+class ScannerTest extends \Masterminds\HTML5\Tests\TestCase
+{
+
+ /**
+ * A canary test to make sure the basics are setup and working.
+ */
+ public function testConstruct()
+ {
+ $is = new StringInputStream("abc");
+ $s = new Scanner($is);
+
+ $this->assertInstanceOf('\Masterminds\HTML5\Parser\Scanner', $s);
+ }
- $this->assertInstanceOf('\Masterminds\HTML5\Parser\Scanner', $s);
- }
+ public function testNext()
+ {
+ $s = new Scanner(new StringInputStream("abc"));
- public function testNext() {
- $s = new Scanner(new StringInputStream("abc"));
+ $this->assertEquals('b', $s->next());
+ $this->assertEquals('c', $s->next());
+ }
- $this->assertEquals('b', $s->next());
- $this->assertEquals('c', $s->next());
- }
+ public function testPosition()
+ {
+ $s = new Scanner(new StringInputStream("abc"));
- public function testPosition() {
- $s = new Scanner(new StringInputStream("abc"));
+ $this->assertEquals(0, $s->position());
- $this->assertEquals(0, $s->position());
+ $s->next();
+ $this->assertEquals(1, $s->position());
+ }
- $s->next();
- $this->assertEquals(1, $s->position());
- }
+ public function testPeek()
+ {
+ $s = new Scanner(new StringInputStream("abc"));
- public function testPeek() {
- $s = new Scanner(new StringInputStream("abc"));
+ $this->assertEquals('b', $s->peek());
+ $s->next();
+ $this->assertEquals('c', $s->peek());
+ }
- $this->assertEquals('b', $s->peek());
+ public function testCurrent()
+ {
+ $s = new Scanner(new StringInputStream("abc"));
- $s->next();
- $this->assertEquals('c', $s->peek());
- }
+ // Before scanning the string begins the current is empty.
+ $this->assertEquals('a', $s->current());
- public function testCurrent() {
- $s = new Scanner(new StringInputStream("abc"));
+ $c = $s->next();
+ $this->assertEquals('b', $s->current());
- // Before scanning the string begins the current is empty.
- $this->assertEquals('a', $s->current());
+ // Test movement through the string.
+ $c = $s->next();
+ $this->assertEquals('c', $s->current());
+ }
- $c = $s->next();
- $this->assertEquals('b', $s->current());
+ public function testUnconsume()
+ {
+ $s = new Scanner(new StringInputStream("abcdefghijklmnopqrst"));
- // Test movement through the string.
- $c = $s->next();
- $this->assertEquals('c', $s->current());
- }
+ // Get initial position.
+ $s->next();
+ $start = $s->position();
- public function testUnconsume() {
- $s = new Scanner(new StringInputStream("abcdefghijklmnopqrst"));
+ // Move forward a bunch of positions.
+ $amount = 7;
+ for ($i = 0; $i < $amount; $i ++) {
+ $s->next();
+ }
- // Get initial position.
- $s->next();
- $start = $s->position();
+ // Roll back the amount we moved forward.
+ $s->unconsume($amount);
- // Move forward a bunch of positions.
- $amount = 7;
- for($i = 0; $i < $amount; $i++) {
- $s->next();
+ $this->assertEquals($start, $s->position());
}
- // Roll back the amount we moved forward.
- $s->unconsume($amount);
-
- $this->assertEquals($start, $s->position());
- }
-
- public function testGetHex() {
- $s = new Scanner(new StringInputStream("ab13ck45DE*"));
+ public function testGetHex()
+ {
+ $s = new Scanner(new StringInputStream("ab13ck45DE*"));
- $this->assertEquals('ab13c', $s->getHex());
+ $this->assertEquals('ab13c', $s->getHex());
- $s->next();
- $this->assertEquals('45DE', $s->getHex());
- }
-
- public function testGetAsciiAlpha() {
- $s = new Scanner(new StringInputStream("abcdef1%mnop*"));
-
- $this->assertEquals('abcdef', $s->getAsciiAlpha());
+ $s->next();
+ $this->assertEquals('45DE', $s->getHex());
+ }
- // Move past the 1% to scan the next group of text.
- $s->next();
- $s->next();
- $this->assertEquals('mnop', $s->getAsciiAlpha());
- }
+ public function testGetAsciiAlpha()
+ {
+ $s = new Scanner(new StringInputStream("abcdef1%mnop*"));
- public function testGetAsciiAlphaNum() {
- $s = new Scanner(new StringInputStream("abcdef1ghpo#mn94op"));
+ $this->assertEquals('abcdef', $s->getAsciiAlpha());
- $this->assertEquals('abcdef1ghpo', $s->getAsciiAlphaNum());
+ // Move past the 1% to scan the next group of text.
+ $s->next();
+ $s->next();
+ $this->assertEquals('mnop', $s->getAsciiAlpha());
+ }
- // Move past the # to scan the next group of text.
- $s->next();
- $this->assertEquals('mn94op', $s->getAsciiAlphaNum());
- }
+ public function testGetAsciiAlphaNum()
+ {
+ $s = new Scanner(new StringInputStream("abcdef1ghpo#mn94op"));
- public function testGetNumeric() {
- $s = new Scanner(new StringInputStream("1784a 45 9867 #"));
+ $this->assertEquals('abcdef1ghpo', $s->getAsciiAlphaNum());
- $this->assertEquals('1784', $s->getNumeric());
+ // Move past the # to scan the next group of text.
+ $s->next();
+ $this->assertEquals('mn94op', $s->getAsciiAlphaNum());
+ }
- // Move past the 'a ' to scan the next group of text.
- $s->next();
- $s->next();
- $this->assertEquals('45', $s->getNumeric());
- }
+ public function testGetNumeric()
+ {
+ $s = new Scanner(new StringInputStream("1784a 45 9867 #"));
- public function testCurrentLine() {
- $s = new Scanner(new StringInputStream("1784a\n45\n9867 #\nThis is a test."));
+ $this->assertEquals('1784', $s->getNumeric());
- $this->assertEquals(1, $s->currentLine());
+ // Move past the 'a ' to scan the next group of text.
+ $s->next();
+ $s->next();
+ $this->assertEquals('45', $s->getNumeric());
+ }
- // Move to the next line.
- $s->getAsciiAlphaNum(); $s->next();
- $this->assertEquals(2, $s->currentLine());
- }
+ public function testCurrentLine()
+ {
+ $s = new Scanner(new StringInputStream("1784a\n45\n9867 #\nThis is a test."));
- public function testColumnOffset() {
- $s = new Scanner(new StringInputStream("1784a a\n45 9867 #\nThis is a test."));
+ $this->assertEquals(1, $s->currentLine());
- // Move the pointer to the space.
- $s->getAsciiAlphaNum();
- $this->assertEquals(5, $s->columnOffset());
+ // Move to the next line.
+ $s->getAsciiAlphaNum();
+ $s->next();
+ $this->assertEquals(2, $s->currentLine());
+ }
- // We move the pointer ahead. There must be a better way to do this.
- $s->next(); $s->next(); $s->next(); $s->next(); $s->next(); $s->next();
- $this->assertEquals(3, $s->columnOffset());
- }
+ public function testColumnOffset()
+ {
+ $s = new Scanner(new StringInputStream("1784a a\n45 9867 #\nThis is a test."));
+
+ // Move the pointer to the space.
+ $s->getAsciiAlphaNum();
+ $this->assertEquals(5, $s->columnOffset());
+
+ // We move the pointer ahead. There must be a better way to do this.
+ $s->next();
+ $s->next();
+ $s->next();
+ $s->next();
+ $s->next();
+ $s->next();
+ $this->assertEquals(3, $s->columnOffset());
+ }
- public function testRemainingChars() {
- $string = "\n45\n9867 #\nThis is a test.";
- $s = new Scanner(new StringInputStream("1784a\n45\n9867 #\nThis is a test."));
+ public function testRemainingChars()
+ {
+ $string = "\n45\n9867 #\nThis is a test.";
+ $s = new Scanner(new StringInputStream("1784a\n45\n9867 #\nThis is a test."));
- $s->getAsciiAlphaNum();
- $this->assertEquals($string, $s->remainingChars());
- }
-} \ No newline at end of file
+ $s->getAsciiAlphaNum();
+ $this->assertEquals($string, $s->remainingChars());
+ }
+}
diff --git a/test/HTML5/Parser/StringInputStreamTest.php b/test/HTML5/Parser/StringInputStreamTest.php
index 5148e45..3ee768a 100644
--- a/test/HTML5/Parser/StringInputStreamTest.php
+++ b/test/HTML5/Parser/StringInputStreamTest.php
@@ -3,337 +3,325 @@ namespace Masterminds\HTML5\Tests\Parser;
use Masterminds\HTML5\Parser\StringInputStream;
-class StringInputStreamTest extends \Masterminds\HTML5\Tests\TestCase {
-
- /**
- * A canary test to make sure the basics are setup and working.
- */
- public function testConstruct() {
- $s = new StringInputStream("abc");
-
- $this->assertInstanceOf('\Masterminds\HTML5\Parser\StringInputStream', $s);
- }
-
- public function testNext() {
- $s = new StringInputStream("abc");
-
- $s->next();
- $this->assertEquals('b', $s->current());
- $s->next();
- $this->assertEquals('c', $s->current());
- }
-
- public function testKey() {
- $s = new StringInputStream("abc");
-
- $this->assertEquals(0, $s->key());
-
- $s->next();
- $this->assertEquals(1, $s->key());
- }
-
- public function testPeek() {
- $s = new StringInputStream("abc");
-
- $this->assertEquals('b', $s->peek());
-
- $s->next();
- $this->assertEquals('c', $s->peek());
- }
-
- public function testCurrent() {
- $s = new StringInputStream("abc");
-
- // Before scanning the string begins the current is empty.
- $this->assertEquals('a', $s->current());
-
- $s->next();
- $this->assertEquals('b', $s->current());
-
- // Test movement through the string.
- $s->next();
- $this->assertEquals('c', $s->current());
- }
-
- public function testColumnOffset() {
- $s = new StringInputStream("abc\ndef\n");
- $this->assertEquals(0, $s->columnOffset());
- $s->next();
- $this->assertEquals(1, $s->columnOffset());
- $s->next();
- $this->assertEquals(2, $s->columnOffset());
- $s->next();
- $this->assertEquals(3, $s->columnOffset());
- $s->next(); // LF
- $this->assertEquals(0, $s->columnOffset());
- $s->next();
- $canary = $s->current(); // e
- $this->assertEquals('e', $canary);
- $this->assertEquals(1, $s->columnOffset());
-
- $s = new StringInputStream("abc");
- $this->assertEquals(0, $s->columnOffset());
- $s->next();
- $this->assertEquals(1, $s->columnOffset());
- $s->next();
- $this->assertEquals(2, $s->columnOffset());
- }
-
- public function testCurrentLine() {
- $txt = "1\n2\n\n\n\n3";
- $stream = new StringInputStream($txt);
- $this->assertEquals(1, $stream->currentLine());
-
- // Advance over 1 and LF on to line 2 value 2.
- $stream->next(); $stream->next();
- $canary = $stream->current();
- $this->assertEquals(2, $stream->currentLine());
- $this->assertEquals('2', $canary);
-
-
- // Advance over 4x LF
- $stream->next(); $stream->next();
- $stream->next(); $stream->next();
- $stream->next();
- $this->assertEquals(6, $stream->currentLine());
- $this->assertEquals('3', $stream->current());
-
- // Make sure it doesn't do 7.
- $this->assertEquals(6, $stream->currentLine());
- }
-
- public function testRemainingChars() {
- $text = "abcd";
- $s = new StringInputStream($text);
- $this->assertEquals($text, $s->remainingChars());
-
- $text = "abcd";
- $s = new StringInputStream($text);
- $s->next(); // Pop one.
- $this->assertEquals('bcd', $s->remainingChars());
-
- }
-
- public function testCharsUnitl() {
- $text = "abcdefffffffghi";
- $s = new StringInputStream($text);
- $this->assertEquals('', $s->charsUntil('a'));
- // Pointer at 'a', moves 2 to 'c'
- $this->assertEquals('ab', $s->charsUntil('w', 2));
-
- // Pointer at 'c', moves to first 'f'
- $this->assertEquals('cde', $s->charsUntil('fzxv'));
-
- // Only get five 'f's
- $this->assertEquals('fffff', $s->charsUntil('g', 5));
-
- // Get just the last two 'f's
- $this->assertEquals('ff', $s->charsUntil('g'));
-
- // This should scan to the end.
- $this->assertEquals('ghi', $s->charsUntil('w', 9));
-
- }
-
- public function testCharsWhile() {
- $text = "abcdefffffffghi";
- $s = new StringInputStream($text);
-
- $this->assertEquals('ab', $s->charsWhile('ba'));
-
- $this->assertEquals('', $s->charsWhile('a'));
- $this->assertEquals('cde', $s->charsWhile('cdeba'));
- $this->assertEquals('ff', $s->charsWhile('f', 2));
- $this->assertEquals('fffff', $s->charsWhile('f'));
- $this->assertEquals('g', $s->charsWhile('fg'));
- $this->assertEquals('hi', $s->charsWhile('fghi', 99));
-
- }
-
- public function testBOM() {
-
- // Ignore in-text BOM.
- $stream = new StringInputStream("a\xEF\xBB\xBF");
- $this->assertEquals("a\xEF\xBB\xBF", $stream->remainingChars(), 'A non-leading U+FEFF (BOM/ZWNBSP) should remain');
-
- // Strip leading BOM
- $leading = new StringInputStream("\xEF\xBB\xBFa");
- $this->assertEquals('a', $leading->current(), 'BOM should be stripped');
- }
-
- public function testCarriageReturn() {
-
- // Replace NULL with Unicode replacement.
- $stream = new StringInputStream("\0\0\0");
- $this->assertEquals("\xEF\xBF\xBD\xEF\xBF\xBD\xEF\xBF\xBD", $stream->remainingChars(), 'Null character should be replaced by U+FFFD');
- $this->assertEquals(3, count($stream->errors), 'Null character should set parse error: ' . print_r($stream->errors, TRUE));
-
- // Remove CR when next to LF.
- $stream = new StringInputStream("\r\n");
- $this->assertEquals("\n", $stream->remainingChars(), 'CRLF should be replaced by LF');
-
- // Convert CR to LF when on its own.
- $stream = new StringInputStream("\r");
- $this->assertEquals("\n", $stream->remainingChars(), 'CR should be replaced by LF');
- }
-
-
- public function invalidParseErrorTestHandler($input, $numErrors, $name) {
- $stream = new StringInputStream($input, 'UTF-8');
- $this->assertEquals($input, $stream->remainingChars(), $name . ' (stream content)');
- $this->assertEquals($numErrors, count($stream->errors), $name . ' (number of errors)');
- }
-
- public function testInvalidReplace() {
- $invalidTest = array(
-
- // Min/max overlong
- "\xC0\x80a" => 'Overlong representation of U+0000',
- "\xE0\x80\x80a" => 'Overlong representation of U+0000',
- "\xF0\x80\x80\x80a" => 'Overlong representation of U+0000',
- "\xF8\x80\x80\x80\x80a" => 'Overlong representation of U+0000',
- "\xFC\x80\x80\x80\x80\x80a" => 'Overlong representation of U+0000',
- "\xC1\xBFa" => 'Overlong representation of U+007F',
- "\xE0\x9F\xBFa" => 'Overlong representation of U+07FF',
- "\xF0\x8F\xBF\xBFa" => 'Overlong representation of U+FFFF',
-
-
- "a\xDF" => 'Incomplete two byte sequence (missing final byte)',
- "a\xEF\xBF" => 'Incomplete three byte sequence (missing final byte)',
- "a\xF4\xBF\xBF" => 'Incomplete four byte sequence (missing final byte)',
-
- // Min/max continuation bytes
- "a\x80" => 'Lone 80 continuation byte',
- "a\xBF" => 'Lone BF continuation byte',
-
- // Invalid bytes (these can never occur)
- "a\xFE" => 'Invalid FE byte',
- "a\xFF" => 'Invalid FF byte',
- );
- foreach ($invalidTest as $test => $note) {
- $stream = new StringInputStream($test);
- $this->assertEquals('a', $stream->remainingChars(), $note);
- }
-
- // MPB:
- // It appears that iconv just leaves these alone. Not sure what to
- // do.
- /*
- $converted = array(
- "a\xF5\x90\x80\x80" => 'U+110000, off unicode planes.',
- );
- foreach ($converted as $test => $note) {
- $stream = new StringInputStream($test);
- $this->assertEquals(2, mb_strlen($stream->remainingChars()), $note);
- }
- */
- }
-
- public function testInvalidParseError() {
- // C0 controls (except U+0000 and U+000D due to different handling)
- $this->invalidParseErrorTestHandler("\x01", 1, 'U+0001 (C0 control)');
- $this->invalidParseErrorTestHandler("\x02", 1, 'U+0002 (C0 control)');
- $this->invalidParseErrorTestHandler("\x03", 1, 'U+0003 (C0 control)');
- $this->invalidParseErrorTestHandler("\x04", 1, 'U+0004 (C0 control)');
- $this->invalidParseErrorTestHandler("\x05", 1, 'U+0005 (C0 control)');
- $this->invalidParseErrorTestHandler("\x06", 1, 'U+0006 (C0 control)');
- $this->invalidParseErrorTestHandler("\x07", 1, 'U+0007 (C0 control)');
- $this->invalidParseErrorTestHandler("\x08", 1, 'U+0008 (C0 control)');
- $this->invalidParseErrorTestHandler("\x09", 0, 'U+0009 (C0 control)');
- $this->invalidParseErrorTestHandler("\x0A", 0, 'U+000A (C0 control)');
- $this->invalidParseErrorTestHandler("\x0B", 1, 'U+000B (C0 control)');
- $this->invalidParseErrorTestHandler("\x0C", 0, 'U+000C (C0 control)');
- $this->invalidParseErrorTestHandler("\x0E", 1, 'U+000E (C0 control)');
- $this->invalidParseErrorTestHandler("\x0F", 1, 'U+000F (C0 control)');
- $this->invalidParseErrorTestHandler("\x10", 1, 'U+0010 (C0 control)');
- $this->invalidParseErrorTestHandler("\x11", 1, 'U+0011 (C0 control)');
- $this->invalidParseErrorTestHandler("\x12", 1, 'U+0012 (C0 control)');
- $this->invalidParseErrorTestHandler("\x13", 1, 'U+0013 (C0 control)');
- $this->invalidParseErrorTestHandler("\x14", 1, 'U+0014 (C0 control)');
- $this->invalidParseErrorTestHandler("\x15", 1, 'U+0015 (C0 control)');
- $this->invalidParseErrorTestHandler("\x16", 1, 'U+0016 (C0 control)');
- $this->invalidParseErrorTestHandler("\x17", 1, 'U+0017 (C0 control)');
- $this->invalidParseErrorTestHandler("\x18", 1, 'U+0018 (C0 control)');
- $this->invalidParseErrorTestHandler("\x19", 1, 'U+0019 (C0 control)');
- $this->invalidParseErrorTestHandler("\x1A", 1, 'U+001A (C0 control)');
- $this->invalidParseErrorTestHandler("\x1B", 1, 'U+001B (C0 control)');
- $this->invalidParseErrorTestHandler("\x1C", 1, 'U+001C (C0 control)');
- $this->invalidParseErrorTestHandler("\x1D", 1, 'U+001D (C0 control)');
- $this->invalidParseErrorTestHandler("\x1E", 1, 'U+001E (C0 control)');
- $this->invalidParseErrorTestHandler("\x1F", 1, 'U+001F (C0 control)');
-
- // DEL (U+007F)
- $this->invalidParseErrorTestHandler("\x7F", 1, 'U+007F');
-
- // C1 Controls
- $this->invalidParseErrorTestHandler("\xC2\x80", 1, 'U+0080 (C1 control)');
- $this->invalidParseErrorTestHandler("\xC2\x9F", 1, 'U+009F (C1 control)');
- $this->invalidParseErrorTestHandler("\xC2\xA0", 0, 'U+00A0 (first codepoint above highest C1 control)');
-
- // Charcters surrounding surrogates
- $this->invalidParseErrorTestHandler("\xED\x9F\xBF", 0, 'U+D7FF (one codepoint below lowest surrogate codepoint)');
- $this->invalidParseErrorTestHandler("\xEF\xBF\xBD", 0, 'U+DE00 (one codepoint above highest surrogate codepoint)');
-
- // Permanent noncharacters
- $this->invalidParseErrorTestHandler("\xEF\xB7\x90", 1, 'U+FDD0 (permanent noncharacter)');
- $this->invalidParseErrorTestHandler("\xEF\xB7\xAF", 1, 'U+FDEF (permanent noncharacter)');
- $this->invalidParseErrorTestHandler("\xEF\xBF\xBE", 1, 'U+FFFE (permanent noncharacter)');
- $this->invalidParseErrorTestHandler("\xEF\xBF\xBF", 1, 'U+FFFF (permanent noncharacter)');
- $this->invalidParseErrorTestHandler("\xF0\x9F\xBF\xBE", 1, 'U+1FFFE (permanent noncharacter)');
- $this->invalidParseErrorTestHandler("\xF0\x9F\xBF\xBF", 1, 'U+1FFFF (permanent noncharacter)');
- $this->invalidParseErrorTestHandler("\xF0\xAF\xBF\xBE", 1, 'U+2FFFE (permanent noncharacter)');
- $this->invalidParseErrorTestHandler("\xF0\xAF\xBF\xBF", 1, 'U+2FFFF (permanent noncharacter)');
- $this->invalidParseErrorTestHandler("\xF0\xBF\xBF\xBE", 1, 'U+3FFFE (permanent noncharacter)');
- $this->invalidParseErrorTestHandler("\xF0\xBF\xBF\xBF", 1, 'U+3FFFF (permanent noncharacter)');
- $this->invalidParseErrorTestHandler("\xF1\x8F\xBF\xBE", 1, 'U+4FFFE (permanent noncharacter)');
- $this->invalidParseErrorTestHandler("\xF1\x8F\xBF\xBF", 1, 'U+4FFFF (permanent noncharacter)');
- $this->invalidParseErrorTestHandler("\xF1\x9F\xBF\xBE", 1, 'U+5FFFE (permanent noncharacter)');
- $this->invalidParseErrorTestHandler("\xF1\x9F\xBF\xBF", 1, 'U+5FFFF (permanent noncharacter)');
- $this->invalidParseErrorTestHandler("\xF1\xAF\xBF\xBE", 1, 'U+6FFFE (permanent noncharacter)');
- $this->invalidParseErrorTestHandler("\xF1\xAF\xBF\xBF", 1, 'U+6FFFF (permanent noncharacter)');
- $this->invalidParseErrorTestHandler("\xF1\xBF\xBF\xBE", 1, 'U+7FFFE (permanent noncharacter)');
- $this->invalidParseErrorTestHandler("\xF1\xBF\xBF\xBF", 1, 'U+7FFFF (permanent noncharacter)');
- $this->invalidParseErrorTestHandler("\xF2\x8F\xBF\xBE", 1, 'U+8FFFE (permanent noncharacter)');
- $this->invalidParseErrorTestHandler("\xF2\x8F\xBF\xBF", 1, 'U+8FFFF (permanent noncharacter)');
- $this->invalidParseErrorTestHandler("\xF2\x9F\xBF\xBE", 1, 'U+9FFFE (permanent noncharacter)');
- $this->invalidParseErrorTestHandler("\xF2\x9F\xBF\xBF", 1, 'U+9FFFF (permanent noncharacter)');
- $this->invalidParseErrorTestHandler("\xF2\xAF\xBF\xBE", 1, 'U+AFFFE (permanent noncharacter)');
- $this->invalidParseErrorTestHandler("\xF2\xAF\xBF\xBF", 1, 'U+AFFFF (permanent noncharacter)');
- $this->invalidParseErrorTestHandler("\xF2\xBF\xBF\xBE", 1, 'U+BFFFE (permanent noncharacter)');
- $this->invalidParseErrorTestHandler("\xF2\xBF\xBF\xBF", 1, 'U+BFFFF (permanent noncharacter)');
- $this->invalidParseErrorTestHandler("\xF3\x8F\xBF\xBE", 1, 'U+CFFFE (permanent noncharacter)');
- $this->invalidParseErrorTestHandler("\xF3\x8F\xBF\xBF", 1, 'U+CFFFF (permanent noncharacter)');
- $this->invalidParseErrorTestHandler("\xF3\x9F\xBF\xBE", 1, 'U+DFFFE (permanent noncharacter)');
- $this->invalidParseErrorTestHandler("\xF3\x9F\xBF\xBF", 1, 'U+DFFFF (permanent noncharacter)');
- $this->invalidParseErrorTestHandler("\xF3\xAF\xBF\xBE", 1, 'U+EFFFE (permanent noncharacter)');
- $this->invalidParseErrorTestHandler("\xF3\xAF\xBF\xBF", 1, 'U+EFFFF (permanent noncharacter)');
- $this->invalidParseErrorTestHandler("\xF3\xBF\xBF\xBE", 1, 'U+FFFFE (permanent noncharacter)');
- $this->invalidParseErrorTestHandler("\xF3\xBF\xBF\xBF", 1, 'U+FFFFF (permanent noncharacter)');
- $this->invalidParseErrorTestHandler("\xF4\x8F\xBF\xBE", 1, 'U+10FFFE (permanent noncharacter)');
- $this->invalidParseErrorTestHandler("\xF4\x8F\xBF\xBF", 1, 'U+10FFFF (permanent noncharacter)');
-
- // MPB: These pass on some versions of iconv, and fail on others. Since we aren't in the
- // business of writing tests against iconv, I've just commented these out. Should revisit
- // at a later point.
- /*
- $this->invalidParseErrorTestHandler("\xED\xA0\x80", 1, 'U+D800 (UTF-16 surrogate character)');
- $this->invalidParseErrorTestHandler("\xED\xAD\xBF", 1, 'U+DB7F (UTF-16 surrogate character)');
- $this->invalidParseErrorTestHandler("\xED\xAE\x80", 1, 'U+DB80 (UTF-16 surrogate character)');
- $this->invalidParseErrorTestHandler("\xED\xAF\xBF", 1, 'U+DBFF (UTF-16 surrogate character)');
- $this->invalidParseErrorTestHandler("\xED\xB0\x80", 1, 'U+DC00 (UTF-16 surrogate character)');
- $this->invalidParseErrorTestHandler("\xED\xBE\x80", 1, 'U+DF80 (UTF-16 surrogate character)');
- $this->invalidParseErrorTestHandler("\xED\xBF\xBF", 1, 'U+DFFF (UTF-16 surrogate character)');
-
- // Paired UTF-16 surrogates
- $this->invalidParseErrorTestHandler("\xED\xA0\x80\xED\xB0\x80", 2, 'U+D800 U+DC00 (paired UTF-16 surrogates)');
- $this->invalidParseErrorTestHandler("\xED\xA0\x80\xED\xBF\xBF", 2, 'U+D800 U+DFFF (paired UTF-16 surrogates)');
- $this->invalidParseErrorTestHandler("\xED\xAD\xBF\xED\xB0\x80", 2, 'U+DB7F U+DC00 (paired UTF-16 surrogates)');
- $this->invalidParseErrorTestHandler("\xED\xAD\xBF\xED\xBF\xBF", 2, 'U+DB7F U+DFFF (paired UTF-16 surrogates)');
- $this->invalidParseErrorTestHandler("\xED\xAE\x80\xED\xB0\x80", 2, 'U+DB80 U+DC00 (paired UTF-16 surrogates)');
- $this->invalidParseErrorTestHandler("\xED\xAE\x80\xED\xBF\xBF", 2, 'U+DB80 U+DFFF (paired UTF-16 surrogates)');
- $this->invalidParseErrorTestHandler("\xED\xAF\xBF\xED\xB0\x80", 2, 'U+DBFF U+DC00 (paired UTF-16 surrogates)');
- $this->invalidParseErrorTestHandler("\xED\xAF\xBF\xED\xBF\xBF", 2, 'U+DBFF U+DFFF (paired UTF-16 surrogates)');
- */
- }
-
+class StringInputStreamTest extends \Masterminds\HTML5\Tests\TestCase
+{
+
+ /**
+ * A canary test to make sure the basics are setup and working.
+ */
+ public function testConstruct()
+ {
+ $s = new StringInputStream("abc");
+
+ $this->assertInstanceOf('\Masterminds\HTML5\Parser\StringInputStream', $s);
+ }
+
+ public function testNext()
+ {
+ $s = new StringInputStream("abc");
+
+ $s->next();
+ $this->assertEquals('b', $s->current());
+ $s->next();
+ $this->assertEquals('c', $s->current());
+ }
+
+ public function testKey()
+ {
+ $s = new StringInputStream("abc");
+
+ $this->assertEquals(0, $s->key());
+
+ $s->next();
+ $this->assertEquals(1, $s->key());
+ }
+
+ public function testPeek()
+ {
+ $s = new StringInputStream("abc");
+
+ $this->assertEquals('b', $s->peek());
+
+ $s->next();
+ $this->assertEquals('c', $s->peek());
+ }
+
+ public function testCurrent()
+ {
+ $s = new StringInputStream("abc");
+
+ // Before scanning the string begins the current is empty.
+ $this->assertEquals('a', $s->current());
+
+ $s->next();
+ $this->assertEquals('b', $s->current());
+
+ // Test movement through the string.
+ $s->next();
+ $this->assertEquals('c', $s->current());
+ }
+
+ public function testColumnOffset()
+ {
+ $s = new StringInputStream("abc\ndef\n");
+ $this->assertEquals(0, $s->columnOffset());
+ $s->next();
+ $this->assertEquals(1, $s->columnOffset());
+ $s->next();
+ $this->assertEquals(2, $s->columnOffset());
+ $s->next();
+ $this->assertEquals(3, $s->columnOffset());
+ $s->next(); // LF
+ $this->assertEquals(0, $s->columnOffset());
+ $s->next();
+ $canary = $s->current(); // e
+ $this->assertEquals('e', $canary);
+ $this->assertEquals(1, $s->columnOffset());
+
+ $s = new StringInputStream("abc");
+ $this->assertEquals(0, $s->columnOffset());
+ $s->next();
+ $this->assertEquals(1, $s->columnOffset());
+ $s->next();
+ $this->assertEquals(2, $s->columnOffset());
+ }
+
+ public function testCurrentLine()
+ {
+ $txt = "1\n2\n\n\n\n3";
+ $stream = new StringInputStream($txt);
+ $this->assertEquals(1, $stream->currentLine());
+
+ // Advance over 1 and LF on to line 2 value 2.
+ $stream->next();
+ $stream->next();
+ $canary = $stream->current();
+ $this->assertEquals(2, $stream->currentLine());
+ $this->assertEquals('2', $canary);
+
+ // Advance over 4x LF
+ $stream->next();
+ $stream->next();
+ $stream->next();
+ $stream->next();
+ $stream->next();
+ $this->assertEquals(6, $stream->currentLine());
+ $this->assertEquals('3', $stream->current());
+
+ // Make sure it doesn't do 7.
+ $this->assertEquals(6, $stream->currentLine());
+ }
+
+ public function testRemainingChars()
+ {
+ $text = "abcd";
+ $s = new StringInputStream($text);
+ $this->assertEquals($text, $s->remainingChars());
+
+ $text = "abcd";
+ $s = new StringInputStream($text);
+ $s->next(); // Pop one.
+ $this->assertEquals('bcd', $s->remainingChars());
+ }
+
+ public function testCharsUnitl()
+ {
+ $text = "abcdefffffffghi";
+ $s = new StringInputStream($text);
+ $this->assertEquals('', $s->charsUntil('a'));
+ // Pointer at 'a', moves 2 to 'c'
+ $this->assertEquals('ab', $s->charsUntil('w', 2));
+
+ // Pointer at 'c', moves to first 'f'
+ $this->assertEquals('cde', $s->charsUntil('fzxv'));
+
+ // Only get five 'f's
+ $this->assertEquals('fffff', $s->charsUntil('g', 5));
+
+ // Get just the last two 'f's
+ $this->assertEquals('ff', $s->charsUntil('g'));
+
+ // This should scan to the end.
+ $this->assertEquals('ghi', $s->charsUntil('w', 9));
+ }
+
+ public function testCharsWhile()
+ {
+ $text = "abcdefffffffghi";
+ $s = new StringInputStream($text);
+
+ $this->assertEquals('ab', $s->charsWhile('ba'));
+
+ $this->assertEquals('', $s->charsWhile('a'));
+ $this->assertEquals('cde', $s->charsWhile('cdeba'));
+ $this->assertEquals('ff', $s->charsWhile('f', 2));
+ $this->assertEquals('fffff', $s->charsWhile('f'));
+ $this->assertEquals('g', $s->charsWhile('fg'));
+ $this->assertEquals('hi', $s->charsWhile('fghi', 99));
+ }
+
+ public function testBOM()
+ {
+ // Ignore in-text BOM.
+ $stream = new StringInputStream("a\xEF\xBB\xBF");
+ $this->assertEquals("a\xEF\xBB\xBF", $stream->remainingChars(), 'A non-leading U+FEFF (BOM/ZWNBSP) should remain');
+
+ // Strip leading BOM
+ $leading = new StringInputStream("\xEF\xBB\xBFa");
+ $this->assertEquals('a', $leading->current(), 'BOM should be stripped');
+ }
+
+ public function testCarriageReturn()
+ {
+ // Replace NULL with Unicode replacement.
+ $stream = new StringInputStream("\0\0\0");
+ $this->assertEquals("\xEF\xBF\xBD\xEF\xBF\xBD\xEF\xBF\xBD", $stream->remainingChars(), 'Null character should be replaced by U+FFFD');
+ $this->assertEquals(3, count($stream->errors), 'Null character should set parse error: ' . print_r($stream->errors, TRUE));
+
+ // Remove CR when next to LF.
+ $stream = new StringInputStream("\r\n");
+ $this->assertEquals("\n", $stream->remainingChars(), 'CRLF should be replaced by LF');
+
+ // Convert CR to LF when on its own.
+ $stream = new StringInputStream("\r");
+ $this->assertEquals("\n", $stream->remainingChars(), 'CR should be replaced by LF');
+ }
+
+ public function invalidParseErrorTestHandler($input, $numErrors, $name)
+ {
+ $stream = new StringInputStream($input, 'UTF-8');
+ $this->assertEquals($input, $stream->remainingChars(), $name . ' (stream content)');
+ $this->assertEquals($numErrors, count($stream->errors), $name . ' (number of errors)');
+ }
+
+ public function testInvalidReplace()
+ {
+ $invalidTest = array(
+
+ // Min/max overlong
+ "\xC0\x80a" => 'Overlong representation of U+0000',
+ "\xE0\x80\x80a" => 'Overlong representation of U+0000',
+ "\xF0\x80\x80\x80a" => 'Overlong representation of U+0000',
+ "\xF8\x80\x80\x80\x80a" => 'Overlong representation of U+0000',
+ "\xFC\x80\x80\x80\x80\x80a" => 'Overlong representation of U+0000',
+ "\xC1\xBFa" => 'Overlong representation of U+007F',
+ "\xE0\x9F\xBFa" => 'Overlong representation of U+07FF',
+ "\xF0\x8F\xBF\xBFa" => 'Overlong representation of U+FFFF',
+
+ "a\xDF" => 'Incomplete two byte sequence (missing final byte)',
+ "a\xEF\xBF" => 'Incomplete three byte sequence (missing final byte)',
+ "a\xF4\xBF\xBF" => 'Incomplete four byte sequence (missing final byte)',
+
+ // Min/max continuation bytes
+ "a\x80" => 'Lone 80 continuation byte',
+ "a\xBF" => 'Lone BF continuation byte',
+
+ // Invalid bytes (these can never occur)
+ "a\xFE" => 'Invalid FE byte',
+ "a\xFF" => 'Invalid FF byte'
+ );
+ foreach ($invalidTest as $test => $note) {
+ $stream = new StringInputStream($test);
+ $this->assertEquals('a', $stream->remainingChars(), $note);
+ }
+
+ // MPB:
+ // It appears that iconv just leaves these alone. Not sure what to
+ // do.
+ /*
+ * $converted = array( "a\xF5\x90\x80\x80" => 'U+110000, off unicode planes.', ); foreach ($converted as $test => $note) { $stream = new StringInputStream($test); $this->assertEquals(2, mb_strlen($stream->remainingChars()), $note); }
+ */
+ }
+
+ public function testInvalidParseError()
+ {
+ // C0 controls (except U+0000 and U+000D due to different handling)
+ $this->invalidParseErrorTestHandler("\x01", 1, 'U+0001 (C0 control)');
+ $this->invalidParseErrorTestHandler("\x02", 1, 'U+0002 (C0 control)');
+ $this->invalidParseErrorTestHandler("\x03", 1, 'U+0003 (C0 control)');
+ $this->invalidParseErrorTestHandler("\x04", 1, 'U+0004 (C0 control)');
+ $this->invalidParseErrorTestHandler("\x05", 1, 'U+0005 (C0 control)');
+ $this->invalidParseErrorTestHandler("\x06", 1, 'U+0006 (C0 control)');
+ $this->invalidParseErrorTestHandler("\x07", 1, 'U+0007 (C0 control)');
+ $this->invalidParseErrorTestHandler("\x08", 1, 'U+0008 (C0 control)');
+ $this->invalidParseErrorTestHandler("\x09", 0, 'U+0009 (C0 control)');
+ $this->invalidParseErrorTestHandler("\x0A", 0, 'U+000A (C0 control)');
+ $this->invalidParseErrorTestHandler("\x0B", 1, 'U+000B (C0 control)');
+ $this->invalidParseErrorTestHandler("\x0C", 0, 'U+000C (C0 control)');
+ $this->invalidParseErrorTestHandler("\x0E", 1, 'U+000E (C0 control)');
+ $this->invalidParseErrorTestHandler("\x0F", 1, 'U+000F (C0 control)');
+ $this->invalidParseErrorTestHandler("\x10", 1, 'U+0010 (C0 control)');
+ $this->invalidParseErrorTestHandler("\x11", 1, 'U+0011 (C0 control)');
+ $this->invalidParseErrorTestHandler("\x12", 1, 'U+0012 (C0 control)');
+ $this->invalidParseErrorTestHandler("\x13", 1, 'U+0013 (C0 control)');
+ $this->invalidParseErrorTestHandler("\x14", 1, 'U+0014 (C0 control)');
+ $this->invalidParseErrorTestHandler("\x15", 1, 'U+0015 (C0 control)');
+ $this->invalidParseErrorTestHandler("\x16", 1, 'U+0016 (C0 control)');
+ $this->invalidParseErrorTestHandler("\x17", 1, 'U+0017 (C0 control)');
+ $this->invalidParseErrorTestHandler("\x18", 1, 'U+0018 (C0 control)');
+ $this->invalidParseErrorTestHandler("\x19", 1, 'U+0019 (C0 control)');
+ $this->invalidParseErrorTestHandler("\x1A", 1, 'U+001A (C0 control)');
+ $this->invalidParseErrorTestHandler("\x1B", 1, 'U+001B (C0 control)');
+ $this->invalidParseErrorTestHandler("\x1C", 1, 'U+001C (C0 control)');
+ $this->invalidParseErrorTestHandler("\x1D", 1, 'U+001D (C0 control)');
+ $this->invalidParseErrorTestHandler("\x1E", 1, 'U+001E (C0 control)');
+ $this->invalidParseErrorTestHandler("\x1F", 1, 'U+001F (C0 control)');
+
+ // DEL (U+007F)
+ $this->invalidParseErrorTestHandler("\x7F", 1, 'U+007F');
+
+ // C1 Controls
+ $this->invalidParseErrorTestHandler("\xC2\x80", 1, 'U+0080 (C1 control)');
+ $this->invalidParseErrorTestHandler("\xC2\x9F", 1, 'U+009F (C1 control)');
+ $this->invalidParseErrorTestHandler("\xC2\xA0", 0, 'U+00A0 (first codepoint above highest C1 control)');
+
+ // Charcters surrounding surrogates
+ $this->invalidParseErrorTestHandler("\xED\x9F\xBF", 0, 'U+D7FF (one codepoint below lowest surrogate codepoint)');
+ $this->invalidParseErrorTestHandler("\xEF\xBF\xBD", 0, 'U+DE00 (one codepoint above highest surrogate codepoint)');
+
+ // Permanent noncharacters
+ $this->invalidParseErrorTestHandler("\xEF\xB7\x90", 1, 'U+FDD0 (permanent noncharacter)');
+ $this->invalidParseErrorTestHandler("\xEF\xB7\xAF", 1, 'U+FDEF (permanent noncharacter)');
+ $this->invalidParseErrorTestHandler("\xEF\xBF\xBE", 1, 'U+FFFE (permanent noncharacter)');
+ $this->invalidParseErrorTestHandler("\xEF\xBF\xBF", 1, 'U+FFFF (permanent noncharacter)');
+ $this->invalidParseErrorTestHandler("\xF0\x9F\xBF\xBE", 1, 'U+1FFFE (permanent noncharacter)');
+ $this->invalidParseErrorTestHandler("\xF0\x9F\xBF\xBF", 1, 'U+1FFFF (permanent noncharacter)');
+ $this->invalidParseErrorTestHandler("\xF0\xAF\xBF\xBE", 1, 'U+2FFFE (permanent noncharacter)');
+ $this->invalidParseErrorTestHandler("\xF0\xAF\xBF\xBF", 1, 'U+2FFFF (permanent noncharacter)');
+ $this->invalidParseErrorTestHandler("\xF0\xBF\xBF\xBE", 1, 'U+3FFFE (permanent noncharacter)');
+ $this->invalidParseErrorTestHandler("\xF0\xBF\xBF\xBF", 1, 'U+3FFFF (permanent noncharacter)');
+ $this->invalidParseErrorTestHandler("\xF1\x8F\xBF\xBE", 1, 'U+4FFFE (permanent noncharacter)');
+ $this->invalidParseErrorTestHandler("\xF1\x8F\xBF\xBF", 1, 'U+4FFFF (permanent noncharacter)');
+ $this->invalidParseErrorTestHandler("\xF1\x9F\xBF\xBE", 1, 'U+5FFFE (permanent noncharacter)');
+ $this->invalidParseErrorTestHandler("\xF1\x9F\xBF\xBF", 1, 'U+5FFFF (permanent noncharacter)');
+ $this->invalidParseErrorTestHandler("\xF1\xAF\xBF\xBE", 1, 'U+6FFFE (permanent noncharacter)');
+ $this->invalidParseErrorTestHandler("\xF1\xAF\xBF\xBF", 1, 'U+6FFFF (permanent noncharacter)');
+ $this->invalidParseErrorTestHandler("\xF1\xBF\xBF\xBE", 1, 'U+7FFFE (permanent noncharacter)');
+ $this->invalidParseErrorTestHandler("\xF1\xBF\xBF\xBF", 1, 'U+7FFFF (permanent noncharacter)');
+ $this->invalidParseErrorTestHandler("\xF2\x8F\xBF\xBE", 1, 'U+8FFFE (permanent noncharacter)');
+ $this->invalidParseErrorTestHandler("\xF2\x8F\xBF\xBF", 1, 'U+8FFFF (permanent noncharacter)');
+ $this->invalidParseErrorTestHandler("\xF2\x9F\xBF\xBE", 1, 'U+9FFFE (permanent noncharacter)');
+ $this->invalidParseErrorTestHandler("\xF2\x9F\xBF\xBF", 1, 'U+9FFFF (permanent noncharacter)');
+ $this->invalidParseErrorTestHandler("\xF2\xAF\xBF\xBE", 1, 'U+AFFFE (permanent noncharacter)');
+ $this->invalidParseErrorTestHandler("\xF2\xAF\xBF\xBF", 1, 'U+AFFFF (permanent noncharacter)');
+ $this->invalidParseErrorTestHandler("\xF2\xBF\xBF\xBE", 1, 'U+BFFFE (permanent noncharacter)');
+ $this->invalidParseErrorTestHandler("\xF2\xBF\xBF\xBF", 1, 'U+BFFFF (permanent noncharacter)');
+ $this->invalidParseErrorTestHandler("\xF3\x8F\xBF\xBE", 1, 'U+CFFFE (permanent noncharacter)');
+ $this->invalidParseErrorTestHandler("\xF3\x8F\xBF\xBF", 1, 'U+CFFFF (permanent noncharacter)');
+ $this->invalidParseErrorTestHandler("\xF3\x9F\xBF\xBE", 1, 'U+DFFFE (permanent noncharacter)');
+ $this->invalidParseErrorTestHandler("\xF3\x9F\xBF\xBF", 1, 'U+DFFFF (permanent noncharacter)');
+ $this->invalidParseErrorTestHandler("\xF3\xAF\xBF\xBE", 1, 'U+EFFFE (permanent noncharacter)');
+ $this->invalidParseErrorTestHandler("\xF3\xAF\xBF\xBF", 1, 'U+EFFFF (permanent noncharacter)');
+ $this->invalidParseErrorTestHandler("\xF3\xBF\xBF\xBE", 1, 'U+FFFFE (permanent noncharacter)');
+ $this->invalidParseErrorTestHandler("\xF3\xBF\xBF\xBF", 1, 'U+FFFFF (permanent noncharacter)');
+ $this->invalidParseErrorTestHandler("\xF4\x8F\xBF\xBE", 1, 'U+10FFFE (permanent noncharacter)');
+ $this->invalidParseErrorTestHandler("\xF4\x8F\xBF\xBF", 1, 'U+10FFFF (permanent noncharacter)');
+
+ // MPB: These pass on some versions of iconv, and fail on others. Since we aren't in the
+ // business of writing tests against iconv, I've just commented these out. Should revisit
+ // at a later point.
+ /*
+ * $this->invalidParseErrorTestHandler("\xED\xA0\x80", 1, 'U+D800 (UTF-16 surrogate character)'); $this->invalidParseErrorTestHandler("\xED\xAD\xBF", 1, 'U+DB7F (UTF-16 surrogate character)'); $this->invalidParseErrorTestHandler("\xED\xAE\x80", 1, 'U+DB80 (UTF-16 surrogate character)'); $this->invalidParseErrorTestHandler("\xED\xAF\xBF", 1, 'U+DBFF (UTF-16 surrogate character)'); $this->invalidParseErrorTestHandler("\xED\xB0\x80", 1, 'U+DC00 (UTF-16 surrogate character)'); $this->invalidParseErrorTestHandler("\xED\xBE\x80", 1, 'U+DF80 (UTF-16 surrogate character)'); $this->invalidParseErrorTestHandler("\xED\xBF\xBF", 1, 'U+DFFF (UTF-16 surrogate character)'); // Paired UTF-16 surrogates $this->invalidParseErrorTestHandler("\xED\xA0\x80\xED\xB0\x80", 2, 'U+D800 U+DC00 (paired UTF-16 surrogates)'); $this->invalidParseErrorTestHandler("\xED\xA0\x80\xED\xBF\xBF", 2, 'U+D800 U+DFFF (paired UTF-16 surrogates)'); $this->invalidParseErrorTestHandler("\xED\xAD\xBF\xED\xB0\x80", 2, 'U+DB7F U+DC00 (paired UTF-16 surrogates)'); $this->invalidParseErrorTestHandler("\xED\xAD\xBF\xED\xBF\xBF", 2, 'U+DB7F U+DFFF (paired UTF-16 surrogates)'); $this->invalidParseErrorTestHandler("\xED\xAE\x80\xED\xB0\x80", 2, 'U+DB80 U+DC00 (paired UTF-16 surrogates)'); $this->invalidParseErrorTestHandler("\xED\xAE\x80\xED\xBF\xBF", 2, 'U+DB80 U+DFFF (paired UTF-16 surrogates)'); $this->invalidParseErrorTestHandler("\xED\xAF\xBF\xED\xB0\x80", 2, 'U+DBFF U+DC00 (paired UTF-16 surrogates)'); $this->invalidParseErrorTestHandler("\xED\xAF\xBF\xED\xBF\xBF", 2, 'U+DBFF U+DFFF (paired UTF-16 surrogates)');
+ */
+ }
}
diff --git a/test/HTML5/Parser/TokenizerTest.php b/test/HTML5/Parser/TokenizerTest.php
index 27c3535..32f0fcb 100644
--- a/test/HTML5/Parser/TokenizerTest.php
+++ b/test/HTML5/Parser/TokenizerTest.php
@@ -6,593 +6,948 @@ use Masterminds\HTML5\Parser\StringInputStream;
use Masterminds\HTML5\Parser\Scanner;
use Masterminds\HTML5\Parser\Tokenizer;
-class TokenizerTest extends \Masterminds\HTML5\Tests\TestCase {
- // ================================================================
- // Additional assertions.
- // ================================================================
- /**
- * Tests that an event matches both the event type and the expected value.
- *
- * @param string $type
- * Expected event type.
- * @param string $expects
- * The value expected in $event['data'][0].
- */
- public function assertEventEquals($type, $expects, $event) {
- $this->assertEquals($type, $event['name'], "Event $type for " . print_r($event, TRUE));
- if (is_array($expects)) {
- $this->assertEquals($expects, $event['data'], "Event $type should equal " . print_r($expects, TRUE) . ": " . print_r($event, TRUE));
+class TokenizerTest extends \Masterminds\HTML5\Tests\TestCase
+{
+ // ================================================================
+ // Additional assertions.
+ // ================================================================
+ /**
+ * Tests that an event matches both the event type and the expected value.
+ *
+ * @param string $type
+ * Expected event type.
+ * @param string $expects
+ * The value expected in $event['data'][0].
+ */
+ public function assertEventEquals($type, $expects, $event)
+ {
+ $this->assertEquals($type, $event['name'], "Event $type for " . print_r($event, TRUE));
+ if (is_array($expects)) {
+ $this->assertEquals($expects, $event['data'], "Event $type should equal " . print_r($expects, TRUE) . ": " . print_r($event, TRUE));
+ } else {
+ $this->assertEquals($expects, $event['data'][0], "Event $type should equal $expects: " . print_r($event, TRUE));
+ }
}
- else {
- $this->assertEquals($expects, $event['data'][0], "Event $type should equal $expects: " . print_r($event, TRUE));
+
+ /**
+ * Assert that a given event is 'error'.
+ */
+ public function assertEventError($event)
+ {
+ $this->assertEquals('error', $event['name'], "Expected error for event: " . print_r($event, TRUE));
+ }
+
+ /**
+ * Asserts that all of the tests are good.
+ *
+ * This loops through a map of tests/expectations and runs a few assertions on each test.
+ *
+ * Checks:
+ * - depth (if depth is > 0)
+ * - event name
+ * - matches on event 0.
+ */
+ protected function isAllGood($name, $depth, $tests, $debug = FALSE)
+ {
+ foreach ($tests as $try => $expects) {
+ if ($debug) {
+ fprintf(STDOUT, "%s expects %s\n", $try, print_r($expects, TRUE));
+ }
+ $e = $this->parse($try);
+ if ($depth > 0) {
+ $this->assertEquals($depth, $e->depth(), "Expected depth $depth for test $try." . print_r($e, TRUE));
+ }
+ $this->assertEventEquals($name, $expects, $e->get(0));
+ }
+ }
+
+ // ================================================================
+ // Utility functions.
+ // ================================================================
+ public function testParse()
+ {
+ list ($tok, $events) = $this->createTokenizer('');
+
+ $tok->parse();
+ $e1 = $events->get(0);
+
+ $this->assertEquals(1, $events->Depth());
+ $this->assertEquals('eof', $e1['name']);
+ }
+
+ public function testWhitespace()
+ {
+ $spaces = ' ';
+ list ($tok, $events) = $this->createTokenizer($spaces);
+
+ $tok->parse();
+
+ $this->assertEquals(2, $events->depth());
+
+ $e1 = $events->get(0);
+
+ $this->assertEquals('text', $e1['name']);
+ $this->assertEquals($spaces, $e1['data'][0]);
}
- }
-
- /**
- * Assert that a given event is 'error'.
- */
- public function assertEventError($event) {
- $this->assertEquals('error', $event['name'], "Expected error for event: " . print_r($event, TRUE));
- }
-
- /**
- * Asserts that all of the tests are good.
- *
- * This loops through a map of tests/expectations and runs a few assertions on each test.
- *
- * Checks:
- * - depth (if depth is > 0)
- * - event name
- * - matches on event 0.
- */
- protected function isAllGood($name, $depth, $tests, $debug = FALSE) {
- foreach ($tests as $try => $expects) {
- if ($debug) {
- fprintf(STDOUT, "%s expects %s\n", $try, print_r($expects, TRUE));
- }
- $e = $this->parse($try);
- if ($depth > 0) {
- $this->assertEquals($depth, $e->depth(), "Expected depth $depth for test $try." . print_r($e, TRUE));
- }
- $this->assertEventEquals($name, $expects, $e->get(0));
+
+ public function testCharacterReference()
+ {
+ $good = array(
+ '&amp;' => '&',
+ '&#x0003c;' => '<',
+ '&#38;' => '&',
+ '&' => '&'
+ );
+ $this->isAllGood('text', 2, $good);
+
+ // Test with broken charref
+ $str = '&foo';
+ $events = $this->parse($str);
+ $e1 = $events->get(0);
+ $this->assertEquals('error', $e1['name']);
+
+ $str = '&#xfoo';
+ $events = $this->parse($str);
+ $e1 = $events->get(0);
+ $this->assertEquals('error', $e1['name']);
+
+ $str = '&#foo';
+ $events = $this->parse($str);
+ $e1 = $events->get(0);
+ $this->assertEquals('error', $e1['name']);
+
+ // FIXME: Once the text processor is done, need to verify that the
+ // tokens are transformed correctly into text.
}
- }
-
-
- // ================================================================
- // Utility functions.
- // ================================================================
-
- public function testParse() {
- list($tok, $events) = $this->createTokenizer('');
-
- $tok->parse();
- $e1 = $events->get(0);
-
- $this->assertEquals(1, $events->Depth());
- $this->assertEquals('eof', $e1['name']);
- }
-
- public function testWhitespace() {
- $spaces = ' ';
- list($tok, $events) = $this->createTokenizer($spaces);
-
- $tok->parse();
-
- $this->assertEquals(2, $events->depth());
-
- $e1 = $events->get(0);
-
- $this->assertEquals('text', $e1['name']);
- $this->assertEquals($spaces, $e1['data'][0]);
- }
-
- public function testCharacterReference() {
- $good = array(
- '&amp;' => '&',
- '&#x0003c;' => '<',
- '&#38;' => '&',
- '&' => '&',
- );
- $this->isAllGood('text', 2, $good);
-
- // Test with broken charref
- $str = '&foo';
- $events = $this->parse($str);
- $e1 = $events->get(0);
- $this->assertEquals('error', $e1['name']);
-
- $str = '&#xfoo';
- $events = $this->parse($str);
- $e1 = $events->get(0);
- $this->assertEquals('error', $e1['name']);
-
- $str = '&#foo';
- $events = $this->parse($str);
- $e1 = $events->get(0);
- $this->assertEquals('error', $e1['name']);
-
- // FIXME: Once the text processor is done, need to verify that the
- // tokens are transformed correctly into text.
- }
-
- public function testBogusComment() {
- $bogus = array(
- '</+this is a bogus comment. +>',
- '<!+this is a bogus comment. !>',
- '<!D OCTYPE foo bar>',
- '<!DOCTYEP foo bar>',
- '<![CADATA[ TEST ]]>',
- '<![CDATA Hello ]]>',
- '<![CDATA[ Hello [[>',
- '<!CDATA[[ test ]]>',
- '<![CDATA[',
- '<![CDATA[hellooooo hello',
- '<? Hello World ?>',
- '<? Hello World',
- );
- foreach ($bogus as $str) {
- $events = $this->parse($str);
- $this->assertEventError($events->get(0));
- $this->assertEventEquals('comment', $str, $events->get(1));
+
+ public function testBogusComment()
+ {
+ $bogus = array(
+ '</+this is a bogus comment. +>',
+ '<!+this is a bogus comment. !>',
+ '<!D OCTYPE foo bar>',
+ '<!DOCTYEP foo bar>',
+ '<![CADATA[ TEST ]]>',
+ '<![CDATA Hello ]]>',
+ '<![CDATA[ Hello [[>',
+ '<!CDATA[[ test ]]>',
+ '<![CDATA[',
+ '<![CDATA[hellooooo hello',
+ '<? Hello World ?>',
+ '<? Hello World'
+ );
+ foreach ($bogus as $str) {
+ $events = $this->parse($str);
+ $this->assertEventError($events->get(0));
+ $this->assertEventEquals('comment', $str, $events->get(1));
+ }
}
- }
- public function testEndTag() {
- $succeed = array(
- '</a>' => 'a',
- '</test>' => 'test',
- '</test
+ public function testEndTag()
+ {
+ $succeed = array(
+ '</a>' => 'a',
+ '</test>' => 'test',
+ '</test
>' => 'test',
- '</thisIsTheTagThatDoesntEndItJustGoesOnAndOnMyFriend>' =>
- 'thisisthetagthatdoesntenditjustgoesonandonmyfriend',
- // See 8.2.4.10, which requires this and does not say error.
- '</a<b>' => 'a<b',
- );
- $this->isAllGood('endTag', 2, $succeed);
-
- // Recoverable failures
- $fail = array(
- '</a class="monkey">' => 'a',
- '</a <b>' => 'a',
- '</a <b <c>' => 'a',
- '</a is the loneliest letter>' => 'a',
- '</a' => 'a',
- );
- foreach ($fail as $test => $result) {
- $events = $this->parse($test);
- $this->assertEquals(3, $events->depth());
- // Should have triggered an error.
- $this->assertEventError($events->get(0));
- // Should have tried to parse anyway.
- $this->assertEventEquals('endTag', $result, $events->get(1));
+ '</thisIsTheTagThatDoesntEndItJustGoesOnAndOnMyFriend>' => 'thisisthetagthatdoesntenditjustgoesonandonmyfriend',
+ // See 8.2.4.10, which requires this and does not say error.
+ '</a<b>' => 'a<b'
+ );
+ $this->isAllGood('endTag', 2, $succeed);
+
+ // Recoverable failures
+ $fail = array(
+ '</a class="monkey">' => 'a',
+ '</a <b>' => 'a',
+ '</a <b <c>' => 'a',
+ '</a is the loneliest letter>' => 'a',
+ '</a' => 'a'
+ );
+ foreach ($fail as $test => $result) {
+ $events = $this->parse($test);
+ $this->assertEquals(3, $events->depth());
+ // Should have triggered an error.
+ $this->assertEventError($events->get(0));
+ // Should have tried to parse anyway.
+ $this->assertEventEquals('endTag', $result, $events->get(1));
+ }
+
+ // BogoComments
+ $comments = array(
+ '</>' => '</>',
+ '</ >' => '</ >',
+ '</ a>' => '</ a>'
+ );
+ foreach ($comments as $test => $result) {
+ $events = $this->parse($test);
+ $this->assertEquals(3, $events->depth());
+
+ // Should have triggered an error.
+ $this->assertEventError($events->get(0));
+
+ // Should have tried to parse anyway.
+ $this->assertEventEquals('comment', $result, $events->get(1));
+ }
}
- // BogoComments
- $comments = array(
- '</>' => '</>',
- '</ >' => '</ >',
- '</ a>' => '</ a>',
- );
- foreach ($comments as $test => $result) {
- $events = $this->parse($test);
- $this->assertEquals(3, $events->depth());
-
- // Should have triggered an error.
- $this->assertEventError($events->get(0));
-
- // Should have tried to parse anyway.
- $this->assertEventEquals('comment', $result, $events->get(1));
+ public function testComment()
+ {
+ $good = array(
+ '<!--easy-->' => 'easy',
+ '<!-- 1 > 0 -->' => ' 1 > 0 ',
+ '<!-- --$i -->' => ' --$i ',
+ '<!----$i-->' => '--$i',
+ '<!-- 1 > 0 -->' => ' 1 > 0 ',
+ "<!--\nHello World.\na-->" => "\nHello World.\na",
+ '<!-- <!-- -->' => ' <!-- '
+ );
+ foreach ($good as $test => $expected) {
+ $events = $this->parse($test);
+ $this->assertEventEquals('comment', $expected, $events->get(0));
+ }
+
+ $fail = array(
+ '<!-->' => '',
+ '<!--Hello' => 'Hello',
+ "<!--\0Hello" => UTF8Utils::FFFD . 'Hello',
+ '<!--' => ''
+ );
+ foreach ($fail as $test => $expected) {
+ $events = $this->parse($test);
+ $this->assertEquals(3, $events->depth());
+ $this->assertEventError($events->get(0));
+ $this->assertEventEquals('comment', $expected, $events->get(1));
+ }
}
- }
-
- public function testComment() {
- $good = array(
- '<!--easy-->' => 'easy',
- '<!-- 1 > 0 -->' => ' 1 > 0 ',
- '<!-- --$i -->' => ' --$i ',
- '<!----$i-->' => '--$i',
- '<!-- 1 > 0 -->' => ' 1 > 0 ',
- "<!--\nHello World.\na-->" => "\nHello World.\na",
- '<!-- <!-- -->' => ' <!-- ',
- );
- foreach ($good as $test => $expected) {
- $events = $this->parse($test);
- $this->assertEventEquals('comment', $expected, $events->get(0));
+
+ public function testCDATASection()
+ {
+ $good = array(
+ '<![CDATA[ This is a test. ]]>' => ' This is a test. ',
+ '<![CDATA[CDATA]]>' => 'CDATA',
+ '<![CDATA[ ]] > ]]>' => ' ]] > ',
+ '<![CDATA[ ]]>' => ' '
+ );
+ $this->isAllGood('cdata', 2, $good);
}
- $fail = array(
- '<!-->' => '',
- '<!--Hello' => 'Hello',
- "<!--\0Hello" => UTF8Utils::FFFD . 'Hello',
- '<!--' => '',
- );
- foreach ($fail as $test => $expected) {
- $events = $this->parse($test);
- $this->assertEquals(3, $events->depth());
- $this->assertEventError($events->get(0));
- $this->assertEventEquals('comment', $expected, $events->get(1));
+ public function testDoctype()
+ {
+ $good = array(
+ '<!DOCTYPE html>' => array(
+ 'html',
+ 0,
+ NULL,
+ FALSE
+ ),
+ '<!doctype html>' => array(
+ 'html',
+ 0,
+ NULL,
+ FALSE
+ ),
+ '<!DocType html>' => array(
+ 'html',
+ 0,
+ NULL,
+ FALSE
+ ),
+ "<!DOCTYPE\nhtml>" => array(
+ 'html',
+ 0,
+ NULL,
+ FALSE
+ ),
+ "<!DOCTYPE\fhtml>" => array(
+ 'html',
+ 0,
+ NULL,
+ FALSE
+ ),
+ '<!DOCTYPE html PUBLIC "foo bar">' => array(
+ 'html',
+ EventStack::DOCTYPE_PUBLIC,
+ 'foo bar',
+ FALSE
+ ),
+ "<!DOCTYPE html PUBLIC 'foo bar'>" => array(
+ 'html',
+ EventStack::DOCTYPE_PUBLIC,
+ 'foo bar',
+ FALSE
+ ),
+ '<!DOCTYPE html PUBLIC "foo bar" >' => array(
+ 'html',
+ EventStack::DOCTYPE_PUBLIC,
+ 'foo bar',
+ FALSE
+ ),
+ "<!DOCTYPE html \nPUBLIC\n'foo bar'>" => array(
+ 'html',
+ EventStack::DOCTYPE_PUBLIC,
+ 'foo bar',
+ FALSE
+ ),
+ '<!DOCTYPE html SYSTEM "foo bar">' => array(
+ 'html',
+ EventStack::DOCTYPE_SYSTEM,
+ 'foo bar',
+ FALSE
+ ),
+ "<!DOCTYPE html SYSTEM 'foo bar'>" => array(
+ 'html',
+ EventStack::DOCTYPE_SYSTEM,
+ 'foo bar',
+ FALSE
+ ),
+ '<!DOCTYPE html SYSTEM "foo/bar" >' => array(
+ 'html',
+ EventStack::DOCTYPE_SYSTEM,
+ 'foo/bar',
+ FALSE
+ ),
+ "<!DOCTYPE html \nSYSTEM\n'foo bar'>" => array(
+ 'html',
+ EventStack::DOCTYPE_SYSTEM,
+ 'foo bar',
+ FALSE
+ )
+ );
+ $this->isAllGood('doctype', 2, $good);
+
+ $bad = array(
+ '<!DOCTYPE>' => array(
+ NULL,
+ EventStack::DOCTYPE_NONE,
+ NULL,
+ TRUE
+ ),
+ '<!DOCTYPE >' => array(
+ NULL,
+ EventStack::DOCTYPE_NONE,
+ NULL,
+ TRUE
+ ),
+ '<!DOCTYPE foo' => array(
+ 'foo',
+ EventStack::DOCTYPE_NONE,
+ NULL,
+ TRUE
+ ),
+ '<!DOCTYPE foo PUB' => array(
+ 'foo',
+ EventStack::DOCTYPE_NONE,
+ NULL,
+ TRUE
+ ),
+ '<!DOCTYPE foo PUB>' => array(
+ 'foo',
+ EventStack::DOCTYPE_NONE,
+ NULL,
+ TRUE
+ ),
+ '<!DOCTYPE foo PUB "Looks good">' => array(
+ 'foo',
+ EventStack::DOCTYPE_NONE,
+ NULL,
+ TRUE
+ ),
+ '<!DOCTYPE foo SYSTME "Looks good"' => array(
+ 'foo',
+ EventStack::DOCTYPE_NONE,
+ NULL,
+ TRUE
+ ),
+
+ // Can't tell whether these are ids or ID types, since the context is chopped.
+ '<!DOCTYPE foo PUBLIC' => array(
+ 'foo',
+ EventStack::DOCTYPE_NONE,
+ NULL,
+ TRUE
+ ),
+ '<!DOCTYPE foo PUBLIC>' => array(
+ 'foo',
+ EventStack::DOCTYPE_NONE,
+ NULL,
+ TRUE
+ ),
+ '<!DOCTYPE foo SYSTEM' => array(
+ 'foo',
+ EventStack::DOCTYPE_NONE,
+ NULL,
+ TRUE
+ ),
+ '<!DOCTYPE foo SYSTEM>' => array(
+ 'foo',
+ EventStack::DOCTYPE_NONE,
+ NULL,
+ TRUE
+ ),
+
+ '<!DOCTYPE html SYSTEM "foo bar"' => array(
+ 'html',
+ EventStack::DOCTYPE_SYSTEM,
+ 'foo bar',
+ TRUE
+ ),
+ '<!DOCTYPE html SYSTEM "foo bar" more stuff>' => array(
+ 'html',
+ EventStack::DOCTYPE_SYSTEM,
+ 'foo bar',
+ TRUE
+ )
+ );
+ foreach ($bad as $test => $expects) {
+ $events = $this->parse($test);
+ // fprintf(STDOUT, $test . PHP_EOL);
+ $this->assertEquals(3, $events->depth(), "Counting events for '$test': " . print_r($events, TRUE));
+ $this->assertEventError($events->get(0));
+ $this->assertEventEquals('doctype', $expects, $events->get(1));
+ }
}
- }
-
- public function testCDATASection() {
- $good = array(
- '<![CDATA[ This is a test. ]]>' => ' This is a test. ',
- '<![CDATA[CDATA]]>' => 'CDATA',
- '<![CDATA[ ]] > ]]>' => ' ]] > ',
- '<![CDATA[ ]]>' => ' ',
- );
- $this->isAllGood('cdata', 2, $good);
- }
-
- public function testDoctype() {
- $good = array(
- '<!DOCTYPE html>' => array('html', 0, NULL, FALSE),
- '<!doctype html>' => array('html', 0, NULL, FALSE),
- '<!DocType html>' => array('html', 0, NULL, FALSE),
- "<!DOCTYPE\nhtml>" => array('html', 0, NULL, FALSE),
- "<!DOCTYPE\fhtml>" => array('html', 0, NULL, FALSE),
- '<!DOCTYPE html PUBLIC "foo bar">' => array('html', EventStack::DOCTYPE_PUBLIC, 'foo bar', FALSE),
- "<!DOCTYPE html PUBLIC 'foo bar'>" => array('html', EventStack::DOCTYPE_PUBLIC, 'foo bar', FALSE),
- '<!DOCTYPE html PUBLIC "foo bar" >' => array('html', EventStack::DOCTYPE_PUBLIC, 'foo bar', FALSE),
- "<!DOCTYPE html \nPUBLIC\n'foo bar'>" => array('html', EventStack::DOCTYPE_PUBLIC, 'foo bar', FALSE),
- '<!DOCTYPE html SYSTEM "foo bar">' => array('html', EventStack::DOCTYPE_SYSTEM, 'foo bar', FALSE),
- "<!DOCTYPE html SYSTEM 'foo bar'>" => array('html', EventStack::DOCTYPE_SYSTEM, 'foo bar', FALSE),
- '<!DOCTYPE html SYSTEM "foo/bar" >' => array('html', EventStack::DOCTYPE_SYSTEM, 'foo/bar', FALSE),
- "<!DOCTYPE html \nSYSTEM\n'foo bar'>" => array('html', EventStack::DOCTYPE_SYSTEM, 'foo bar', FALSE),
- );
- $this->isAllGood('doctype', 2, $good);
-
- $bad = array(
- '<!DOCTYPE>' => array(NULL, EventStack::DOCTYPE_NONE, NULL, TRUE),
- '<!DOCTYPE >' => array(NULL, EventStack::DOCTYPE_NONE, NULL, TRUE),
- '<!DOCTYPE foo' => array('foo', EventStack::DOCTYPE_NONE, NULL, TRUE),
- '<!DOCTYPE foo PUB' => array('foo', EventStack::DOCTYPE_NONE, NULL, TRUE),
- '<!DOCTYPE foo PUB>' => array('foo', EventStack::DOCTYPE_NONE, NULL, TRUE),
- '<!DOCTYPE foo PUB "Looks good">' => array('foo', EventStack::DOCTYPE_NONE, NULL, TRUE),
- '<!DOCTYPE foo SYSTME "Looks good"' => array('foo', EventStack::DOCTYPE_NONE, NULL, TRUE),
-
- // Can't tell whether these are ids or ID types, since the context is chopped.
- '<!DOCTYPE foo PUBLIC' => array('foo', EventStack::DOCTYPE_NONE, NULL, TRUE),
- '<!DOCTYPE foo PUBLIC>' => array('foo', EventStack::DOCTYPE_NONE, NULL, TRUE),
- '<!DOCTYPE foo SYSTEM' => array('foo', EventStack::DOCTYPE_NONE, NULL, TRUE),
- '<!DOCTYPE foo SYSTEM>' => array('foo', EventStack::DOCTYPE_NONE, NULL, TRUE),
-
- '<!DOCTYPE html SYSTEM "foo bar"' => array('html', EventStack::DOCTYPE_SYSTEM, 'foo bar', TRUE),
- '<!DOCTYPE html SYSTEM "foo bar" more stuff>' => array('html', EventStack::DOCTYPE_SYSTEM, 'foo bar', TRUE),
- );
- foreach ($bad as $test => $expects) {
- $events = $this->parse($test);
- //fprintf(STDOUT, $test . PHP_EOL);
- $this->assertEquals(3, $events->depth(), "Counting events for '$test': " . print_r($events, TRUE));
- $this->assertEventError($events->get(0));
- $this->assertEventEquals('doctype', $expects, $events->get(1));
+ public function testProcessorInstruction()
+ {
+ $good = array(
+ '<?hph ?>' => 'hph',
+ '<?hph echo "Hello World"; ?>' => array(
+ 'hph',
+ 'echo "Hello World"; '
+ ),
+ "<?hph \necho 'Hello World';\n?>" => array(
+ 'hph',
+ "echo 'Hello World';\n"
+ )
+ );
+ $this->isAllGood('pi', 2, $good);
}
- }
-
- public function testProcessorInstruction() {
- $good = array(
- '<?hph ?>' => 'hph',
- '<?hph echo "Hello World"; ?>' => array('hph', 'echo "Hello World"; '),
- "<?hph \necho 'Hello World';\n?>" => array('hph', "echo 'Hello World';\n"),
- );
- $this->isAllGood('pi', 2, $good);
- }
-
- /**
- * This tests just simple tags.
- */
- public function testSimpleTags() {
- $open = array(
- '<foo>' => 'foo',
- '<FOO>' => 'foo',
- '<fOO>' => 'foo',
- '<foo >' => 'foo',
- "<foo\n\n\n\n>" => 'foo',
- '<foo:bar>' => 'foo:bar',
- );
- $this->isAllGood('startTag', 2, $open);
-
- $selfClose= array(
- '<foo/>' => 'foo',
- '<FOO/>' => 'foo',
- '<foo />' => 'foo',
- "<foo\n\n\n\n/>" => 'foo',
- '<foo:bar/>' => 'foo:bar',
- );
- foreach ($selfClose as $test => $expects) {
- $events = $this->parse($test);
- $this->assertEquals(3, $events->depth(), "Counting events for '$test'" . print_r($events, TRUE));
- $this->assertEventEquals('startTag', $expects, $events->get(0));
- $this->assertEventEquals('endTag', $expects, $events->get(1));
+
+ /**
+ * This tests just simple tags.
+ */
+ public function testSimpleTags()
+ {
+ $open = array(
+ '<foo>' => 'foo',
+ '<FOO>' => 'foo',
+ '<fOO>' => 'foo',
+ '<foo >' => 'foo',
+ "<foo\n\n\n\n>" => 'foo',
+ '<foo:bar>' => 'foo:bar'
+ );
+ $this->isAllGood('startTag', 2, $open);
+
+ $selfClose = array(
+ '<foo/>' => 'foo',
+ '<FOO/>' => 'foo',
+ '<foo />' => 'foo',
+ "<foo\n\n\n\n/>" => 'foo',
+ '<foo:bar/>' => 'foo:bar'
+ );
+ foreach ($selfClose as $test => $expects) {
+ $events = $this->parse($test);
+ $this->assertEquals(3, $events->depth(), "Counting events for '$test'" . print_r($events, TRUE));
+ $this->assertEventEquals('startTag', $expects, $events->get(0));
+ $this->assertEventEquals('endTag', $expects, $events->get(1));
+ }
+
+ $bad = array(
+ '<foo' => 'foo',
+ '<foo ' => 'foo',
+ '<foo/' => 'foo',
+ '<foo /' => 'foo'
+ );
+
+ foreach ($bad as $test => $expects) {
+ $events = $this->parse($test);
+ $this->assertEquals(3, $events->depth(), "Counting events for '$test': " . print_r($events, TRUE));
+ $this->assertEventError($events->get(0));
+ $this->assertEventEquals('startTag', $expects, $events->get(1));
+ }
}
- $bad = array(
- '<foo' => 'foo',
- '<foo ' => 'foo',
- '<foo/' => 'foo',
- '<foo /' => 'foo',
- );
-
- foreach ($bad as $test => $expects) {
- $events = $this->parse($test);
- $this->assertEquals(3, $events->depth(), "Counting events for '$test': " . print_r($events, TRUE));
- $this->assertEventError($events->get(0));
- $this->assertEventEquals('startTag', $expects, $events->get(1));
+ public function testTagsWithAttributeAndMissingName()
+ {
+ $cases = array(
+ '<id="top_featured">' => 'id',
+ '<color="white">' => 'color',
+ "<class='neaktivni_stranka'>" => 'class',
+ '<bgcolor="white">' => 'bgcolor',
+ '<class="nom">' => 'class'
+ );
+
+ foreach ($cases as $html => $expected) {
+ $events = $this->parse($html);
+ $this->assertEventError($events->get(0));
+ $this->assertEventError($events->get(1));
+ $this->assertEventError($events->get(2));
+ $this->assertEventEquals('startTag', $expected, $events->get(3));
+ $this->assertEventEquals('eof', NULL, $events->get(4));
+ }
}
- }
-
- public function testTagsWithAttributeAndMissingName() {
- $cases = array(
- '<id="top_featured">' => 'id',
- '<color="white">' => 'color',
- "<class='neaktivni_stranka'>" => 'class',
- '<bgcolor="white">' => 'bgcolor',
- '<class="nom">' => 'class',
- );
-
- foreach($cases as $html => $expected) {
- $events = $this->parse($html);
- $this->assertEventError($events->get(0));
- $this->assertEventError($events->get(1));
- $this->assertEventError($events->get(2));
- $this->assertEventEquals('startTag', $expected, $events->get(3));
- $this->assertEventEquals('eof', NULL, $events->get(4));
+
+ public function testTagNotClosedAfterTagName()
+ {
+ $cases = array(
+ "<noscript<img>" => array(
+ 'noscript',
+ 'img'
+ ),
+ '<center<a>' => array(
+ 'center',
+ 'a'
+ ),
+ '<br<br>' => array(
+ 'br',
+ 'br'
+ )
+ );
+
+ foreach ($cases as $html => $expected) {
+ $events = $this->parse($html);
+ $this->assertEventError($events->get(0));
+ $this->assertEventEquals('startTag', $expected[0], $events->get(1));
+ $this->assertEventEquals('startTag', $expected[1], $events->get(2));
+ $this->assertEventEquals('eof', NULL, $events->get(3));
+ }
+
+ $events = $this->parse('<span<>02</span>');
+ $this->assertEventError($events->get(0));
+ $this->assertEventEquals('startTag', 'span', $events->get(1));
+ $this->assertEventError($events->get(2));
+ $this->assertEventEquals('text', '>02', $events->get(3));
+ $this->assertEventEquals('endTag', 'span', $events->get(4));
+ $this->assertEventEquals('eof', NULL, $events->get(5));
+
+ $events = $this->parse('<p</p>');
+ $this->assertEventError($events->get(0));
+ $this->assertEventEquals('startTag', 'p', $events->get(1));
+ $this->assertEventEquals('endTag', 'p', $events->get(2));
+ $this->assertEventEquals('eof', NULL, $events->get(3));
+
+ $events = $this->parse('<strong><WordPress</strong>');
+ $this->assertEventEquals('startTag', 'strong', $events->get(0));
+ $this->assertEventError($events->get(1));
+ $this->assertEventEquals('startTag', 'wordpress', $events->get(2));
+ $this->assertEventEquals('endTag', 'strong', $events->get(3));
+ $this->assertEventEquals('eof', NULL, $events->get(4));
+
+ $events = $this->parse('<src=<a>');
+ $this->assertEventError($events->get(0));
+ $this->assertEventError($events->get(1));
+ $this->assertEventError($events->get(2));
+ $this->assertEventEquals('startTag', 'src', $events->get(3));
+ $this->assertEventEquals('startTag', 'a', $events->get(4));
+ $this->assertEventEquals('eof', NULL, $events->get(5));
+
+ $events = $this->parse('<br...<a>');
+ $this->assertEventError($events->get(0));
+ $this->assertEventEquals('startTag', 'br', $events->get(1));
+ $this->assertEventEquals('eof', NULL, $events->get(2));
}
- }
-
- public function testTagNotClosedAfterTagName() {
- $cases = array(
- "<noscript<img>" => array('noscript', 'img'),
- '<center<a>' => array('center', 'a'),
- '<br<br>' => array('br', 'br'),
- );
-
- foreach($cases as $html => $expected) {
- $events = $this->parse($html);
- $this->assertEventError($events->get(0));
- $this->assertEventEquals('startTag', $expected[0], $events->get(1));
- $this->assertEventEquals('startTag', $expected[1], $events->get(2));
- $this->assertEventEquals('eof', NULL, $events->get(3));
+
+ public function testIllegalTagNames()
+ {
+ $cases = array(
+ '<li">' => 'li',
+ '<p">' => 'p',
+ '<b&nbsp; >' => 'b',
+ '<static*all>' => 'static',
+ '<h*0720/>' => 'h',
+ '<st*ATTRIBUTE />' => 'st',
+ '<a-href="http://url.com/">' => 'a'
+ );
+
+ foreach ($cases as $html => $expected) {
+ $events = $this->parse($html);
+ $this->assertEventError($events->get(0));
+ $this->assertEventEquals('startTag', $expected, $events->get(1));
+ }
}
- $events = $this->parse('<span<>02</span>');
- $this->assertEventError($events->get(0));
- $this->assertEventEquals('startTag', 'span', $events->get(1));
- $this->assertEventError($events->get(2));
- $this->assertEventEquals('text', '>02', $events->get(3));
- $this->assertEventEquals('endTag', 'span', $events->get(4));
- $this->assertEventEquals('eof', NULL, $events->get(5));
-
- $events = $this->parse('<p</p>');
- $this->assertEventError($events->get(0));
- $this->assertEventEquals('startTag', 'p', $events->get(1));
- $this->assertEventEquals('endTag', 'p', $events->get(2));
- $this->assertEventEquals('eof', NULL, $events->get(3));
-
- $events = $this->parse('<strong><WordPress</strong>');
- $this->assertEventEquals('startTag', 'strong', $events->get(0));
- $this->assertEventError($events->get(1));
- $this->assertEventEquals('startTag', 'wordpress', $events->get(2));
- $this->assertEventEquals('endTag', 'strong', $events->get(3));
- $this->assertEventEquals('eof', NULL, $events->get(4));
-
- $events = $this->parse('<src=<a>');
- $this->assertEventError($events->get(0));
- $this->assertEventError($events->get(1));
- $this->assertEventError($events->get(2));
- $this->assertEventEquals('startTag', 'src', $events->get(3));
- $this->assertEventEquals('startTag', 'a', $events->get(4));
- $this->assertEventEquals('eof', NULL, $events->get(5));
-
- $events = $this->parse('<br...<a>');
- $this->assertEventError($events->get(0));
- $this->assertEventEquals('startTag', 'br', $events->get(1));
- $this->assertEventEquals('eof', NULL, $events->get(2));
- }
-
- public function testIllegalTagNames() {
- $cases = array(
- '<li">' => 'li',
- '<p">' => 'p',
- '<b&nbsp; >' => 'b',
- '<static*all>' => 'static',
- '<h*0720/>' => 'h',
- '<st*ATTRIBUTE />' => 'st',
- '<a-href="http://url.com/">' => 'a',
- );
-
- foreach($cases as $html => $expected) {
- $events = $this->parse($html);
- $this->assertEventError($events->get(0));
- $this->assertEventEquals('startTag', $expected, $events->get(1));
+ /**
+ * @depends testCharacterReference
+ */
+ public function testTagAttributes()
+ {
+ // Opening tags.
+ $good = array(
+ '<foo bar="baz">' => array(
+ 'foo',
+ array(
+ 'bar' => 'baz'
+ ),
+ FALSE
+ ),
+ '<foo bar=" baz ">' => array(
+ 'foo',
+ array(
+ 'bar' => ' baz '
+ ),
+ FALSE
+ ),
+ "<foo bar=\"\nbaz\n\">" => array(
+ 'foo',
+ array(
+ 'bar' => "\nbaz\n"
+ ),
+ FALSE
+ ),
+ "<foo bar='baz'>" => array(
+ 'foo',
+ array(
+ 'bar' => 'baz'
+ ),
+ FALSE
+ ),
+ '<foo bar="A full sentence.">' => array(
+ 'foo',
+ array(
+ 'bar' => 'A full sentence.'
+ ),
+ FALSE
+ ),
+ "<foo a='1' b=\"2\">" => array(
+ 'foo',
+ array(
+ 'a' => '1',
+ 'b' => '2'
+ ),
+ FALSE
+ ),
+ "<foo ns:bar='baz'>" => array(
+ 'foo',
+ array(
+ 'ns:bar' => 'baz'
+ ),
+ FALSE
+ ),
+ "<foo a='blue&amp;red'>" => array(
+ 'foo',
+ array(
+ 'a' => 'blue&red'
+ ),
+ FALSE
+ ),
+ "<foo a='blue&&amp;red'>" => array(
+ 'foo',
+ array(
+ 'a' => 'blue&&red'
+ ),
+ FALSE
+ ),
+ "<foo\nbar='baz'\n>" => array(
+ 'foo',
+ array(
+ 'bar' => 'baz'
+ ),
+ FALSE
+ ),
+ '<doe a deer>' => array(
+ 'doe',
+ array(
+ 'a' => NULL,
+ 'deer' => NULL
+ ),
+ FALSE
+ ),
+ '<foo bar=baz>' => array(
+ 'foo',
+ array(
+ 'bar' => 'baz'
+ ),
+ FALSE
+ ),
+
+ // Updated for 8.1.2.3
+ '<foo bar = "baz" >' => array(
+ 'foo',
+ array(
+ 'bar' => 'baz'
+ ),
+ FALSE
+ ),
+
+ // The spec allows an unquoted value '/'. This will not be a closing
+ // tag.
+ '<foo bar=/>' => array(
+ 'foo',
+ array(
+ 'bar' => '/'
+ ),
+ FALSE
+ ),
+ '<foo bar=baz/>' => array(
+ 'foo',
+ array(
+ 'bar' => 'baz/'
+ ),
+ FALSE
+ )
+ );
+ $this->isAllGood('startTag', 2, $good);
+
+ // Self-closing tags.
+ $withEnd = array(
+ '<foo bar="baz"/>' => array(
+ 'foo',
+ array(
+ 'bar' => 'baz'
+ ),
+ TRUE
+ ),
+ '<foo BAR="baz"/>' => array(
+ 'foo',
+ array(
+ 'bar' => 'baz'
+ ),
+ TRUE
+ ),
+ '<foo BAR="BAZ"/>' => array(
+ 'foo',
+ array(
+ 'bar' => 'BAZ'
+ ),
+ TRUE
+ ),
+ "<foo a='1' b=\"2\" c=3 d/>" => array(
+ 'foo',
+ array(
+ 'a' => '1',
+ 'b' => '2',
+ 'c' => '3',
+ 'd' => NULL
+ ),
+ TRUE
+ )
+ );
+ $this->isAllGood('startTag', 3, $withEnd);
+
+ // Cause a parse error.
+ $bad = array(
+ // This will emit an entity lookup failure for &red.
+ "<foo a='blue&red'>" => array(
+ 'foo',
+ array(
+ 'a' => 'blue&red'
+ ),
+ FALSE
+ ),
+ "<foo a='blue&&amp;&red'>" => array(
+ 'foo',
+ array(
+ 'a' => 'blue&&&red'
+ ),
+ FALSE
+ ),
+ '<foo bar=>' => array(
+ 'foo',
+ array(
+ 'bar' => NULL
+ ),
+ FALSE
+ ),
+ '<foo bar="oh' => array(
+ 'foo',
+ array(
+ 'bar' => 'oh'
+ ),
+ FALSE
+ ),
+ '<foo bar=oh">' => array(
+ 'foo',
+ array(
+ 'bar' => 'oh"'
+ ),
+ FALSE
+ ),
+
+ // these attributes are ignored because of current implementation
+ // of method "DOMElement::setAttribute"
+ // see issue #23: https://github.com/Masterminds/html5-php/issues/23
+ '<foo b"="baz">' => array(
+ 'foo',
+ array(),
+ FALSE
+ ),
+ '<foo 2abc="baz">' => array(
+ 'foo',
+ array(),
+ FALSE
+ ),
+ '<foo ?="baz">' => array(
+ 'foo',
+ array(),
+ FALSE
+ ),
+ '<foo foo?bar="baz">' => array(
+ 'foo',
+ array(),
+ FALSE
+ )
+ )
+ ;
+ foreach ($bad as $test => $expects) {
+ $events = $this->parse($test);
+ $this->assertEquals(3, $events->depth(), "Counting events for '$test': " . print_r($events, TRUE));
+ $this->assertEventError($events->get(0));
+ $this->assertEventEquals('startTag', $expects, $events->get(1));
+ }
+
+ // Cause multiple parse errors.
+ $reallyBad = array(
+ '<foo ="bar">' => array(
+ 'foo',
+ array(
+ '=' => NULL,
+ '"bar"' => NULL
+ ),
+ FALSE
+ ),
+ '<foo////>' => array(
+ 'foo',
+ array(),
+ TRUE
+ ),
+ // character "&" in unquoted attribute shouldn't cause an infinite loop
+ '<foo bar=index.php?str=1&amp;id=29>' => array(
+ 'foo',
+ array(
+ 'bar' => 'index.php?str=1&id=29'
+ ),
+ FALSE
+ )
+ );
+ foreach ($reallyBad as $test => $expects) {
+ $events = $this->parse($test);
+ // fprintf(STDOUT, $test . print_r($events, TRUE));
+ $this->assertEventError($events->get(0));
+ $this->assertEventError($events->get(1));
+ // $this->assertEventEquals('startTag', $expects, $events->get(1));
+ }
+
+ // Regression: Malformed elements should be detected.
+ // '<foo baz="1" <bar></foo>' => array('foo', array('baz' => '1'), FALSE),
+ $events = $this->parse('<foo baz="1" <bar></foo>');
+ $this->assertEventError($events->get(0));
+ $this->assertEventEquals('startTag', array(
+ 'foo',
+ array(
+ 'baz' => '1'
+ ),
+ FALSE
+ ), $events->get(1));
+ $this->assertEventEquals('startTag', array(
+ 'bar',
+ array(),
+ FALSE
+ ), $events->get(2));
+ $this->assertEventEquals('endTag', array(
+ 'foo'
+ ), $events->get(3));
}
- }
-
- /**
- * @depends testCharacterReference
- */
- public function testTagAttributes() {
- // Opening tags.
- $good = array(
- '<foo bar="baz">' => array('foo', array('bar' => 'baz'), FALSE),
- '<foo bar=" baz ">' => array('foo', array('bar' => ' baz '), FALSE),
- "<foo bar=\"\nbaz\n\">" => array('foo', array('bar' => "\nbaz\n"), FALSE),
- "<foo bar='baz'>" => array('foo', array('bar' => 'baz'), FALSE),
- '<foo bar="A full sentence.">' => array('foo', array('bar' => 'A full sentence.'), FALSE),
- "<foo a='1' b=\"2\">" => array('foo', array('a' => '1', 'b' => '2'), FALSE),
- "<foo ns:bar='baz'>" => array('foo', array('ns:bar' => 'baz'), FALSE),
- "<foo a='blue&amp;red'>" => array('foo', array('a' => 'blue&red'), FALSE),
- "<foo a='blue&&amp;red'>" => array('foo', array('a' => 'blue&&red'), FALSE),
- "<foo\nbar='baz'\n>" => array('foo', array('bar' => 'baz'), FALSE),
- '<doe a deer>' => array('doe', array('a' => NULL, 'deer' => NULL), FALSE),
- '<foo bar=baz>' => array('foo', array('bar' => 'baz'), FALSE),
-
- // Updated for 8.1.2.3
- '<foo bar = "baz" >' => array('foo', array('bar' => 'baz'), FALSE),
-
- // The spec allows an unquoted value '/'. This will not be a closing
- // tag.
- '<foo bar=/>' => array('foo', array('bar' => '/'), FALSE),
- '<foo bar=baz/>' => array('foo', array('bar' => 'baz/'), FALSE),
- );
- $this->isAllGood('startTag', 2, $good);
-
- // Self-closing tags.
- $withEnd = array(
- '<foo bar="baz"/>' => array('foo', array('bar' => 'baz'), TRUE),
- '<foo BAR="baz"/>' => array('foo', array('bar' => 'baz'), TRUE),
- '<foo BAR="BAZ"/>' => array('foo', array('bar' => 'BAZ'), TRUE),
- "<foo a='1' b=\"2\" c=3 d/>" => array('foo', array('a' => '1', 'b' => '2', 'c' => '3', 'd' => NULL), TRUE),
- );
- $this->isAllGood('startTag', 3, $withEnd);
-
- // Cause a parse error.
- $bad = array(
- // This will emit an entity lookup failure for &red.
- "<foo a='blue&red'>" => array('foo', array('a' => 'blue&red'), FALSE),
- "<foo a='blue&&amp;&red'>" => array('foo', array('a' => 'blue&&&red'), FALSE),
- '<foo bar=>' => array('foo', array('bar' => NULL), FALSE),
- '<foo bar="oh' => array('foo', array('bar' => 'oh'), FALSE),
- '<foo bar=oh">' => array('foo', array('bar' => 'oh"'), FALSE),
-
- // these attributes are ignored because of current implementation
- // of method "DOMElement::setAttribute"
- // see issue #23: https://github.com/Masterminds/html5-php/issues/23
- '<foo b"="baz">' => array('foo', array(), FALSE),
- '<foo 2abc="baz">' => array('foo', array(), FALSE),
- '<foo ?="baz">' => array('foo', array(), FALSE),
- '<foo foo?bar="baz">' => array('foo', array(), FALSE),
-
- );
- foreach ($bad as $test => $expects) {
- $events = $this->parse($test);
- $this->assertEquals(3, $events->depth(), "Counting events for '$test': " . print_r($events, TRUE));
- $this->assertEventError($events->get(0));
- $this->assertEventEquals('startTag', $expects, $events->get(1));
+
+ public function testRawText()
+ {
+ $good = array(
+ '<script>abcd efg hijk lmnop</script> ' => 'abcd efg hijk lmnop',
+ '<script><not/><the/><tag></script>' => '<not/><the/><tag>',
+ '<script><<<<<<<<</script>' => '<<<<<<<<',
+ '<script>hello</script</script>' => 'hello</script',
+ "<script>\nhello</script\n</script>" => "\nhello</script\n",
+ '<script>&amp;</script>' => '&amp;',
+ '<script><!--not a comment--></script>' => '<!--not a comment-->',
+ '<script><![CDATA[not a comment]]></script>' => '<![CDATA[not a comment]]>'
+ );
+ foreach ($good as $test => $expects) {
+ $events = $this->parse($test);
+ $this->assertEventEquals('startTag', 'script', $events->get(0));
+ $this->assertEventEquals('text', $expects, $events->get(1));
+ $this->assertEventEquals('endTag', 'script', $events->get(2));
+ }
+
+ $bad = array(
+ '<script>&amp;</script' => '&amp;</script',
+ '<script>Hello world' => 'Hello world'
+ );
+ foreach ($bad as $test => $expects) {
+ $events = $this->parse($test);
+ $this->assertEquals(4, $events->depth(), "Counting events for '$test': " . print_r($events, TRUE));
+ $this->assertEventEquals('startTag', 'script', $events->get(0));
+ $this->assertEventError($events->get(1));
+ $this->assertEventEquals('text', $expects, $events->get(2));
+ }
+
+ // Testing case sensitivity
+ $events = $this->parse('<TITLE>a test</TITLE>');
+ $this->assertEventEquals('startTag', 'title', $events->get(0));
+ $this->assertEventEquals('text', 'a test', $events->get(1));
+ $this->assertEventEquals('endTag', 'title', $events->get(2));
}
- // Cause multiple parse errors.
- $reallyBad = array(
- '<foo ="bar">' => array('foo', array('=' => NULL, '"bar"' => NULL), FALSE),
- '<foo////>' => array('foo', array(), TRUE),
- // character "&" in unquoted attribute shouldn't cause an infinite loop
- '<foo bar=index.php?str=1&amp;id=29>' => array('foo', array('bar' => 'index.php?str=1&id=29'), FALSE),
- );
- foreach ($reallyBad as $test => $expects) {
- $events = $this->parse($test);
- //fprintf(STDOUT, $test . print_r($events, TRUE));
- $this->assertEventError($events->get(0));
- $this->assertEventError($events->get(1));
- //$this->assertEventEquals('startTag', $expects, $events->get(1));
+ public function testRcdata()
+ {
+ list ($tok, $events) = $this->createTokenizer('<title>&#x27;<!-- not a comment --></TITLE>');
+ $tok->setTextMode(\Masterminds\HTML5\Elements::TEXT_RCDATA, 'title');
+ $tok->parse();
+ $this->assertEventEquals('text', "'<!-- not a comment -->", $events->get(1));
}
- // Regression: Malformed elements should be detected.
- // '<foo baz="1" <bar></foo>' => array('foo', array('baz' => '1'), FALSE),
- $events = $this->parse('<foo baz="1" <bar></foo>');
- $this->assertEventError($events->get(0));
- $this->assertEventEquals('startTag', array('foo', array('baz' => '1'), FALSE), $events->get(1));
- $this->assertEventEquals('startTag', array('bar', array(), FALSE), $events->get(2));
- $this->assertEventEquals('endTag', array('foo'), $events->get(3));
- }
-
- public function testRawText() {
- $good = array(
- '<script>abcd efg hijk lmnop</script> ' => 'abcd efg hijk lmnop',
- '<script><not/><the/><tag></script>' => '<not/><the/><tag>',
- '<script><<<<<<<<</script>' => '<<<<<<<<',
- '<script>hello</script</script>' => 'hello</script',
- "<script>\nhello</script\n</script>" => "\nhello</script\n",
- '<script>&amp;</script>' => '&amp;',
- '<script><!--not a comment--></script>' => '<!--not a comment-->',
- '<script><![CDATA[not a comment]]></script>' => '<![CDATA[not a comment]]>',
- );
- foreach ($good as $test => $expects) {
- $events = $this->parse($test);
- $this->assertEventEquals('startTag', 'script', $events->get(0));
- $this->assertEventEquals('text', $expects, $events->get(1));
- $this->assertEventEquals('endTag', 'script', $events->get(2));
+ public function testText()
+ {
+ $events = $this->parse('a<br>b');
+ $this->assertEquals(4, $events->depth(), "Events: " . print_r($events, TRUE));
+ $this->assertEventEquals('text', 'a', $events->get(0));
+ $this->assertEventEquals('startTag', 'br', $events->get(1));
+ $this->assertEventEquals('text', 'b', $events->get(2));
+
+ $events = $this->parse('<a>Test</a>');
+ $this->assertEquals(4, $events->depth(), "Events: " . print_r($events, TRUE));
+ $this->assertEventEquals('startTag', 'a', $events->get(0));
+ $this->assertEventEquals('text', 'Test', $events->get(1));
+ $this->assertEventEquals('endTag', 'a', $events->get(2));
+
+ $events = $this->parse('a<![CDATA[test]]>b');
+ $this->assertEquals(4, $events->depth(), "Events: " . print_r($events, TRUE));
+ $this->assertEventEquals('text', 'a', $events->get(0));
+ $this->assertEventEquals('cdata', 'test', $events->get(1));
+ $this->assertEventEquals('text', 'b', $events->get(2));
+
+ $events = $this->parse('a<!--test-->b');
+ $this->assertEquals(4, $events->depth(), "Events: " . print_r($events, TRUE));
+ $this->assertEventEquals('text', 'a', $events->get(0));
+ $this->assertEventEquals('comment', 'test', $events->get(1));
+ $this->assertEventEquals('text', 'b', $events->get(2));
+
+ $events = $this->parse('a&amp;b');
+ $this->assertEquals(2, $events->depth(), "Events: " . print_r($events, TRUE));
+ $this->assertEventEquals('text', 'a&b', $events->get(0));
}
- $bad = array(
- '<script>&amp;</script' => '&amp;</script',
- '<script>Hello world' => 'Hello world',
- );
- foreach ($bad as $test => $expects) {
- $events = $this->parse($test);
- $this->assertEquals(4, $events->depth(), "Counting events for '$test': " . print_r($events, TRUE));
- $this->assertEventEquals('startTag', 'script', $events->get(0));
- $this->assertEventError($events->get(1));
- $this->assertEventEquals('text', $expects, $events->get(2));
+ // ================================================================
+ // Utility functions.
+ // ================================================================
+ protected function createTokenizer($string, $debug = FALSE)
+ {
+ $eventHandler = new EventStack();
+ $stream = new StringInputStream($string);
+ $scanner = new Scanner($stream);
+
+ $scanner->debug = $debug;
+
+ return array(
+ new Tokenizer($scanner, $eventHandler),
+ $eventHandler
+ );
}
- // Testing case sensitivity
- $events = $this->parse('<TITLE>a test</TITLE>');
- $this->assertEventEquals('startTag', 'title', $events->get(0));
- $this->assertEventEquals('text', 'a test', $events->get(1));
- $this->assertEventEquals('endTag', 'title', $events->get(2));
-
- }
-
- public function testRcdata() {
- list($tok, $events) = $this->createTokenizer('<title>&#x27;<!-- not a comment --></TITLE>');
- $tok->setTextMode(\Masterminds\HTML5\Elements::TEXT_RCDATA, 'title');
- $tok->parse();
- $this->assertEventEquals('text', "'<!-- not a comment -->", $events->get(1));
- }
-
- public function testText() {
-
- $events = $this->parse('a<br>b');
- $this->assertEquals(4, $events->depth(), "Events: " . print_r($events, TRUE));
- $this->assertEventEquals('text', 'a', $events->get(0));
- $this->assertEventEquals('startTag', 'br', $events->get(1));
- $this->assertEventEquals('text', 'b', $events->get(2));
-
- $events = $this->parse('<a>Test</a>');
- $this->assertEquals(4, $events->depth(), "Events: " . print_r($events, TRUE));
- $this->assertEventEquals('startTag', 'a', $events->get(0));
- $this->assertEventEquals('text', 'Test', $events->get(1));
- $this->assertEventEquals('endTag', 'a', $events->get(2));
-
- $events = $this->parse('a<![CDATA[test]]>b');
- $this->assertEquals(4, $events->depth(), "Events: " . print_r($events, TRUE));
- $this->assertEventEquals('text', 'a', $events->get(0));
- $this->assertEventEquals('cdata', 'test', $events->get(1));
- $this->assertEventEquals('text', 'b', $events->get(2));
-
- $events = $this->parse('a<!--test-->b');
- $this->assertEquals(4, $events->depth(), "Events: " . print_r($events, TRUE));
- $this->assertEventEquals('text', 'a', $events->get(0));
- $this->assertEventEquals('comment', 'test', $events->get(1));
- $this->assertEventEquals('text', 'b', $events->get(2));
-
- $events = $this->parse('a&amp;b');
- $this->assertEquals(2, $events->depth(), "Events: " . print_r($events, TRUE));
- $this->assertEventEquals('text', 'a&b', $events->get(0));
- }
-
- // ================================================================
- // Utility functions.
- // ================================================================
- protected function createTokenizer($string, $debug = FALSE) {
- $eventHandler = new EventStack();
- $stream = new StringInputStream($string);
- $scanner = new Scanner($stream);
-
- $scanner->debug = $debug;
-
- return array(
- new Tokenizer($scanner, $eventHandler),
- $eventHandler,
- );
- }
-
- public function parse($string, $debug = FALSE) {
- list($tok, $events) = $this->createTokenizer($string, $debug);
- $tok->parse();
-
- return $events;
- }
+ public function parse($string, $debug = FALSE)
+ {
+ list ($tok, $events) = $this->createTokenizer($string, $debug);
+ $tok->parse();
+
+ return $events;
+ }
}
diff --git a/test/HTML5/Parser/TreeBuildingRulesTest.php b/test/HTML5/Parser/TreeBuildingRulesTest.php
index ff07a0a..cb9b1e4 100644
--- a/test/HTML5/Parser/TreeBuildingRulesTest.php
+++ b/test/HTML5/Parser/TreeBuildingRulesTest.php
@@ -5,77 +5,78 @@
*/
namespace Masterminds\HTML5\Tests\Parser;
-use Masterminds\HTML5\Elements;
use Masterminds\HTML5\Parser\TreeBuildingRules;
use Masterminds\HTML5\Parser\Tokenizer;
use Masterminds\HTML5\Parser\Scanner;
use Masterminds\HTML5\Parser\StringInputStream;
use Masterminds\HTML5\Parser\DOMTreeBuilder;
-
-
/**
* These tests are functional, not necessarily unit tests.
*/
-class TreeBuildingRulesTest extends \Masterminds\HTML5\Tests\TestCase {
+class TreeBuildingRulesTest extends \Masterminds\HTML5\Tests\TestCase
+{
- const HTML_STUB = '<!DOCTYPE html><html><head><title>test</title></head><body>%s</body></html>';
+ const HTML_STUB = '<!DOCTYPE html><html><head><title>test</title></head><body>%s</body></html>';
- /**
- * Convenience function for parsing.
- */
- protected function parse($string) {
- $treeBuilder = new DOMTreeBuilder();
- $input = new StringInputStream($string);
- $scanner = new Scanner($input);
- $parser = new Tokenizer($scanner, $treeBuilder);
+ /**
+ * Convenience function for parsing.
+ */
+ protected function parse($string)
+ {
+ $treeBuilder = new DOMTreeBuilder();
+ $input = new StringInputStream($string);
+ $scanner = new Scanner($input);
+ $parser = new Tokenizer($scanner, $treeBuilder);
- $parser->parse();
+ $parser->parse();
- return $treeBuilder->document();
- }
+ return $treeBuilder->document();
+ }
- public function testHasRules() {
- $doc = new \DOMDocument('1.0');
- $engine = new TreeBuildingRules($doc);
+ public function testHasRules()
+ {
+ $doc = new \DOMDocument('1.0');
+ $engine = new TreeBuildingRules($doc);
- $this->assertTrue($engine->hasRules('li'));
- $this->assertFalse($engine->hasRules('imaginary'));
- }
+ $this->assertTrue($engine->hasRules('li'));
+ $this->assertFalse($engine->hasRules('imaginary'));
+ }
- public function testHandleLI() {
- $html = sprintf(self::HTML_STUB, '<ul id="a"><li>test<li>test2</ul><a></a>');
- $doc = $this->parse($html);
+ public function testHandleLI()
+ {
+ $html = sprintf(self::HTML_STUB, '<ul id="a"><li>test<li>test2</ul><a></a>');
+ $doc = $this->parse($html);
- $list = $doc->getElementById('a');
+ $list = $doc->getElementById('a');
- $this->assertEquals(2, $list->childNodes->length);
- foreach($list->childNodes as $ele) {
- $this->assertEquals('li', $ele->tagName);
+ $this->assertEquals(2, $list->childNodes->length);
+ foreach ($list->childNodes as $ele) {
+ $this->assertEquals('li', $ele->tagName);
+ }
}
- }
+ public function testHandleDT()
+ {
+ $html = sprintf(self::HTML_STUB, '<dl id="a"><dt>Hello<dd>Hi</dl><a></a>');
+ $doc = $this->parse($html);
- public function testHandleDT() {
- $html = sprintf(self::HTML_STUB, '<dl id="a"><dt>Hello<dd>Hi</dl><a></a>');
- $doc = $this->parse($html);
+ $list = $doc->getElementById('a');
- $list = $doc->getElementById('a');
-
- $this->assertEquals(2, $list->childNodes->length);
- $this->assertEquals('dt', $list->firstChild->tagName);
- $this->assertEquals('dd', $list->lastChild->tagName);
- }
-
- public function testTable() {
- $html = sprintf(self::HTML_STUB, '<table><thead id="a"><th>foo<td>bar<td>baz');
- $doc = $this->parse($html);
+ $this->assertEquals(2, $list->childNodes->length);
+ $this->assertEquals('dt', $list->firstChild->tagName);
+ $this->assertEquals('dd', $list->lastChild->tagName);
+ }
- $list = $doc->getElementById('a');
+ public function testTable()
+ {
+ $html = sprintf(self::HTML_STUB, '<table><thead id="a"><th>foo<td>bar<td>baz');
+ $doc = $this->parse($html);
- $this->assertEquals(3, $list->childNodes->length);
- $this->assertEquals('th', $list->firstChild->tagName);
- $this->assertEquals('td', $list->lastChild->tagName);
- }
+ $list = $doc->getElementById('a');
+ $this->assertEquals(3, $list->childNodes->length);
+ $this->assertEquals('th', $list->firstChild->tagName);
+ $this->assertEquals('td', $list->lastChild->tagName);
+ }
}