diff options
Diffstat (limited to 'plugins/af_readability/vendor/masterminds/html5/test/HTML5/Parser/TokenizerTest.php')
-rw-r--r-- | plugins/af_readability/vendor/masterminds/html5/test/HTML5/Parser/TokenizerTest.php | 978 |
1 files changed, 978 insertions, 0 deletions
diff --git a/plugins/af_readability/vendor/masterminds/html5/test/HTML5/Parser/TokenizerTest.php b/plugins/af_readability/vendor/masterminds/html5/test/HTML5/Parser/TokenizerTest.php new file mode 100644 index 000000000..5284d30df --- /dev/null +++ b/plugins/af_readability/vendor/masterminds/html5/test/HTML5/Parser/TokenizerTest.php @@ -0,0 +1,978 @@ +<?php + +namespace Masterminds\HTML5\Tests\Parser; + +use Masterminds\HTML5\Parser\UTF8Utils; +use Masterminds\HTML5\Parser\Scanner; +use Masterminds\HTML5\Parser\Tokenizer; + +class TokenizerTest extends \Masterminds\HTML5\Tests\TestCase +{ + // ================================================================ + // Additional assertions. + // ================================================================ + + /** + * Tests that an event matches both the event type and the expected value. + * + * @param string $type + * Expected event type + * @param string $expects + * The value expected in $event['data'][0] + */ + public function assertEventEquals($type, $expects, $event) + { + $this->assertEquals($type, $event['name'], "Event $type for " . print_r($event, true)); + if (is_array($expects)) { + $this->assertEquals($expects, $event['data'], "Event $type should equal " . print_r($expects, true) . ': ' . print_r($event, true)); + } else { + $d = (is_array($event['data']) ? $event['data'][0] : null); + $this->assertEquals($expects, $d, "Event $type should equal $expects: " . print_r($event, true)); + } + } + + /** + * Assert that a given event is 'error'. + */ + public function assertEventError($event) + { + $this->assertEquals('error', $event['name'], 'Expected error for event: ' . print_r($event, true)); + } + + /** + * Asserts that all of the tests are good. + * + * This loops through a map of tests/expectations and runs a few assertions on each test. + * + * Checks: + * - depth (if depth is > 0) + * - event name + * - matches on event 0. + */ + protected function isAllGood($name, $depth, $tests, $debug = false) + { + foreach ($tests as $try => $expects) { + if ($debug) { + fprintf(STDOUT, "%s expects %s\n", $try, print_r($expects, true)); + } + $e = $this->parse($try); + if ($depth > 0) { + $this->assertEquals($depth, $e->depth(), "Expected depth $depth for test $try." . print_r($e, true)); + } + $this->assertEventEquals($name, $expects, $e->get(0)); + } + } + + // ================================================================ + // Utility functions. + // ================================================================ + public function testParse() + { + list($tok, $events) = $this->createTokenizer(''); + + $tok->parse(); + $e1 = $events->get(0); + + $this->assertEquals(1, $events->Depth()); + $this->assertEquals('eof', $e1['name']); + } + + public function testWhitespace() + { + $spaces = ' '; + list($tok, $events) = $this->createTokenizer($spaces); + + $tok->parse(); + + $this->assertEquals(2, $events->depth()); + + $e1 = $events->get(0); + + $this->assertEquals('text', $e1['name']); + $this->assertEquals($spaces, $e1['data'][0]); + } + + public function testCharacterReference() + { + $good = array( + '&' => '&', + '<' => '<', + '&' => '&', + '&' => '&', + ); + $this->isAllGood('text', 2, $good); + + // Test with broken charref + $str = '&foo'; + $events = $this->parse($str); + $e1 = $events->get(0); + $this->assertEquals('error', $e1['name']); + + $str = 'oo'; + $events = $this->parse($str); + $e1 = $events->get(0); + $this->assertEquals('error', $e1['name']); + + $str = '&#foo'; + $events = $this->parse($str); + $e1 = $events->get(0); + $this->assertEquals('error', $e1['name']); + + // FIXME: Once the text processor is done, need to verify that the + // tokens are transformed correctly into text. + } + + public function testBogusComment() + { + $bogus = array( + '</+this is a bogus comment. +>', + '<!+this is a bogus comment. !>', + '<!D OCTYPE foo bar>', + '<!DOCTYEP foo bar>', + '<![CADATA[ TEST ]]>', + '<![CDATA Hello ]]>', + '<![CDATA[ Hello [[>', + '<!CDATA[[ test ]]>', + '<![CDATA[', + '<![CDATA[hellooooo hello', + '<? Hello World ?>', + '<? Hello World', + ); + foreach ($bogus as $str) { + $events = $this->parse($str); + $this->assertEventError($events->get(0)); + $this->assertEventEquals('comment', $str, $events->get(1)); + } + } + + public function testEndTag() + { + $succeed = array( + '</a>' => 'a', + '</test>' => 'test', + '</test + >' => 'test', + '</thisIsTheTagThatDoesntEndItJustGoesOnAndOnMyFriend>' => 'thisisthetagthatdoesntenditjustgoesonandonmyfriend', + // See 8.2.4.10, which requires this and does not say error. + '</a<b>' => 'a<b', + ); + $this->isAllGood('endTag', 2, $succeed); + + // Recoverable failures + $fail = array( + '</a class="monkey">' => 'a', + '</a <b>' => 'a', + '</a <b <c>' => 'a', + '</a is the loneliest letter>' => 'a', + '</a' => 'a', + ); + foreach ($fail as $test => $result) { + $events = $this->parse($test); + $this->assertEquals(3, $events->depth()); + // Should have triggered an error. + $this->assertEventError($events->get(0)); + // Should have tried to parse anyway. + $this->assertEventEquals('endTag', $result, $events->get(1)); + } + + // BogoComments + $comments = array( + '</>' => '</>', + '</ >' => '</ >', + '</ a>' => '</ a>', + ); + foreach ($comments as $test => $result) { + $events = $this->parse($test); + $this->assertEquals(3, $events->depth()); + + // Should have triggered an error. + $this->assertEventError($events->get(0)); + + // Should have tried to parse anyway. + $this->assertEventEquals('comment', $result, $events->get(1)); + } + } + + public function testComment() + { + $good = array( + '<!--easy-->' => 'easy', + '<!-- 1 > 0 -->' => ' 1 > 0 ', + '<!-- --$i -->' => ' --$i ', + '<!----$i-->' => '--$i', + "<!--\nHello World.\na-->" => "\nHello World.\na", + '<!-- <!-- -->' => ' <!-- ', + ); + foreach ($good as $test => $expected) { + $events = $this->parse($test); + $this->assertEventEquals('comment', $expected, $events->get(0)); + } + + $fail = array( + '<!-->' => '', + '<!--Hello' => 'Hello', + "<!--\0Hello" => UTF8Utils::FFFD . 'Hello', + '<!--' => '', + ); + foreach ($fail as $test => $expected) { + $events = $this->parse($test); + $this->assertEquals(3, $events->depth()); + $this->assertEventError($events->get(0)); + $this->assertEventEquals('comment', $expected, $events->get(1)); + } + } + + public function testCDATASection() + { + $good = array( + '<![CDATA[ This is a test. ]]>' => ' This is a test. ', + '<![CDATA[CDATA]]>' => 'CDATA', + '<![CDATA[ ]] > ]]>' => ' ]] > ', + '<![CDATA[ ]]>' => ' ', + ); + $this->isAllGood('cdata', 2, $good); + } + + public function testDoctype() + { + $good = array( + '<!DOCTYPE html>' => array( + 'html', + 0, + null, + false, + ), + '<!doctype html>' => array( + 'html', + 0, + null, + false, + ), + '<!DocType html>' => array( + 'html', + 0, + null, + false, + ), + "<!DOCTYPE\nhtml>" => array( + 'html', + 0, + null, + false, + ), + "<!DOCTYPE\fhtml>" => array( + 'html', + 0, + null, + false, + ), + '<!DOCTYPE html PUBLIC "foo bar">' => array( + 'html', + EventStack::DOCTYPE_PUBLIC, + 'foo bar', + false, + ), + "<!DOCTYPE html PUBLIC 'foo bar'>" => array( + 'html', + EventStack::DOCTYPE_PUBLIC, + 'foo bar', + false, + ), + '<!DOCTYPE html PUBLIC "foo bar" >' => array( + 'html', + EventStack::DOCTYPE_PUBLIC, + 'foo bar', + false, + ), + "<!DOCTYPE html \nPUBLIC\n'foo bar'>" => array( + 'html', + EventStack::DOCTYPE_PUBLIC, + 'foo bar', + false, + ), + '<!DOCTYPE html SYSTEM "foo bar">' => array( + 'html', + EventStack::DOCTYPE_SYSTEM, + 'foo bar', + false, + ), + "<!DOCTYPE html SYSTEM 'foo bar'>" => array( + 'html', + EventStack::DOCTYPE_SYSTEM, + 'foo bar', + false, + ), + '<!DOCTYPE html SYSTEM "foo/bar" >' => array( + 'html', + EventStack::DOCTYPE_SYSTEM, + 'foo/bar', + false, + ), + "<!DOCTYPE html \nSYSTEM\n'foo bar'>" => array( + 'html', + EventStack::DOCTYPE_SYSTEM, + 'foo bar', + false, + ), + ); + $this->isAllGood('doctype', 2, $good); + + $bad = array( + '<!DOCTYPE>' => array( + null, + EventStack::DOCTYPE_NONE, + null, + true, + ), + '<!DOCTYPE >' => array( + null, + EventStack::DOCTYPE_NONE, + null, + true, + ), + '<!DOCTYPE foo' => array( + 'foo', + EventStack::DOCTYPE_NONE, + null, + true, + ), + '<!DOCTYPE foo PUB' => array( + 'foo', + EventStack::DOCTYPE_NONE, + null, + true, + ), + '<!DOCTYPE foo PUB>' => array( + 'foo', + EventStack::DOCTYPE_NONE, + null, + true, + ), + '<!DOCTYPE foo PUB "Looks good">' => array( + 'foo', + EventStack::DOCTYPE_NONE, + null, + true, + ), + '<!DOCTYPE foo SYSTME "Looks good"' => array( + 'foo', + EventStack::DOCTYPE_NONE, + null, + true, + ), + + // Can't tell whether these are ids or ID types, since the context is chopped. + '<!DOCTYPE foo PUBLIC' => array( + 'foo', + EventStack::DOCTYPE_NONE, + null, + true, + ), + '<!DOCTYPE foo PUBLIC>' => array( + 'foo', + EventStack::DOCTYPE_NONE, + null, + true, + ), + '<!DOCTYPE foo SYSTEM' => array( + 'foo', + EventStack::DOCTYPE_NONE, + null, + true, + ), + '<!DOCTYPE foo SYSTEM>' => array( + 'foo', + EventStack::DOCTYPE_NONE, + null, + true, + ), + + '<!DOCTYPE html SYSTEM "foo bar"' => array( + 'html', + EventStack::DOCTYPE_SYSTEM, + 'foo bar', + true, + ), + '<!DOCTYPE html SYSTEM "foo bar" more stuff>' => array( + 'html', + EventStack::DOCTYPE_SYSTEM, + 'foo bar', + true, + ), + ); + foreach ($bad as $test => $expects) { + $events = $this->parse($test); + // fprintf(STDOUT, $test . PHP_EOL); + $this->assertEquals(3, $events->depth(), "Counting events for '$test': " . print_r($events, true)); + $this->assertEventError($events->get(0)); + $this->assertEventEquals('doctype', $expects, $events->get(1)); + } + } + + public function testProcessorInstruction() + { + $good = array( + '<?hph ?>' => 'hph', + '<?hph echo "Hello World"; ?>' => array( + 'hph', + 'echo "Hello World"; ', + ), + "<?hph \necho 'Hello World';\n?>" => array( + 'hph', + "echo 'Hello World';\n", + ), + ); + $this->isAllGood('pi', 2, $good); + } + + /** + * This tests just simple tags. + */ + public function testSimpleTags() + { + $open = array( + '<foo>' => 'foo', + '<FOO>' => 'foo', + '<fOO>' => 'foo', + '<foo >' => 'foo', + "<foo\n\n\n\n>" => 'foo', + '<foo:bar>' => 'foo:bar', + ); + $this->isAllGood('startTag', 2, $open); + + $selfClose = array( + '<foo/>' => 'foo', + '<FOO/>' => 'foo', + '<foo />' => 'foo', + "<foo\n\n\n\n/>" => 'foo', + '<foo:bar/>' => 'foo:bar', + ); + foreach ($selfClose as $test => $expects) { + $events = $this->parse($test); + $this->assertEquals(2, $events->depth(), "Counting events for '$test'" . print_r($events, true)); + $this->assertEventEquals('startTag', $expects, $events->get(0)); + $event = $events->get(0); + $this->assertTrue($event['data'][2]); + } + + $bad = array( + '<foo' => 'foo', + '<foo ' => 'foo', + '<foo/' => 'foo', + '<foo /' => 'foo', + ); + + foreach ($bad as $test => $expects) { + $events = $this->parse($test); + $this->assertEquals(3, $events->depth(), "Counting events for '$test': " . print_r($events, true)); + $this->assertEventError($events->get(0)); + $this->assertEventEquals('startTag', $expects, $events->get(1)); + } + } + + public function testTagsWithAttributeAndMissingName() + { + $cases = array( + '<id="top_featured">' => 'id', + '<color="white">' => 'color', + "<class='neaktivni_stranka'>" => 'class', + '<bgcolor="white">' => 'bgcolor', + '<class="nom">' => 'class', + ); + + foreach ($cases as $html => $expected) { + $events = $this->parse($html); + $this->assertEventError($events->get(0)); + $this->assertEventError($events->get(1)); + $this->assertEventError($events->get(2)); + $this->assertEventEquals('startTag', $expected, $events->get(3)); + $this->assertEventEquals('eof', null, $events->get(4)); + } + } + + public function testTagNotClosedAfterTagName() + { + $cases = array( + '<noscript<img>' => array( + 'noscript', + 'img', + ), + '<center<a>' => array( + 'center', + 'a', + ), + '<br<br>' => array( + 'br', + 'br', + ), + ); + + foreach ($cases as $html => $expected) { + $events = $this->parse($html); + $this->assertEventError($events->get(0)); + $this->assertEventEquals('startTag', $expected[0], $events->get(1)); + $this->assertEventEquals('startTag', $expected[1], $events->get(2)); + $this->assertEventEquals('eof', null, $events->get(3)); + } + + $events = $this->parse('<span<>02</span>'); + $this->assertEventError($events->get(0)); + $this->assertEventEquals('startTag', 'span', $events->get(1)); + $this->assertEventError($events->get(2)); + $this->assertEventEquals('text', '>02', $events->get(3)); + $this->assertEventEquals('endTag', 'span', $events->get(4)); + $this->assertEventEquals('eof', null, $events->get(5)); + + $events = $this->parse('<p</p>'); + $this->assertEventError($events->get(0)); + $this->assertEventEquals('startTag', 'p', $events->get(1)); + $this->assertEventEquals('endTag', 'p', $events->get(2)); + $this->assertEventEquals('eof', null, $events->get(3)); + + $events = $this->parse('<strong><WordPress</strong>'); + $this->assertEventEquals('startTag', 'strong', $events->get(0)); + $this->assertEventError($events->get(1)); + $this->assertEventEquals('startTag', 'wordpress', $events->get(2)); + $this->assertEventEquals('endTag', 'strong', $events->get(3)); + $this->assertEventEquals('eof', null, $events->get(4)); + + $events = $this->parse('<src=<a>'); + $this->assertEventError($events->get(0)); + $this->assertEventError($events->get(1)); + $this->assertEventError($events->get(2)); + $this->assertEventEquals('startTag', 'src', $events->get(3)); + $this->assertEventEquals('startTag', 'a', $events->get(4)); + $this->assertEventEquals('eof', null, $events->get(5)); + + $events = $this->parse('<br...<a>'); + $this->assertEventError($events->get(0)); + $this->assertEventEquals('startTag', 'br', $events->get(1)); + $this->assertEventEquals('eof', null, $events->get(2)); + } + + public function testIllegalTagNames() + { + $cases = array( + '<li">' => 'li', + '<p">' => 'p', + '<b >' => 'b', + '<static*all>' => 'static', + '<h*0720/>' => 'h', + '<st*ATTRIBUTE />' => 'st', + ); + + foreach ($cases as $html => $expected) { + $events = $this->parse($html); + $this->assertEventError($events->get(0)); + $this->assertEventEquals('startTag', $expected, $events->get(1)); + } + } + + public function testTagAttributes() + { + // Opening tags. + $good = array( + '<foo bar="baz">' => array( + 'foo', + array( + 'bar' => 'baz', + ), + false, + ), + '<foo bar=" baz ">' => array( + 'foo', + array( + 'bar' => ' baz ', + ), + false, + ), + "<foo bar=\"\nbaz\n\">" => array( + 'foo', + array( + 'bar' => "\nbaz\n", + ), + false, + ), + "<foo bar='baz'>" => array( + 'foo', + array( + 'bar' => 'baz', + ), + false, + ), + '<foo bar="A full sentence.">' => array( + 'foo', + array( + 'bar' => 'A full sentence.', + ), + false, + ), + "<foo a='1' b=\"2\">" => array( + 'foo', + array( + 'a' => '1', + 'b' => '2', + ), + false, + ), + "<foo ns:bar='baz'>" => array( + 'foo', + array( + 'ns:bar' => 'baz', + ), + false, + ), + "<foo a='blue&red'>" => array( + 'foo', + array( + 'a' => 'blue&red', + ), + false, + ), + "<foo a='blue&red'>" => array( + 'foo', + array( + 'a' => 'blue&red', + ), + false, + ), + "<foo a='blue&&&red'>" => array( + 'foo', + array( + 'a' => 'blue&&&red', + ), + false, + ), + "<foo a='blue&&red'>" => array( + 'foo', + array( + 'a' => 'blue&&red', + ), + false, + ), + "<foo\nbar='baz'\n>" => array( + 'foo', + array( + 'bar' => 'baz', + ), + false, + ), + '<doe a deer>' => array( + 'doe', + array( + 'a' => null, + 'deer' => null, + ), + false, + ), + '<foo bar=baz>' => array( + 'foo', + array( + 'bar' => 'baz', + ), + false, + ), + + // Updated for 8.1.2.3 + '<foo bar = "baz" >' => array( + 'foo', + array( + 'bar' => 'baz', + ), + false, + ), + + // The spec allows an unquoted value '/'. This will not be a closing + // tag. + '<foo bar=/>' => array( + 'foo', + array( + 'bar' => '/', + ), + false, + ), + '<foo bar=baz/>' => array( + 'foo', + array( + 'bar' => 'baz/', + ), + false, + ), + ); + $this->isAllGood('startTag', 2, $good); + + // Self-closing tags. + $withEnd = array( + '<foo bar="baz"/>' => array( + 'foo', + array( + 'bar' => 'baz', + ), + true, + ), + '<foo BAR="baz"/>' => array( + 'foo', + array( + 'bar' => 'baz', + ), + true, + ), + '<foo BAR="BAZ"/>' => array( + 'foo', + array( + 'bar' => 'BAZ', + ), + true, + ), + "<foo a='1' b=\"2\" c=3 d/>" => array( + 'foo', + array( + 'a' => '1', + 'b' => '2', + 'c' => '3', + 'd' => null, + ), + true, + ), + ); + $this->isAllGood('startTag', 2, $withEnd); + + // Cause a parse error. + $bad = array( + // This will emit an entity lookup failure for &+dark. + "<foo a='blue&+dark'>" => array( + 'foo', + array( + 'a' => 'blue&+dark', + ), + false, + ), + '<foo bar=>' => array( + 'foo', + array( + 'bar' => null, + ), + false, + ), + '<foo bar="oh' => array( + 'foo', + array( + 'bar' => 'oh', + ), + false, + ), + '<foo bar=oh">' => array( + 'foo', + array( + 'bar' => 'oh"', + ), + false, + ), + + // these attributes are ignored because of current implementation + // of method "DOMElement::setAttribute" + // see issue #23: https://github.com/Masterminds/html5-php/issues/23 + '<foo b"="baz">' => array( + 'foo', + array(), + false, + ), + '<foo 2abc="baz">' => array( + 'foo', + array(), + false, + ), + '<foo ?="baz">' => array( + 'foo', + array(), + false, + ), + '<foo foo?bar="baz">' => array( + 'foo', + array(), + false, + ), + ) + ; + foreach ($bad as $test => $expects) { + $events = $this->parse($test); + $this->assertEquals(3, $events->depth(), "Counting events for '$test': " . print_r($events, true)); + $this->assertEventError($events->get(0)); + $this->assertEventEquals('startTag', $expects, $events->get(1)); + } + + // Cause multiple parse errors. + $reallyBad = array( + '<foo ="bar">' => array( + 'foo', + array( + '=' => null, + '"bar"' => null, + ), + false, + ), + '<foo////>' => array( + 'foo', + array(), + true, + ), + // character "&" in unquoted attribute shouldn't cause an infinite loop + '<foo bar=index.php?str=1&id=29>' => array( + 'foo', + array( + 'bar' => 'index.php?str=1&id=29', + ), + false, + ), + ); + foreach ($reallyBad as $test => $expects) { + $events = $this->parse($test); + // fprintf(STDOUT, $test . print_r($events, true)); + $this->assertEventError($events->get(0)); + $this->assertEventError($events->get(1)); + // $this->assertEventEquals('startTag', $expects, $events->get(1)); + } + + // Regression: Malformed elements should be detected. + // '<foo baz="1" <bar></foo>' => array('foo', array('baz' => '1'), false), + $events = $this->parse('<foo baz="1" <bar></foo>'); + $this->assertEventError($events->get(0)); + $this->assertEventEquals('startTag', array( + 'foo', + array( + 'baz' => '1', + ), + false, + ), $events->get(1)); + $this->assertEventEquals('startTag', array( + 'bar', + array(), + false, + ), $events->get(2)); + $this->assertEventEquals('endTag', array( + 'foo', + ), $events->get(3)); + } + + public function testRawText() + { + $good = array( + '<script>abcd efg hijk lmnop</script> ' => 'abcd efg hijk lmnop', + '<script><not/><the/><tag></script>' => '<not/><the/><tag>', + '<script><<<<<<<<</script>' => '<<<<<<<<', + '<script>hello</script</script>' => 'hello</script', + "<script>\nhello</script\n</script>" => "\nhello</script\n", + '<script>&</script>' => '&', + '<script><!--not a comment--></script>' => '<!--not a comment-->', + '<script><![CDATA[not a comment]]></script>' => '<![CDATA[not a comment]]>', + ); + foreach ($good as $test => $expects) { + $events = $this->parse($test); + $this->assertEventEquals('startTag', 'script', $events->get(0)); + $this->assertEventEquals('text', $expects, $events->get(1)); + $this->assertEventEquals('endTag', 'script', $events->get(2)); + } + + $bad = array( + '<script>&</script' => '&</script', + '<script>Hello world' => 'Hello world', + ); + foreach ($bad as $test => $expects) { + $events = $this->parse($test); + $this->assertEquals(4, $events->depth(), "Counting events for '$test': " . print_r($events, true)); + $this->assertEventEquals('startTag', 'script', $events->get(0)); + $this->assertEventError($events->get(1)); + $this->assertEventEquals('text', $expects, $events->get(2)); + } + + // Testing case sensitivity + $events = $this->parse('<TITLE>a test</TITLE>'); + $this->assertEventEquals('startTag', 'title', $events->get(0)); + $this->assertEventEquals('text', 'a test', $events->get(1)); + $this->assertEventEquals('endTag', 'title', $events->get(2)); + + // Testing end tags with whitespaces + $events = $this->parse('<title>Whitespaces are tasty</title >'); + $this->assertEventEquals('startTag', 'title', $events->get(0)); + $this->assertEventEquals('text', 'Whitespaces are tasty', $events->get(1)); + $this->assertEventEquals('endTag', 'title', $events->get(2)); + } + + public function testRcdata() + { + list($tok, $events) = $this->createTokenizer('<title>'<!-- not a comment --></TITLE>'); + $tok->setTextMode(\Masterminds\HTML5\Elements::TEXT_RCDATA, 'title'); + $tok->parse(); + $this->assertEventEquals('text', "'<!-- not a comment -->", $events->get(1)); + } + + public function testText() + { + $events = $this->parse('a<br>b'); + $this->assertEquals(4, $events->depth(), 'Events: ' . print_r($events, true)); + $this->assertEventEquals('text', 'a', $events->get(0)); + $this->assertEventEquals('startTag', 'br', $events->get(1)); + $this->assertEventEquals('text', 'b', $events->get(2)); + + $events = $this->parse('<a>Test</a>'); + $this->assertEquals(4, $events->depth(), 'Events: ' . print_r($events, true)); + $this->assertEventEquals('startTag', 'a', $events->get(0)); + $this->assertEventEquals('text', 'Test', $events->get(1)); + $this->assertEventEquals('endTag', 'a', $events->get(2)); + + $events = $this->parse('<p>0</p><p>1</p>'); + $this->assertEquals(7, $events->depth(), 'Events: ' . print_r($events, true)); + + $this->assertEventEquals('startTag', 'p', $events->get(0)); + $this->assertEventEquals('text', '0', $events->get(1)); + $this->assertEventEquals('endTag', 'p', $events->get(2)); + + $this->assertEventEquals('startTag', 'p', $events->get(3)); + $this->assertEventEquals('text', '1', $events->get(4)); + $this->assertEventEquals('endTag', 'p', $events->get(5)); + + $events = $this->parse('a<![CDATA[test]]>b'); + $this->assertEquals(4, $events->depth(), 'Events: ' . print_r($events, true)); + $this->assertEventEquals('text', 'a', $events->get(0)); + $this->assertEventEquals('cdata', 'test', $events->get(1)); + $this->assertEventEquals('text', 'b', $events->get(2)); + + $events = $this->parse('a<!--test-->b'); + $this->assertEquals(4, $events->depth(), 'Events: ' . print_r($events, true)); + $this->assertEventEquals('text', 'a', $events->get(0)); + $this->assertEventEquals('comment', 'test', $events->get(1)); + $this->assertEventEquals('text', 'b', $events->get(2)); + + $events = $this->parse('a&b'); + $this->assertEquals(2, $events->depth(), 'Events: ' . print_r($events, true)); + $this->assertEventEquals('text', 'a&b', $events->get(0)); + + $events = $this->parse('a²b'); + $this->assertEquals(2, $events->depth(), 'Events: ' . print_r($events, true)); + $this->assertEventEquals('text', 'a²b', $events->get(0)); + } + + // ================================================================ + // Utility functions. + // ================================================================ + protected function createTokenizer($string, $debug = false) + { + $eventHandler = new EventStack(); + $scanner = new Scanner($string); + + $scanner->debug = $debug; + + return array( + new Tokenizer($scanner, $eventHandler), + $eventHandler, + ); + } + + public function parse($string, $debug = false) + { + list($tok, $events) = $this->createTokenizer($string, $debug); + $tok->parse(); + + return $events; + } +} |