summaryrefslogtreecommitdiff
path: root/test/SimpleTest
diff options
context:
space:
mode:
authorMatt Butcher <[email protected]>2013-04-13 13:32:23 -0500
committerMatt Butcher <[email protected]>2013-04-13 13:32:23 -0500
commitb94fe2a6e67475aa819a97779a3c5ec07e2f53b5 (patch)
treeb01ae40879b2f00d7537fde9e13a671f8bf7b5ad /test/SimpleTest
parent9457e0951ebc69238917c7178c015f0c9bf68d62 (diff)
Relocated deprecated tests.
Diffstat (limited to 'test/SimpleTest')
-rw-r--r--test/SimpleTest/TestableTokenizer.php76
-rw-r--r--test/SimpleTest/TokenizerPositionTest.php164
-rw-r--r--test/SimpleTest/TokenizerTest.php88
-rw-r--r--test/SimpleTest/TreeBuilderTest.php39
4 files changed, 367 insertions, 0 deletions
diff --git a/test/SimpleTest/TestableTokenizer.php b/test/SimpleTest/TestableTokenizer.php
new file mode 100644
index 0000000..4f064c3
--- /dev/null
+++ b/test/SimpleTest/TestableTokenizer.php
@@ -0,0 +1,76 @@
+<?php
+
+class HTML5_TestableTokenizer extends HTML5_Tokenizer
+{
+ public $outputTokens = array();
+ private $_contentModelFlag;
+ private $_lastStartFlag;
+
+ // this interface does not match HTML5_Tokenizer's. It might make
+ // more sense though
+ public function __construct($data, $contentModelFlag, $lastStartFlag = null) {
+ parent::__construct($data);
+ $this->_contentModelFlag = $contentModelFlag;
+ $this->_lastStartFlag = $lastStartFlag;
+ }
+ public function parse() {
+ $this->content_model = $this->_contentModelFlag;
+ if ($this->_lastStartFlag) {
+ $this->token = array(
+ 'type' => self::STARTTAG,
+ 'name' => $this->_lastStartFlag,
+ );
+ }
+ return parent::parse();
+ }
+ // --end mismatched interface
+
+ protected function emitToken($token, $checkStream = true, $dry = false) {
+ parent::emitToken($token, $checkStream, true);
+
+ // tree handling code omitted
+ switch ($token['type']) {
+ case self::DOCTYPE:
+ if (!isset($token['name'])) $token['name'] = null;
+ if (!isset($token['public'])) $token['public'] = null;
+ if (!isset($token['system'])) $token['system'] = null;
+ $this->outputTokens[] = array('DOCTYPE', $token['name'], $token['public'], $token['system'], empty($token['force-quirks']));
+ break;
+ case self::STARTTAG:
+ $attr = new stdclass();
+ foreach ($token['attr'] as $keypair) {
+ // XXX this is IMPORTANT behavior, check if it's
+ // in TreeBuilder
+ $name = $keypair['name'];
+ if (isset($attr->$name)) continue;
+ $attr->$name = $keypair['value'];
+ }
+ $start = array('StartTag', $token['name'], $attr);
+ if (isset($token['self-closing'])) $start[] = true;
+ $this->outputTokens[] = $start;
+ break;
+ case self::ENDTAG:
+ $this->outputTokens[] = array('EndTag', $token['name']);
+ break;
+ case self::COMMENT:
+ $this->outputTokens[] = array('Comment', $token['data']);
+ break;
+ case self::CHARACTER:
+ case self::SPACECHARACTER:
+ if (count($this->outputTokens)) {
+ $old = array_pop($this->outputTokens);
+ if ($old[0] === 'Character') {
+ $old[1] .= $token['data'];
+ $this->outputTokens[] = $old;
+ break;
+ }
+ $this->outputTokens[] = $old;
+ }
+ $this->outputTokens[] = array('Character', $token['data']);
+ break;
+ case self::PARSEERROR:
+ $this->outputTokens[] = 'ParseError';
+ break;
+ }
+ }
+}
diff --git a/test/SimpleTest/TokenizerPositionTest.php b/test/SimpleTest/TokenizerPositionTest.php
new file mode 100644
index 0000000..534456a
--- /dev/null
+++ b/test/SimpleTest/TokenizerPositionTest.php
@@ -0,0 +1,164 @@
+<?php
+
+require_once dirname(__FILE__) . '/../autorun.php';
+
+class HTML5_PositionTestableTokenizer extends HTML5_TestableTokenizer
+{
+ public $outputLines = array();
+ public $outputCols = array();
+ private $characterTokens = array();
+ protected function emitToken($token, $checkStream = true, $dry = false) {
+ parent::emitToken($token, $checkStream, $dry);
+ // XXX: The tests should really include the parse errors, but I'm lazy.
+ switch ($token['type']) {
+ case self::PARSEERROR:
+ return;
+
+ case self::CHARACTER:
+ if ($this->characterTokens) {
+ array_pop($this->outputLines);
+ array_pop($this->outputCols);
+ }
+ $this->characterTokens[] = $token;
+
+ default:
+ $this->outputLines[] = $this->stream()->getCurrentLine();
+ $this->outputCols[] = $this->stream()->getColumnOffset();
+ }
+ if ($token['type'] !== self::CHARACTER) {
+ $this->characterTokens = array();
+ }
+ }
+}
+
+class HTML5_TokenizerTestOfPosition extends UnitTestCase
+{
+ function testBasic() {
+ $this->assertPositions(
+ "<b><i>f<p>\n<b>a</b>",
+ array(1,1,1,1, 2,2,2,2),
+ array(3,6,7,10,0,3,4,8)
+ );
+ }
+
+ function testUnicode() {
+ $this->assertPositions(
+ "\xC2\xA2<b>\xE2\x82\xACa<b>\xf4\x8a\xaf\x8d",
+ array(1,1,1,1,1),
+ array(1,4,6,9,10)
+ );
+ }
+
+ function testData() {
+ $this->assertPositions(
+ "a\na\n\xC2\xA2<b>",
+ array(3,3),
+ array(1,4)
+ );
+ }
+
+ function testMarkupDeclarationDoubleDash() {
+ $this->assertPositions(
+ '<!-- foo -->',
+ array(1),
+ array(12)
+ );
+ }
+
+ function testMarkupDeclarationDoctype() {
+ $this->assertPositions(
+ '<!DOCTYPE>',
+ array(1),
+ array(10)
+ );
+ }
+
+ function testAfterDoctypeNamePublic() {
+ $this->assertPositions(
+ '<!DOCTYPE PUBLIC "foo">',
+ array(1),
+ array(23)
+ );
+ }
+
+ function testAfterDoctypeNameSystem() {
+ $this->assertPositions(
+ '<!DOCTYPE SYSTEM "foo">',
+ array(1),
+ array(23)
+ );
+ }
+
+ function testDecEntitySansSemicolon() {
+ $this->assertPositions(
+ '&#300',
+ array(1),
+ array(5)
+ );
+ }
+
+ function testDecEntityWithSemicolon() {
+ $this->assertPositions(
+ '&#300;',
+ array(1),
+ array(6)
+ );
+ }
+
+ function testHexEntity() {
+ $this->assertPositions(
+ '&#x300;',
+ array(1),
+ array(7)
+ );
+ }
+
+ function testEmptyEntity() {
+ $this->assertPositions(
+ '&#;<b>',
+ array(1,1),
+ array(3,6)
+ );
+ }
+
+ function testNamedEntity() {
+ $this->assertPositions(
+ '&quot;foo<b>',
+ array(1,1),
+ array(9,12)
+ );
+ }
+
+ function testBadNamedEntity() {
+ $this->assertPositions(
+ '&zzz;b',
+ array(1),
+ array(6)
+ );
+ }
+
+ function testAttributeEntity() {
+ $this->assertPositions(
+ '<b foo="&amper">a',
+ array( 1, 1),
+ array(16,17)
+ );
+ }
+
+ function testBogusComment() {
+ $this->assertPositions(
+ "<!as asdfe \nasdf>d",
+ array(2,2),
+ array(5,6)
+ );
+ }
+
+ protected function assertPositions($input, $lines, $cols, $flag = HTML5_Tokenizer::PCDATA, $lastStartTag = null) {
+ $tokenizer = new HTML5_PositionTestableTokenizer($input, $flag, $lastStartTag);
+ $GLOBALS['TIME'] -= get_microtime();
+ $tokenizer->parse($input);
+ $GLOBALS['TIME'] += get_microtime();
+ $this->assertIdentical($tokenizer->outputLines, $lines, 'Lines: %s');
+ $this->assertIdentical($tokenizer->outputCols, $cols, 'Cols: %s');
+ }
+}
diff --git a/test/SimpleTest/TokenizerTest.php b/test/SimpleTest/TokenizerTest.php
new file mode 100644
index 0000000..d00fa78
--- /dev/null
+++ b/test/SimpleTest/TokenizerTest.php
@@ -0,0 +1,88 @@
+<?php
+
+require_once dirname(__FILE__) . '/../autorun.php';
+
+SimpleTest::ignore('HTML5_TokenizerHarness');
+abstract class HTML5_TokenizerHarness extends HTML5_JSONHarness
+{
+ public function invoke($test) {
+ //echo get_class($this) . ': ' . $test->description ."\n";
+ if (!isset($test->contentModelFlags)) {
+ $test->contentModelFlags = array('PCDATA');
+ }
+ if (!isset($test->ignoreErrorOrder)) {
+ $test->ignoreErrorOrder = false;
+ }
+
+ // Get expected result array (and maybe error count).
+ $expect = array();
+ $expectedErrorCount = 0; // This is only used when ignoreErrorOrder = true.
+ foreach ($test->output as $tok) {
+ // If we're ignoring error order and this is a parse error, just count.
+ if ($test->ignoreErrorOrder && $tok === 'ParseError') {
+ $expectedErrorCount++;
+ } else {
+ // Normalize character tokens from the test
+ if ($expect && $tok[0] === 'Character' && $expect[count($expect) - 1][0] === 'Character') {
+ $expect[count($expect) - 1][1] .= $tok[1];
+ } else {
+ $expect[] = $tok;
+ }
+ }
+ }
+
+ // Run test for each content model flag.
+ foreach ($test->contentModelFlags as $flag) {
+ $output = $this->tokenize($test, $flag);
+ $result = array();
+ $resultErrorCount = 0; // This is only used when ignoreErrorOrder = true.
+ foreach ($output as $tok) {
+ // If we're ignoring error order and this is a parse error, just count.
+ if ($test->ignoreErrorOrder && $tok === 'ParseError') {
+ $resultErrorCount++;
+ } else {
+ $result[] = $tok;
+ }
+ }
+ $this->assertIdentical($expect, $result,
+ 'In test "'.str_replace('%', '%%', $test->description).
+ '" with content model '.$flag.': %s'
+ );
+ if ($test->ignoreErrorOrder) {
+ $this->assertIdentical($expectedErrorCount, $resultErrorCount,
+ 'Wrong error count in test "'.str_replace('%', '%%', $test->description).
+ '" with content model '.$flag.': %s'
+ );
+ }
+ if ($expect != $result || ($test->ignoreErrorOrder && $expectedErrorCount !== $resultErrorCount)) {
+ echo "Input: "; str_dump($test->input);
+ echo "\nExpected: \n"; echo $this->tokenDump($expect);
+ echo "\nActual: \n"; echo $this->tokenDump($result);
+ echo "\n";
+ }
+ }
+ }
+ private function tokenDump($tokens) {
+ $ret = '';
+ foreach ($tokens as $i => $token) {
+ $ret .= ($i+1).". {$token[0]}: {$token[1]}\n";
+ }
+ return $ret;
+ }
+ public function tokenize($test, $flag) {
+ $flag = constant("HTML5_Tokenizer::$flag");
+ if (!isset($test->lastStartTag)) $test->lastStartTag = null;
+ $tokenizer = new HTML5_TestableTokenizer($test->input, $flag, $test->lastStartTag);
+ $GLOBALS['TIME'] -= get_microtime();
+ $tokenizer->parse();
+ $GLOBALS['TIME'] += get_microtime();
+ return $tokenizer->outputTokens;
+ }
+}
+
+// generate test suites for tokenizer
+HTML5_TestData::generateTestCases(
+ 'HTML5_TokenizerHarness',
+ 'HTML5_TokenizerTestOf',
+ 'tokenizer', '*.test'
+);
diff --git a/test/SimpleTest/TreeBuilderTest.php b/test/SimpleTest/TreeBuilderTest.php
new file mode 100644
index 0000000..708d6a0
--- /dev/null
+++ b/test/SimpleTest/TreeBuilderTest.php
@@ -0,0 +1,39 @@
+<?php
+
+require_once dirname(__FILE__) . '/../autorun.php';
+
+SimpleTest::ignore('HTML5_TreeBuilderHarness');
+class HTML5_TreeBuilderHarness extends HTML5_TestDataHarness
+{
+ public function assertIdentical($expect, $actual, $test = array()) {
+ $input = $test['data'];
+ if (isset($test['document-fragment'])) {
+ $input .= "\nFragment: " . $test['document-fragment'];
+ }
+ parent::assertIdentical($expect, $actual, "Identical expectation failed\nInput:\n$input\n\nExpected:\n$expect\n\nActual:\n$actual\n");
+ }
+ public function invoke($test) {
+ // this is totally the wrong interface to use, but
+ // for now we need testing
+ $tokenizer = new HTML5_Tokenizer($test['data']);
+ $GLOBALS['TIME'] -= get_microtime();
+ if (isset($test['document-fragment'])) {
+ $tokenizer->parseFragment($test['document-fragment']);
+ } else {
+ $tokenizer->parse();
+ }
+ $GLOBALS['TIME'] += get_microtime();
+ $this->assertIdentical(
+ $test['document'],
+ HTML5_TestData::strDom($tokenizer->save()),
+ $test
+ );
+ }
+}
+
+HTML5_TestData::generateTestCases(
+ 'HTML5_TreeBuilderHarness',
+ 'HTML5_TreeBuilderTestOf',
+ 'tree-construction', '*.dat'
+);
+