diff options
Diffstat (limited to 'test/HTML5')
-rw-r--r-- | test/HTML5/DataHarness.php | 48 | ||||
-rw-r--r-- | test/HTML5/InputStreamTest.php | 174 | ||||
-rw-r--r-- | test/HTML5/JSONHarness.php | 21 | ||||
-rw-r--r-- | test/HTML5/ParserTest.php | 15 | ||||
-rw-r--r-- | test/HTML5/TestData.php | 167 | ||||
-rw-r--r-- | test/HTML5/TestDataHarness.php | 18 | ||||
-rw-r--r-- | test/HTML5/TestDataTest.php | 31 | ||||
-rw-r--r-- | test/HTML5/TestDataTest/sample.dat | 7 | ||||
-rw-r--r-- | test/HTML5/TestableTokenizer.php | 76 | ||||
-rw-r--r-- | test/HTML5/TokenizerPositionTest.php | 164 | ||||
-rw-r--r-- | test/HTML5/TokenizerTest.php | 88 | ||||
-rw-r--r-- | test/HTML5/TreeBuilderTest.php | 39 |
12 files changed, 848 insertions, 0 deletions
diff --git a/test/HTML5/DataHarness.php b/test/HTML5/DataHarness.php new file mode 100644 index 0000000..844b1fc --- /dev/null +++ b/test/HTML5/DataHarness.php @@ -0,0 +1,48 @@ +<?php + +/** + * Modified test-case supertype for running tests that are not + * test method based, but based off of test data that resides in + * files. + */ +SimpleTest::ignore('HTML5_DataHarness'); +abstract class HTML5_DataHarness extends UnitTestCase +{ + /** + * Filled in by HTML5_TestData::generateTestCases() + */ + protected $filename; + private $tests; + /** + * Invoked by the runner, it is the function responsible for executing + * the test and delivering results. + * @param $test Some easily usable representation of the test + */ + abstract public function invoke($test); + /** + * Returns a list of tests that can be executed. The list members will + * be passed to invoke(). Return an iterator if you don't want to load + * all test into memory + */ + abstract public function getDataTests(); + /** + * Returns a description of the test + */ + abstract public function getDescription($test); + public function getTests() { + $this->tests = $this->getDataTests(); + // 1-indexed, to be consistent with Python + $ret = array(); + for ($i = 1; $i <= count($this->tests); $i++) { + $ret[] = "test_$i"; + } + return $ret; + } + /** + * Emulates our test functions + */ + public function __call($name, $args) { + list($test, $i) = explode("_", $name); + $this->invoke($this->tests[$i-1]); + } +} diff --git a/test/HTML5/InputStreamTest.php b/test/HTML5/InputStreamTest.php new file mode 100644 index 0000000..b60787a --- /dev/null +++ b/test/HTML5/InputStreamTest.php @@ -0,0 +1,174 @@ +<?php + +require_once dirname(__FILE__) . '/../autorun.php'; + +class HTML5_InputStreamTest extends UnitTestCase +{ + public function invalidReplaceTestHandler($input, $name) { + $stream = new HTML5_InputStream($input); + $this->assertIdentical("\xEF\xBF\xBD", $stream->remainingChars(), $name); + } + + public function testInvalidReplace() { + // Above U+10FFFF + $this->invalidReplaceTestHandler("\xF5\x90\x80\x80", 'U+110000'); + + // Incomplete + $this->invalidReplaceTestHandler("\xDF", 'Incomplete two byte sequence (missing final byte)'); + $this->invalidReplaceTestHandler("\xEF\xBF", 'Incomplete three byte sequence (missing final byte)'); + $this->invalidReplaceTestHandler("\xF4\xBF\xBF", 'Incomplete four byte sequence (missing final byte)'); + + // Min/max continuation bytes + $this->invalidReplaceTestHandler("\x80", 'Lone 80 continuation byte'); + $this->invalidReplaceTestHandler("\xBF", 'Lone BF continuation byte'); + + // Invalid bytes (these can never occur) + $this->invalidReplaceTestHandler("\xFE", 'Invalid FE byte'); + $this->invalidReplaceTestHandler("\xFF", 'Invalid FF byte'); + + // Min/max overlong + $this->invalidReplaceTestHandler("\xC0\x80", 'Overlong representation of U+0000'); + $this->invalidReplaceTestHandler("\xE0\x80\x80", 'Overlong representation of U+0000'); + $this->invalidReplaceTestHandler("\xF0\x80\x80\x80", 'Overlong representation of U+0000'); + $this->invalidReplaceTestHandler("\xF8\x80\x80\x80\x80", 'Overlong representation of U+0000'); + $this->invalidReplaceTestHandler("\xFC\x80\x80\x80\x80\x80", 'Overlong representation of U+0000'); + $this->invalidReplaceTestHandler("\xC1\xBF", 'Overlong representation of U+007F'); + $this->invalidReplaceTestHandler("\xE0\x9F\xBF", 'Overlong representation of U+07FF'); + $this->invalidReplaceTestHandler("\xF0\x8F\xBF\xBF", 'Overlong representation of U+FFFF'); + } + + public function testStripLeadingBOM() { + $leading = new HTML5_InputStream("\xEF\xBB\xBFa"); + $this->assertIdentical('a', $leading->char(), 'BOM should be stripped'); + } + + public function testZWNBSP() { + $stream = new HTML5_InputStream("a\xEF\xBB\xBF"); + $this->assertIdentical("a\xEF\xBB\xBF", $stream->remainingChars(), 'A non-leading U+FEFF (BOM/ZWNBSP) should remain'); + } + + public function testNull() { + $stream = new HTML5_InputStream("\0\0\0"); + $this->assertIdentical("\xEF\xBF\xBD\xEF\xBF\xBD\xEF\xBF\xBD", $stream->remainingChars(), 'Null character should be replaced by U+FFFD'); + $this->assertIdentical(3, count($stream->errors), 'Null character should be throw parse error'); + } + + public function testCRLF() { + $stream = new HTML5_InputStream("\r\n"); + $this->assertIdentical("\n", $stream->remainingChars(), 'CRLF should be replaced by LF'); + } + + public function testCR() { + $stream = new HTML5_InputStream("\r"); + $this->assertIdentical("\n", $stream->remainingChars(), 'CR should be replaced by LF'); + } + + public function invalidParseErrorTestHandler($input, $numErrors, $name) { + $stream = new HTML5_InputStream($input); + $this->assertIdentical($input, $stream->remainingChars(), $name . ' (stream content)'); + $this->assertIdentical($numErrors, count($stream->errors), $name . ' (number of errors)'); + } + + public function testInvalidParseError() { + // C0 controls (except U+0000 and U+000D due to different handling) + $this->invalidParseErrorTestHandler("\x01", 1, 'U+0001 (C0 control)'); + $this->invalidParseErrorTestHandler("\x02", 1, 'U+0002 (C0 control)'); + $this->invalidParseErrorTestHandler("\x03", 1, 'U+0003 (C0 control)'); + $this->invalidParseErrorTestHandler("\x04", 1, 'U+0004 (C0 control)'); + $this->invalidParseErrorTestHandler("\x05", 1, 'U+0005 (C0 control)'); + $this->invalidParseErrorTestHandler("\x06", 1, 'U+0006 (C0 control)'); + $this->invalidParseErrorTestHandler("\x07", 1, 'U+0007 (C0 control)'); + $this->invalidParseErrorTestHandler("\x08", 1, 'U+0008 (C0 control)'); + $this->invalidParseErrorTestHandler("\x09", 0, 'U+0009 (C0 control)'); + $this->invalidParseErrorTestHandler("\x0A", 0, 'U+000A (C0 control)'); + $this->invalidParseErrorTestHandler("\x0B", 1, 'U+000B (C0 control)'); + $this->invalidParseErrorTestHandler("\x0C", 0, 'U+000C (C0 control)'); + $this->invalidParseErrorTestHandler("\x0E", 1, 'U+000E (C0 control)'); + $this->invalidParseErrorTestHandler("\x0F", 1, 'U+000F (C0 control)'); + $this->invalidParseErrorTestHandler("\x10", 1, 'U+0010 (C0 control)'); + $this->invalidParseErrorTestHandler("\x11", 1, 'U+0011 (C0 control)'); + $this->invalidParseErrorTestHandler("\x12", 1, 'U+0012 (C0 control)'); + $this->invalidParseErrorTestHandler("\x13", 1, 'U+0013 (C0 control)'); + $this->invalidParseErrorTestHandler("\x14", 1, 'U+0014 (C0 control)'); + $this->invalidParseErrorTestHandler("\x15", 1, 'U+0015 (C0 control)'); + $this->invalidParseErrorTestHandler("\x16", 1, 'U+0016 (C0 control)'); + $this->invalidParseErrorTestHandler("\x17", 1, 'U+0017 (C0 control)'); + $this->invalidParseErrorTestHandler("\x18", 1, 'U+0018 (C0 control)'); + $this->invalidParseErrorTestHandler("\x19", 1, 'U+0019 (C0 control)'); + $this->invalidParseErrorTestHandler("\x1A", 1, 'U+001A (C0 control)'); + $this->invalidParseErrorTestHandler("\x1B", 1, 'U+001B (C0 control)'); + $this->invalidParseErrorTestHandler("\x1C", 1, 'U+001C (C0 control)'); + $this->invalidParseErrorTestHandler("\x1D", 1, 'U+001D (C0 control)'); + $this->invalidParseErrorTestHandler("\x1E", 1, 'U+001E (C0 control)'); + $this->invalidParseErrorTestHandler("\x1F", 1, 'U+001F (C0 control)'); + + // DEL (U+007F) + $this->invalidParseErrorTestHandler("\x7F", 1, 'U+007F'); + + // C1 Controls + $this->invalidParseErrorTestHandler("\xC2\x80", 1, 'U+0080 (C1 control)'); + $this->invalidParseErrorTestHandler("\xC2\x9F", 1, 'U+009F (C1 control)'); + $this->invalidParseErrorTestHandler("\xC2\xA0", 0, 'U+00A0 (first codepoint above highest C1 control)'); + + // Single UTF-16 surrogates + $this->invalidParseErrorTestHandler("\xED\xA0\x80", 1, 'U+D800 (UTF-16 surrogate character)'); + $this->invalidParseErrorTestHandler("\xED\xAD\xBF", 1, 'U+DB7F (UTF-16 surrogate character)'); + $this->invalidParseErrorTestHandler("\xED\xAE\x80", 1, 'U+DB80 (UTF-16 surrogate character)'); + $this->invalidParseErrorTestHandler("\xED\xAF\xBF", 1, 'U+DBFF (UTF-16 surrogate character)'); + $this->invalidParseErrorTestHandler("\xED\xB0\x80", 1, 'U+DC00 (UTF-16 surrogate character)'); + $this->invalidParseErrorTestHandler("\xED\xBE\x80", 1, 'U+DF80 (UTF-16 surrogate character)'); + $this->invalidParseErrorTestHandler("\xED\xBF\xBF", 1, 'U+DFFF (UTF-16 surrogate character)'); + + // Paired UTF-16 surrogates + $this->invalidParseErrorTestHandler("\xED\xA0\x80\xED\xB0\x80", 2, 'U+D800 U+DC00 (paired UTF-16 surrogates)'); + $this->invalidParseErrorTestHandler("\xED\xA0\x80\xED\xBF\xBF", 2, 'U+D800 U+DFFF (paired UTF-16 surrogates)'); + $this->invalidParseErrorTestHandler("\xED\xAD\xBF\xED\xB0\x80", 2, 'U+DB7F U+DC00 (paired UTF-16 surrogates)'); + $this->invalidParseErrorTestHandler("\xED\xAD\xBF\xED\xBF\xBF", 2, 'U+DB7F U+DFFF (paired UTF-16 surrogates)'); + $this->invalidParseErrorTestHandler("\xED\xAE\x80\xED\xB0\x80", 2, 'U+DB80 U+DC00 (paired UTF-16 surrogates)'); + $this->invalidParseErrorTestHandler("\xED\xAE\x80\xED\xBF\xBF", 2, 'U+DB80 U+DFFF (paired UTF-16 surrogates)'); + $this->invalidParseErrorTestHandler("\xED\xAF\xBF\xED\xB0\x80", 2, 'U+DBFF U+DC00 (paired UTF-16 surrogates)'); + $this->invalidParseErrorTestHandler("\xED\xAF\xBF\xED\xBF\xBF", 2, 'U+DBFF U+DFFF (paired UTF-16 surrogates)'); + + // Charcters surrounding surrogates + $this->invalidParseErrorTestHandler("\xED\x9F\xBF", 0, 'U+D7FF (one codepoint below lowest surrogate codepoint)'); + $this->invalidParseErrorTestHandler("\xEF\xBF\xBD", 0, 'U+DE00 (one codepoint above highest surrogate codepoint)'); + + // Permanent noncharacters + $this->invalidParseErrorTestHandler("\xEF\xB7\x90", 1, 'U+FDD0 (permanent noncharacter)'); + $this->invalidParseErrorTestHandler("\xEF\xB7\xAF", 1, 'U+FDEF (permanent noncharacter)'); + $this->invalidParseErrorTestHandler("\xEF\xBF\xBE", 1, 'U+FFFE (permanent noncharacter)'); + $this->invalidParseErrorTestHandler("\xEF\xBF\xBF", 1, 'U+FFFF (permanent noncharacter)'); + $this->invalidParseErrorTestHandler("\xF0\x9F\xBF\xBE", 1, 'U+1FFFE (permanent noncharacter)'); + $this->invalidParseErrorTestHandler("\xF0\x9F\xBF\xBF", 1, 'U+1FFFF (permanent noncharacter)'); + $this->invalidParseErrorTestHandler("\xF0\xAF\xBF\xBE", 1, 'U+2FFFE (permanent noncharacter)'); + $this->invalidParseErrorTestHandler("\xF0\xAF\xBF\xBF", 1, 'U+2FFFF (permanent noncharacter)'); + $this->invalidParseErrorTestHandler("\xF0\xBF\xBF\xBE", 1, 'U+3FFFE (permanent noncharacter)'); + $this->invalidParseErrorTestHandler("\xF0\xBF\xBF\xBF", 1, 'U+3FFFF (permanent noncharacter)'); + $this->invalidParseErrorTestHandler("\xF1\x8F\xBF\xBE", 1, 'U+4FFFE (permanent noncharacter)'); + $this->invalidParseErrorTestHandler("\xF1\x8F\xBF\xBF", 1, 'U+4FFFF (permanent noncharacter)'); + $this->invalidParseErrorTestHandler("\xF1\x9F\xBF\xBE", 1, 'U+5FFFE (permanent noncharacter)'); + $this->invalidParseErrorTestHandler("\xF1\x9F\xBF\xBF", 1, 'U+5FFFF (permanent noncharacter)'); + $this->invalidParseErrorTestHandler("\xF1\xAF\xBF\xBE", 1, 'U+6FFFE (permanent noncharacter)'); + $this->invalidParseErrorTestHandler("\xF1\xAF\xBF\xBF", 1, 'U+6FFFF (permanent noncharacter)'); + $this->invalidParseErrorTestHandler("\xF1\xBF\xBF\xBE", 1, 'U+7FFFE (permanent noncharacter)'); + $this->invalidParseErrorTestHandler("\xF1\xBF\xBF\xBF", 1, 'U+7FFFF (permanent noncharacter)'); + $this->invalidParseErrorTestHandler("\xF2\x8F\xBF\xBE", 1, 'U+8FFFE (permanent noncharacter)'); + $this->invalidParseErrorTestHandler("\xF2\x8F\xBF\xBF", 1, 'U+8FFFF (permanent noncharacter)'); + $this->invalidParseErrorTestHandler("\xF2\x9F\xBF\xBE", 1, 'U+9FFFE (permanent noncharacter)'); + $this->invalidParseErrorTestHandler("\xF2\x9F\xBF\xBF", 1, 'U+9FFFF (permanent noncharacter)'); + $this->invalidParseErrorTestHandler("\xF2\xAF\xBF\xBE", 1, 'U+AFFFE (permanent noncharacter)'); + $this->invalidParseErrorTestHandler("\xF2\xAF\xBF\xBF", 1, 'U+AFFFF (permanent noncharacter)'); + $this->invalidParseErrorTestHandler("\xF2\xBF\xBF\xBE", 1, 'U+BFFFE (permanent noncharacter)'); + $this->invalidParseErrorTestHandler("\xF2\xBF\xBF\xBF", 1, 'U+BFFFF (permanent noncharacter)'); + $this->invalidParseErrorTestHandler("\xF3\x8F\xBF\xBE", 1, 'U+CFFFE (permanent noncharacter)'); + $this->invalidParseErrorTestHandler("\xF3\x8F\xBF\xBF", 1, 'U+CFFFF (permanent noncharacter)'); + $this->invalidParseErrorTestHandler("\xF3\x9F\xBF\xBE", 1, 'U+DFFFE (permanent noncharacter)'); + $this->invalidParseErrorTestHandler("\xF3\x9F\xBF\xBF", 1, 'U+DFFFF (permanent noncharacter)'); + $this->invalidParseErrorTestHandler("\xF3\xAF\xBF\xBE", 1, 'U+EFFFE (permanent noncharacter)'); + $this->invalidParseErrorTestHandler("\xF3\xAF\xBF\xBF", 1, 'U+EFFFF (permanent noncharacter)'); + $this->invalidParseErrorTestHandler("\xF3\xBF\xBF\xBE", 1, 'U+FFFFE (permanent noncharacter)'); + $this->invalidParseErrorTestHandler("\xF3\xBF\xBF\xBF", 1, 'U+FFFFF (permanent noncharacter)'); + $this->invalidParseErrorTestHandler("\xF4\x8F\xBF\xBE", 1, 'U+10FFFE (permanent noncharacter)'); + $this->invalidParseErrorTestHandler("\xF4\x8F\xBF\xBF", 1, 'U+10FFFF (permanent noncharacter)'); + } +} diff --git a/test/HTML5/JSONHarness.php b/test/HTML5/JSONHarness.php new file mode 100644 index 0000000..dd1cf66 --- /dev/null +++ b/test/HTML5/JSONHarness.php @@ -0,0 +1,21 @@ +<?php + +/** + * Implementation specifically for JSON format files. + */ +SimpleTest::ignore('HTML5_JSONHarness'); +abstract class HTML5_JSONHarness extends HTML5_DataHarness +{ + protected $data; + public function __construct() { + parent::__construct(); + $this->data = json_decode(file_get_contents($this->filename)); + } + public function getDescription($test) { + return $test->description; + } + public function getDataTests() { + return isset($this->data->tests) ? $this->data->tests : array(); + // could be a weird xmlViolationsTest + } +} diff --git a/test/HTML5/ParserTest.php b/test/HTML5/ParserTest.php new file mode 100644 index 0000000..43b87e9 --- /dev/null +++ b/test/HTML5/ParserTest.php @@ -0,0 +1,15 @@ +<?php + +require_once dirname(__FILE__) . '/../autorun.php'; + +class HTML5_ParserTest extends UnitTestCase +{ + public function testParse() { + $result = HTML5_Parser::parse('<html><body></body></html>'); + $this->assertIsA($result, 'DOMDocument'); + } + public function testParseFragment() { + $result = HTML5_Parser::parseFragment('<b>asdf</b> foo'); + $this->assertIsA($result, 'DOMNodeList'); + } +} diff --git a/test/HTML5/TestData.php b/test/HTML5/TestData.php new file mode 100644 index 0000000..39e9e44 --- /dev/null +++ b/test/HTML5/TestData.php @@ -0,0 +1,167 @@ +<?php + +/** + * Interface for retreiving test files. Also represents a .dat file. + */ +class HTML5_TestData +{ + /** + * Retrieves a list of test filenames from a directory. + */ + static public function getList($type, $glob) { + $full_glob = + realpath(dirname(__FILE__) . '/../../../testdata/' . $type) . + DIRECTORY_SEPARATOR . $glob; + return glob($full_glob); + } + /** + * This function generates unique test case classes corresponding + * to test files in the testdata directory. + */ + static public function generateTestCases($base, $prefix, $type, $glob) { + foreach (HTML5_TestData::getList($type, $glob) as $filename) { + $name = str_replace('-', '', basename($filename)); + $name = ucfirst(substr($name, 0, strcspn($name, '.'))); + if ($type === 'tree-construction') { + // skip XFOREIGN tests for now + $num = (int) substr($name, 5); + if ($num >= 9) continue; + } + $pfilename = var_export($filename, true); + $code = "class $prefix$name extends $base { public \$filename = $pfilename; }"; + eval($code); + } + } + + public $tests; + + public function __construct($filename) { + $test = array(); + $newTestHeading = null; + $heading = null; + foreach (explode("\n", file_get_contents($filename)) as $line) { + if ($line !== '' && $line[0] === '#') { + $newHeading = substr($line, 1); + if (!$newTestHeading) { + $newTestHeading = $newHeading; + } elseif ($newHeading === $newTestHeading) { + $test[$heading] = substr($test[$heading], 0, -1); + $this->tests[] = $test; + $test = array(); + } + $heading = $newHeading; + $test[$heading] = ''; + } elseif ($heading) { + $test[$heading] .= "$line\n"; + } + } + if (!empty($test)) { + $test[$heading] = substr($test[$heading], 0, -1); + $this->tests[] = $test; + } + // normalize + foreach ($this->tests as &$test) { + foreach ($test as $key => $value) { + $test[$key] = rtrim($value, "\n"); + } + } + } + + /** + * Converts a DOMDocument into string form as seen in test cases. + */ + public static function strDom($node, $prefix = '| ') { + // XXX: Doesn't handle svg and math correctly + $ret = array(); + $indent = 2; + $level = -1; // since DOMDocument doesn't get rendered + $skip = false; + $next = $node; + while ($next) { + if ($next instanceof DOMNodeList) { + if (!$next->length) break; + $next = $next->item(0); + $level = 0; + } + $text = false; + $subnodes = array(); + switch ($next->nodeType) { + case XML_DOCUMENT_NODE: + case XML_HTML_DOCUMENT_NODE: + if ($next->doctype) { + $subnode = '<!DOCTYPE '; + $subnode .= $next->doctype->name; + if ($next->doctype->publicId || $next->doctype->systemId) { + $subnode .= ' "' . $next->doctype->publicId . '"'; + $subnode .= ' "' . $next->doctype->systemId . '"'; + } + $subnode .= '>'; + $subnodes[] = $subnode; + } elseif (!empty($next->emptyDoctype)) { + $subnodes = array('<!DOCTYPE >'); + } + break; + case XML_TEXT_NODE: + $text = '"' . $next->data . '"'; + break; + case XML_COMMENT_NODE: + $text = "<!-- {$next->data} -->"; + break; + case XML_ELEMENT_NODE: + $ns = ''; + switch ($next->namespaceURI) { + case HTML5_TreeBuilder::NS_MATHML: + $ns = 'math '; break; + case HTML5_TreeBuilder::NS_SVG: + $ns = 'svg '; break; + } + $text = "<{$ns}{$next->tagName}>"; + foreach ($next->attributes as $attr) { + $ans = ''; + switch ($attr->namespaceURI) { + case HTML5_TreeBuilder::NS_MATHML: + $ans = 'math '; break; + case HTML5_TreeBuilder::NS_SVG: + $ans = 'svg '; break; + case HTML5_TreeBuilder::NS_XLINK: + $ans = 'xlink '; break; + case HTML5_TreeBuilder::NS_XML: + $ans = 'xml '; break; + case HTML5_TreeBuilder::NS_XMLNS: + $ans = 'xmlns '; break; + } + // XSKETCHY: needed for our horrible xlink hack + $name = str_replace(':', ' ', $attr->localName); + $subnodes[] = "{$ans}{$name}=\"{$attr->value}\""; + } + sort($subnodes); + break; + } + if (!$skip) { + // code duplication + if ($text) { + $ret[] = $prefix . str_repeat(' ', $indent * $level) . $text; + } + foreach ($subnodes as $node) { + $ret[] = $prefix . str_repeat(' ', $indent * ($level + 1)) . $node; + } + } + if ($next->firstChild && !$skip) { + $next = $next->firstChild; + $level++; + $skip = false; + } elseif ($next->nextSibling) { + $next = $next->nextSibling; + $skip = false; + } elseif ($next->parentNode) { + $next = $next->parentNode; + $level--; + $skip = true; + if ($level < 0) break; + } else { + $next = false; + } + } + return implode("\n", $ret); + } +} diff --git a/test/HTML5/TestDataHarness.php b/test/HTML5/TestDataHarness.php new file mode 100644 index 0000000..0b90321 --- /dev/null +++ b/test/HTML5/TestDataHarness.php @@ -0,0 +1,18 @@ +<?php + +SimpleTest::ignore('HTML5_TestDataHarness'); +abstract class HTML5_TestDataHarness extends HTML5_DataHarness +{ + protected $data; + public function __construct() { + parent::__construct(); + $this->data = new HTML5_TestData($this->filename); + } + public function getDescription($test) { + return $test['data']; + } + public function getDataTests() { + return $this->data->tests; + } +} + diff --git a/test/HTML5/TestDataTest.php b/test/HTML5/TestDataTest.php new file mode 100644 index 0000000..de97040 --- /dev/null +++ b/test/HTML5/TestDataTest.php @@ -0,0 +1,31 @@ +<?php + +require_once dirname(__FILE__) . '/../autorun.php'; + +class HTML5_TestDataTest extends UnitTestCase +{ + function testSample() { + $data = new HTML5_TestData(dirname(__FILE__) . '/TestDataTest/sample.dat'); + $this->assertIdentical($data->tests, array( + array('data' => "Foo", 'des' => "Bar"), + array('data' => "Foo") + )); + } + function testStrDom() { + $dom = new DOMDocument(); + $dom->loadHTML('<!DOCTYPE html PUBLIC "http://foo" "http://bar"><html><body foo="bar" baz="1">foo<b>bar</b>asdf</body></html>'); + $this->assertIdentical(HTML5_TestData::strDom($dom), <<<RESULT +| <!DOCTYPE html "http://foo" "http://bar"> +| <html> +| <body> +| baz="1" +| foo="bar" +| "foo" +| <b> +| "bar" +| "asdf" +RESULT +); + } +} + diff --git a/test/HTML5/TestDataTest/sample.dat b/test/HTML5/TestDataTest/sample.dat new file mode 100644 index 0000000..4351e8d --- /dev/null +++ b/test/HTML5/TestDataTest/sample.dat @@ -0,0 +1,7 @@ +#data +Foo +#des +Bar + +#data +Foo diff --git a/test/HTML5/TestableTokenizer.php b/test/HTML5/TestableTokenizer.php new file mode 100644 index 0000000..4f064c3 --- /dev/null +++ b/test/HTML5/TestableTokenizer.php @@ -0,0 +1,76 @@ +<?php + +class HTML5_TestableTokenizer extends HTML5_Tokenizer +{ + public $outputTokens = array(); + private $_contentModelFlag; + private $_lastStartFlag; + + // this interface does not match HTML5_Tokenizer's. It might make + // more sense though + public function __construct($data, $contentModelFlag, $lastStartFlag = null) { + parent::__construct($data); + $this->_contentModelFlag = $contentModelFlag; + $this->_lastStartFlag = $lastStartFlag; + } + public function parse() { + $this->content_model = $this->_contentModelFlag; + if ($this->_lastStartFlag) { + $this->token = array( + 'type' => self::STARTTAG, + 'name' => $this->_lastStartFlag, + ); + } + return parent::parse(); + } + // --end mismatched interface + + protected function emitToken($token, $checkStream = true, $dry = false) { + parent::emitToken($token, $checkStream, true); + + // tree handling code omitted + switch ($token['type']) { + case self::DOCTYPE: + if (!isset($token['name'])) $token['name'] = null; + if (!isset($token['public'])) $token['public'] = null; + if (!isset($token['system'])) $token['system'] = null; + $this->outputTokens[] = array('DOCTYPE', $token['name'], $token['public'], $token['system'], empty($token['force-quirks'])); + break; + case self::STARTTAG: + $attr = new stdclass(); + foreach ($token['attr'] as $keypair) { + // XXX this is IMPORTANT behavior, check if it's + // in TreeBuilder + $name = $keypair['name']; + if (isset($attr->$name)) continue; + $attr->$name = $keypair['value']; + } + $start = array('StartTag', $token['name'], $attr); + if (isset($token['self-closing'])) $start[] = true; + $this->outputTokens[] = $start; + break; + case self::ENDTAG: + $this->outputTokens[] = array('EndTag', $token['name']); + break; + case self::COMMENT: + $this->outputTokens[] = array('Comment', $token['data']); + break; + case self::CHARACTER: + case self::SPACECHARACTER: + if (count($this->outputTokens)) { + $old = array_pop($this->outputTokens); + if ($old[0] === 'Character') { + $old[1] .= $token['data']; + $this->outputTokens[] = $old; + break; + } + $this->outputTokens[] = $old; + } + $this->outputTokens[] = array('Character', $token['data']); + break; + case self::PARSEERROR: + $this->outputTokens[] = 'ParseError'; + break; + } + } +} diff --git a/test/HTML5/TokenizerPositionTest.php b/test/HTML5/TokenizerPositionTest.php new file mode 100644 index 0000000..534456a --- /dev/null +++ b/test/HTML5/TokenizerPositionTest.php @@ -0,0 +1,164 @@ +<?php + +require_once dirname(__FILE__) . '/../autorun.php'; + +class HTML5_PositionTestableTokenizer extends HTML5_TestableTokenizer +{ + public $outputLines = array(); + public $outputCols = array(); + private $characterTokens = array(); + protected function emitToken($token, $checkStream = true, $dry = false) { + parent::emitToken($token, $checkStream, $dry); + // XXX: The tests should really include the parse errors, but I'm lazy. + switch ($token['type']) { + case self::PARSEERROR: + return; + + case self::CHARACTER: + if ($this->characterTokens) { + array_pop($this->outputLines); + array_pop($this->outputCols); + } + $this->characterTokens[] = $token; + + default: + $this->outputLines[] = $this->stream()->getCurrentLine(); + $this->outputCols[] = $this->stream()->getColumnOffset(); + } + if ($token['type'] !== self::CHARACTER) { + $this->characterTokens = array(); + } + } +} + +class HTML5_TokenizerTestOfPosition extends UnitTestCase +{ + function testBasic() { + $this->assertPositions( + "<b><i>f<p>\n<b>a</b>", + array(1,1,1,1, 2,2,2,2), + array(3,6,7,10,0,3,4,8) + ); + } + + function testUnicode() { + $this->assertPositions( + "\xC2\xA2<b>\xE2\x82\xACa<b>\xf4\x8a\xaf\x8d", + array(1,1,1,1,1), + array(1,4,6,9,10) + ); + } + + function testData() { + $this->assertPositions( + "a\na\n\xC2\xA2<b>", + array(3,3), + array(1,4) + ); + } + + function testMarkupDeclarationDoubleDash() { + $this->assertPositions( + '<!-- foo -->', + array(1), + array(12) + ); + } + + function testMarkupDeclarationDoctype() { + $this->assertPositions( + '<!DOCTYPE>', + array(1), + array(10) + ); + } + + function testAfterDoctypeNamePublic() { + $this->assertPositions( + '<!DOCTYPE PUBLIC "foo">', + array(1), + array(23) + ); + } + + function testAfterDoctypeNameSystem() { + $this->assertPositions( + '<!DOCTYPE SYSTEM "foo">', + array(1), + array(23) + ); + } + + function testDecEntitySansSemicolon() { + $this->assertPositions( + 'Ĭ', + array(1), + array(5) + ); + } + + function testDecEntityWithSemicolon() { + $this->assertPositions( + 'Ĭ', + array(1), + array(6) + ); + } + + function testHexEntity() { + $this->assertPositions( + '̀', + array(1), + array(7) + ); + } + + function testEmptyEntity() { + $this->assertPositions( + '&#;<b>', + array(1,1), + array(3,6) + ); + } + + function testNamedEntity() { + $this->assertPositions( + '"foo<b>', + array(1,1), + array(9,12) + ); + } + + function testBadNamedEntity() { + $this->assertPositions( + '&zzz;b', + array(1), + array(6) + ); + } + + function testAttributeEntity() { + $this->assertPositions( + '<b foo="&er">a', + array( 1, 1), + array(16,17) + ); + } + + function testBogusComment() { + $this->assertPositions( + "<!as asdfe \nasdf>d", + array(2,2), + array(5,6) + ); + } + + protected function assertPositions($input, $lines, $cols, $flag = HTML5_Tokenizer::PCDATA, $lastStartTag = null) { + $tokenizer = new HTML5_PositionTestableTokenizer($input, $flag, $lastStartTag); + $GLOBALS['TIME'] -= get_microtime(); + $tokenizer->parse($input); + $GLOBALS['TIME'] += get_microtime(); + $this->assertIdentical($tokenizer->outputLines, $lines, 'Lines: %s'); + $this->assertIdentical($tokenizer->outputCols, $cols, 'Cols: %s'); + } +} diff --git a/test/HTML5/TokenizerTest.php b/test/HTML5/TokenizerTest.php new file mode 100644 index 0000000..d00fa78 --- /dev/null +++ b/test/HTML5/TokenizerTest.php @@ -0,0 +1,88 @@ +<?php + +require_once dirname(__FILE__) . '/../autorun.php'; + +SimpleTest::ignore('HTML5_TokenizerHarness'); +abstract class HTML5_TokenizerHarness extends HTML5_JSONHarness +{ + public function invoke($test) { + //echo get_class($this) . ': ' . $test->description ."\n"; + if (!isset($test->contentModelFlags)) { + $test->contentModelFlags = array('PCDATA'); + } + if (!isset($test->ignoreErrorOrder)) { + $test->ignoreErrorOrder = false; + } + + // Get expected result array (and maybe error count). + $expect = array(); + $expectedErrorCount = 0; // This is only used when ignoreErrorOrder = true. + foreach ($test->output as $tok) { + // If we're ignoring error order and this is a parse error, just count. + if ($test->ignoreErrorOrder && $tok === 'ParseError') { + $expectedErrorCount++; + } else { + // Normalize character tokens from the test + if ($expect && $tok[0] === 'Character' && $expect[count($expect) - 1][0] === 'Character') { + $expect[count($expect) - 1][1] .= $tok[1]; + } else { + $expect[] = $tok; + } + } + } + + // Run test for each content model flag. + foreach ($test->contentModelFlags as $flag) { + $output = $this->tokenize($test, $flag); + $result = array(); + $resultErrorCount = 0; // This is only used when ignoreErrorOrder = true. + foreach ($output as $tok) { + // If we're ignoring error order and this is a parse error, just count. + if ($test->ignoreErrorOrder && $tok === 'ParseError') { + $resultErrorCount++; + } else { + $result[] = $tok; + } + } + $this->assertIdentical($expect, $result, + 'In test "'.str_replace('%', '%%', $test->description). + '" with content model '.$flag.': %s' + ); + if ($test->ignoreErrorOrder) { + $this->assertIdentical($expectedErrorCount, $resultErrorCount, + 'Wrong error count in test "'.str_replace('%', '%%', $test->description). + '" with content model '.$flag.': %s' + ); + } + if ($expect != $result || ($test->ignoreErrorOrder && $expectedErrorCount !== $resultErrorCount)) { + echo "Input: "; str_dump($test->input); + echo "\nExpected: \n"; echo $this->tokenDump($expect); + echo "\nActual: \n"; echo $this->tokenDump($result); + echo "\n"; + } + } + } + private function tokenDump($tokens) { + $ret = ''; + foreach ($tokens as $i => $token) { + $ret .= ($i+1).". {$token[0]}: {$token[1]}\n"; + } + return $ret; + } + public function tokenize($test, $flag) { + $flag = constant("HTML5_Tokenizer::$flag"); + if (!isset($test->lastStartTag)) $test->lastStartTag = null; + $tokenizer = new HTML5_TestableTokenizer($test->input, $flag, $test->lastStartTag); + $GLOBALS['TIME'] -= get_microtime(); + $tokenizer->parse(); + $GLOBALS['TIME'] += get_microtime(); + return $tokenizer->outputTokens; + } +} + +// generate test suites for tokenizer +HTML5_TestData::generateTestCases( + 'HTML5_TokenizerHarness', + 'HTML5_TokenizerTestOf', + 'tokenizer', '*.test' +); diff --git a/test/HTML5/TreeBuilderTest.php b/test/HTML5/TreeBuilderTest.php new file mode 100644 index 0000000..708d6a0 --- /dev/null +++ b/test/HTML5/TreeBuilderTest.php @@ -0,0 +1,39 @@ +<?php + +require_once dirname(__FILE__) . '/../autorun.php'; + +SimpleTest::ignore('HTML5_TreeBuilderHarness'); +class HTML5_TreeBuilderHarness extends HTML5_TestDataHarness +{ + public function assertIdentical($expect, $actual, $test = array()) { + $input = $test['data']; + if (isset($test['document-fragment'])) { + $input .= "\nFragment: " . $test['document-fragment']; + } + parent::assertIdentical($expect, $actual, "Identical expectation failed\nInput:\n$input\n\nExpected:\n$expect\n\nActual:\n$actual\n"); + } + public function invoke($test) { + // this is totally the wrong interface to use, but + // for now we need testing + $tokenizer = new HTML5_Tokenizer($test['data']); + $GLOBALS['TIME'] -= get_microtime(); + if (isset($test['document-fragment'])) { + $tokenizer->parseFragment($test['document-fragment']); + } else { + $tokenizer->parse(); + } + $GLOBALS['TIME'] += get_microtime(); + $this->assertIdentical( + $test['document'], + HTML5_TestData::strDom($tokenizer->save()), + $test + ); + } +} + +HTML5_TestData::generateTestCases( + 'HTML5_TreeBuilderHarness', + 'HTML5_TreeBuilderTestOf', + 'tree-construction', '*.dat' +); + |