summaryrefslogtreecommitdiff
path: root/test/HTML5
diff options
context:
space:
mode:
Diffstat (limited to 'test/HTML5')
-rw-r--r--test/HTML5/DataHarness.php48
-rw-r--r--test/HTML5/InputStreamTest.php174
-rw-r--r--test/HTML5/JSONHarness.php21
-rw-r--r--test/HTML5/ParserTest.php15
-rw-r--r--test/HTML5/TestData.php167
-rw-r--r--test/HTML5/TestDataHarness.php18
-rw-r--r--test/HTML5/TestDataTest.php31
-rw-r--r--test/HTML5/TestDataTest/sample.dat7
-rw-r--r--test/HTML5/TestableTokenizer.php76
-rw-r--r--test/HTML5/TokenizerPositionTest.php164
-rw-r--r--test/HTML5/TokenizerTest.php88
-rw-r--r--test/HTML5/TreeBuilderTest.php39
12 files changed, 848 insertions, 0 deletions
diff --git a/test/HTML5/DataHarness.php b/test/HTML5/DataHarness.php
new file mode 100644
index 0000000..844b1fc
--- /dev/null
+++ b/test/HTML5/DataHarness.php
@@ -0,0 +1,48 @@
+<?php
+
+/**
+ * Modified test-case supertype for running tests that are not
+ * test method based, but based off of test data that resides in
+ * files.
+ */
+SimpleTest::ignore('HTML5_DataHarness');
+abstract class HTML5_DataHarness extends UnitTestCase
+{
+ /**
+ * Filled in by HTML5_TestData::generateTestCases()
+ */
+ protected $filename;
+ private $tests;
+ /**
+ * Invoked by the runner, it is the function responsible for executing
+ * the test and delivering results.
+ * @param $test Some easily usable representation of the test
+ */
+ abstract public function invoke($test);
+ /**
+ * Returns a list of tests that can be executed. The list members will
+ * be passed to invoke(). Return an iterator if you don't want to load
+ * all test into memory
+ */
+ abstract public function getDataTests();
+ /**
+ * Returns a description of the test
+ */
+ abstract public function getDescription($test);
+ public function getTests() {
+ $this->tests = $this->getDataTests();
+ // 1-indexed, to be consistent with Python
+ $ret = array();
+ for ($i = 1; $i <= count($this->tests); $i++) {
+ $ret[] = "test_$i";
+ }
+ return $ret;
+ }
+ /**
+ * Emulates our test functions
+ */
+ public function __call($name, $args) {
+ list($test, $i) = explode("_", $name);
+ $this->invoke($this->tests[$i-1]);
+ }
+}
diff --git a/test/HTML5/InputStreamTest.php b/test/HTML5/InputStreamTest.php
new file mode 100644
index 0000000..b60787a
--- /dev/null
+++ b/test/HTML5/InputStreamTest.php
@@ -0,0 +1,174 @@
+<?php
+
+require_once dirname(__FILE__) . '/../autorun.php';
+
+class HTML5_InputStreamTest extends UnitTestCase
+{
+ public function invalidReplaceTestHandler($input, $name) {
+ $stream = new HTML5_InputStream($input);
+ $this->assertIdentical("\xEF\xBF\xBD", $stream->remainingChars(), $name);
+ }
+
+ public function testInvalidReplace() {
+ // Above U+10FFFF
+ $this->invalidReplaceTestHandler("\xF5\x90\x80\x80", 'U+110000');
+
+ // Incomplete
+ $this->invalidReplaceTestHandler("\xDF", 'Incomplete two byte sequence (missing final byte)');
+ $this->invalidReplaceTestHandler("\xEF\xBF", 'Incomplete three byte sequence (missing final byte)');
+ $this->invalidReplaceTestHandler("\xF4\xBF\xBF", 'Incomplete four byte sequence (missing final byte)');
+
+ // Min/max continuation bytes
+ $this->invalidReplaceTestHandler("\x80", 'Lone 80 continuation byte');
+ $this->invalidReplaceTestHandler("\xBF", 'Lone BF continuation byte');
+
+ // Invalid bytes (these can never occur)
+ $this->invalidReplaceTestHandler("\xFE", 'Invalid FE byte');
+ $this->invalidReplaceTestHandler("\xFF", 'Invalid FF byte');
+
+ // Min/max overlong
+ $this->invalidReplaceTestHandler("\xC0\x80", 'Overlong representation of U+0000');
+ $this->invalidReplaceTestHandler("\xE0\x80\x80", 'Overlong representation of U+0000');
+ $this->invalidReplaceTestHandler("\xF0\x80\x80\x80", 'Overlong representation of U+0000');
+ $this->invalidReplaceTestHandler("\xF8\x80\x80\x80\x80", 'Overlong representation of U+0000');
+ $this->invalidReplaceTestHandler("\xFC\x80\x80\x80\x80\x80", 'Overlong representation of U+0000');
+ $this->invalidReplaceTestHandler("\xC1\xBF", 'Overlong representation of U+007F');
+ $this->invalidReplaceTestHandler("\xE0\x9F\xBF", 'Overlong representation of U+07FF');
+ $this->invalidReplaceTestHandler("\xF0\x8F\xBF\xBF", 'Overlong representation of U+FFFF');
+ }
+
+ public function testStripLeadingBOM() {
+ $leading = new HTML5_InputStream("\xEF\xBB\xBFa");
+ $this->assertIdentical('a', $leading->char(), 'BOM should be stripped');
+ }
+
+ public function testZWNBSP() {
+ $stream = new HTML5_InputStream("a\xEF\xBB\xBF");
+ $this->assertIdentical("a\xEF\xBB\xBF", $stream->remainingChars(), 'A non-leading U+FEFF (BOM/ZWNBSP) should remain');
+ }
+
+ public function testNull() {
+ $stream = new HTML5_InputStream("\0\0\0");
+ $this->assertIdentical("\xEF\xBF\xBD\xEF\xBF\xBD\xEF\xBF\xBD", $stream->remainingChars(), 'Null character should be replaced by U+FFFD');
+ $this->assertIdentical(3, count($stream->errors), 'Null character should be throw parse error');
+ }
+
+ public function testCRLF() {
+ $stream = new HTML5_InputStream("\r\n");
+ $this->assertIdentical("\n", $stream->remainingChars(), 'CRLF should be replaced by LF');
+ }
+
+ public function testCR() {
+ $stream = new HTML5_InputStream("\r");
+ $this->assertIdentical("\n", $stream->remainingChars(), 'CR should be replaced by LF');
+ }
+
+ public function invalidParseErrorTestHandler($input, $numErrors, $name) {
+ $stream = new HTML5_InputStream($input);
+ $this->assertIdentical($input, $stream->remainingChars(), $name . ' (stream content)');
+ $this->assertIdentical($numErrors, count($stream->errors), $name . ' (number of errors)');
+ }
+
+ public function testInvalidParseError() {
+ // C0 controls (except U+0000 and U+000D due to different handling)
+ $this->invalidParseErrorTestHandler("\x01", 1, 'U+0001 (C0 control)');
+ $this->invalidParseErrorTestHandler("\x02", 1, 'U+0002 (C0 control)');
+ $this->invalidParseErrorTestHandler("\x03", 1, 'U+0003 (C0 control)');
+ $this->invalidParseErrorTestHandler("\x04", 1, 'U+0004 (C0 control)');
+ $this->invalidParseErrorTestHandler("\x05", 1, 'U+0005 (C0 control)');
+ $this->invalidParseErrorTestHandler("\x06", 1, 'U+0006 (C0 control)');
+ $this->invalidParseErrorTestHandler("\x07", 1, 'U+0007 (C0 control)');
+ $this->invalidParseErrorTestHandler("\x08", 1, 'U+0008 (C0 control)');
+ $this->invalidParseErrorTestHandler("\x09", 0, 'U+0009 (C0 control)');
+ $this->invalidParseErrorTestHandler("\x0A", 0, 'U+000A (C0 control)');
+ $this->invalidParseErrorTestHandler("\x0B", 1, 'U+000B (C0 control)');
+ $this->invalidParseErrorTestHandler("\x0C", 0, 'U+000C (C0 control)');
+ $this->invalidParseErrorTestHandler("\x0E", 1, 'U+000E (C0 control)');
+ $this->invalidParseErrorTestHandler("\x0F", 1, 'U+000F (C0 control)');
+ $this->invalidParseErrorTestHandler("\x10", 1, 'U+0010 (C0 control)');
+ $this->invalidParseErrorTestHandler("\x11", 1, 'U+0011 (C0 control)');
+ $this->invalidParseErrorTestHandler("\x12", 1, 'U+0012 (C0 control)');
+ $this->invalidParseErrorTestHandler("\x13", 1, 'U+0013 (C0 control)');
+ $this->invalidParseErrorTestHandler("\x14", 1, 'U+0014 (C0 control)');
+ $this->invalidParseErrorTestHandler("\x15", 1, 'U+0015 (C0 control)');
+ $this->invalidParseErrorTestHandler("\x16", 1, 'U+0016 (C0 control)');
+ $this->invalidParseErrorTestHandler("\x17", 1, 'U+0017 (C0 control)');
+ $this->invalidParseErrorTestHandler("\x18", 1, 'U+0018 (C0 control)');
+ $this->invalidParseErrorTestHandler("\x19", 1, 'U+0019 (C0 control)');
+ $this->invalidParseErrorTestHandler("\x1A", 1, 'U+001A (C0 control)');
+ $this->invalidParseErrorTestHandler("\x1B", 1, 'U+001B (C0 control)');
+ $this->invalidParseErrorTestHandler("\x1C", 1, 'U+001C (C0 control)');
+ $this->invalidParseErrorTestHandler("\x1D", 1, 'U+001D (C0 control)');
+ $this->invalidParseErrorTestHandler("\x1E", 1, 'U+001E (C0 control)');
+ $this->invalidParseErrorTestHandler("\x1F", 1, 'U+001F (C0 control)');
+
+ // DEL (U+007F)
+ $this->invalidParseErrorTestHandler("\x7F", 1, 'U+007F');
+
+ // C1 Controls
+ $this->invalidParseErrorTestHandler("\xC2\x80", 1, 'U+0080 (C1 control)');
+ $this->invalidParseErrorTestHandler("\xC2\x9F", 1, 'U+009F (C1 control)');
+ $this->invalidParseErrorTestHandler("\xC2\xA0", 0, 'U+00A0 (first codepoint above highest C1 control)');
+
+ // Single UTF-16 surrogates
+ $this->invalidParseErrorTestHandler("\xED\xA0\x80", 1, 'U+D800 (UTF-16 surrogate character)');
+ $this->invalidParseErrorTestHandler("\xED\xAD\xBF", 1, 'U+DB7F (UTF-16 surrogate character)');
+ $this->invalidParseErrorTestHandler("\xED\xAE\x80", 1, 'U+DB80 (UTF-16 surrogate character)');
+ $this->invalidParseErrorTestHandler("\xED\xAF\xBF", 1, 'U+DBFF (UTF-16 surrogate character)');
+ $this->invalidParseErrorTestHandler("\xED\xB0\x80", 1, 'U+DC00 (UTF-16 surrogate character)');
+ $this->invalidParseErrorTestHandler("\xED\xBE\x80", 1, 'U+DF80 (UTF-16 surrogate character)');
+ $this->invalidParseErrorTestHandler("\xED\xBF\xBF", 1, 'U+DFFF (UTF-16 surrogate character)');
+
+ // Paired UTF-16 surrogates
+ $this->invalidParseErrorTestHandler("\xED\xA0\x80\xED\xB0\x80", 2, 'U+D800 U+DC00 (paired UTF-16 surrogates)');
+ $this->invalidParseErrorTestHandler("\xED\xA0\x80\xED\xBF\xBF", 2, 'U+D800 U+DFFF (paired UTF-16 surrogates)');
+ $this->invalidParseErrorTestHandler("\xED\xAD\xBF\xED\xB0\x80", 2, 'U+DB7F U+DC00 (paired UTF-16 surrogates)');
+ $this->invalidParseErrorTestHandler("\xED\xAD\xBF\xED\xBF\xBF", 2, 'U+DB7F U+DFFF (paired UTF-16 surrogates)');
+ $this->invalidParseErrorTestHandler("\xED\xAE\x80\xED\xB0\x80", 2, 'U+DB80 U+DC00 (paired UTF-16 surrogates)');
+ $this->invalidParseErrorTestHandler("\xED\xAE\x80\xED\xBF\xBF", 2, 'U+DB80 U+DFFF (paired UTF-16 surrogates)');
+ $this->invalidParseErrorTestHandler("\xED\xAF\xBF\xED\xB0\x80", 2, 'U+DBFF U+DC00 (paired UTF-16 surrogates)');
+ $this->invalidParseErrorTestHandler("\xED\xAF\xBF\xED\xBF\xBF", 2, 'U+DBFF U+DFFF (paired UTF-16 surrogates)');
+
+ // Charcters surrounding surrogates
+ $this->invalidParseErrorTestHandler("\xED\x9F\xBF", 0, 'U+D7FF (one codepoint below lowest surrogate codepoint)');
+ $this->invalidParseErrorTestHandler("\xEF\xBF\xBD", 0, 'U+DE00 (one codepoint above highest surrogate codepoint)');
+
+ // Permanent noncharacters
+ $this->invalidParseErrorTestHandler("\xEF\xB7\x90", 1, 'U+FDD0 (permanent noncharacter)');
+ $this->invalidParseErrorTestHandler("\xEF\xB7\xAF", 1, 'U+FDEF (permanent noncharacter)');
+ $this->invalidParseErrorTestHandler("\xEF\xBF\xBE", 1, 'U+FFFE (permanent noncharacter)');
+ $this->invalidParseErrorTestHandler("\xEF\xBF\xBF", 1, 'U+FFFF (permanent noncharacter)');
+ $this->invalidParseErrorTestHandler("\xF0\x9F\xBF\xBE", 1, 'U+1FFFE (permanent noncharacter)');
+ $this->invalidParseErrorTestHandler("\xF0\x9F\xBF\xBF", 1, 'U+1FFFF (permanent noncharacter)');
+ $this->invalidParseErrorTestHandler("\xF0\xAF\xBF\xBE", 1, 'U+2FFFE (permanent noncharacter)');
+ $this->invalidParseErrorTestHandler("\xF0\xAF\xBF\xBF", 1, 'U+2FFFF (permanent noncharacter)');
+ $this->invalidParseErrorTestHandler("\xF0\xBF\xBF\xBE", 1, 'U+3FFFE (permanent noncharacter)');
+ $this->invalidParseErrorTestHandler("\xF0\xBF\xBF\xBF", 1, 'U+3FFFF (permanent noncharacter)');
+ $this->invalidParseErrorTestHandler("\xF1\x8F\xBF\xBE", 1, 'U+4FFFE (permanent noncharacter)');
+ $this->invalidParseErrorTestHandler("\xF1\x8F\xBF\xBF", 1, 'U+4FFFF (permanent noncharacter)');
+ $this->invalidParseErrorTestHandler("\xF1\x9F\xBF\xBE", 1, 'U+5FFFE (permanent noncharacter)');
+ $this->invalidParseErrorTestHandler("\xF1\x9F\xBF\xBF", 1, 'U+5FFFF (permanent noncharacter)');
+ $this->invalidParseErrorTestHandler("\xF1\xAF\xBF\xBE", 1, 'U+6FFFE (permanent noncharacter)');
+ $this->invalidParseErrorTestHandler("\xF1\xAF\xBF\xBF", 1, 'U+6FFFF (permanent noncharacter)');
+ $this->invalidParseErrorTestHandler("\xF1\xBF\xBF\xBE", 1, 'U+7FFFE (permanent noncharacter)');
+ $this->invalidParseErrorTestHandler("\xF1\xBF\xBF\xBF", 1, 'U+7FFFF (permanent noncharacter)');
+ $this->invalidParseErrorTestHandler("\xF2\x8F\xBF\xBE", 1, 'U+8FFFE (permanent noncharacter)');
+ $this->invalidParseErrorTestHandler("\xF2\x8F\xBF\xBF", 1, 'U+8FFFF (permanent noncharacter)');
+ $this->invalidParseErrorTestHandler("\xF2\x9F\xBF\xBE", 1, 'U+9FFFE (permanent noncharacter)');
+ $this->invalidParseErrorTestHandler("\xF2\x9F\xBF\xBF", 1, 'U+9FFFF (permanent noncharacter)');
+ $this->invalidParseErrorTestHandler("\xF2\xAF\xBF\xBE", 1, 'U+AFFFE (permanent noncharacter)');
+ $this->invalidParseErrorTestHandler("\xF2\xAF\xBF\xBF", 1, 'U+AFFFF (permanent noncharacter)');
+ $this->invalidParseErrorTestHandler("\xF2\xBF\xBF\xBE", 1, 'U+BFFFE (permanent noncharacter)');
+ $this->invalidParseErrorTestHandler("\xF2\xBF\xBF\xBF", 1, 'U+BFFFF (permanent noncharacter)');
+ $this->invalidParseErrorTestHandler("\xF3\x8F\xBF\xBE", 1, 'U+CFFFE (permanent noncharacter)');
+ $this->invalidParseErrorTestHandler("\xF3\x8F\xBF\xBF", 1, 'U+CFFFF (permanent noncharacter)');
+ $this->invalidParseErrorTestHandler("\xF3\x9F\xBF\xBE", 1, 'U+DFFFE (permanent noncharacter)');
+ $this->invalidParseErrorTestHandler("\xF3\x9F\xBF\xBF", 1, 'U+DFFFF (permanent noncharacter)');
+ $this->invalidParseErrorTestHandler("\xF3\xAF\xBF\xBE", 1, 'U+EFFFE (permanent noncharacter)');
+ $this->invalidParseErrorTestHandler("\xF3\xAF\xBF\xBF", 1, 'U+EFFFF (permanent noncharacter)');
+ $this->invalidParseErrorTestHandler("\xF3\xBF\xBF\xBE", 1, 'U+FFFFE (permanent noncharacter)');
+ $this->invalidParseErrorTestHandler("\xF3\xBF\xBF\xBF", 1, 'U+FFFFF (permanent noncharacter)');
+ $this->invalidParseErrorTestHandler("\xF4\x8F\xBF\xBE", 1, 'U+10FFFE (permanent noncharacter)');
+ $this->invalidParseErrorTestHandler("\xF4\x8F\xBF\xBF", 1, 'U+10FFFF (permanent noncharacter)');
+ }
+}
diff --git a/test/HTML5/JSONHarness.php b/test/HTML5/JSONHarness.php
new file mode 100644
index 0000000..dd1cf66
--- /dev/null
+++ b/test/HTML5/JSONHarness.php
@@ -0,0 +1,21 @@
+<?php
+
+/**
+ * Implementation specifically for JSON format files.
+ */
+SimpleTest::ignore('HTML5_JSONHarness');
+abstract class HTML5_JSONHarness extends HTML5_DataHarness
+{
+ protected $data;
+ public function __construct() {
+ parent::__construct();
+ $this->data = json_decode(file_get_contents($this->filename));
+ }
+ public function getDescription($test) {
+ return $test->description;
+ }
+ public function getDataTests() {
+ return isset($this->data->tests) ? $this->data->tests : array();
+ // could be a weird xmlViolationsTest
+ }
+}
diff --git a/test/HTML5/ParserTest.php b/test/HTML5/ParserTest.php
new file mode 100644
index 0000000..43b87e9
--- /dev/null
+++ b/test/HTML5/ParserTest.php
@@ -0,0 +1,15 @@
+<?php
+
+require_once dirname(__FILE__) . '/../autorun.php';
+
+class HTML5_ParserTest extends UnitTestCase
+{
+ public function testParse() {
+ $result = HTML5_Parser::parse('<html><body></body></html>');
+ $this->assertIsA($result, 'DOMDocument');
+ }
+ public function testParseFragment() {
+ $result = HTML5_Parser::parseFragment('<b>asdf</b> foo');
+ $this->assertIsA($result, 'DOMNodeList');
+ }
+}
diff --git a/test/HTML5/TestData.php b/test/HTML5/TestData.php
new file mode 100644
index 0000000..39e9e44
--- /dev/null
+++ b/test/HTML5/TestData.php
@@ -0,0 +1,167 @@
+<?php
+
+/**
+ * Interface for retreiving test files. Also represents a .dat file.
+ */
+class HTML5_TestData
+{
+ /**
+ * Retrieves a list of test filenames from a directory.
+ */
+ static public function getList($type, $glob) {
+ $full_glob =
+ realpath(dirname(__FILE__) . '/../../../testdata/' . $type) .
+ DIRECTORY_SEPARATOR . $glob;
+ return glob($full_glob);
+ }
+ /**
+ * This function generates unique test case classes corresponding
+ * to test files in the testdata directory.
+ */
+ static public function generateTestCases($base, $prefix, $type, $glob) {
+ foreach (HTML5_TestData::getList($type, $glob) as $filename) {
+ $name = str_replace('-', '', basename($filename));
+ $name = ucfirst(substr($name, 0, strcspn($name, '.')));
+ if ($type === 'tree-construction') {
+ // skip XFOREIGN tests for now
+ $num = (int) substr($name, 5);
+ if ($num >= 9) continue;
+ }
+ $pfilename = var_export($filename, true);
+ $code = "class $prefix$name extends $base { public \$filename = $pfilename; }";
+ eval($code);
+ }
+ }
+
+ public $tests;
+
+ public function __construct($filename) {
+ $test = array();
+ $newTestHeading = null;
+ $heading = null;
+ foreach (explode("\n", file_get_contents($filename)) as $line) {
+ if ($line !== '' && $line[0] === '#') {
+ $newHeading = substr($line, 1);
+ if (!$newTestHeading) {
+ $newTestHeading = $newHeading;
+ } elseif ($newHeading === $newTestHeading) {
+ $test[$heading] = substr($test[$heading], 0, -1);
+ $this->tests[] = $test;
+ $test = array();
+ }
+ $heading = $newHeading;
+ $test[$heading] = '';
+ } elseif ($heading) {
+ $test[$heading] .= "$line\n";
+ }
+ }
+ if (!empty($test)) {
+ $test[$heading] = substr($test[$heading], 0, -1);
+ $this->tests[] = $test;
+ }
+ // normalize
+ foreach ($this->tests as &$test) {
+ foreach ($test as $key => $value) {
+ $test[$key] = rtrim($value, "\n");
+ }
+ }
+ }
+
+ /**
+ * Converts a DOMDocument into string form as seen in test cases.
+ */
+ public static function strDom($node, $prefix = '| ') {
+ // XXX: Doesn't handle svg and math correctly
+ $ret = array();
+ $indent = 2;
+ $level = -1; // since DOMDocument doesn't get rendered
+ $skip = false;
+ $next = $node;
+ while ($next) {
+ if ($next instanceof DOMNodeList) {
+ if (!$next->length) break;
+ $next = $next->item(0);
+ $level = 0;
+ }
+ $text = false;
+ $subnodes = array();
+ switch ($next->nodeType) {
+ case XML_DOCUMENT_NODE:
+ case XML_HTML_DOCUMENT_NODE:
+ if ($next->doctype) {
+ $subnode = '<!DOCTYPE ';
+ $subnode .= $next->doctype->name;
+ if ($next->doctype->publicId || $next->doctype->systemId) {
+ $subnode .= ' "' . $next->doctype->publicId . '"';
+ $subnode .= ' "' . $next->doctype->systemId . '"';
+ }
+ $subnode .= '>';
+ $subnodes[] = $subnode;
+ } elseif (!empty($next->emptyDoctype)) {
+ $subnodes = array('<!DOCTYPE >');
+ }
+ break;
+ case XML_TEXT_NODE:
+ $text = '"' . $next->data . '"';
+ break;
+ case XML_COMMENT_NODE:
+ $text = "<!-- {$next->data} -->";
+ break;
+ case XML_ELEMENT_NODE:
+ $ns = '';
+ switch ($next->namespaceURI) {
+ case HTML5_TreeBuilder::NS_MATHML:
+ $ns = 'math '; break;
+ case HTML5_TreeBuilder::NS_SVG:
+ $ns = 'svg '; break;
+ }
+ $text = "<{$ns}{$next->tagName}>";
+ foreach ($next->attributes as $attr) {
+ $ans = '';
+ switch ($attr->namespaceURI) {
+ case HTML5_TreeBuilder::NS_MATHML:
+ $ans = 'math '; break;
+ case HTML5_TreeBuilder::NS_SVG:
+ $ans = 'svg '; break;
+ case HTML5_TreeBuilder::NS_XLINK:
+ $ans = 'xlink '; break;
+ case HTML5_TreeBuilder::NS_XML:
+ $ans = 'xml '; break;
+ case HTML5_TreeBuilder::NS_XMLNS:
+ $ans = 'xmlns '; break;
+ }
+ // XSKETCHY: needed for our horrible xlink hack
+ $name = str_replace(':', ' ', $attr->localName);
+ $subnodes[] = "{$ans}{$name}=\"{$attr->value}\"";
+ }
+ sort($subnodes);
+ break;
+ }
+ if (!$skip) {
+ // code duplication
+ if ($text) {
+ $ret[] = $prefix . str_repeat(' ', $indent * $level) . $text;
+ }
+ foreach ($subnodes as $node) {
+ $ret[] = $prefix . str_repeat(' ', $indent * ($level + 1)) . $node;
+ }
+ }
+ if ($next->firstChild && !$skip) {
+ $next = $next->firstChild;
+ $level++;
+ $skip = false;
+ } elseif ($next->nextSibling) {
+ $next = $next->nextSibling;
+ $skip = false;
+ } elseif ($next->parentNode) {
+ $next = $next->parentNode;
+ $level--;
+ $skip = true;
+ if ($level < 0) break;
+ } else {
+ $next = false;
+ }
+ }
+ return implode("\n", $ret);
+ }
+}
diff --git a/test/HTML5/TestDataHarness.php b/test/HTML5/TestDataHarness.php
new file mode 100644
index 0000000..0b90321
--- /dev/null
+++ b/test/HTML5/TestDataHarness.php
@@ -0,0 +1,18 @@
+<?php
+
+SimpleTest::ignore('HTML5_TestDataHarness');
+abstract class HTML5_TestDataHarness extends HTML5_DataHarness
+{
+ protected $data;
+ public function __construct() {
+ parent::__construct();
+ $this->data = new HTML5_TestData($this->filename);
+ }
+ public function getDescription($test) {
+ return $test['data'];
+ }
+ public function getDataTests() {
+ return $this->data->tests;
+ }
+}
+
diff --git a/test/HTML5/TestDataTest.php b/test/HTML5/TestDataTest.php
new file mode 100644
index 0000000..de97040
--- /dev/null
+++ b/test/HTML5/TestDataTest.php
@@ -0,0 +1,31 @@
+<?php
+
+require_once dirname(__FILE__) . '/../autorun.php';
+
+class HTML5_TestDataTest extends UnitTestCase
+{
+ function testSample() {
+ $data = new HTML5_TestData(dirname(__FILE__) . '/TestDataTest/sample.dat');
+ $this->assertIdentical($data->tests, array(
+ array('data' => "Foo", 'des' => "Bar"),
+ array('data' => "Foo")
+ ));
+ }
+ function testStrDom() {
+ $dom = new DOMDocument();
+ $dom->loadHTML('<!DOCTYPE html PUBLIC "http://foo" "http://bar"><html><body foo="bar" baz="1">foo<b>bar</b>asdf</body></html>');
+ $this->assertIdentical(HTML5_TestData::strDom($dom), <<<RESULT
+| <!DOCTYPE html "http://foo" "http://bar">
+| <html>
+| <body>
+| baz="1"
+| foo="bar"
+| "foo"
+| <b>
+| "bar"
+| "asdf"
+RESULT
+);
+ }
+}
+
diff --git a/test/HTML5/TestDataTest/sample.dat b/test/HTML5/TestDataTest/sample.dat
new file mode 100644
index 0000000..4351e8d
--- /dev/null
+++ b/test/HTML5/TestDataTest/sample.dat
@@ -0,0 +1,7 @@
+#data
+Foo
+#des
+Bar
+
+#data
+Foo
diff --git a/test/HTML5/TestableTokenizer.php b/test/HTML5/TestableTokenizer.php
new file mode 100644
index 0000000..4f064c3
--- /dev/null
+++ b/test/HTML5/TestableTokenizer.php
@@ -0,0 +1,76 @@
+<?php
+
+class HTML5_TestableTokenizer extends HTML5_Tokenizer
+{
+ public $outputTokens = array();
+ private $_contentModelFlag;
+ private $_lastStartFlag;
+
+ // this interface does not match HTML5_Tokenizer's. It might make
+ // more sense though
+ public function __construct($data, $contentModelFlag, $lastStartFlag = null) {
+ parent::__construct($data);
+ $this->_contentModelFlag = $contentModelFlag;
+ $this->_lastStartFlag = $lastStartFlag;
+ }
+ public function parse() {
+ $this->content_model = $this->_contentModelFlag;
+ if ($this->_lastStartFlag) {
+ $this->token = array(
+ 'type' => self::STARTTAG,
+ 'name' => $this->_lastStartFlag,
+ );
+ }
+ return parent::parse();
+ }
+ // --end mismatched interface
+
+ protected function emitToken($token, $checkStream = true, $dry = false) {
+ parent::emitToken($token, $checkStream, true);
+
+ // tree handling code omitted
+ switch ($token['type']) {
+ case self::DOCTYPE:
+ if (!isset($token['name'])) $token['name'] = null;
+ if (!isset($token['public'])) $token['public'] = null;
+ if (!isset($token['system'])) $token['system'] = null;
+ $this->outputTokens[] = array('DOCTYPE', $token['name'], $token['public'], $token['system'], empty($token['force-quirks']));
+ break;
+ case self::STARTTAG:
+ $attr = new stdclass();
+ foreach ($token['attr'] as $keypair) {
+ // XXX this is IMPORTANT behavior, check if it's
+ // in TreeBuilder
+ $name = $keypair['name'];
+ if (isset($attr->$name)) continue;
+ $attr->$name = $keypair['value'];
+ }
+ $start = array('StartTag', $token['name'], $attr);
+ if (isset($token['self-closing'])) $start[] = true;
+ $this->outputTokens[] = $start;
+ break;
+ case self::ENDTAG:
+ $this->outputTokens[] = array('EndTag', $token['name']);
+ break;
+ case self::COMMENT:
+ $this->outputTokens[] = array('Comment', $token['data']);
+ break;
+ case self::CHARACTER:
+ case self::SPACECHARACTER:
+ if (count($this->outputTokens)) {
+ $old = array_pop($this->outputTokens);
+ if ($old[0] === 'Character') {
+ $old[1] .= $token['data'];
+ $this->outputTokens[] = $old;
+ break;
+ }
+ $this->outputTokens[] = $old;
+ }
+ $this->outputTokens[] = array('Character', $token['data']);
+ break;
+ case self::PARSEERROR:
+ $this->outputTokens[] = 'ParseError';
+ break;
+ }
+ }
+}
diff --git a/test/HTML5/TokenizerPositionTest.php b/test/HTML5/TokenizerPositionTest.php
new file mode 100644
index 0000000..534456a
--- /dev/null
+++ b/test/HTML5/TokenizerPositionTest.php
@@ -0,0 +1,164 @@
+<?php
+
+require_once dirname(__FILE__) . '/../autorun.php';
+
+class HTML5_PositionTestableTokenizer extends HTML5_TestableTokenizer
+{
+ public $outputLines = array();
+ public $outputCols = array();
+ private $characterTokens = array();
+ protected function emitToken($token, $checkStream = true, $dry = false) {
+ parent::emitToken($token, $checkStream, $dry);
+ // XXX: The tests should really include the parse errors, but I'm lazy.
+ switch ($token['type']) {
+ case self::PARSEERROR:
+ return;
+
+ case self::CHARACTER:
+ if ($this->characterTokens) {
+ array_pop($this->outputLines);
+ array_pop($this->outputCols);
+ }
+ $this->characterTokens[] = $token;
+
+ default:
+ $this->outputLines[] = $this->stream()->getCurrentLine();
+ $this->outputCols[] = $this->stream()->getColumnOffset();
+ }
+ if ($token['type'] !== self::CHARACTER) {
+ $this->characterTokens = array();
+ }
+ }
+}
+
+class HTML5_TokenizerTestOfPosition extends UnitTestCase
+{
+ function testBasic() {
+ $this->assertPositions(
+ "<b><i>f<p>\n<b>a</b>",
+ array(1,1,1,1, 2,2,2,2),
+ array(3,6,7,10,0,3,4,8)
+ );
+ }
+
+ function testUnicode() {
+ $this->assertPositions(
+ "\xC2\xA2<b>\xE2\x82\xACa<b>\xf4\x8a\xaf\x8d",
+ array(1,1,1,1,1),
+ array(1,4,6,9,10)
+ );
+ }
+
+ function testData() {
+ $this->assertPositions(
+ "a\na\n\xC2\xA2<b>",
+ array(3,3),
+ array(1,4)
+ );
+ }
+
+ function testMarkupDeclarationDoubleDash() {
+ $this->assertPositions(
+ '<!-- foo -->',
+ array(1),
+ array(12)
+ );
+ }
+
+ function testMarkupDeclarationDoctype() {
+ $this->assertPositions(
+ '<!DOCTYPE>',
+ array(1),
+ array(10)
+ );
+ }
+
+ function testAfterDoctypeNamePublic() {
+ $this->assertPositions(
+ '<!DOCTYPE PUBLIC "foo">',
+ array(1),
+ array(23)
+ );
+ }
+
+ function testAfterDoctypeNameSystem() {
+ $this->assertPositions(
+ '<!DOCTYPE SYSTEM "foo">',
+ array(1),
+ array(23)
+ );
+ }
+
+ function testDecEntitySansSemicolon() {
+ $this->assertPositions(
+ '&#300',
+ array(1),
+ array(5)
+ );
+ }
+
+ function testDecEntityWithSemicolon() {
+ $this->assertPositions(
+ '&#300;',
+ array(1),
+ array(6)
+ );
+ }
+
+ function testHexEntity() {
+ $this->assertPositions(
+ '&#x300;',
+ array(1),
+ array(7)
+ );
+ }
+
+ function testEmptyEntity() {
+ $this->assertPositions(
+ '&#;<b>',
+ array(1,1),
+ array(3,6)
+ );
+ }
+
+ function testNamedEntity() {
+ $this->assertPositions(
+ '&quot;foo<b>',
+ array(1,1),
+ array(9,12)
+ );
+ }
+
+ function testBadNamedEntity() {
+ $this->assertPositions(
+ '&zzz;b',
+ array(1),
+ array(6)
+ );
+ }
+
+ function testAttributeEntity() {
+ $this->assertPositions(
+ '<b foo="&amper">a',
+ array( 1, 1),
+ array(16,17)
+ );
+ }
+
+ function testBogusComment() {
+ $this->assertPositions(
+ "<!as asdfe \nasdf>d",
+ array(2,2),
+ array(5,6)
+ );
+ }
+
+ protected function assertPositions($input, $lines, $cols, $flag = HTML5_Tokenizer::PCDATA, $lastStartTag = null) {
+ $tokenizer = new HTML5_PositionTestableTokenizer($input, $flag, $lastStartTag);
+ $GLOBALS['TIME'] -= get_microtime();
+ $tokenizer->parse($input);
+ $GLOBALS['TIME'] += get_microtime();
+ $this->assertIdentical($tokenizer->outputLines, $lines, 'Lines: %s');
+ $this->assertIdentical($tokenizer->outputCols, $cols, 'Cols: %s');
+ }
+}
diff --git a/test/HTML5/TokenizerTest.php b/test/HTML5/TokenizerTest.php
new file mode 100644
index 0000000..d00fa78
--- /dev/null
+++ b/test/HTML5/TokenizerTest.php
@@ -0,0 +1,88 @@
+<?php
+
+require_once dirname(__FILE__) . '/../autorun.php';
+
+SimpleTest::ignore('HTML5_TokenizerHarness');
+abstract class HTML5_TokenizerHarness extends HTML5_JSONHarness
+{
+ public function invoke($test) {
+ //echo get_class($this) . ': ' . $test->description ."\n";
+ if (!isset($test->contentModelFlags)) {
+ $test->contentModelFlags = array('PCDATA');
+ }
+ if (!isset($test->ignoreErrorOrder)) {
+ $test->ignoreErrorOrder = false;
+ }
+
+ // Get expected result array (and maybe error count).
+ $expect = array();
+ $expectedErrorCount = 0; // This is only used when ignoreErrorOrder = true.
+ foreach ($test->output as $tok) {
+ // If we're ignoring error order and this is a parse error, just count.
+ if ($test->ignoreErrorOrder && $tok === 'ParseError') {
+ $expectedErrorCount++;
+ } else {
+ // Normalize character tokens from the test
+ if ($expect && $tok[0] === 'Character' && $expect[count($expect) - 1][0] === 'Character') {
+ $expect[count($expect) - 1][1] .= $tok[1];
+ } else {
+ $expect[] = $tok;
+ }
+ }
+ }
+
+ // Run test for each content model flag.
+ foreach ($test->contentModelFlags as $flag) {
+ $output = $this->tokenize($test, $flag);
+ $result = array();
+ $resultErrorCount = 0; // This is only used when ignoreErrorOrder = true.
+ foreach ($output as $tok) {
+ // If we're ignoring error order and this is a parse error, just count.
+ if ($test->ignoreErrorOrder && $tok === 'ParseError') {
+ $resultErrorCount++;
+ } else {
+ $result[] = $tok;
+ }
+ }
+ $this->assertIdentical($expect, $result,
+ 'In test "'.str_replace('%', '%%', $test->description).
+ '" with content model '.$flag.': %s'
+ );
+ if ($test->ignoreErrorOrder) {
+ $this->assertIdentical($expectedErrorCount, $resultErrorCount,
+ 'Wrong error count in test "'.str_replace('%', '%%', $test->description).
+ '" with content model '.$flag.': %s'
+ );
+ }
+ if ($expect != $result || ($test->ignoreErrorOrder && $expectedErrorCount !== $resultErrorCount)) {
+ echo "Input: "; str_dump($test->input);
+ echo "\nExpected: \n"; echo $this->tokenDump($expect);
+ echo "\nActual: \n"; echo $this->tokenDump($result);
+ echo "\n";
+ }
+ }
+ }
+ private function tokenDump($tokens) {
+ $ret = '';
+ foreach ($tokens as $i => $token) {
+ $ret .= ($i+1).". {$token[0]}: {$token[1]}\n";
+ }
+ return $ret;
+ }
+ public function tokenize($test, $flag) {
+ $flag = constant("HTML5_Tokenizer::$flag");
+ if (!isset($test->lastStartTag)) $test->lastStartTag = null;
+ $tokenizer = new HTML5_TestableTokenizer($test->input, $flag, $test->lastStartTag);
+ $GLOBALS['TIME'] -= get_microtime();
+ $tokenizer->parse();
+ $GLOBALS['TIME'] += get_microtime();
+ return $tokenizer->outputTokens;
+ }
+}
+
+// generate test suites for tokenizer
+HTML5_TestData::generateTestCases(
+ 'HTML5_TokenizerHarness',
+ 'HTML5_TokenizerTestOf',
+ 'tokenizer', '*.test'
+);
diff --git a/test/HTML5/TreeBuilderTest.php b/test/HTML5/TreeBuilderTest.php
new file mode 100644
index 0000000..708d6a0
--- /dev/null
+++ b/test/HTML5/TreeBuilderTest.php
@@ -0,0 +1,39 @@
+<?php
+
+require_once dirname(__FILE__) . '/../autorun.php';
+
+SimpleTest::ignore('HTML5_TreeBuilderHarness');
+class HTML5_TreeBuilderHarness extends HTML5_TestDataHarness
+{
+ public function assertIdentical($expect, $actual, $test = array()) {
+ $input = $test['data'];
+ if (isset($test['document-fragment'])) {
+ $input .= "\nFragment: " . $test['document-fragment'];
+ }
+ parent::assertIdentical($expect, $actual, "Identical expectation failed\nInput:\n$input\n\nExpected:\n$expect\n\nActual:\n$actual\n");
+ }
+ public function invoke($test) {
+ // this is totally the wrong interface to use, but
+ // for now we need testing
+ $tokenizer = new HTML5_Tokenizer($test['data']);
+ $GLOBALS['TIME'] -= get_microtime();
+ if (isset($test['document-fragment'])) {
+ $tokenizer->parseFragment($test['document-fragment']);
+ } else {
+ $tokenizer->parse();
+ }
+ $GLOBALS['TIME'] += get_microtime();
+ $this->assertIdentical(
+ $test['document'],
+ HTML5_TestData::strDom($tokenizer->save()),
+ $test
+ );
+ }
+}
+
+HTML5_TestData::generateTestCases(
+ 'HTML5_TreeBuilderHarness',
+ 'HTML5_TreeBuilderTestOf',
+ 'tree-construction', '*.dat'
+);
+