summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/HTML5/Parser/DOMTreeBuilder.php53
-rw-r--r--src/HTML5/Parser/TreeBuildingRules.php88
-rw-r--r--test/HTML5/Parser/TreeBuildingRulesTest.php65
3 files changed, 192 insertions, 14 deletions
diff --git a/src/HTML5/Parser/DOMTreeBuilder.php b/src/HTML5/Parser/DOMTreeBuilder.php
index 1e3faad..7d40d40 100644
--- a/src/HTML5/Parser/DOMTreeBuilder.php
+++ b/src/HTML5/Parser/DOMTreeBuilder.php
@@ -64,6 +64,9 @@ class DOMTreeBuilder implements EventHandler {
// $this->current = $this->doc->documentElement;
$this->current = $this->doc; //->documentElement;
+
+ // Create a rules engine for tags.
+ $this->rules = new TreeBuildingRules($this->doc);
}
/**
@@ -123,15 +126,6 @@ class DOMTreeBuilder implements EventHandler {
if ($name == 'image') {
$name = 'img';
}
- elseif ($name == 'optgroup' && $this->current->tagName == 'option') {
- $this->current = $this->current->parentNode;
- }
- // TODO: MathML support
- elseif ($name == 'math') {
- }
- // TODO: SVG support.
- elseif ($name == 'svg') {
- }
// Autoclose p tags where appropriate.
@@ -149,7 +143,7 @@ class DOMTreeBuilder implements EventHandler {
$this->parseError("Unexpected head tag outside of head context.");
}
else {
- $this->isertMode = self::IM_IN_HEAD;
+ $this->insertMode = self::IM_IN_HEAD;
}
break;
case 'body':
@@ -174,11 +168,18 @@ class DOMTreeBuilder implements EventHandler {
}
}
- $this->current->appendChild($ele);
+ // Some elements have special processing rules. Handle those separately.
+ if ($this->rules->hasRules($name)) {
+ $this->current = $this->rules->evaluate($ele, $this->current);
+ }
+ // Otherwise, it's a standard element.
+ else {
+ $this->current->appendChild($ele);
- // XXX: Need to handle self-closing tags and unary tags.
- if (!Elements::isA($name, Elements::VOID_TAG)) {
- $this->current = $ele;
+ // XXX: Need to handle self-closing tags and unary tags.
+ if (!Elements::isA($name, Elements::VOID_TAG)) {
+ $this->current = $ele;
+ }
}
// Return the element mask, which the tokenizer can then use to set
@@ -333,5 +334,29 @@ class DOMTreeBuilder implements EventHandler {
}
+ /**
+ * Checks if the given tagname is an ancestor of the present candidate.
+ *
+ * If $this->current or anything above $this->current matches the given tag
+ * name, this returns TRUE.
+ */
+ protected function isAncestor($tagname) {
+ $candidate = $this->current;
+ while ($candidate->nodeType === XML_ELEMENT_NODE) {
+ if ($candidate->tagName == $tagname) {
+ return TRUE;
+ }
+ $candidate = $candidate->parentNode;
+ }
+ return FALSE;
+ }
+
+ /**
+ * Returns TRUE if the immediate parent element is of the given tagname.
+ */
+ protected function isParent($tagname) {
+ return $this->current->tagName == $tagname;
+ }
+
}
diff --git a/src/HTML5/Parser/TreeBuildingRules.php b/src/HTML5/Parser/TreeBuildingRules.php
new file mode 100644
index 0000000..111da1e
--- /dev/null
+++ b/src/HTML5/Parser/TreeBuildingRules.php
@@ -0,0 +1,88 @@
+<?php
+namespace HTML5\Parser;
+
+use HTML5\Elements;
+
+/**
+ * Handles special-case rules for the DOM tree builder.
+ *
+ * Many tags have special rules that need to be accomodated on an
+ * individual basis. This class handles those rules.
+ *
+ * See section 8.1.2.4 of the spec.
+ */
+class TreeBuildingRules {
+
+ protected static $tags = array(
+ 'li' => 1,
+ 'dd' => 1,
+ 'dt' => 1,
+ 'rt' => 1,
+ 'rp' => 1,
+ );
+
+ /**
+ * Build a new rules engine.
+ *
+ * @param \DOMDocument $doc
+ * The DOM document to use for evaluation and modification.
+ */
+ public function __construct($doc) {
+ $this->doc = $doc;
+ }
+
+ /**
+ * Returns TRUE if the given tagname has special processing rules.
+ */
+ public function hasRules($tagname) {
+ return isset(self::$tags[$tagname]);
+ }
+
+ /**
+ * Evaluate the rule for the current tag name.
+ *
+ * This may modify the existing DOM.
+ *
+ * @return \DOMElement
+ * The new Current DOM element.
+ */
+ public function evaluate($new, $current) {
+
+ switch($new->tagName) {
+ case 'li':
+ return $this->handleLI($new, $current);
+ case 'dt':
+ case 'dd':
+ return $this->handleDT($new, $current);
+ case 'rt':
+ case 'rp':
+ return $this->handleRT($new, $current);
+
+ }
+
+ return $current;
+ }
+
+ protected function handleLI($ele, $current) {
+ return $this->closeIfCurrentMatches($ele, $current, array('li'));
+ }
+
+ protected function handleDT($ele, $current) {
+ return $this->closeIfCurrentMatches($ele, $current, array('dt','dd'));
+ }
+ protected function handleRT($ele, $current) {
+ return $this->closeIfCurrentMatches($ele, $current, array('rt','rp'));
+ }
+
+ protected function closeIfCurrentMatches($ele, $current, $match) {
+ $tname = $current->tagName;
+ if (in_array($current->tagName, $match)) {
+ $current->parentNode->appendChild($ele);
+ }
+ else {
+ $current->appendChild($ele);
+ }
+ return $ele;
+
+ }
+}
diff --git a/test/HTML5/Parser/TreeBuildingRulesTest.php b/test/HTML5/Parser/TreeBuildingRulesTest.php
new file mode 100644
index 0000000..fe02893
--- /dev/null
+++ b/test/HTML5/Parser/TreeBuildingRulesTest.php
@@ -0,0 +1,65 @@
+<?php
+/**
+ * @file
+ * Test the Tree Builder's special-case rules.
+ */
+namespace HTML5\Parser;
+
+use HTML5\Elements;
+
+require_once __DIR__ . '/../TestCase.php';
+
+/**
+ * These tests are functional, not necessarily unit tests.
+ */
+class TreeBuildingRulesTest extends \HTML5\Tests\TestCase {
+
+ const HTML_STUB = '<!DOCTYPE html><html><head><title>test</title></head><body>%s</body></html>';
+
+ /**
+ * Convenience function for parsing.
+ */
+ protected function parse($string) {
+ $treeBuilder = new DOMTreeBuilder();
+ $input = new StringInputStream($string);
+ $scanner = new Scanner($input);
+ $parser = new Tokenizer($scanner, $treeBuilder);
+
+ $parser->parse();
+
+ return $treeBuilder->document();
+ }
+
+ public function testHasRules() {
+ $doc = new \DOMDocument('1.0');
+ $engine = new TreeBuildingRules($doc);
+
+ $this->assertTrue($engine->hasRules('li'));
+ $this->assertFalse($engine->hasRules('imaginary'));
+ }
+
+ public function testHandleLI() {
+ $html = sprintf(self::HTML_STUB, '<ul id="a"><li>test<li>test2</ul><a></a>');
+ $doc = $this->parse($html);
+
+ $list = $doc->getElementById('a');
+
+ $this->assertEquals(2, $list->childNodes->length);
+ foreach($list->childNodes as $ele) {
+ $this->assertEquals('li', $ele->tagName);
+ }
+
+ }
+
+ public function testHandleDT() {
+ $html = sprintf(self::HTML_STUB, '<dl id="a"><dt>Hello<dd>Hi</dl><a></a>');
+ $doc = $this->parse($html);
+
+ $list = $doc->getElementById('a');
+
+ $this->assertEquals(2, $list->childNodes->length);
+ $this->assertEquals('dt', $list->firstChild->tagName);
+ $this->assertEquals('dd', $list->lastChild->tagName);
+ }
+
+}