summaryrefslogtreecommitdiff
path: root/src/HTML5
diff options
context:
space:
mode:
Diffstat (limited to 'src/HTML5')
-rw-r--r--src/HTML5/Parser/DOMTreeBuilder.php53
-rw-r--r--src/HTML5/Parser/TreeBuildingRules.php88
2 files changed, 127 insertions, 14 deletions
diff --git a/src/HTML5/Parser/DOMTreeBuilder.php b/src/HTML5/Parser/DOMTreeBuilder.php
index 1e3faad..7d40d40 100644
--- a/src/HTML5/Parser/DOMTreeBuilder.php
+++ b/src/HTML5/Parser/DOMTreeBuilder.php
@@ -64,6 +64,9 @@ class DOMTreeBuilder implements EventHandler {
// $this->current = $this->doc->documentElement;
$this->current = $this->doc; //->documentElement;
+
+ // Create a rules engine for tags.
+ $this->rules = new TreeBuildingRules($this->doc);
}
/**
@@ -123,15 +126,6 @@ class DOMTreeBuilder implements EventHandler {
if ($name == 'image') {
$name = 'img';
}
- elseif ($name == 'optgroup' && $this->current->tagName == 'option') {
- $this->current = $this->current->parentNode;
- }
- // TODO: MathML support
- elseif ($name == 'math') {
- }
- // TODO: SVG support.
- elseif ($name == 'svg') {
- }
// Autoclose p tags where appropriate.
@@ -149,7 +143,7 @@ class DOMTreeBuilder implements EventHandler {
$this->parseError("Unexpected head tag outside of head context.");
}
else {
- $this->isertMode = self::IM_IN_HEAD;
+ $this->insertMode = self::IM_IN_HEAD;
}
break;
case 'body':
@@ -174,11 +168,18 @@ class DOMTreeBuilder implements EventHandler {
}
}
- $this->current->appendChild($ele);
+ // Some elements have special processing rules. Handle those separately.
+ if ($this->rules->hasRules($name)) {
+ $this->current = $this->rules->evaluate($ele, $this->current);
+ }
+ // Otherwise, it's a standard element.
+ else {
+ $this->current->appendChild($ele);
- // XXX: Need to handle self-closing tags and unary tags.
- if (!Elements::isA($name, Elements::VOID_TAG)) {
- $this->current = $ele;
+ // XXX: Need to handle self-closing tags and unary tags.
+ if (!Elements::isA($name, Elements::VOID_TAG)) {
+ $this->current = $ele;
+ }
}
// Return the element mask, which the tokenizer can then use to set
@@ -333,5 +334,29 @@ class DOMTreeBuilder implements EventHandler {
}
+ /**
+ * Checks if the given tagname is an ancestor of the present candidate.
+ *
+ * If $this->current or anything above $this->current matches the given tag
+ * name, this returns TRUE.
+ */
+ protected function isAncestor($tagname) {
+ $candidate = $this->current;
+ while ($candidate->nodeType === XML_ELEMENT_NODE) {
+ if ($candidate->tagName == $tagname) {
+ return TRUE;
+ }
+ $candidate = $candidate->parentNode;
+ }
+ return FALSE;
+ }
+
+ /**
+ * Returns TRUE if the immediate parent element is of the given tagname.
+ */
+ protected function isParent($tagname) {
+ return $this->current->tagName == $tagname;
+ }
+
}
diff --git a/src/HTML5/Parser/TreeBuildingRules.php b/src/HTML5/Parser/TreeBuildingRules.php
new file mode 100644
index 0000000..111da1e
--- /dev/null
+++ b/src/HTML5/Parser/TreeBuildingRules.php
@@ -0,0 +1,88 @@
+<?php
+namespace HTML5\Parser;
+
+use HTML5\Elements;
+
+/**
+ * Handles special-case rules for the DOM tree builder.
+ *
+ * Many tags have special rules that need to be accomodated on an
+ * individual basis. This class handles those rules.
+ *
+ * See section 8.1.2.4 of the spec.
+ */
+class TreeBuildingRules {
+
+ protected static $tags = array(
+ 'li' => 1,
+ 'dd' => 1,
+ 'dt' => 1,
+ 'rt' => 1,
+ 'rp' => 1,
+ );
+
+ /**
+ * Build a new rules engine.
+ *
+ * @param \DOMDocument $doc
+ * The DOM document to use for evaluation and modification.
+ */
+ public function __construct($doc) {
+ $this->doc = $doc;
+ }
+
+ /**
+ * Returns TRUE if the given tagname has special processing rules.
+ */
+ public function hasRules($tagname) {
+ return isset(self::$tags[$tagname]);
+ }
+
+ /**
+ * Evaluate the rule for the current tag name.
+ *
+ * This may modify the existing DOM.
+ *
+ * @return \DOMElement
+ * The new Current DOM element.
+ */
+ public function evaluate($new, $current) {
+
+ switch($new->tagName) {
+ case 'li':
+ return $this->handleLI($new, $current);
+ case 'dt':
+ case 'dd':
+ return $this->handleDT($new, $current);
+ case 'rt':
+ case 'rp':
+ return $this->handleRT($new, $current);
+
+ }
+
+ return $current;
+ }
+
+ protected function handleLI($ele, $current) {
+ return $this->closeIfCurrentMatches($ele, $current, array('li'));
+ }
+
+ protected function handleDT($ele, $current) {
+ return $this->closeIfCurrentMatches($ele, $current, array('dt','dd'));
+ }
+ protected function handleRT($ele, $current) {
+ return $this->closeIfCurrentMatches($ele, $current, array('rt','rp'));
+ }
+
+ protected function closeIfCurrentMatches($ele, $current, $match) {
+ $tname = $current->tagName;
+ if (in_array($current->tagName, $match)) {
+ $current->parentNode->appendChild($ele);
+ }
+ else {
+ $current->appendChild($ele);
+ }
+ return $ele;
+
+ }
+}