summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMatt Butcher <[email protected]>2013-05-02 11:02:07 -0500
committerMatt Butcher <[email protected]>2013-05-02 11:02:07 -0500
commit7b243b238c29e787686cb60fe418c1cd21a6dd58 (patch)
tree036b001cc30509936126eb557cb7e06a554d08e0
parentdd4a88998609e7efa6203b2ee776aac6d9df9b1c (diff)
Rules for LI, DT, DD, RT, and RP.
-rw-r--r--src/HTML5/Parser/DOMTreeBuilder.php53
-rw-r--r--src/HTML5/Parser/TreeBuildingRules.php33
-rw-r--r--test/HTML5/Parser/TreeBuildingRulesTest.php26
3 files changed, 92 insertions, 20 deletions
diff --git a/src/HTML5/Parser/DOMTreeBuilder.php b/src/HTML5/Parser/DOMTreeBuilder.php
index 1e3faad..7d40d40 100644
--- a/src/HTML5/Parser/DOMTreeBuilder.php
+++ b/src/HTML5/Parser/DOMTreeBuilder.php
@@ -64,6 +64,9 @@ class DOMTreeBuilder implements EventHandler {
// $this->current = $this->doc->documentElement;
$this->current = $this->doc; //->documentElement;
+
+ // Create a rules engine for tags.
+ $this->rules = new TreeBuildingRules($this->doc);
}
/**
@@ -123,15 +126,6 @@ class DOMTreeBuilder implements EventHandler {
if ($name == 'image') {
$name = 'img';
}
- elseif ($name == 'optgroup' && $this->current->tagName == 'option') {
- $this->current = $this->current->parentNode;
- }
- // TODO: MathML support
- elseif ($name == 'math') {
- }
- // TODO: SVG support.
- elseif ($name == 'svg') {
- }
// Autoclose p tags where appropriate.
@@ -149,7 +143,7 @@ class DOMTreeBuilder implements EventHandler {
$this->parseError("Unexpected head tag outside of head context.");
}
else {
- $this->isertMode = self::IM_IN_HEAD;
+ $this->insertMode = self::IM_IN_HEAD;
}
break;
case 'body':
@@ -174,11 +168,18 @@ class DOMTreeBuilder implements EventHandler {
}
}
- $this->current->appendChild($ele);
+ // Some elements have special processing rules. Handle those separately.
+ if ($this->rules->hasRules($name)) {
+ $this->current = $this->rules->evaluate($ele, $this->current);
+ }
+ // Otherwise, it's a standard element.
+ else {
+ $this->current->appendChild($ele);
- // XXX: Need to handle self-closing tags and unary tags.
- if (!Elements::isA($name, Elements::VOID_TAG)) {
- $this->current = $ele;
+ // XXX: Need to handle self-closing tags and unary tags.
+ if (!Elements::isA($name, Elements::VOID_TAG)) {
+ $this->current = $ele;
+ }
}
// Return the element mask, which the tokenizer can then use to set
@@ -333,5 +334,29 @@ class DOMTreeBuilder implements EventHandler {
}
+ /**
+ * Checks if the given tagname is an ancestor of the present candidate.
+ *
+ * If $this->current or anything above $this->current matches the given tag
+ * name, this returns TRUE.
+ */
+ protected function isAncestor($tagname) {
+ $candidate = $this->current;
+ while ($candidate->nodeType === XML_ELEMENT_NODE) {
+ if ($candidate->tagName == $tagname) {
+ return TRUE;
+ }
+ $candidate = $candidate->parentNode;
+ }
+ return FALSE;
+ }
+
+ /**
+ * Returns TRUE if the immediate parent element is of the given tagname.
+ */
+ protected function isParent($tagname) {
+ return $this->current->tagName == $tagname;
+ }
+
}
diff --git a/src/HTML5/Parser/TreeBuildingRules.php b/src/HTML5/Parser/TreeBuildingRules.php
index 33d46b1..111da1e 100644
--- a/src/HTML5/Parser/TreeBuildingRules.php
+++ b/src/HTML5/Parser/TreeBuildingRules.php
@@ -15,6 +15,10 @@ class TreeBuildingRules {
protected static $tags = array(
'li' => 1,
+ 'dd' => 1,
+ 'dt' => 1,
+ 'rt' => 1,
+ 'rp' => 1,
);
/**
@@ -47,6 +51,12 @@ class TreeBuildingRules {
switch($new->tagName) {
case 'li':
return $this->handleLI($new, $current);
+ case 'dt':
+ case 'dd':
+ return $this->handleDT($new, $current);
+ case 'rt':
+ case 'rp':
+ return $this->handleRT($new, $current);
}
@@ -54,14 +64,25 @@ class TreeBuildingRules {
}
protected function handleLI($ele, $current) {
- if ($current->tagName == 'li') {
+ return $this->closeIfCurrentMatches($ele, $current, array('li'));
+ }
+
+ protected function handleDT($ele, $current) {
+ return $this->closeIfCurrentMatches($ele, $current, array('dt','dd'));
+ }
+ protected function handleRT($ele, $current) {
+ return $this->closeIfCurrentMatches($ele, $current, array('rt','rp'));
+ }
+
+ protected function closeIfCurrentMatches($ele, $current, $match) {
+ $tname = $current->tagName;
+ if (in_array($current->tagName, $match)) {
$current->parentNode->appendChild($ele);
- return $ele;
}
- // XXX FINISH
-
- $current->appendChild($ele);
- return $current;
+ else {
+ $current->appendChild($ele);
+ }
+ return $ele;
}
}
diff --git a/test/HTML5/Parser/TreeBuildingRulesTest.php b/test/HTML5/Parser/TreeBuildingRulesTest.php
index 3ca3cbb..fe02893 100644
--- a/test/HTML5/Parser/TreeBuildingRulesTest.php
+++ b/test/HTML5/Parser/TreeBuildingRulesTest.php
@@ -14,6 +14,8 @@ require_once __DIR__ . '/../TestCase.php';
*/
class TreeBuildingRulesTest extends \HTML5\Tests\TestCase {
+ const HTML_STUB = '<!DOCTYPE html><html><head><title>test</title></head><body>%s</body></html>';
+
/**
* Convenience function for parsing.
*/
@@ -36,4 +38,28 @@ class TreeBuildingRulesTest extends \HTML5\Tests\TestCase {
$this->assertFalse($engine->hasRules('imaginary'));
}
+ public function testHandleLI() {
+ $html = sprintf(self::HTML_STUB, '<ul id="a"><li>test<li>test2</ul><a></a>');
+ $doc = $this->parse($html);
+
+ $list = $doc->getElementById('a');
+
+ $this->assertEquals(2, $list->childNodes->length);
+ foreach($list->childNodes as $ele) {
+ $this->assertEquals('li', $ele->tagName);
+ }
+
+ }
+
+ public function testHandleDT() {
+ $html = sprintf(self::HTML_STUB, '<dl id="a"><dt>Hello<dd>Hi</dl><a></a>');
+ $doc = $this->parse($html);
+
+ $list = $doc->getElementById('a');
+
+ $this->assertEquals(2, $list->childNodes->length);
+ $this->assertEquals('dt', $list->firstChild->tagName);
+ $this->assertEquals('dd', $list->lastChild->tagName);
+ }
+
}