From 7b243b238c29e787686cb60fe418c1cd21a6dd58 Mon Sep 17 00:00:00 2001 From: Matt Butcher Date: Thu, 2 May 2013 11:02:07 -0500 Subject: Rules for LI, DT, DD, RT, and RP. --- src/HTML5/Parser/DOMTreeBuilder.php | 53 +++++++++++++++++++++-------- src/HTML5/Parser/TreeBuildingRules.php | 33 ++++++++++++++---- test/HTML5/Parser/TreeBuildingRulesTest.php | 26 ++++++++++++++ 3 files changed, 92 insertions(+), 20 deletions(-) diff --git a/src/HTML5/Parser/DOMTreeBuilder.php b/src/HTML5/Parser/DOMTreeBuilder.php index 1e3faad..7d40d40 100644 --- a/src/HTML5/Parser/DOMTreeBuilder.php +++ b/src/HTML5/Parser/DOMTreeBuilder.php @@ -64,6 +64,9 @@ class DOMTreeBuilder implements EventHandler { // $this->current = $this->doc->documentElement; $this->current = $this->doc; //->documentElement; + + // Create a rules engine for tags. + $this->rules = new TreeBuildingRules($this->doc); } /** @@ -123,15 +126,6 @@ class DOMTreeBuilder implements EventHandler { if ($name == 'image') { $name = 'img'; } - elseif ($name == 'optgroup' && $this->current->tagName == 'option') { - $this->current = $this->current->parentNode; - } - // TODO: MathML support - elseif ($name == 'math') { - } - // TODO: SVG support. - elseif ($name == 'svg') { - } // Autoclose p tags where appropriate. @@ -149,7 +143,7 @@ class DOMTreeBuilder implements EventHandler { $this->parseError("Unexpected head tag outside of head context."); } else { - $this->isertMode = self::IM_IN_HEAD; + $this->insertMode = self::IM_IN_HEAD; } break; case 'body': @@ -174,11 +168,18 @@ class DOMTreeBuilder implements EventHandler { } } - $this->current->appendChild($ele); + // Some elements have special processing rules. Handle those separately. + if ($this->rules->hasRules($name)) { + $this->current = $this->rules->evaluate($ele, $this->current); + } + // Otherwise, it's a standard element. + else { + $this->current->appendChild($ele); - // XXX: Need to handle self-closing tags and unary tags. - if (!Elements::isA($name, Elements::VOID_TAG)) { - $this->current = $ele; + // XXX: Need to handle self-closing tags and unary tags. + if (!Elements::isA($name, Elements::VOID_TAG)) { + $this->current = $ele; + } } // Return the element mask, which the tokenizer can then use to set @@ -333,5 +334,29 @@ class DOMTreeBuilder implements EventHandler { } + /** + * Checks if the given tagname is an ancestor of the present candidate. + * + * If $this->current or anything above $this->current matches the given tag + * name, this returns TRUE. + */ + protected function isAncestor($tagname) { + $candidate = $this->current; + while ($candidate->nodeType === XML_ELEMENT_NODE) { + if ($candidate->tagName == $tagname) { + return TRUE; + } + $candidate = $candidate->parentNode; + } + return FALSE; + } + + /** + * Returns TRUE if the immediate parent element is of the given tagname. + */ + protected function isParent($tagname) { + return $this->current->tagName == $tagname; + } + } diff --git a/src/HTML5/Parser/TreeBuildingRules.php b/src/HTML5/Parser/TreeBuildingRules.php index 33d46b1..111da1e 100644 --- a/src/HTML5/Parser/TreeBuildingRules.php +++ b/src/HTML5/Parser/TreeBuildingRules.php @@ -15,6 +15,10 @@ class TreeBuildingRules { protected static $tags = array( 'li' => 1, + 'dd' => 1, + 'dt' => 1, + 'rt' => 1, + 'rp' => 1, ); /** @@ -47,6 +51,12 @@ class TreeBuildingRules { switch($new->tagName) { case 'li': return $this->handleLI($new, $current); + case 'dt': + case 'dd': + return $this->handleDT($new, $current); + case 'rt': + case 'rp': + return $this->handleRT($new, $current); } @@ -54,14 +64,25 @@ class TreeBuildingRules { } protected function handleLI($ele, $current) { - if ($current->tagName == 'li') { + return $this->closeIfCurrentMatches($ele, $current, array('li')); + } + + protected function handleDT($ele, $current) { + return $this->closeIfCurrentMatches($ele, $current, array('dt','dd')); + } + protected function handleRT($ele, $current) { + return $this->closeIfCurrentMatches($ele, $current, array('rt','rp')); + } + + protected function closeIfCurrentMatches($ele, $current, $match) { + $tname = $current->tagName; + if (in_array($current->tagName, $match)) { $current->parentNode->appendChild($ele); - return $ele; } - // XXX FINISH - - $current->appendChild($ele); - return $current; + else { + $current->appendChild($ele); + } + return $ele; } } diff --git a/test/HTML5/Parser/TreeBuildingRulesTest.php b/test/HTML5/Parser/TreeBuildingRulesTest.php index 3ca3cbb..fe02893 100644 --- a/test/HTML5/Parser/TreeBuildingRulesTest.php +++ b/test/HTML5/Parser/TreeBuildingRulesTest.php @@ -14,6 +14,8 @@ require_once __DIR__ . '/../TestCase.php'; */ class TreeBuildingRulesTest extends \HTML5\Tests\TestCase { + const HTML_STUB = 'test%s'; + /** * Convenience function for parsing. */ @@ -36,4 +38,28 @@ class TreeBuildingRulesTest extends \HTML5\Tests\TestCase { $this->assertFalse($engine->hasRules('imaginary')); } + public function testHandleLI() { + $html = sprintf(self::HTML_STUB, ''); + $doc = $this->parse($html); + + $list = $doc->getElementById('a'); + + $this->assertEquals(2, $list->childNodes->length); + foreach($list->childNodes as $ele) { + $this->assertEquals('li', $ele->tagName); + } + + } + + public function testHandleDT() { + $html = sprintf(self::HTML_STUB, '
Hello
Hi
'); + $doc = $this->parse($html); + + $list = $doc->getElementById('a'); + + $this->assertEquals(2, $list->childNodes->length); + $this->assertEquals('dt', $list->firstChild->tagName); + $this->assertEquals('dd', $list->lastChild->tagName); + } + } -- cgit v1.2.3