diff options
author | Matt Butcher <[email protected]> | 2013-04-03 21:34:26 -0500 |
---|---|---|
committer | Matt Butcher <[email protected]> | 2013-04-03 21:34:26 -0500 |
commit | 55a48088ff54d4251fe8e5133deefbe81b991073 (patch) | |
tree | c209841b251798cdacdcea2bd22f7a7598d8dad1 /src/HTML5 | |
parent | 326f71c3747145b5618788f429559cbac453f114 (diff) |
Traverser now does basic HTML5.
Diffstat (limited to 'src/HTML5')
-rw-r--r-- | src/HTML5/Traverser.php | 175 |
1 files changed, 174 insertions, 1 deletions
diff --git a/src/HTML5/Traverser.php b/src/HTML5/Traverser.php index f0771f0..9737bf6 100644 --- a/src/HTML5/Traverser.php +++ b/src/HTML5/Traverser.php @@ -7,16 +7,47 @@ namespace HTML5; * This is a concrete traverser designed to convert a DOM tree into an * HTML5 document. It is not intended to be a generic DOMTreeWalker * implementation. + * + * @see http://www.w3.org/TR/2012/CR-html5-20121217/syntax.html#serializing-html-fragments */ class Traverser { + static $block_elements = array( + 'html' => 1, + 'body' => 1, + 'head' => 1, + 'p' => 1, + 'div' => 1, + 'h1' => 1, + 'h2' => 1, + 'h3' => 1, + 'h4' => 1, + 'h5' => 1, + 'h6' => 1, + 'title' => 1, + 'script' => 1, + 'link' => 1, + 'meta' => 1, + 'section' => 1, + 'article' => 1, + 'table' => 1, + 'tbody' => 1, + 'tr' => 1, + 'th' => 1, + 'td' => 1, + //'form' => 1, + ); + protected $dom; protected $out; + protected $pretty = TRUE; + + const DOCTYPE = '<!DOCTYPE html>'; /** * Create a traverser. * - * @param DOMNode $dom + * @param DOMNode|DOMNodeList $dom * The document or node to traverse. * @param resource $out * A stream that allows writing. The traverser will output into this @@ -29,8 +60,150 @@ class Traverser { /** * Tell the traverser to walk the DOM. + * + * @return resource $out + * Returns the output stream. */ public function walk() { + // If DOMDocument, start with the DOCTYPE and travers. + if ($this->dom instanceof \DOMDocument) { + $this->doctype(); + $this->document($this->dom); + } + // If NodeList, loop + elseif ($this->dom instanceof \DOMNodeList) { + // Loop through the list + } + // Else assume this is a DOMNode-like datastructure. + else { + $this->node($this->dom); + } + + return $this->out; + } + + protected function doctype() { + $this->wr(self::DOCTYPE); + $this->nl(); + } + + protected function document($node) { + $this->node($node->documentElement); + $this->nl(); + } + + protected function node($node) { + switch ($node->nodeType) { + case XML_ELEMENT_NODE: + $this->element($node); + break; + case XML_TEXT_NODE: + $this->wr($node->wholeText); + break; + case XML_CDATA_SECTION_NODE: + $this->cdata($node); + break; + // FIXME: It appears that the parser doesn't do PI's. + case XML_PI_NODE: + $this->processorInstruction($ele); + break; + case XML_COMMENT_NODE: + $this->comment($node); + break; + // Currently we don't support embedding DTDs. + default: + print '<!-- Skipped -->'; + break; + } + } + + protected function element($ele) { + $name = $ele->tagName; + $block = $this->pretty && $this->isBlock($name); + + // TODO: Really need to fix the spacing. + // Add a newline for a block element. + if ($block) $this->nl(); + + $this->openTag($ele); + + // Handle children. + if ($ele->hasChildNodes()) { + $this->children($ele->childNodes); + } + + // If not unary, add a closing tag. + if (!$this->isUnary($name)) { + $this->closeTag($ele); + if ($block) $this->nl(); + } + } + + protected function cdata($ele) { + $this->wr('<![CDATA[')->wr($ele->wholeText)->wr(']]>'); + } + + protected function comment($ele) { + $this->wr('<!--')->wr($ele->data)->wr('-->'); + } + + protected function processorInstruction($ele) { + $this->wr('<?')->wr($ele->target)->wr(' ')->wr($ele->data)->wr(' ?>'); + } + + protected function children($nl) { + foreach ($nl as $node) { + $this->node($node); + } + } + + protected function openTag($ele) { + $this->wr('<')->wr($ele->tagName); + $this->attrs($ele); + $this->wr('>'); + } + + protected function attrs($ele) { + if (!$ele->hasAttributes()) { + return $this; + } + + // TODO: Currently, this always writes name="value", and does not do + // value-less attributes. + $map = $ele->attributes; + $len = $map->length; + for ($i = 0; $i < $len; ++$i) { + $node = $map->item($i); + $this->wr(' ')->wr($node->name)->wr('="')->wr($node->value)->wr('"'); + } + } + + protected function closeTag($ele) { + $this->wr('</')->wr($ele->tagName)->wr('>'); + } + + protected function wr($text) { + fwrite($this->out, $text); + return $this; + } + + protected function nl() { + fwrite($this->out, PHP_EOL); + return $this; + } + + /** + * Is an unary tag. + */ + protected function isUnary($name) { + return FALSE; + } + + /** + * Is block element. + */ + protected function isBlock($name) { + return isset(self::$block_elements[$name]); } } |