diff options
Diffstat (limited to 'src/HTML5/Serializer/OutputRules.php')
-rw-r--r-- | src/HTML5/Serializer/OutputRules.php | 203 |
1 files changed, 203 insertions, 0 deletions
diff --git a/src/HTML5/Serializer/OutputRules.php b/src/HTML5/Serializer/OutputRules.php new file mode 100644 index 0000000..5780d61 --- /dev/null +++ b/src/HTML5/Serializer/OutputRules.php @@ -0,0 +1,203 @@ +<?php +namespace HTML5\Serializer; + +use \HTML5\Elements; + +class OutputRules { + + protected $traverser; + protected $encode = FALSE; + protected $out; + + const DOCTYPE = '<!DOCTYPE html>'; + + public function __construct($traverser, $output, $options = array()) { + $this->traverser = $traverser; + + if (isset($options['encode'])) { + $this->encode = $options['encode']; + } + + $this->out = $output; + + } + + public function document($dom) { + $this->doctype(); + $this->traverser->node($dom->documentElement); + $this->nl(); + } + + protected function doctype() { + $this->wr(self::DOCTYPE); + $this->nl(); + } + + public function element($ele) { + $name = $ele->tagName; + + // Per spec: + // If the element has a declared namespace in the HTML, MathML or + // SVG namespaces, we use the lname instead of the tagName. + if ($this->traverser->isLocalElement($ele)) { + $name = $ele->localName; + } + + $this->openTag($ele); + + // Handle children. + if ($ele->hasChildNodes()) { + $this->traverser->children($ele->childNodes); + } + + // If not unary, add a closing tag. + if (!Elements::isA($name, Elements::VOID_TAG)) { + $this->closeTag($ele); + } + } + + /** + * Write a text node. + * + * @param \DOMText $ele + * The text node to write. + */ + public function text($ele) { + if (isset($ele->parentNode) && Elements::isA($ele->parentNode->tagName, Elements::TEXT_RAW)) { + $this->wr($ele->wholeText); + return; + } + + // FIXME: This probably needs some flags set. + $this->wr($this->enc($ele->wholeText)); + + } + + public function cdata($ele) { + $this->wr('<![CDATA[')->wr($ele->wholeText)->wr(']]>'); + } + + public function comment($ele) { + $this->wr('<!--')->wr($ele->data)->wr('-->'); + } + + public function processorInstruction($ele) { + $this->wr('<?')->wr($ele->target)->wr(' ')->wr($ele->data)->wr(' ?>'); + } + + /** + * Write the opening tag. + * + * Tags for HTML, MathML, and SVG are in the local name. Otherwise, use the + * qualified name (8.3). + * + * @param \DOMNode $ele + * The element being written. + */ + protected function openTag($ele) { + // FIXME: Needs support for SVG, MathML, and namespaced XML. + $this->wr('<')->wr($ele->tagName); + $this->attrs($ele); + $this->wr('>'); + } + + protected function attrs($ele) { + // FIXME: Needs support for xml, xmlns, xlink, and namespaced elements. + if (!$ele->hasAttributes()) { + return $this; + } + + // TODO: Currently, this always writes name="value", and does not do + // value-less attributes. + $map = $ele->attributes; + $len = $map->length; + for ($i = 0; $i < $len; ++$i) { + $node = $map->item($i); + $val = $this->enc($node->value); + + // XXX: The spec says that we need to ensure that anything in + // the XML, XMLNS, or XLink NS's should use the canonical + // prefix. It seems that DOM does this for us already, but there + // may be exceptions. + $this->wr(' ')->wr($node->name)->wr('="')->wr($val)->wr('"'); + } + } + + /** + * Write the closing tag. + * + * Tags for HTML, MathML, and SVG are in the local name. Otherwise, use the + * qualified name (8.3). + * + * @param \DOMNode $ele + * The element being written. + */ + protected function closeTag($ele) { + // FIXME: Needs support for SVG, MathML, and namespaced XML. + $this->wr('</')->wr($ele->tagName)->wr('>'); + } + + /** + * Write to the output. + * + * @param string $text + * The string to put into the output. + * + * @return HTML5\Serializer\Traverser + * $this so it can be used in chaining. + */ + protected function wr($text) { + fwrite($this->out, $text); + return $this; + } + + /** + * Write a new line character. + * + * @return HTML5\Serializer\Traverser + * $this so it can be used in chaining. + */ + protected function nl() { + fwrite($this->out, PHP_EOL); + return $this; + } + + /** + * Encode text. + * + * True encoding will turn all named character references into their entities. + * This includes such characters as +.# and many other common ones. By default + * encoding here will just escape &'<>". + * + * Note, PHP 5.4+ has better html5 encoding. + * + * @todo Use the Entities class in php 5.3 to have html5 entities. + * + * @param string $text + * text to encode. + * + * @return string + * The encoded text. + */ + protected function enc($text) { + $flags = ENT_QUOTES; + + // Escape rather than encode all entities. + if (!$this->encode) { + return htmlspecialchars($text, $flags, 'UTF-8'); + } + + // If we are in PHP 5.4+ we can use the native html5 entity functionality. + if (defined('ENT_HTML5')) { + $flags = ENT_HTML5 | ENT_SUBSTITUTE | ENT_QUOTES; + $ret = htmlentities($text, $flags, 'UTF-8', FALSE); + } + // If a version earlier than 5.4 html5 entities are not entirely handled. + // This manually handles them. + else { + $ret = strtr($text, \HTML5\Serializer\HTML5Entities::$map); + } + return $ret; + } + +}
\ No newline at end of file |