'; public function __construct($output, $options = array()) { if (isset($options['encode_entities'])) { $this->encode = $options['encode_entities']; } $this->outputMode = static::IM_IN_HTML; $this->out = $output; } public function setTraverser(\Masterminds\HTML5\Serializer\Traverser $traverser) { $this->traverser = $traverser; return $this; } public function document($dom) { $this->doctype(); $this->traverser->node($dom->documentElement); $this->nl(); } protected function doctype() { $this->wr(static::DOCTYPE); $this->nl(); } public function element($ele) { $name = $ele->tagName; // Per spec: // If the element has a declared namespace in the HTML, MathML or // SVG namespaces, we use the lname instead of the tagName. if ($this->traverser->isLocalElement($ele)) { $name = $ele->localName; } // If we are in SVG or MathML there is special handling. // Using if/elseif instead of switch because it's faster in PHP. if ($name == 'svg') { $this->outputMode = static::IM_IN_SVG; $name = Elements::normalizeSvgElement($name); } elseif ($name == 'math') { $this->outputMode = static::IM_IN_MATHML; } $this->openTag($ele); // Handle children. if ($ele->hasChildNodes()) { $this->traverser->children($ele->childNodes); } // Close out the SVG or MathML special handling. if ($name == 'svg' || $name == 'math') { $this->outputMode = static::IM_IN_HTML; } // If not unary, add a closing tag. if (!Elements::isA($name, Elements::VOID_TAG)) { $this->closeTag($ele); } } /** * Write a text node. * * @param \DOMText $ele * The text node to write. */ public function text($ele) { if (isset($ele->parentNode) && isset($ele->parentNode->tagName) && Elements::isA($ele->parentNode->tagName, Elements::TEXT_RAW)) { $this->wr($ele->data); return; } // FIXME: This probably needs some flags set. $this->wr($this->enc($ele->data)); } public function cdata($ele) { // This encodes CDATA. $this->wr($ele->ownerDocument->saveXML($ele)); } public function comment($ele) { // These produce identical output. //$this->wr(''); $this->wr($ele->ownerDocument->saveXML($ele)); } public function processorInstruction($ele) { $this->wr('wr($ele->target)->wr(' ')->wr($ele->data)->wr('?>'); } /** * Write the opening tag. * * Tags for HTML, MathML, and SVG are in the local name. Otherwise, use the * qualified name (8.3). * * @param \DOMNode $ele * The element being written. */ protected function openTag($ele) { $this->wr('<')->wr($ele->tagName); $this->attrs($ele); if ($this->outputMode == static::IM_IN_HTML) { $this->wr('>'); } // If we are not in html mode we are in SVG, MathML, or XML embedded content. else { if ($ele->hasChildNodes()) { $this->wr('>'); } // If there are no children this is self closing. else { $this->wr(' />'); } } } protected function attrs($ele) { // FIXME: Needs support for xml, xmlns, xlink, and namespaced elements. if (!$ele->hasAttributes()) { return $this; } // TODO: Currently, this always writes name="value", and does not do // value-less attributes. $map = $ele->attributes; $len = $map->length; for ($i = 0; $i < $len; ++$i) { $node = $map->item($i); $val = $this->enc($node->value, TRUE); // XXX: The spec says that we need to ensure that anything in // the XML, XMLNS, or XLink NS's should use the canonical // prefix. It seems that DOM does this for us already, but there // may be exceptions. $name = $node->name; // Special handling for attributes in SVG and MathML. // Using if/elseif instead of switch because it's faster in PHP. if ($this->outputMode == static::IM_IN_SVG) { $name = Elements::normalizeSvgAttribute($name); } elseif ($this->outputMode == static::IM_IN_MATHML) { $name = Elements::normalizeMathMlAttribute($name); } $this->wr(' ')->wr($name); if (isset($val) && $val !== '') { $this->wr('="')->wr($val)->wr('"'); } } } /** * Write the closing tag. * * Tags for HTML, MathML, and SVG are in the local name. Otherwise, use the * qualified name (8.3). * * @param \DOMNode $ele * The element being written. */ protected function closeTag($ele) { if ($this->outputMode == static::IM_IN_HTML || $ele->hasChildNodes()) { $this->wr('wr($ele->tagName)->wr('>'); } } /** * Write to the output. * * @param string $text * The string to put into the output. * * @return Masterminds\HTML5\Serializer\Traverser * $this so it can be used in chaining. */ protected function wr($text) { fwrite($this->out, $text); return $this; } /** * Write a new line character. * * @return Masterminds\HTML5\Serializer\Traverser * $this so it can be used in chaining. */ protected function nl() { fwrite($this->out, PHP_EOL); return $this; } /** * Encode text. * * When encode is set to FALSE, the default value, the text passed in is * escaped per section 8.3 of the html5 spec. For details on how text is * escaped see the escape() method. * * When encoding is set to true the text is converted to named character * references where appropriate. Section 8.1.4 Character references of the * html5 spec refers to using named character references. This is useful for * characters that can't otherwise legally be used in the text. * * The named character references are listed in section 8.5. * * @see http://www.w3.org/TR/2013/CR-html5-20130806/syntax.html#named-character-references * * True encoding will turn all named character references into their entities. * This includes such characters as +.# and many other common ones. By default * encoding here will just escape &'<>". * * Note, PHP 5.4+ has better html5 encoding. * * @todo Use the Entities class in php 5.3 to have html5 entities. * * @param string $text * text to encode. * @param boolean $attribute * True if we are encoding an attrubute, false otherwise * * @return string * The encoded text. */ protected function enc($text, $attribute = FALSE) { // Escape the text rather than convert to named character references. if (!$this->encode) { return $this->escape($text, $attribute); } // If we are in PHP 5.4+ we can use the native html5 entity functionality to // convert the named character references. if (defined('ENT_HTML5')) { return htmlentities($text, ENT_HTML5 | ENT_SUBSTITUTE | ENT_QUOTES, 'UTF-8', FALSE); } // If a version earlier than 5.4 html5 entities are not entirely handled. // This manually handles them. else { return strtr($text, \Masterminds\HTML5\Serializer\HTML5Entities::$map); } } /** * Escape test. * * According to the html5 spec section 8.3 Serializing HTML fragments, text * within tags that are not style, script, xmp, iframe, noembed, and noframes * need to be properly escaped. * * The & should be converted to &, no breaking space unicode characters * converted to  , when in attribute mode the " should be converted to * ", and when not in attribute mode the < and > should be converted to * < and >. * * @see http://www.w3.org/TR/2013/CR-html5-20130806/syntax.html#escapingString * * @param string $text * text to escape. * @param boolean $attribute * True if we are escaping an attrubute, false otherwise */ protected function escape($text, $attribute = FALSE) { // Not using htmlspecialchars because, while it does escaping, it doesn't // match the requirements of section 8.5. For example, it doesn't handle // non-breaking spaces. if ($attribute) { $replace = array('"'=>'"', '&'=>'&', "\xc2\xa0"=>' '); } else { $replace = array('<'=>'<', '>'=>'>', '&'=>'&', "\xc2\xa0"=>' '); } return strtr($text, $replace); } }