diff options
Diffstat (limited to 'src/HTML5/Serializer/OutputRules.php')
-rw-r--r-- | src/HTML5/Serializer/OutputRules.php | 549 |
1 files changed, 286 insertions, 263 deletions
diff --git a/src/HTML5/Serializer/OutputRules.php b/src/HTML5/Serializer/OutputRules.php index 2425958..168c65c 100644 --- a/src/HTML5/Serializer/OutputRules.php +++ b/src/HTML5/Serializer/OutputRules.php @@ -13,302 +13,325 @@ use Masterminds\HTML5\Elements; /** * Generate the output html5 based on element rules. */ -class OutputRules implements \Masterminds\HTML5\Serializer\RulesInterface { +class OutputRules implements \Masterminds\HTML5\Serializer\RulesInterface +{ - const IM_IN_HTML = 1; - const IM_IN_SVG = 2; - const IM_IN_MATHML = 3; + const IM_IN_HTML = 1; - protected $traverser; - protected $encode = FALSE; - protected $out; - protected $outputMode; + const IM_IN_SVG = 2; - const DOCTYPE = '<!DOCTYPE html>'; + const IM_IN_MATHML = 3; - public function __construct($output, $options = array()) { + protected $traverser; - if (isset($options['encode_entities'])) { - $this->encode = $options['encode_entities']; - } + protected $encode = FALSE; - $this->outputMode = static::IM_IN_HTML; - $this->out = $output; - } + protected $out; - public function setTraverser(\Masterminds\HTML5\Serializer\Traverser $traverser) { - $this->traverser = $traverser; + protected $outputMode; - return $this; - } + const DOCTYPE = '<!DOCTYPE html>'; - public function document($dom) { - $this->doctype(); - $this->traverser->node($dom->documentElement); - $this->nl(); - } + public function __construct($output, $options = array()) + { + if (isset($options['encode_entities'])) { + $this->encode = $options['encode_entities']; + } - protected function doctype() { - $this->wr(static::DOCTYPE); - $this->nl(); - } + $this->outputMode = static::IM_IN_HTML; + $this->out = $output; + } - public function element($ele) { - $name = $ele->tagName; + public function setTraverser(\Masterminds\HTML5\Serializer\Traverser $traverser) + { + $this->traverser = $traverser; - // Per spec: - // If the element has a declared namespace in the HTML, MathML or - // SVG namespaces, we use the lname instead of the tagName. - if ($this->traverser->isLocalElement($ele)) { - $name = $ele->localName; + return $this; } - // If we are in SVG or MathML there is special handling. - // Using if/elseif instead of switch because it's faster in PHP. - if ($name == 'svg') { - $this->outputMode = static::IM_IN_SVG; - $name = Elements::normalizeSvgElement($name); + public function document($dom) + { + $this->doctype(); + $this->traverser->node($dom->documentElement); + $this->nl(); } - elseif ($name == 'math') { - $this->outputMode = static::IM_IN_MATHML; - } - - $this->openTag($ele); - // Handle children. - if ($ele->hasChildNodes()) { - $this->traverser->children($ele->childNodes); + protected function doctype() + { + $this->wr(static::DOCTYPE); + $this->nl(); } - // Close out the SVG or MathML special handling. - if ($name == 'svg' || $name == 'math') { - $this->outputMode = static::IM_IN_HTML; + public function element($ele) + { + $name = $ele->tagName; + + // Per spec: + // If the element has a declared namespace in the HTML, MathML or + // SVG namespaces, we use the lname instead of the tagName. + if ($this->traverser->isLocalElement($ele)) { + $name = $ele->localName; + } + + // If we are in SVG or MathML there is special handling. + // Using if/elseif instead of switch because it's faster in PHP. + if ($name == 'svg') { + $this->outputMode = static::IM_IN_SVG; + $name = Elements::normalizeSvgElement($name); + } elseif ($name == 'math') { + $this->outputMode = static::IM_IN_MATHML; + } + + $this->openTag($ele); + + // Handle children. + if ($ele->hasChildNodes()) { + $this->traverser->children($ele->childNodes); + } + + // Close out the SVG or MathML special handling. + if ($name == 'svg' || $name == 'math') { + $this->outputMode = static::IM_IN_HTML; + } + + // If not unary, add a closing tag. + if (! Elements::isA($name, Elements::VOID_TAG)) { + $this->closeTag($ele); + } } - // If not unary, add a closing tag. - if (!Elements::isA($name, Elements::VOID_TAG)) { - $this->closeTag($ele); - } - } - - /** - * Write a text node. - * - * @param \DOMText $ele - * The text node to write. - */ - public function text($ele) { - if (isset($ele->parentNode) && isset($ele->parentNode->tagName) && Elements::isA($ele->parentNode->tagName, Elements::TEXT_RAW)) { - $this->wr($ele->data); - return; + /** + * Write a text node. + * + * @param \DOMText $ele + * The text node to write. + */ + public function text($ele) + { + if (isset($ele->parentNode) && isset($ele->parentNode->tagName) && Elements::isA($ele->parentNode->tagName, Elements::TEXT_RAW)) { + $this->wr($ele->data); + + return; + } + + // FIXME: This probably needs some flags set. + $this->wr($this->enc($ele->data)); } - // FIXME: This probably needs some flags set. - $this->wr($this->enc($ele->data)); - - } - - public function cdata($ele) { - // This encodes CDATA. - $this->wr($ele->ownerDocument->saveXML($ele)); - } - - public function comment($ele) { - // These produce identical output. - //$this->wr('<!--')->wr($ele->data)->wr('-->'); - $this->wr($ele->ownerDocument->saveXML($ele)); - } - - public function processorInstruction($ele) { - $this->wr('<?')->wr($ele->target)->wr(' ')->wr($ele->data)->wr('?>'); - } - - /** - * Write the opening tag. - * - * Tags for HTML, MathML, and SVG are in the local name. Otherwise, use the - * qualified name (8.3). - * - * @param \DOMNode $ele - * The element being written. - */ - protected function openTag($ele) { - $this->wr('<')->wr($ele->tagName); - $this->attrs($ele); - - if ($this->outputMode == static::IM_IN_HTML) { - $this->wr('>'); - } - // If we are not in html mode we are in SVG, MathML, or XML embedded content. - else { - if ($ele->hasChildNodes()) { - $this->wr('>'); - } - // If there are no children this is self closing. - else { - $this->wr(' />'); - } + public function cdata($ele) + { + // This encodes CDATA. + $this->wr($ele->ownerDocument->saveXML($ele)); } - } - protected function attrs($ele) { - // FIXME: Needs support for xml, xmlns, xlink, and namespaced elements. - if (!$ele->hasAttributes()) { - return $this; + public function comment($ele) + { + // These produce identical output. + // $this->wr('<!--')->wr($ele->data)->wr('-->'); + $this->wr($ele->ownerDocument->saveXML($ele)); } - // TODO: Currently, this always writes name="value", and does not do - // value-less attributes. - $map = $ele->attributes; - $len = $map->length; - for ($i = 0; $i < $len; ++$i) { - $node = $map->item($i); - $val = $this->enc($node->value, TRUE); - - // XXX: The spec says that we need to ensure that anything in - // the XML, XMLNS, or XLink NS's should use the canonical - // prefix. It seems that DOM does this for us already, but there - // may be exceptions. - $name = $node->name; - - // Special handling for attributes in SVG and MathML. - // Using if/elseif instead of switch because it's faster in PHP. - if ($this->outputMode == static::IM_IN_SVG) { - $name = Elements::normalizeSvgAttribute($name); - } - elseif ($this->outputMode == static::IM_IN_MATHML) { - $name = Elements::normalizeMathMlAttribute($name); - } - - $this->wr(' ')->wr($name); - if (isset($val) && $val !== '') { - $this->wr('="')->wr($val)->wr('"'); - } + public function processorInstruction($ele) + { + $this->wr('<?') + ->wr($ele->target) + ->wr(' ') + ->wr($ele->data) + ->wr('?>'); } - } - - /** - * Write the closing tag. - * - * Tags for HTML, MathML, and SVG are in the local name. Otherwise, use the - * qualified name (8.3). - * - * @param \DOMNode $ele - * The element being written. - */ - protected function closeTag($ele) { - if ($this->outputMode == static::IM_IN_HTML || $ele->hasChildNodes()) { - $this->wr('</')->wr($ele->tagName)->wr('>'); + + /** + * Write the opening tag. + * + * Tags for HTML, MathML, and SVG are in the local name. Otherwise, use the + * qualified name (8.3). + * + * @param \DOMNode $ele + * The element being written. + */ + protected function openTag($ele) + { + $this->wr('<')->wr($ele->tagName); + $this->attrs($ele); + + if ($this->outputMode == static::IM_IN_HTML) { + $this->wr('>'); + } // If we are not in html mode we are in SVG, MathML, or XML embedded content. + else { + if ($ele->hasChildNodes()) { + $this->wr('>'); + } // If there are no children this is self closing. + else { + $this->wr(' />'); + } + } } - } - - /** - * Write to the output. - * - * @param string $text - * The string to put into the output. - * - * @return Masterminds\HTML5\Serializer\Traverser - * $this so it can be used in chaining. - */ - protected function wr($text) { - fwrite($this->out, $text); - return $this; - } - - /** - * Write a new line character. - * - * @return Masterminds\HTML5\Serializer\Traverser - * $this so it can be used in chaining. - */ - protected function nl() { - fwrite($this->out, PHP_EOL); - return $this; - } - - /** - * Encode text. - * - * When encode is set to FALSE, the default value, the text passed in is - * escaped per section 8.3 of the html5 spec. For details on how text is - * escaped see the escape() method. - * - * When encoding is set to true the text is converted to named character - * references where appropriate. Section 8.1.4 Character references of the - * html5 spec refers to using named character references. This is useful for - * characters that can't otherwise legally be used in the text. - * - * The named character references are listed in section 8.5. - * - * @see http://www.w3.org/TR/2013/CR-html5-20130806/syntax.html#named-character-references - * - * True encoding will turn all named character references into their entities. - * This includes such characters as +.# and many other common ones. By default - * encoding here will just escape &'<>". - * - * Note, PHP 5.4+ has better html5 encoding. - * - * @todo Use the Entities class in php 5.3 to have html5 entities. - * - * @param string $text - * text to encode. - * @param boolean $attribute - * True if we are encoding an attrubute, false otherwise - * - * @return string - * The encoded text. - */ - protected function enc($text, $attribute = FALSE) { - - // Escape the text rather than convert to named character references. - if (!$this->encode) { - return $this->escape($text, $attribute); + + protected function attrs($ele) + { + // FIXME: Needs support for xml, xmlns, xlink, and namespaced elements. + if (! $ele->hasAttributes()) { + return $this; + } + + // TODO: Currently, this always writes name="value", and does not do + // value-less attributes. + $map = $ele->attributes; + $len = $map->length; + for ($i = 0; $i < $len; ++ $i) { + $node = $map->item($i); + $val = $this->enc($node->value, TRUE); + + // XXX: The spec says that we need to ensure that anything in + // the XML, XMLNS, or XLink NS's should use the canonical + // prefix. It seems that DOM does this for us already, but there + // may be exceptions. + $name = $node->name; + + // Special handling for attributes in SVG and MathML. + // Using if/elseif instead of switch because it's faster in PHP. + if ($this->outputMode == static::IM_IN_SVG) { + $name = Elements::normalizeSvgAttribute($name); + } elseif ($this->outputMode == static::IM_IN_MATHML) { + $name = Elements::normalizeMathMlAttribute($name); + } + + $this->wr(' ')->wr($name); + if (isset($val) && $val !== '') { + $this->wr('="')->wr($val)->wr('"'); + } + } } - // If we are in PHP 5.4+ we can use the native html5 entity functionality to - // convert the named character references. - if (defined('ENT_HTML5')) { - return htmlentities($text, ENT_HTML5 | ENT_SUBSTITUTE | ENT_QUOTES, 'UTF-8', FALSE); + /** + * Write the closing tag. + * + * Tags for HTML, MathML, and SVG are in the local name. Otherwise, use the + * qualified name (8.3). + * + * @param \DOMNode $ele + * The element being written. + */ + protected function closeTag($ele) + { + if ($this->outputMode == static::IM_IN_HTML || $ele->hasChildNodes()) { + $this->wr('</')->wr($ele->tagName)->wr('>'); + } } - // If a version earlier than 5.4 html5 entities are not entirely handled. - // This manually handles them. - else { - return strtr($text, \Masterminds\HTML5\Serializer\HTML5Entities::$map); + + /** + * Write to the output. + * + * @param string $text + * The string to put into the output. + * + * @return Masterminds\HTML5\Serializer\Traverser $this so it can be used in chaining. + */ + protected function wr($text) + { + fwrite($this->out, $text); + + return $this; } - } - - /** - * Escape test. - * - * According to the html5 spec section 8.3 Serializing HTML fragments, text - * within tags that are not style, script, xmp, iframe, noembed, and noframes - * need to be properly escaped. - * - * The & should be converted to &, no breaking space unicode characters - * converted to , when in attribute mode the " should be converted to - * ", and when not in attribute mode the < and > should be converted to - * < and >. - * - * @see http://www.w3.org/TR/2013/CR-html5-20130806/syntax.html#escapingString - * - * @param string $text - * text to escape. - * @param boolean $attribute - * True if we are escaping an attrubute, false otherwise - */ - protected function escape($text, $attribute = FALSE) { - - // Not using htmlspecialchars because, while it does escaping, it doesn't - // match the requirements of section 8.5. For example, it doesn't handle - // non-breaking spaces. - if ($attribute) { - $replace = array('"'=>'"', '&'=>'&', "\xc2\xa0"=>' '); + + /** + * Write a new line character. + * + * @return Masterminds\HTML5\Serializer\Traverser $this so it can be used in chaining. + */ + protected function nl() + { + fwrite($this->out, PHP_EOL); + + return $this; } - else { - $replace = array('<'=>'<', '>'=>'>', '&'=>'&', "\xc2\xa0"=>' '); + + /** + * Encode text. + * + * When encode is set to FALSE, the default value, the text passed in is + * escaped per section 8.3 of the html5 spec. For details on how text is + * escaped see the escape() method. + * + * When encoding is set to true the text is converted to named character + * references where appropriate. Section 8.1.4 Character references of the + * html5 spec refers to using named character references. This is useful for + * characters that can't otherwise legally be used in the text. + * + * The named character references are listed in section 8.5. + * + * @see http://www.w3.org/TR/2013/CR-html5-20130806/syntax.html#named-character-references True encoding will turn all named character references into their entities. + * This includes such characters as +.# and many other common ones. By default + * encoding here will just escape &'<>". + * + * Note, PHP 5.4+ has better html5 encoding. + * + * @todo Use the Entities class in php 5.3 to have html5 entities. + * + * @param string $text + * text to encode. + * @param boolean $attribute + * True if we are encoding an attrubute, false otherwise + * + * @return string The encoded text. + */ + protected function enc($text, $attribute = FALSE) + { + // Escape the text rather than convert to named character references. + if (! $this->encode) { + return $this->escape($text, $attribute); + } + + // If we are in PHP 5.4+ we can use the native html5 entity functionality to + // convert the named character references. + if (defined('ENT_HTML5')) { + return htmlentities($text, ENT_HTML5 | ENT_SUBSTITUTE | ENT_QUOTES, 'UTF-8', FALSE); + } // If a version earlier than 5.4 html5 entities are not entirely handled. + // This manually handles them. + else { + return strtr($text, \Masterminds\HTML5\Serializer\HTML5Entities::$map); + } } - return strtr($text, $replace); - } + /** + * Escape test. + * + * According to the html5 spec section 8.3 Serializing HTML fragments, text + * within tags that are not style, script, xmp, iframe, noembed, and noframes + * need to be properly escaped. + * + * The & should be converted to &, no breaking space unicode characters + * converted to , when in attribute mode the " should be converted to + * ", and when not in attribute mode the < and > should be converted to + * < and >. + * + * @see http://www.w3.org/TR/2013/CR-html5-20130806/syntax.html#escapingString + * + * @param string $text + * text to escape. + * @param boolean $attribute + * True if we are escaping an attrubute, false otherwise + */ + protected function escape($text, $attribute = FALSE) + { + // Not using htmlspecialchars because, while it does escaping, it doesn't + // match the requirements of section 8.5. For example, it doesn't handle + // non-breaking spaces. + if ($attribute) { + $replace = array( + '"' => '"', + '&' => '&', + "\xc2\xa0" => ' ' + ); + } else { + $replace = array( + '<' => '<', + '>' => '>', + '&' => '&', + "\xc2\xa0" => ' ' + ); + } + + return strtr($text, $replace); + } } |