summaryrefslogtreecommitdiff
path: root/src/HTML5/Serializer/OutputRules.php
diff options
context:
space:
mode:
Diffstat (limited to 'src/HTML5/Serializer/OutputRules.php')
-rw-r--r--src/HTML5/Serializer/OutputRules.php549
1 files changed, 286 insertions, 263 deletions
diff --git a/src/HTML5/Serializer/OutputRules.php b/src/HTML5/Serializer/OutputRules.php
index 2425958..168c65c 100644
--- a/src/HTML5/Serializer/OutputRules.php
+++ b/src/HTML5/Serializer/OutputRules.php
@@ -13,302 +13,325 @@ use Masterminds\HTML5\Elements;
/**
* Generate the output html5 based on element rules.
*/
-class OutputRules implements \Masterminds\HTML5\Serializer\RulesInterface {
+class OutputRules implements \Masterminds\HTML5\Serializer\RulesInterface
+{
- const IM_IN_HTML = 1;
- const IM_IN_SVG = 2;
- const IM_IN_MATHML = 3;
+ const IM_IN_HTML = 1;
- protected $traverser;
- protected $encode = FALSE;
- protected $out;
- protected $outputMode;
+ const IM_IN_SVG = 2;
- const DOCTYPE = '<!DOCTYPE html>';
+ const IM_IN_MATHML = 3;
- public function __construct($output, $options = array()) {
+ protected $traverser;
- if (isset($options['encode_entities'])) {
- $this->encode = $options['encode_entities'];
- }
+ protected $encode = FALSE;
- $this->outputMode = static::IM_IN_HTML;
- $this->out = $output;
- }
+ protected $out;
- public function setTraverser(\Masterminds\HTML5\Serializer\Traverser $traverser) {
- $this->traverser = $traverser;
+ protected $outputMode;
- return $this;
- }
+ const DOCTYPE = '<!DOCTYPE html>';
- public function document($dom) {
- $this->doctype();
- $this->traverser->node($dom->documentElement);
- $this->nl();
- }
+ public function __construct($output, $options = array())
+ {
+ if (isset($options['encode_entities'])) {
+ $this->encode = $options['encode_entities'];
+ }
- protected function doctype() {
- $this->wr(static::DOCTYPE);
- $this->nl();
- }
+ $this->outputMode = static::IM_IN_HTML;
+ $this->out = $output;
+ }
- public function element($ele) {
- $name = $ele->tagName;
+ public function setTraverser(\Masterminds\HTML5\Serializer\Traverser $traverser)
+ {
+ $this->traverser = $traverser;
- // Per spec:
- // If the element has a declared namespace in the HTML, MathML or
- // SVG namespaces, we use the lname instead of the tagName.
- if ($this->traverser->isLocalElement($ele)) {
- $name = $ele->localName;
+ return $this;
}
- // If we are in SVG or MathML there is special handling.
- // Using if/elseif instead of switch because it's faster in PHP.
- if ($name == 'svg') {
- $this->outputMode = static::IM_IN_SVG;
- $name = Elements::normalizeSvgElement($name);
+ public function document($dom)
+ {
+ $this->doctype();
+ $this->traverser->node($dom->documentElement);
+ $this->nl();
}
- elseif ($name == 'math') {
- $this->outputMode = static::IM_IN_MATHML;
- }
-
- $this->openTag($ele);
- // Handle children.
- if ($ele->hasChildNodes()) {
- $this->traverser->children($ele->childNodes);
+ protected function doctype()
+ {
+ $this->wr(static::DOCTYPE);
+ $this->nl();
}
- // Close out the SVG or MathML special handling.
- if ($name == 'svg' || $name == 'math') {
- $this->outputMode = static::IM_IN_HTML;
+ public function element($ele)
+ {
+ $name = $ele->tagName;
+
+ // Per spec:
+ // If the element has a declared namespace in the HTML, MathML or
+ // SVG namespaces, we use the lname instead of the tagName.
+ if ($this->traverser->isLocalElement($ele)) {
+ $name = $ele->localName;
+ }
+
+ // If we are in SVG or MathML there is special handling.
+ // Using if/elseif instead of switch because it's faster in PHP.
+ if ($name == 'svg') {
+ $this->outputMode = static::IM_IN_SVG;
+ $name = Elements::normalizeSvgElement($name);
+ } elseif ($name == 'math') {
+ $this->outputMode = static::IM_IN_MATHML;
+ }
+
+ $this->openTag($ele);
+
+ // Handle children.
+ if ($ele->hasChildNodes()) {
+ $this->traverser->children($ele->childNodes);
+ }
+
+ // Close out the SVG or MathML special handling.
+ if ($name == 'svg' || $name == 'math') {
+ $this->outputMode = static::IM_IN_HTML;
+ }
+
+ // If not unary, add a closing tag.
+ if (! Elements::isA($name, Elements::VOID_TAG)) {
+ $this->closeTag($ele);
+ }
}
- // If not unary, add a closing tag.
- if (!Elements::isA($name, Elements::VOID_TAG)) {
- $this->closeTag($ele);
- }
- }
-
- /**
- * Write a text node.
- *
- * @param \DOMText $ele
- * The text node to write.
- */
- public function text($ele) {
- if (isset($ele->parentNode) && isset($ele->parentNode->tagName) && Elements::isA($ele->parentNode->tagName, Elements::TEXT_RAW)) {
- $this->wr($ele->data);
- return;
+ /**
+ * Write a text node.
+ *
+ * @param \DOMText $ele
+ * The text node to write.
+ */
+ public function text($ele)
+ {
+ if (isset($ele->parentNode) && isset($ele->parentNode->tagName) && Elements::isA($ele->parentNode->tagName, Elements::TEXT_RAW)) {
+ $this->wr($ele->data);
+
+ return;
+ }
+
+ // FIXME: This probably needs some flags set.
+ $this->wr($this->enc($ele->data));
}
- // FIXME: This probably needs some flags set.
- $this->wr($this->enc($ele->data));
-
- }
-
- public function cdata($ele) {
- // This encodes CDATA.
- $this->wr($ele->ownerDocument->saveXML($ele));
- }
-
- public function comment($ele) {
- // These produce identical output.
- //$this->wr('<!--')->wr($ele->data)->wr('-->');
- $this->wr($ele->ownerDocument->saveXML($ele));
- }
-
- public function processorInstruction($ele) {
- $this->wr('<?')->wr($ele->target)->wr(' ')->wr($ele->data)->wr('?>');
- }
-
- /**
- * Write the opening tag.
- *
- * Tags for HTML, MathML, and SVG are in the local name. Otherwise, use the
- * qualified name (8.3).
- *
- * @param \DOMNode $ele
- * The element being written.
- */
- protected function openTag($ele) {
- $this->wr('<')->wr($ele->tagName);
- $this->attrs($ele);
-
- if ($this->outputMode == static::IM_IN_HTML) {
- $this->wr('>');
- }
- // If we are not in html mode we are in SVG, MathML, or XML embedded content.
- else {
- if ($ele->hasChildNodes()) {
- $this->wr('>');
- }
- // If there are no children this is self closing.
- else {
- $this->wr(' />');
- }
+ public function cdata($ele)
+ {
+ // This encodes CDATA.
+ $this->wr($ele->ownerDocument->saveXML($ele));
}
- }
- protected function attrs($ele) {
- // FIXME: Needs support for xml, xmlns, xlink, and namespaced elements.
- if (!$ele->hasAttributes()) {
- return $this;
+ public function comment($ele)
+ {
+ // These produce identical output.
+ // $this->wr('<!--')->wr($ele->data)->wr('-->');
+ $this->wr($ele->ownerDocument->saveXML($ele));
}
- // TODO: Currently, this always writes name="value", and does not do
- // value-less attributes.
- $map = $ele->attributes;
- $len = $map->length;
- for ($i = 0; $i < $len; ++$i) {
- $node = $map->item($i);
- $val = $this->enc($node->value, TRUE);
-
- // XXX: The spec says that we need to ensure that anything in
- // the XML, XMLNS, or XLink NS's should use the canonical
- // prefix. It seems that DOM does this for us already, but there
- // may be exceptions.
- $name = $node->name;
-
- // Special handling for attributes in SVG and MathML.
- // Using if/elseif instead of switch because it's faster in PHP.
- if ($this->outputMode == static::IM_IN_SVG) {
- $name = Elements::normalizeSvgAttribute($name);
- }
- elseif ($this->outputMode == static::IM_IN_MATHML) {
- $name = Elements::normalizeMathMlAttribute($name);
- }
-
- $this->wr(' ')->wr($name);
- if (isset($val) && $val !== '') {
- $this->wr('="')->wr($val)->wr('"');
- }
+ public function processorInstruction($ele)
+ {
+ $this->wr('<?')
+ ->wr($ele->target)
+ ->wr(' ')
+ ->wr($ele->data)
+ ->wr('?>');
}
- }
-
- /**
- * Write the closing tag.
- *
- * Tags for HTML, MathML, and SVG are in the local name. Otherwise, use the
- * qualified name (8.3).
- *
- * @param \DOMNode $ele
- * The element being written.
- */
- protected function closeTag($ele) {
- if ($this->outputMode == static::IM_IN_HTML || $ele->hasChildNodes()) {
- $this->wr('</')->wr($ele->tagName)->wr('>');
+
+ /**
+ * Write the opening tag.
+ *
+ * Tags for HTML, MathML, and SVG are in the local name. Otherwise, use the
+ * qualified name (8.3).
+ *
+ * @param \DOMNode $ele
+ * The element being written.
+ */
+ protected function openTag($ele)
+ {
+ $this->wr('<')->wr($ele->tagName);
+ $this->attrs($ele);
+
+ if ($this->outputMode == static::IM_IN_HTML) {
+ $this->wr('>');
+ } // If we are not in html mode we are in SVG, MathML, or XML embedded content.
+ else {
+ if ($ele->hasChildNodes()) {
+ $this->wr('>');
+ } // If there are no children this is self closing.
+ else {
+ $this->wr(' />');
+ }
+ }
}
- }
-
- /**
- * Write to the output.
- *
- * @param string $text
- * The string to put into the output.
- *
- * @return Masterminds\HTML5\Serializer\Traverser
- * $this so it can be used in chaining.
- */
- protected function wr($text) {
- fwrite($this->out, $text);
- return $this;
- }
-
- /**
- * Write a new line character.
- *
- * @return Masterminds\HTML5\Serializer\Traverser
- * $this so it can be used in chaining.
- */
- protected function nl() {
- fwrite($this->out, PHP_EOL);
- return $this;
- }
-
- /**
- * Encode text.
- *
- * When encode is set to FALSE, the default value, the text passed in is
- * escaped per section 8.3 of the html5 spec. For details on how text is
- * escaped see the escape() method.
- *
- * When encoding is set to true the text is converted to named character
- * references where appropriate. Section 8.1.4 Character references of the
- * html5 spec refers to using named character references. This is useful for
- * characters that can't otherwise legally be used in the text.
- *
- * The named character references are listed in section 8.5.
- *
- * @see http://www.w3.org/TR/2013/CR-html5-20130806/syntax.html#named-character-references
- *
- * True encoding will turn all named character references into their entities.
- * This includes such characters as +.# and many other common ones. By default
- * encoding here will just escape &'<>".
- *
- * Note, PHP 5.4+ has better html5 encoding.
- *
- * @todo Use the Entities class in php 5.3 to have html5 entities.
- *
- * @param string $text
- * text to encode.
- * @param boolean $attribute
- * True if we are encoding an attrubute, false otherwise
- *
- * @return string
- * The encoded text.
- */
- protected function enc($text, $attribute = FALSE) {
-
- // Escape the text rather than convert to named character references.
- if (!$this->encode) {
- return $this->escape($text, $attribute);
+
+ protected function attrs($ele)
+ {
+ // FIXME: Needs support for xml, xmlns, xlink, and namespaced elements.
+ if (! $ele->hasAttributes()) {
+ return $this;
+ }
+
+ // TODO: Currently, this always writes name="value", and does not do
+ // value-less attributes.
+ $map = $ele->attributes;
+ $len = $map->length;
+ for ($i = 0; $i < $len; ++ $i) {
+ $node = $map->item($i);
+ $val = $this->enc($node->value, TRUE);
+
+ // XXX: The spec says that we need to ensure that anything in
+ // the XML, XMLNS, or XLink NS's should use the canonical
+ // prefix. It seems that DOM does this for us already, but there
+ // may be exceptions.
+ $name = $node->name;
+
+ // Special handling for attributes in SVG and MathML.
+ // Using if/elseif instead of switch because it's faster in PHP.
+ if ($this->outputMode == static::IM_IN_SVG) {
+ $name = Elements::normalizeSvgAttribute($name);
+ } elseif ($this->outputMode == static::IM_IN_MATHML) {
+ $name = Elements::normalizeMathMlAttribute($name);
+ }
+
+ $this->wr(' ')->wr($name);
+ if (isset($val) && $val !== '') {
+ $this->wr('="')->wr($val)->wr('"');
+ }
+ }
}
- // If we are in PHP 5.4+ we can use the native html5 entity functionality to
- // convert the named character references.
- if (defined('ENT_HTML5')) {
- return htmlentities($text, ENT_HTML5 | ENT_SUBSTITUTE | ENT_QUOTES, 'UTF-8', FALSE);
+ /**
+ * Write the closing tag.
+ *
+ * Tags for HTML, MathML, and SVG are in the local name. Otherwise, use the
+ * qualified name (8.3).
+ *
+ * @param \DOMNode $ele
+ * The element being written.
+ */
+ protected function closeTag($ele)
+ {
+ if ($this->outputMode == static::IM_IN_HTML || $ele->hasChildNodes()) {
+ $this->wr('</')->wr($ele->tagName)->wr('>');
+ }
}
- // If a version earlier than 5.4 html5 entities are not entirely handled.
- // This manually handles them.
- else {
- return strtr($text, \Masterminds\HTML5\Serializer\HTML5Entities::$map);
+
+ /**
+ * Write to the output.
+ *
+ * @param string $text
+ * The string to put into the output.
+ *
+ * @return Masterminds\HTML5\Serializer\Traverser $this so it can be used in chaining.
+ */
+ protected function wr($text)
+ {
+ fwrite($this->out, $text);
+
+ return $this;
}
- }
-
- /**
- * Escape test.
- *
- * According to the html5 spec section 8.3 Serializing HTML fragments, text
- * within tags that are not style, script, xmp, iframe, noembed, and noframes
- * need to be properly escaped.
- *
- * The & should be converted to &amp;, no breaking space unicode characters
- * converted to &nbsp;, when in attribute mode the " should be converted to
- * &quot;, and when not in attribute mode the < and > should be converted to
- * &lt; and &gt;.
- *
- * @see http://www.w3.org/TR/2013/CR-html5-20130806/syntax.html#escapingString
- *
- * @param string $text
- * text to escape.
- * @param boolean $attribute
- * True if we are escaping an attrubute, false otherwise
- */
- protected function escape($text, $attribute = FALSE) {
-
- // Not using htmlspecialchars because, while it does escaping, it doesn't
- // match the requirements of section 8.5. For example, it doesn't handle
- // non-breaking spaces.
- if ($attribute) {
- $replace = array('"'=>'&quot;', '&'=>'&amp;', "\xc2\xa0"=>'&nbsp;');
+
+ /**
+ * Write a new line character.
+ *
+ * @return Masterminds\HTML5\Serializer\Traverser $this so it can be used in chaining.
+ */
+ protected function nl()
+ {
+ fwrite($this->out, PHP_EOL);
+
+ return $this;
}
- else {
- $replace = array('<'=>'&lt;', '>'=>'&gt;', '&'=>'&amp;', "\xc2\xa0"=>'&nbsp;');
+
+ /**
+ * Encode text.
+ *
+ * When encode is set to FALSE, the default value, the text passed in is
+ * escaped per section 8.3 of the html5 spec. For details on how text is
+ * escaped see the escape() method.
+ *
+ * When encoding is set to true the text is converted to named character
+ * references where appropriate. Section 8.1.4 Character references of the
+ * html5 spec refers to using named character references. This is useful for
+ * characters that can't otherwise legally be used in the text.
+ *
+ * The named character references are listed in section 8.5.
+ *
+ * @see http://www.w3.org/TR/2013/CR-html5-20130806/syntax.html#named-character-references True encoding will turn all named character references into their entities.
+ * This includes such characters as +.# and many other common ones. By default
+ * encoding here will just escape &'<>".
+ *
+ * Note, PHP 5.4+ has better html5 encoding.
+ *
+ * @todo Use the Entities class in php 5.3 to have html5 entities.
+ *
+ * @param string $text
+ * text to encode.
+ * @param boolean $attribute
+ * True if we are encoding an attrubute, false otherwise
+ *
+ * @return string The encoded text.
+ */
+ protected function enc($text, $attribute = FALSE)
+ {
+ // Escape the text rather than convert to named character references.
+ if (! $this->encode) {
+ return $this->escape($text, $attribute);
+ }
+
+ // If we are in PHP 5.4+ we can use the native html5 entity functionality to
+ // convert the named character references.
+ if (defined('ENT_HTML5')) {
+ return htmlentities($text, ENT_HTML5 | ENT_SUBSTITUTE | ENT_QUOTES, 'UTF-8', FALSE);
+ } // If a version earlier than 5.4 html5 entities are not entirely handled.
+ // This manually handles them.
+ else {
+ return strtr($text, \Masterminds\HTML5\Serializer\HTML5Entities::$map);
+ }
}
- return strtr($text, $replace);
- }
+ /**
+ * Escape test.
+ *
+ * According to the html5 spec section 8.3 Serializing HTML fragments, text
+ * within tags that are not style, script, xmp, iframe, noembed, and noframes
+ * need to be properly escaped.
+ *
+ * The & should be converted to &amp;, no breaking space unicode characters
+ * converted to &nbsp;, when in attribute mode the " should be converted to
+ * &quot;, and when not in attribute mode the < and > should be converted to
+ * &lt; and &gt;.
+ *
+ * @see http://www.w3.org/TR/2013/CR-html5-20130806/syntax.html#escapingString
+ *
+ * @param string $text
+ * text to escape.
+ * @param boolean $attribute
+ * True if we are escaping an attrubute, false otherwise
+ */
+ protected function escape($text, $attribute = FALSE)
+ {
+ // Not using htmlspecialchars because, while it does escaping, it doesn't
+ // match the requirements of section 8.5. For example, it doesn't handle
+ // non-breaking spaces.
+ if ($attribute) {
+ $replace = array(
+ '"' => '&quot;',
+ '&' => '&amp;',
+ "\xc2\xa0" => '&nbsp;'
+ );
+ } else {
+ $replace = array(
+ '<' => '&lt;',
+ '>' => '&gt;',
+ '&' => '&amp;',
+ "\xc2\xa0" => '&nbsp;'
+ );
+ }
+
+ return strtr($text, $replace);
+ }
}