From 0226e0ca0dc70f9a0310b3eef045ee1c1e0ca3ac Mon Sep 17 00:00:00 2001 From: Andrew Dolgov Date: Tue, 13 Dec 2022 20:00:46 +0300 Subject: split into a separate repo --- .../html5/src/HTML5/Serializer/OutputRules.php | 553 +++++++++++++++++++++ 1 file changed, 553 insertions(+) create mode 100644 vendor/masterminds/html5/src/HTML5/Serializer/OutputRules.php (limited to 'vendor/masterminds/html5/src/HTML5/Serializer/OutputRules.php') diff --git a/vendor/masterminds/html5/src/HTML5/Serializer/OutputRules.php b/vendor/masterminds/html5/src/HTML5/Serializer/OutputRules.php new file mode 100644 index 0000000..ec467f2 --- /dev/null +++ b/vendor/masterminds/html5/src/HTML5/Serializer/OutputRules.php @@ -0,0 +1,553 @@ +'http://www.w3.org/1999/xhtml', + 'attrNamespace'=>'http://www.w3.org/1999/xhtml', + + 'nodeName'=>'img', 'nodeName'=>array('img', 'a'), + 'attrName'=>'alt', 'attrName'=>array('title', 'alt'), + ), + */ + array( + 'nodeNamespace' => 'http://www.w3.org/1999/xhtml', + 'attrName' => array('href', + 'hreflang', + 'http-equiv', + 'icon', + 'id', + 'keytype', + 'kind', + 'label', + 'lang', + 'language', + 'list', + 'maxlength', + 'media', + 'method', + 'name', + 'placeholder', + 'rel', + 'rows', + 'rowspan', + 'sandbox', + 'spellcheck', + 'scope', + 'seamless', + 'shape', + 'size', + 'sizes', + 'span', + 'src', + 'srcdoc', + 'srclang', + 'srcset', + 'start', + 'step', + 'style', + 'summary', + 'tabindex', + 'target', + 'title', + 'type', + 'value', + 'width', + 'border', + 'charset', + 'cite', + 'class', + 'code', + 'codebase', + 'color', + 'cols', + 'colspan', + 'content', + 'coords', + 'data', + 'datetime', + 'default', + 'dir', + 'dirname', + 'enctype', + 'for', + 'form', + 'formaction', + 'headers', + 'height', + 'accept', + 'accept-charset', + 'accesskey', + 'action', + 'align', + 'alt', + 'bgcolor', + ), + ), + array( + 'nodeNamespace' => 'http://www.w3.org/1999/xhtml', + 'xpath' => 'starts-with(local-name(), \'data-\')', + ), + ); + + const DOCTYPE = ''; + + public function __construct($output, $options = array()) + { + if (isset($options['encode_entities'])) { + $this->encode = $options['encode_entities']; + } + + $this->outputMode = static::IM_IN_HTML; + $this->out = $output; + $this->hasHTML5 = defined('ENT_HTML5'); + } + + public function addRule(array $rule) + { + $this->nonBooleanAttributes[] = $rule; + } + + public function setTraverser(Traverser $traverser) + { + $this->traverser = $traverser; + + return $this; + } + + public function unsetTraverser() + { + $this->traverser = null; + + return $this; + } + + public function document($dom) + { + $this->doctype(); + if ($dom->documentElement) { + foreach ($dom->childNodes as $node) { + $this->traverser->node($node); + } + $this->nl(); + } + } + + protected function doctype() + { + $this->wr(static::DOCTYPE); + $this->nl(); + } + + public function element($ele) + { + $name = $ele->tagName; + + // Per spec: + // If the element has a declared namespace in the HTML, MathML or + // SVG namespaces, we use the lname instead of the tagName. + if ($this->traverser->isLocalElement($ele)) { + $name = $ele->localName; + } + + // If we are in SVG or MathML there is special handling. + // Using if/elseif instead of switch because it's faster in PHP. + if ('svg' == $name) { + $this->outputMode = static::IM_IN_SVG; + $name = Elements::normalizeSvgElement($name); + } elseif ('math' == $name) { + $this->outputMode = static::IM_IN_MATHML; + } + + $this->openTag($ele); + if (Elements::isA($name, Elements::TEXT_RAW)) { + foreach ($ele->childNodes as $child) { + if ($child instanceof \DOMCharacterData) { + $this->wr($child->data); + } elseif ($child instanceof \DOMElement) { + $this->element($child); + } + } + } else { + // Handle children. + if ($ele->hasChildNodes()) { + $this->traverser->children($ele->childNodes); + } + + // Close out the SVG or MathML special handling. + if ('svg' == $name || 'math' == $name) { + $this->outputMode = static::IM_IN_HTML; + } + } + + // If not unary, add a closing tag. + if (!Elements::isA($name, Elements::VOID_TAG)) { + $this->closeTag($ele); + } + } + + /** + * Write a text node. + * + * @param \DOMText $ele The text node to write. + */ + public function text($ele) + { + if (isset($ele->parentNode) && isset($ele->parentNode->tagName) && Elements::isA($ele->parentNode->localName, Elements::TEXT_RAW)) { + $this->wr($ele->data); + + return; + } + + // FIXME: This probably needs some flags set. + $this->wr($this->enc($ele->data)); + } + + public function cdata($ele) + { + // This encodes CDATA. + $this->wr($ele->ownerDocument->saveXML($ele)); + } + + public function comment($ele) + { + // These produce identical output. + // $this->wr(''); + $this->wr($ele->ownerDocument->saveXML($ele)); + } + + public function processorInstruction($ele) + { + $this->wr('wr($ele->target) + ->wr(' ') + ->wr($ele->data) + ->wr('?>'); + } + + /** + * Write the namespace attributes. + * + * @param \DOMNode $ele The element being written. + */ + protected function namespaceAttrs($ele) + { + if (!$this->xpath || $this->xpath->document !== $ele->ownerDocument) { + $this->xpath = new \DOMXPath($ele->ownerDocument); + } + + foreach ($this->xpath->query('namespace::*[not(.=../../namespace::*)]', $ele) as $nsNode) { + if (!in_array($nsNode->nodeValue, $this->implicitNamespaces)) { + $this->wr(' ')->wr($nsNode->nodeName)->wr('="')->wr($nsNode->nodeValue)->wr('"'); + } + } + } + + /** + * Write the opening tag. + * + * Tags for HTML, MathML, and SVG are in the local name. Otherwise, use the + * qualified name (8.3). + * + * @param \DOMNode $ele The element being written. + */ + protected function openTag($ele) + { + $this->wr('<')->wr($this->traverser->isLocalElement($ele) ? $ele->localName : $ele->tagName); + + $this->attrs($ele); + $this->namespaceAttrs($ele); + + if ($this->outputMode == static::IM_IN_HTML) { + $this->wr('>'); + } // If we are not in html mode we are in SVG, MathML, or XML embedded content. + else { + if ($ele->hasChildNodes()) { + $this->wr('>'); + } // If there are no children this is self closing. + else { + $this->wr(' />'); + } + } + } + + protected function attrs($ele) + { + // FIXME: Needs support for xml, xmlns, xlink, and namespaced elements. + if (!$ele->hasAttributes()) { + return $this; + } + + // TODO: Currently, this always writes name="value", and does not do + // value-less attributes. + $map = $ele->attributes; + $len = $map->length; + for ($i = 0; $i < $len; ++$i) { + $node = $map->item($i); + $val = $this->enc($node->value, true); + + // XXX: The spec says that we need to ensure that anything in + // the XML, XMLNS, or XLink NS's should use the canonical + // prefix. It seems that DOM does this for us already, but there + // may be exceptions. + $name = $node->nodeName; + + // Special handling for attributes in SVG and MathML. + // Using if/elseif instead of switch because it's faster in PHP. + if ($this->outputMode == static::IM_IN_SVG) { + $name = Elements::normalizeSvgAttribute($name); + } elseif ($this->outputMode == static::IM_IN_MATHML) { + $name = Elements::normalizeMathMlAttribute($name); + } + + $this->wr(' ')->wr($name); + + if ((isset($val) && '' !== $val) || $this->nonBooleanAttribute($node)) { + $this->wr('="')->wr($val)->wr('"'); + } + } + } + + protected function nonBooleanAttribute(\DOMAttr $attr) + { + $ele = $attr->ownerElement; + foreach ($this->nonBooleanAttributes as $rule) { + if (isset($rule['nodeNamespace']) && $rule['nodeNamespace'] !== $ele->namespaceURI) { + continue; + } + if (isset($rule['attNamespace']) && $rule['attNamespace'] !== $attr->namespaceURI) { + continue; + } + if (isset($rule['nodeName']) && !is_array($rule['nodeName']) && $rule['nodeName'] !== $ele->localName) { + continue; + } + if (isset($rule['nodeName']) && is_array($rule['nodeName']) && !in_array($ele->localName, $rule['nodeName'], true)) { + continue; + } + if (isset($rule['attrName']) && !is_array($rule['attrName']) && $rule['attrName'] !== $attr->localName) { + continue; + } + if (isset($rule['attrName']) && is_array($rule['attrName']) && !in_array($attr->localName, $rule['attrName'], true)) { + continue; + } + if (isset($rule['xpath'])) { + $xp = $this->getXPath($attr); + if (isset($rule['prefixes'])) { + foreach ($rule['prefixes'] as $nsPrefix => $ns) { + $xp->registerNamespace($nsPrefix, $ns); + } + } + if (!$xp->evaluate($rule['xpath'], $attr)) { + continue; + } + } + + return true; + } + + return false; + } + + private function getXPath(\DOMNode $node) + { + if (!$this->xpath) { + $this->xpath = new \DOMXPath($node->ownerDocument); + } + + return $this->xpath; + } + + /** + * Write the closing tag. + * + * Tags for HTML, MathML, and SVG are in the local name. Otherwise, use the + * qualified name (8.3). + * + * @param \DOMNode $ele The element being written. + */ + protected function closeTag($ele) + { + if ($this->outputMode == static::IM_IN_HTML || $ele->hasChildNodes()) { + $this->wr('wr($this->traverser->isLocalElement($ele) ? $ele->localName : $ele->tagName)->wr('>'); + } + } + + /** + * Write to the output. + * + * @param string $text The string to put into the output + * + * @return $this + */ + protected function wr($text) + { + fwrite($this->out, $text); + + return $this; + } + + /** + * Write a new line character. + * + * @return $this + */ + protected function nl() + { + fwrite($this->out, PHP_EOL); + + return $this; + } + + /** + * Encode text. + * + * When encode is set to false, the default value, the text passed in is + * escaped per section 8.3 of the html5 spec. For details on how text is + * escaped see the escape() method. + * + * When encoding is set to true the text is converted to named character + * references where appropriate. Section 8.1.4 Character references of the + * html5 spec refers to using named character references. This is useful for + * characters that can't otherwise legally be used in the text. + * + * The named character references are listed in section 8.5. + * + * @see http://www.w3.org/TR/2013/CR-html5-20130806/syntax.html#named-character-references True encoding will turn all named character references into their entities. + * This includes such characters as +.# and many other common ones. By default + * encoding here will just escape &'<>". + * + * Note, PHP 5.4+ has better html5 encoding. + * + * @todo Use the Entities class in php 5.3 to have html5 entities. + * + * @param string $text Text to encode. + * @param bool $attribute True if we are encoding an attrubute, false otherwise. + * + * @return string The encoded text. + */ + protected function enc($text, $attribute = false) + { + // Escape the text rather than convert to named character references. + if (!$this->encode) { + return $this->escape($text, $attribute); + } + + // If we are in PHP 5.4+ we can use the native html5 entity functionality to + // convert the named character references. + + if ($this->hasHTML5) { + return htmlentities($text, ENT_HTML5 | ENT_SUBSTITUTE | ENT_QUOTES, 'UTF-8', false); + } // If a version earlier than 5.4 html5 entities are not entirely handled. + // This manually handles them. + else { + return strtr($text, HTML5Entities::$map); + } + } + + /** + * Escape test. + * + * According to the html5 spec section 8.3 Serializing HTML fragments, text + * within tags that are not style, script, xmp, iframe, noembed, and noframes + * need to be properly escaped. + * + * The & should be converted to &, no breaking space unicode characters + * converted to  , when in attribute mode the " should be converted to + * ", and when not in attribute mode the < and > should be converted to + * < and >. + * + * @see http://www.w3.org/TR/2013/CR-html5-20130806/syntax.html#escapingString + * + * @param string $text Text to escape. + * @param bool $attribute True if we are escaping an attrubute, false otherwise. + */ + protected function escape($text, $attribute = false) + { + // Not using htmlspecialchars because, while it does escaping, it doesn't + // match the requirements of section 8.5. For example, it doesn't handle + // non-breaking spaces. + if ($attribute) { + $replace = array( + '"' => '"', + '&' => '&', + "\xc2\xa0" => ' ', + ); + } else { + $replace = array( + '<' => '<', + '>' => '>', + '&' => '&', + "\xc2\xa0" => ' ', + ); + } + + return strtr($text, $replace); + } +} -- cgit v1.2.3