summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAsmir Mustafic <[email protected]>2014-06-11 10:34:56 +0200
committerAsmir Mustafic <[email protected]>2014-06-11 10:34:56 +0200
commite49c45913b9cff66e3ca25a17e98604e18388e1e (patch)
treebe24de17532fd4f0bbef1ac5d57766de1f8eb2de
parentd6940c3cc8eebd72627d42a0c0b3139f951716f4 (diff)
PSR-2 code style
-rw-r--r--src/HTML5.php410
-rw-r--r--src/HTML5/Elements.php1189
-rw-r--r--src/HTML5/Entities.php4458
-rw-r--r--src/HTML5/Exception.php3
-rw-r--r--src/HTML5/InstructionProcessor.php46
-rw-r--r--src/HTML5/Parser/CharacterReference.php85
-rw-r--r--src/HTML5/Parser/DOMTreeBuilder.php850
-rw-r--r--src/HTML5/Parser/EventHandler.php201
-rw-r--r--src/HTML5/Parser/FileInputStream.php45
-rw-r--r--src/HTML5/Parser/InputStream.php139
-rw-r--r--src/HTML5/Parser/ParseError.php3
-rw-r--r--src/HTML5/Parser/Scanner.php411
-rw-r--r--src/HTML5/Parser/StringInputStream.php526
-rw-r--r--src/HTML5/Parser/Tokenizer.php1907
-rw-r--r--src/HTML5/Parser/TreeBuildingRules.php208
-rw-r--r--src/HTML5/Parser/UTF8Utils.php262
-rw-r--r--src/HTML5/Serializer/HTML5Entities.php3031
-rw-r--r--src/HTML5/Serializer/OutputRules.php549
-rw-r--r--src/HTML5/Serializer/RulesInterface.php163
-rw-r--r--src/HTML5/Serializer/Traverser.php244
-rw-r--r--test/HTML5/ElementsTest.php841
-rw-r--r--test/HTML5/Html5Test.php541
-rw-r--r--test/HTML5/Parser/CharacterReferenceTest.php56
-rw-r--r--test/HTML5/Parser/DOMTreeBuilderTest.php569
-rw-r--r--test/HTML5/Parser/EventStack.php156
-rw-r--r--test/HTML5/Parser/EventStackError.php3
-rw-r--r--test/HTML5/Parser/FileInputStreamTest.php322
-rw-r--r--test/HTML5/Parser/InstructionProcessorMock.php12
-rw-r--r--test/HTML5/Parser/ScannerTest.php231
-rw-r--r--test/HTML5/Parser/StringInputStreamTest.php654
-rw-r--r--test/HTML5/Parser/TokenizerTest.php1475
-rw-r--r--test/HTML5/Parser/TreeBuildingRulesTest.php97
-rw-r--r--test/HTML5/Serializer/OutputRulesTest.php604
-rw-r--r--test/HTML5/Serializer/TraverserTest.php122
-rw-r--r--test/HTML5/TestCase.php31
35 files changed, 10627 insertions, 9817 deletions
diff --git a/src/HTML5.php b/src/HTML5.php
index 121ef54..6d6d4e4 100644
--- a/src/HTML5.php
+++ b/src/HTML5.php
@@ -1,5 +1,4 @@
<?php
-
namespace Masterminds;
use Masterminds\HTML5\Parser\FileInputStream;
@@ -19,206 +18,229 @@ use Masterminds\HTML5\Serializer\Traverser;
*/
class HTML5
{
- /**
- * Global options for the parser and serializer.
- * @var array
- */
- private $options = array(
- // If the serializer should encode all entities.
- 'encode_entities' => FALSE
- );
-
- private $errors = array();
-
- public function __construct(array $options = array()) {
- $this->options = array_merge($this->options, $options);
- }
- /**
- * Get the default options.
- *
- * @return array
- * The default options.
- */
- public function getOptions() {
- return $this->options;
- }
- /**
- * Load and parse an HTML file.
- *
- * This will apply the HTML5 parser, which is tolerant of many
- * varieties of HTML, including XHTML 1, HTML 4, and well-formed HTML
- * 3. Note that in these cases, not all of the old data will be
- * preserved. For example, XHTML's XML declaration will be removed.
- *
- * The rules governing parsing are set out in the HTML 5 spec.
- *
- * @param string $file
- * The path to the file to parse. If this is a resource, it is
- * assumed to be an open stream whose pointer is set to the first
- * byte of input.
- * @return \DOMDocument
- * A DOM document. These object type is defined by the libxml
- * library, and should have been included with your version of PHP.
- */
- public function load($file) {
- // Handle the case where file is a resource.
- if (is_resource($file)) {
- // FIXME: We need a StreamInputStream class.
- return $this->loadHTML(stream_get_contents($file));
+
+ /**
+ * Global options for the parser and serializer.
+ *
+ * @var array
+ */
+ private $options = array(
+ // If the serializer should encode all entities.
+ 'encode_entities' => FALSE
+ );
+
+ private $errors = array();
+
+ public function __construct(array $options = array())
+ {
+ $this->options = array_merge($this->options, $options);
+ }
+
+ /**
+ * Get the default options.
+ *
+ * @return array The default options.
+ */
+ public function getOptions()
+ {
+ return $this->options;
+ }
+
+ /**
+ * Load and parse an HTML file.
+ *
+ * This will apply the HTML5 parser, which is tolerant of many
+ * varieties of HTML, including XHTML 1, HTML 4, and well-formed HTML
+ * 3. Note that in these cases, not all of the old data will be
+ * preserved. For example, XHTML's XML declaration will be removed.
+ *
+ * The rules governing parsing are set out in the HTML 5 spec.
+ *
+ * @param string $file
+ * The path to the file to parse. If this is a resource, it is
+ * assumed to be an open stream whose pointer is set to the first
+ * byte of input.
+ * @return \DOMDocument A DOM document. These object type is defined by the libxml
+ * library, and should have been included with your version of PHP.
+ */
+ public function load($file)
+ {
+ // Handle the case where file is a resource.
+ if (is_resource($file)) {
+ // FIXME: We need a StreamInputStream class.
+ return $this->loadHTML(stream_get_contents($file));
+ }
+
+ $input = new FileInputStream($file);
+
+ return $this->parse($input);
+ }
+
+ /**
+ * Parse a HTML Document from a string.
+ *
+ * Take a string of HTML 5 (or earlier) and parse it into a
+ * DOMDocument.
+ *
+ * @param string $string
+ * A html5 document as a string.
+ * @return \DOMDocument A DOM document. DOM is part of libxml, which is included with
+ * almost all distribtions of PHP.
+ */
+ public function loadHTML($string)
+ {
+ $input = new StringInputStream($string);
+
+ return $this->parse($input);
+ }
+
+ /**
+ * Convenience function to load an HTML file.
+ *
+ * This is here to provide backwards compatibility with the
+ * PHP DOM implementation. It simply calls load().
+ *
+ * @param string $file
+ * The path to the file to parse. If this is a resource, it is
+ * assumed to be an open stream whose pointer is set to the first
+ * byte of input.
+ *
+ * @return \DOMDocument A DOM document. These object type is defined by the libxml
+ * library, and should have been included with your version of PHP.
+ */
+ public function loadHTMLFile($file)
+ {
+ return $this->load($file);
}
- $input = new FileInputStream($file);
- return $this->parse($input);
- }
- /**
- * Parse a HTML Document from a string.
- *
- * Take a string of HTML 5 (or earlier) and parse it into a
- * DOMDocument.
- *
- * @param string $string
- * A html5 document as a string.
- * @return \DOMDocument
- * A DOM document. DOM is part of libxml, which is included with
- * almost all distribtions of PHP.
- */
- public function loadHTML($string) {
- $input = new StringInputStream($string);
- return $this->parse($input);
- }
- /**
- * Convenience function to load an HTML file.
- *
- * This is here to provide backwards compatibility with the
- * PHP DOM implementation. It simply calls load().
- *
- * @param string $file
- * The path to the file to parse. If this is a resource, it is
- * assumed to be an open stream whose pointer is set to the first
- * byte of input.
- *
- * @return \DOMDocument
- * A DOM document. These object type is defined by the libxml
- * library, and should have been included with your version of PHP.
- */
- public function loadHTMLFile($file) {
- return $this->load($file);
- }
- /**
- * Parse a HTML fragment from a string.
- *
- * @param string $string
- * The html5 fragment as a string.
- *
- * @return \DOMDocumentFragment
- * A DOM fragment. The DOM is part of libxml, which is included with
- * almost all distributions of PHP.
- */
- public function loadHTMLFragment($string) {
- $input = new StringInputStream($string);
- return $this->parseFragment($input);
- }
- /**
- * Return all errors encountered into parsing phase
- * @return array
- */
- public function getErrors() {
- return $this->errors;
- }
- /**
- * Return true it some errors were encountered into parsing phase
- * @return bool
- */
- public function hasErrors() {
- return count($this->errors)>0;
- }
-
- /**
- * Parse an input stream.
- *
- * Lower-level loading function. This requires an input stream instead
- * of a string, file, or resource.
- */
- public function parse(\Masterminds\HTML5\Parser\InputStream $input) {
- $this->errors = array();
- $events = new DOMTreeBuilder();
- $scanner = new Scanner($input);
- $parser = new Tokenizer($scanner, $events);
-
- $parser->parse();
-
- $document = $events->document();
-
- if($document){
- $this->errors = $document->errors;
+ /**
+ * Parse a HTML fragment from a string.
+ *
+ * @param string $string
+ * The html5 fragment as a string.
+ *
+ * @return \DOMDocumentFragment A DOM fragment. The DOM is part of libxml, which is included with
+ * almost all distributions of PHP.
+ */
+ public function loadHTMLFragment($string)
+ {
+ $input = new StringInputStream($string);
+
+ return $this->parseFragment($input);
}
- return $document;
- }
- /**
- * Parse an input stream where the stream is a fragment.
- *
- * Lower-level loading function. This requires an input stream instead
- * of a string, file, or resource.
- */
- public function parseFragment(\Masterminds\HTML5\Parser\InputStream $input) {
- $events = new DOMTreeBuilder(TRUE);
- $scanner = new Scanner($input);
- $parser = new Tokenizer($scanner, $events);
-
- $parser->parse();
-
- return $events->fragment();
- }
- /**
- * Save a DOM into a given file as HTML5.
- *
- * @param mixed $dom
- * The DOM to be serialized.
- * @param string $file
- * The filename to be written.
- * @param array $options
- * Configuration options when serializing the DOM. These include:
- * - encode_entities: Text written to the output is escaped by default and not all
- * entities are encoded. If this is set to TRUE all entities will be encoded.
- * Defaults to FALSE.
- */
- public function save($dom, $file, $options = array()) {
- $close = TRUE;
- if (is_resource($file)) {
- $stream = $file;
- $close = FALSE;
+ /**
+ * Return all errors encountered into parsing phase
+ *
+ * @return array
+ */
+ public function getErrors()
+ {
+ return $this->errors;
}
- else {
- $stream = fopen($file, 'w');
+
+ /**
+ * Return true it some errors were encountered into parsing phase
+ *
+ * @return bool
+ */
+ public function hasErrors()
+ {
+ return count($this->errors) > 0;
+ }
+
+ /**
+ * Parse an input stream.
+ *
+ * Lower-level loading function. This requires an input stream instead
+ * of a string, file, or resource.
+ */
+ public function parse(\Masterminds\HTML5\Parser\InputStream $input)
+ {
+ $this->errors = array();
+ $events = new DOMTreeBuilder();
+ $scanner = new Scanner($input);
+ $parser = new Tokenizer($scanner, $events);
+
+ $parser->parse();
+
+ $document = $events->document();
+
+ if ($document) {
+ $this->errors = $document->errors;
+ }
+
+ return $document;
+ }
+
+ /**
+ * Parse an input stream where the stream is a fragment.
+ *
+ * Lower-level loading function. This requires an input stream instead
+ * of a string, file, or resource.
+ */
+ public function parseFragment(\Masterminds\HTML5\Parser\InputStream $input)
+ {
+ $events = new DOMTreeBuilder(TRUE);
+ $scanner = new Scanner($input);
+ $parser = new Tokenizer($scanner, $events);
+
+ $parser->parse();
+
+ return $events->fragment();
+ }
+
+ /**
+ * Save a DOM into a given file as HTML5.
+ *
+ * @param mixed $dom
+ * The DOM to be serialized.
+ * @param string $file
+ * The filename to be written.
+ * @param array $options
+ * Configuration options when serializing the DOM. These include:
+ * - encode_entities: Text written to the output is escaped by default and not all
+ * entities are encoded. If this is set to TRUE all entities will be encoded.
+ * Defaults to FALSE.
+ */
+ public function save($dom, $file, $options = array())
+ {
+ $close = TRUE;
+ if (is_resource($file)) {
+ $stream = $file;
+ $close = FALSE;
+ } else {
+ $stream = fopen($file, 'w');
+ }
+ $options = array_merge($this->getOptions(), $options);
+ $rules = new OutputRules($stream, $options);
+ $trav = new Traverser($dom, $stream, $rules, $options);
+
+ $trav->walk();
+
+ if ($close) {
+ fclose($stream);
+ }
}
- $options = array_merge($this->getOptions(), $options);
- $rules = new OutputRules($stream, $options);
- $trav = new Traverser($dom, $stream, $rules, $options);
- $trav->walk();
+ /**
+ * Convert a DOM into an HTML5 string.
+ *
+ * @param mixed $dom
+ * The DOM to be serialized.
+ * @param array $options
+ * Configuration options when serializing the DOM. These include:
+ * - encode_entities: Text written to the output is escaped by default and not all
+ * entities are encoded. If this is set to TRUE all entities will be encoded.
+ * Defaults to FALSE.
+ *
+ * @return string A HTML5 documented generated from the DOM.
+ */
+ public function saveHTML($dom, $options = array())
+ {
+ $stream = fopen('php://temp', 'w');
+ $this->save($dom, $stream, array_merge($this->getOptions(), $options));
- if ($close) {
- fclose($stream);
+ return stream_get_contents($stream, - 1, 0);
}
- }
- /**
- * Convert a DOM into an HTML5 string.
- *
- * @param mixed $dom
- * The DOM to be serialized.
- * @param array $options
- * Configuration options when serializing the DOM. These include:
- * - encode_entities: Text written to the output is escaped by default and not all
- * entities are encoded. If this is set to TRUE all entities will be encoded.
- * Defaults to FALSE.
- *
- * @return string
- * A HTML5 documented generated from the DOM.
- */
- public function saveHTML($dom, $options = array()) {
- $stream = fopen('php://temp', 'w');
- $this->save($dom, $stream, array_merge($this->getOptions(), $options));
- return stream_get_contents($stream, - 1, 0);
- }
}
diff --git a/src/HTML5/Elements.php b/src/HTML5/Elements.php
index 31e9e4c..2b9ba63 100644
--- a/src/HTML5/Elements.php
+++ b/src/HTML5/Elements.php
@@ -6,609 +6,624 @@ namespace Masterminds\HTML5;
/**
* This class provides general information about HTML5 elements,
- * including syntactic and semantic issues. Parsers and serializers can
+ * including syntactic and semantic issues.
+ * Parsers and serializers can
* use this class as a reference point for information about the rules
* of various HTML5 elements.
*
* @todo consider using a bitmask table lookup. There is enough overlap in
- * naming that this could significantly shrink the size and maybe make it
- * faster. See the Go teams implementation at https://code.google.com/p/go/source/browse/html/atom.
+ * naming that this could significantly shrink the size and maybe make it
+ * faster. See the Go teams implementation at https://code.google.com/p/go/source/browse/html/atom.
*/
-class Elements {
-
- /** Indicates an element is described in the specification. */
- const KNOWN_ELEMENT = 1;
-
- // From section 8.1.2: "script", "style"
- // From 8.2.5.4.7 ("in body" insertion mode): "noembed", "noscript"
- // From 8.4 "style", "xmp", "iframe", "noembed", "noframes"
- /** Indicates the contained text should be processed as raw text. */
- const TEXT_RAW = 2;
-
- // From section 8.1.2: "textarea", "title"
- /** Indicates the contained text should be processed as RCDATA. */
- const TEXT_RCDATA = 4;
-
- /** Indicates the tag cannot have content. */
- const VOID_TAG = 8;
-
- // "address", "article", "aside", "blockquote", "center", "details", "dialog", "dir", "div", "dl",
- // "fieldset", "figcaption", "figure", "footer", "header", "hgroup", "menu",
- // "nav", "ol", "p", "section", "summary", "ul"
- // "h1", "h2", "h3", "h4", "h5", "h6"
- // "pre", "listing"
- // "form"
- // "plaintext"
- /**
- * Indicates that if a previous event is for a P tag, that element
- * should be considered closed.
- */
- const AUTOCLOSE_P = 16;
-
- /** Indicates that the text inside is plaintext (pre). */
- const TEXT_PLAINTEXT = 32;
-
- // See https://developer.mozilla.org/en-US/docs/HTML/Block-level_elements
- /** Indicates that the tag is a block. */
- const BLOCK_TAG = 64;
-
-
- /**
- * The HTML5 elements as defined in http://dev.w3.org/html5/markup/elements.html.
- * @var array
- */
- public static $html5 = array(
- "a" => 1,
- "abbr" => 1,
- "address" => 89, // NORMAL | VOID_TAG | AUTOCLOSE_P | BLOCK_TAG
- "area" => 9, // NORMAL | VOID_TAG
- "article" => 81, // NORMAL | AUTOCLOSE_P | BLOCK_TAG
- "aside" => 81, // NORMAL | AUTOCLOSE_P | BLOCK_TAG
- "audio" => 65, // NORMAL | BLOCK_TAG
- "b" => 1,
- "base" => 9, // NORMAL | VOID_TAG
- "bdi" => 1,
- "bdo" => 1,
- "blockquote" => 81, // NORMAL | AUTOCLOSE_P | BLOCK_TAG
- "body" => 1,
- "br" => 9, // NORMAL | VOID_TAG
- "button" => 1,
- "canvas" => 65, // NORMAL | BLOCK_TAG
- "caption" => 1,
- "cite" => 1,
- "code" => 1,
- "col" => 9, // NORMAL | VOID_TAG
- "colgroup" => 1,
- "command" => 9, // NORMAL | VOID_TAG
- //"data" => 1, // This is highly experimental and only part of the whatwg spec (not w3c). See https://developer.mozilla.org/en-US/docs/HTML/Element/data
- "datalist" => 1,
- "dd" => 65, // NORMAL | BLOCK_TAG
- "del" => 1,
- "details" => 17, // NORMAL | AUTOCLOSE_P,
- "dfn" => 1,
- "dialog" => 17, // NORMAL | AUTOCLOSE_P,
- "div" => 81, // NORMAL | AUTOCLOSE_P | BLOCK_TAG
- "dl" => 81, // NORMAL | AUTOCLOSE_P | BLOCK_TAG
- "dt" => 1,
- "em" => 1,
- "embed" => 9, // NORMAL | VOID_TAG
- "fieldset" => 81, // NORMAL | AUTOCLOSE_P | BLOCK_TAG
- "figcaption" => 81, // NORMAL | AUTOCLOSE_P | BLOCK_TAG
- "figure" => 81, // NORMAL | AUTOCLOSE_P | BLOCK_TAG
- "footer" => 81, // NORMAL | AUTOCLOSE_P | BLOCK_TAG
- "form" => 81, // NORMAL | AUTOCLOSE_P | BLOCK_TAG
- "h1" => 81, // NORMAL | AUTOCLOSE_P | BLOCK_TAG
- "h2" => 81, // NORMAL | AUTOCLOSE_P | BLOCK_TAG
- "h3" => 81, // NORMAL | AUTOCLOSE_P | BLOCK_TAG
- "h4" => 81, // NORMAL | AUTOCLOSE_P | BLOCK_TAG
- "h5" => 81, // NORMAL | AUTOCLOSE_P | BLOCK_TAG
- "h6" => 81, // NORMAL | AUTOCLOSE_P | BLOCK_TAG
- "head" => 1,
- "header" => 81, // NORMAL | AUTOCLOSE_P | BLOCK_TAG
- "hgroup" => 81, // NORMAL | AUTOCLOSE_P | BLOCK_TAG
- "hr" => 73, // NORMAL | VOID_TAG | BLOCK_TAG
- "html" => 1,
- "i" => 1,
- "iframe" => 3, // NORMAL | TEXT_RAW
- "img" => 9, // NORMAL | VOID_TAG
- "input" => 9, // NORMAL | VOID_TAG
- "kbd" => 1,
- "ins" => 1,
- "keygen" => 9, // NORMAL | VOID_TAG
- "label" => 1,
- "legend" => 1,
- "li" => 1,
- "link" => 9, // NORMAL | VOID_TAG
- "map" => 1,
- "mark" => 1,
- "menu" => 17, // NORMAL | AUTOCLOSE_P,
- "meta" => 9, // NORMAL | VOID_TAG
- "meter" => 1,
- "nav" => 17, // NORMAL | AUTOCLOSE_P,
- "noscript" => 67, // NORMAL | TEXT_RAW | BLOCK_TAG
- "object" => 1,
- "ol" => 81, // NORMAL | AUTOCLOSE_P | BLOCK_TAG
- "optgroup" => 1,
- "option" => 1,
- "output" => 65, // NORMAL | BLOCK_TAG
- "p" => 81, // NORMAL | AUTOCLOSE_P | BLOCK_TAG
- "param" => 9, // NORMAL | VOID_TAG
- "pre" => 81, // NORMAL | AUTOCLOSE_P | BLOCK_TAG
- "progress" => 1,
- "q" => 1,
- "rp" => 1,
- "rt" => 1,
- "ruby" => 1,
- "s" => 1,
- "samp" => 1,
- "script" => 3, // NORMAL | TEXT_RAW
- "section" => 81, // NORMAL | AUTOCLOSE_P | BLOCK_TAG
- "select" => 1,
- "small" => 1,
- "source" => 9, // NORMAL | VOID_TAG
- "span" => 1,
- "strong" => 1,
- "style" => 3, // NORMAL | TEXT_RAW
- "sub" => 1,
- "summary" => 17, // NORMAL | AUTOCLOSE_P,
- "sup" => 1,
- "table" => 65, // NORMAL | BLOCK_TAG
- "tbody" => 1,
- "td" => 1,
- "textarea" => 5, // NORMAL | TEXT_RCDATA
- "tfoot" => 65, // NORMAL | BLOCK_TAG
- "th" => 1,
- "thead" => 1,
- "time" => 1,
- "title" => 5, // NORMAL | TEXT_RCDATA
- "tr" => 1,
- "track" => 9, // NORMAL | VOID_TAG
- "u" => 1,
- "ul" => 81, // NORMAL | AUTOCLOSE_P | BLOCK_TAG
- "var" => 1,
- "video" => 65, // NORMAL | BLOCK_TAG
- "wbr" => 9, // NORMAL | VOID_TAG
-
- // Legacy?
- 'basefont' => 8, // VOID_TAG
- 'bgsound' => 8, // VOID_TAG
- 'noframes' => 2, // RAW_TEXT
- 'frame' => 9, // NORMAL | VOID_TAG
- 'frameset' => 1,
- 'center' => 16, 'dir' => 16, 'listing' => 16, // AUTOCLOSE_P
- 'plaintext' => 48, // AUTOCLOSE_P | TEXT_PLAINTEXT
- 'applet' => 0,
- 'marquee' => 0,
- 'isindex' => 8, // VOID_TAG
- 'xmp' => 20, // AUTOCLOSE_P | VOID_TAG | RAW_TEXT
- 'noembed' => 2, // RAW_TEXT
- );
-
- /**
- * The MathML elements. See http://www.w3.org/wiki/MathML/Elements.
- *
- * In our case we are only concerned with presentation MathML and not content
- * MathML. There is a nice list of this subset at https://developer.mozilla.org/en-US/docs/MathML/Element.
- *
- * @var array
- */
- public static $mathml = array(
- "maction" => 1,
- "maligngroup" => 1,
- "malignmark" => 1,
- "math" => 1,
- "menclose" => 1,
- "merror" => 1,
- "mfenced" => 1,
- "mfrac" => 1,
- "mglyph" => 1,
- "mi" => 1,
- "mlabeledtr" => 1,
- "mlongdiv" => 1,
- "mmultiscripts" => 1,
- "mn" => 1,
- "mo" => 1,
- "mover" => 1,
- "mpadded" => 1,
- "mphantom" => 1,
- "mroot" => 1,
- "mrow" => 1,
- "ms" => 1,
- "mscarries" => 1,
- "mscarry" => 1,
- "msgroup" => 1,
- "msline" => 1,
- "mspace" => 1,
- "msqrt" => 1,
- "msrow" => 1,
- "mstack" => 1,
- "mstyle" => 1,
- "msub" => 1,
- "msup" => 1,
- "msubsup" => 1,
- "mtable" => 1,
- "mtd" => 1,
- "mtext" => 1,
- "mtr" => 1,
- "munder" => 1,
- "munderover" => 1,
- );
-
- /**
- * The svg elements.
- *
- * The Mozilla documentation has a good list at https://developer.mozilla.org/en-US/docs/SVG/Element.
- * The w3c list appears to be lacking in some areas like filter effect elements.
- * That list can be found at http://www.w3.org/wiki/SVG/Elements.
- *
- * Note, FireFox appears to do a better job rendering filter effects than chrome.
- * While they are in the spec I'm not sure how widely implemented they are.
- *
- * @var array
- */
- public static $svg = array(
- "a" => 1,
- "altGlyph" => 1,
- "altGlyphDef" => 1,
- "altGlyphItem" => 1,
- "animate" => 1,
- "animateColor" => 1,
- "animateMotion" => 1,
- "animateTransform" => 1,
- "circle" => 1,
- "clipPath" => 1,
- "color-profile" => 1,
- "cursor" => 1,
- "defs" => 1,
- "desc" => 1,
- "ellipse" => 1,
- "feBlend" => 1,
- "feColorMatrix" => 1,
- "feComponentTransfer" => 1,
- "feComposite" => 1,
- "feConvolveMatrix" => 1,
- "feDiffuseLighting" => 1,
- "feDisplacementMap" => 1,
- "feDistantLight" => 1,
- "feFlood" => 1,
- "feFuncA" => 1,
- "feFuncB" => 1,
- "feFuncG" => 1,
- "feFuncR" => 1,
- "feGaussianBlur" => 1,
- "feImage" => 1,
- "feMerge" => 1,
- "feMergeNode" => 1,
- "feMorphology" => 1,
- "feOffset" => 1,
- "fePointLight" => 1,
- "feSpecularLighting" => 1,
- "feSpotLight" => 1,
- "feTile" => 1,
- "feTurbulence" => 1,
- "filter" => 1,
- "font" => 1,
- "font-face" => 1,
- "font-face-format" => 1,
- "font-face-name" => 1,
- "font-face-src" => 1,
- "font-face-uri" => 1,
- "foreignObject" => 1,
- "g" => 1,
- "glyph" => 1,
- "glyphRef" => 1,
- "hkern" => 1,
- "image" => 1,
- "line" => 1,
- "linearGradient" => 1,
- "marker" => 1,
- "mask" => 1,
- "metadata" => 1,
- "missing-glyph" => 1,
- "mpath" => 1,
- "path" => 1,
- "pattern" => 1,
- "polygon" => 1,
- "polyline" => 1,
- "radialGradient" => 1,
- "rect" => 1,
- "script" => 3, // NORMAL | RAW_TEXT
- "set" => 1,
- "stop" => 1,
- "style" => 3, // NORMAL | RAW_TEXT
- "svg" => 1,
- "switch" => 1,
- "symbol" => 1,
- "text" => 1,
- "textPath" => 1,
- "title" => 1,
- "tref" => 1,
- "tspan" => 1,
- "use" => 1,
- "view" => 1,
- "vkern" => 1,
- );
-
- /**
- * Some attributes in SVG are case sensetitive.
- *
- * This map contains key/value pairs with the key as the lowercase attribute
- * name and the value with the correct casing.
- */
- public static $svgCaseSensitiveAttributeMap = array(
- 'attributename' => 'attributeName',
- 'attributetype' => 'attributeType',
- 'basefrequency' => 'baseFrequency',
- 'baseprofile' => 'baseProfile',
- 'calcmode' => 'calcMode',
- 'clippathunits' => 'clipPathUnits',
- 'contentscripttype' => 'contentScriptType',
- 'contentstyletype' => 'contentStyleType',
- 'diffuseconstant' => 'diffuseConstant',
- 'edgemode' => 'edgeMode',
- 'externalresourcesrequired' => 'externalResourcesRequired',
- 'filterres' => 'filterRes',
- 'filterunits' => 'filterUnits',
- 'glyphref' => 'glyphRef',
- 'gradienttransform' => 'gradientTransform',
- 'gradientunits' => 'gradientUnits',
- 'kernelmatrix' => 'kernelMatrix',
- 'kernelunitlength' => 'kernelUnitLength',
- 'keypoints' => 'keyPoints',
- 'keysplines' => 'keySplines',
- 'keytimes' => 'keyTimes',
- 'lengthadjust' => 'lengthAdjust',
- 'limitingconeangle' => 'limitingConeAngle',
- 'markerheight' => 'markerHeight',
- 'markerunits' => 'markerUnits',
- 'markerwidth' => 'markerWidth',
- 'maskcontentunits' => 'maskContentUnits',
- 'maskunits' => 'maskUnits',
- 'numoctaves' => 'numOctaves',
- 'pathlength' => 'pathLength',
- 'patterncontentunits' => 'patternContentUnits',
- 'patterntransform' => 'patternTransform',
- 'patternunits' => 'patternUnits',
- 'pointsatx' => 'pointsAtX',
- 'pointsaty' => 'pointsAtY',
- 'pointsatz' => 'pointsAtZ',
- 'preservealpha' => 'preserveAlpha',
- 'preserveaspectratio' => 'preserveAspectRatio',
- 'primitiveunits' => 'primitiveUnits',
- 'refx' => 'refX',
- 'refy' => 'refY',
- 'repeatcount' => 'repeatCount',
- 'repeatdur' => 'repeatDur',
- 'requiredextensions' => 'requiredExtensions',
- 'requiredfeatures' => 'requiredFeatures',
- 'specularconstant' => 'specularConstant',
- 'specularexponent' => 'specularExponent',
- 'spreadmethod' => 'spreadMethod',
- 'startoffset' => 'startOffset',
- 'stddeviation' => 'stdDeviation',
- 'stitchtiles' => 'stitchTiles',
- 'surfacescale' => 'surfaceScale',
- 'systemlanguage' => 'systemLanguage',
- 'tablevalues' => 'tableValues',
- 'targetx' => 'targetX',
- 'targety' => 'targetY',
- 'textlength' => 'textLength',
- 'viewbox' => 'viewBox',
- 'viewtarget' => 'viewTarget',
- 'xchannelselector' => 'xChannelSelector',
- 'ychannelselector' => 'yChannelSelector',
- 'zoomandpan' => 'zoomAndPan',
- );
-
- /**
- * Some SVG elements are case sensetitive. This map contains these.
- *
- * The map contains key/value store of the name is lowercase as the keys and
- * the correct casing as the value.
- */
- public static $svgCaseSensitiveElementMap = array(
- 'altglyph' => 'altGlyph',
- 'altglyphdef' => 'altGlyphDef',
- 'altglyphitem' => 'altGlyphItem',
- 'animatecolor' => 'animateColor',
- 'animatemotion' => 'animateMotion',
- 'animatetransform' => 'animateTransform',
- 'clippath' => 'clipPath',
- 'feblend' => 'feBlend',
- 'fecolormatrix' => 'feColorMatrix',
- 'fecomponenttransfer' => 'feComponentTransfer',
- 'fecomposite' => 'feComposite',
- 'feconvolvematrix' => 'feConvolveMatrix',
- 'fediffuselighting' => 'feDiffuseLighting',
- 'fedisplacementmap' => 'feDisplacementMap',
- 'fedistantlight' => 'feDistantLight',
- 'feflood' => 'feFlood',
- 'fefunca' => 'feFuncA',
- 'fefuncb' => 'feFuncB',
- 'fefuncg' => 'feFuncG',
- 'fefuncr' => 'feFuncR',
- 'fegaussianblur' => 'feGaussianBlur',
- 'feimage' => 'feImage',
- 'femerge' => 'feMerge',
- 'femergenode' => 'feMergeNode',
- 'femorphology' => 'feMorphology',
- 'feoffset' => 'feOffset',
- 'fepointlight' => 'fePointLight',
- 'fespecularlighting' => 'feSpecularLighting',
- 'fespotlight' => 'feSpotLight',
- 'fetile' => 'feTile',
- 'feturbulence' => 'feTurbulence',
- 'foreignobject' => 'foreignObject',
- 'glyphref' => 'glyphRef',
- 'lineargradient' => 'linearGradient',
- 'radialgradient' => 'radialGradient',
- 'textpath' => 'textPath',
- );
-
- /**
- * Check whether the given element meets the given criterion.
- *
- * Example:
- *
- * Elements::isA('script', Elements::TEXT_RAW); // Returns true.
- *
- * Elements::isA('script', Elements::TEXT_RCDATA); // Returns false.
- *
- * @param string $name
- * The element name.
- * @param int $mask
- * One of the constants on this class.
- * @return boolean
- * TRUE if the element matches the mask, FALSE otherwise.
- */
- public static function isA($name, $mask) {
- if (!static::isElement($name)) {
- return FALSE;
+class Elements
+{
+
+ /**
+ * Indicates an element is described in the specification.
+ */
+ const KNOWN_ELEMENT = 1;
+
+ // From section 8.1.2: "script", "style"
+ // From 8.2.5.4.7 ("in body" insertion mode): "noembed", "noscript"
+ // From 8.4 "style", "xmp", "iframe", "noembed", "noframes"
+ /**
+ * Indicates the contained text should be processed as raw text.
+ */
+ const TEXT_RAW = 2;
+
+ // From section 8.1.2: "textarea", "title"
+ /**
+ * Indicates the contained text should be processed as RCDATA.
+ */
+ const TEXT_RCDATA = 4;
+
+ /**
+ * Indicates the tag cannot have content.
+ */
+ const VOID_TAG = 8;
+
+ // "address", "article", "aside", "blockquote", "center", "details", "dialog", "dir", "div", "dl",
+ // "fieldset", "figcaption", "figure", "footer", "header", "hgroup", "menu",
+ // "nav", "ol", "p", "section", "summary", "ul"
+ // "h1", "h2", "h3", "h4", "h5", "h6"
+ // "pre", "listing"
+ // "form"
+ // "plaintext"
+ /**
+ * Indicates that if a previous event is for a P tag, that element
+ * should be considered closed.
+ */
+ const AUTOCLOSE_P = 16;
+
+ /**
+ * Indicates that the text inside is plaintext (pre).
+ */
+ const TEXT_PLAINTEXT = 32;
+
+ // See https://developer.mozilla.org/en-US/docs/HTML/Block-level_elements
+ /**
+ * Indicates that the tag is a block.
+ */
+ const BLOCK_TAG = 64;
+
+ /**
+ * The HTML5 elements as defined in http://dev.w3.org/html5/markup/elements.html.
+ *
+ * @var array
+ */
+ public static $html5 = array(
+ "a" => 1,
+ "abbr" => 1,
+ "address" => 89, // NORMAL | VOID_TAG | AUTOCLOSE_P | BLOCK_TAG
+ "area" => 9, // NORMAL | VOID_TAG
+ "article" => 81, // NORMAL | AUTOCLOSE_P | BLOCK_TAG
+ "aside" => 81, // NORMAL | AUTOCLOSE_P | BLOCK_TAG
+ "audio" => 65, // NORMAL | BLOCK_TAG
+ "b" => 1,
+ "base" => 9, // NORMAL | VOID_TAG
+ "bdi" => 1,
+ "bdo" => 1,
+ "blockquote" => 81, // NORMAL | AUTOCLOSE_P | BLOCK_TAG
+ "body" => 1,
+ "br" => 9, // NORMAL | VOID_TAG
+ "button" => 1,
+ "canvas" => 65, // NORMAL | BLOCK_TAG
+ "caption" => 1,
+ "cite" => 1,
+ "code" => 1,
+ "col" => 9, // NORMAL | VOID_TAG
+ "colgroup" => 1,
+ "command" => 9, // NORMAL | VOID_TAG
+ // "data" => 1, // This is highly experimental and only part of the whatwg spec (not w3c). See https://developer.mozilla.org/en-US/docs/HTML/Element/data
+ "datalist" => 1,
+ "dd" => 65, // NORMAL | BLOCK_TAG
+ "del" => 1,
+ "details" => 17, // NORMAL | AUTOCLOSE_P,
+ "dfn" => 1,
+ "dialog" => 17, // NORMAL | AUTOCLOSE_P,
+ "div" => 81, // NORMAL | AUTOCLOSE_P | BLOCK_TAG
+ "dl" => 81, // NORMAL | AUTOCLOSE_P | BLOCK_TAG
+ "dt" => 1,
+ "em" => 1,
+ "embed" => 9, // NORMAL | VOID_TAG
+ "fieldset" => 81, // NORMAL | AUTOCLOSE_P | BLOCK_TAG
+ "figcaption" => 81, // NORMAL | AUTOCLOSE_P | BLOCK_TAG
+ "figure" => 81, // NORMAL | AUTOCLOSE_P | BLOCK_TAG
+ "footer" => 81, // NORMAL | AUTOCLOSE_P | BLOCK_TAG
+ "form" => 81, // NORMAL | AUTOCLOSE_P | BLOCK_TAG
+ "h1" => 81, // NORMAL | AUTOCLOSE_P | BLOCK_TAG
+ "h2" => 81, // NORMAL | AUTOCLOSE_P | BLOCK_TAG
+ "h3" => 81, // NORMAL | AUTOCLOSE_P | BLOCK_TAG
+ "h4" => 81, // NORMAL | AUTOCLOSE_P | BLOCK_TAG
+ "h5" => 81, // NORMAL | AUTOCLOSE_P | BLOCK_TAG
+ "h6" => 81, // NORMAL | AUTOCLOSE_P | BLOCK_TAG
+ "head" => 1,
+ "header" => 81, // NORMAL | AUTOCLOSE_P | BLOCK_TAG
+ "hgroup" => 81, // NORMAL | AUTOCLOSE_P | BLOCK_TAG
+ "hr" => 73, // NORMAL | VOID_TAG | BLOCK_TAG
+ "html" => 1,
+ "i" => 1,
+ "iframe" => 3, // NORMAL | TEXT_RAW
+ "img" => 9, // NORMAL | VOID_TAG
+ "input" => 9, // NORMAL | VOID_TAG
+ "kbd" => 1,
+ "ins" => 1,
+ "keygen" => 9, // NORMAL | VOID_TAG
+ "label" => 1,
+ "legend" => 1,
+ "li" => 1,
+ "link" => 9, // NORMAL | VOID_TAG
+ "map" => 1,
+ "mark" => 1,
+ "menu" => 17, // NORMAL | AUTOCLOSE_P,
+ "meta" => 9, // NORMAL | VOID_TAG
+ "meter" => 1,
+ "nav" => 17, // NORMAL | AUTOCLOSE_P,
+ "noscript" => 67, // NORMAL | TEXT_RAW | BLOCK_TAG
+ "object" => 1,
+ "ol" => 81, // NORMAL | AUTOCLOSE_P | BLOCK_TAG
+ "optgroup" => 1,
+ "option" => 1,
+ "output" => 65, // NORMAL | BLOCK_TAG
+ "p" => 81, // NORMAL | AUTOCLOSE_P | BLOCK_TAG
+ "param" => 9, // NORMAL | VOID_TAG
+ "pre" => 81, // NORMAL | AUTOCLOSE_P | BLOCK_TAG
+ "progress" => 1,
+ "q" => 1,
+ "rp" => 1,
+ "rt" => 1,
+ "ruby" => 1,
+ "s" => 1,
+ "samp" => 1,
+ "script" => 3, // NORMAL | TEXT_RAW
+ "section" => 81, // NORMAL | AUTOCLOSE_P | BLOCK_TAG
+ "select" => 1,
+ "small" => 1,
+ "source" => 9, // NORMAL | VOID_TAG
+ "span" => 1,
+ "strong" => 1,
+ "style" => 3, // NORMAL | TEXT_RAW
+ "sub" => 1,
+ "summary" => 17, // NORMAL | AUTOCLOSE_P,
+ "sup" => 1,
+ "table" => 65, // NORMAL | BLOCK_TAG
+ "tbody" => 1,
+ "td" => 1,
+ "textarea" => 5, // NORMAL | TEXT_RCDATA
+ "tfoot" => 65, // NORMAL | BLOCK_TAG
+ "th" => 1,
+ "thead" => 1,
+ "time" => 1,
+ "title" => 5, // NORMAL | TEXT_RCDATA
+ "tr" => 1,
+ "track" => 9, // NORMAL | VOID_TAG
+ "u" => 1,
+ "ul" => 81, // NORMAL | AUTOCLOSE_P | BLOCK_TAG
+ "var" => 1,
+ "video" => 65, // NORMAL | BLOCK_TAG
+ "wbr" => 9, // NORMAL | VOID_TAG
+
+ // Legacy?
+ 'basefont' => 8, // VOID_TAG
+ 'bgsound' => 8, // VOID_TAG
+ 'noframes' => 2, // RAW_TEXT
+ 'frame' => 9, // NORMAL | VOID_TAG
+ 'frameset' => 1,
+ 'center' => 16,
+ 'dir' => 16,
+ 'listing' => 16, // AUTOCLOSE_P
+ 'plaintext' => 48, // AUTOCLOSE_P | TEXT_PLAINTEXT
+ 'applet' => 0,
+ 'marquee' => 0,
+ 'isindex' => 8, // VOID_TAG
+ 'xmp' => 20, // AUTOCLOSE_P | VOID_TAG | RAW_TEXT
+ 'noembed' => 2 // RAW_TEXT
+ );
+
+ /**
+ * The MathML elements.
+ * See http://www.w3.org/wiki/MathML/Elements.
+ *
+ * In our case we are only concerned with presentation MathML and not content
+ * MathML. There is a nice list of this subset at https://developer.mozilla.org/en-US/docs/MathML/Element.
+ *
+ * @var array
+ */
+ public static $mathml = array(
+ "maction" => 1,
+ "maligngroup" => 1,
+ "malignmark" => 1,
+ "math" => 1,
+ "menclose" => 1,
+ "merror" => 1,
+ "mfenced" => 1,
+ "mfrac" => 1,
+ "mglyph" => 1,
+ "mi" => 1,
+ "mlabeledtr" => 1,
+ "mlongdiv" => 1,
+ "mmultiscripts" => 1,
+ "mn" => 1,
+ "mo" => 1,
+ "mover" => 1,
+ "mpadded" => 1,
+ "mphantom" => 1,
+ "mroot" => 1,
+ "mrow" => 1,
+ "ms" => 1,
+ "mscarries" => 1,
+ "mscarry" => 1,
+ "msgroup" => 1,
+ "msline" => 1,
+ "mspace" => 1,
+ "msqrt" => 1,
+ "msrow" => 1,
+ "mstack" => 1,
+ "mstyle" => 1,
+ "msub" => 1,
+ "msup" => 1,
+ "msubsup" => 1,
+ "mtable" => 1,
+ "mtd" => 1,
+ "mtext" => 1,
+ "mtr" => 1,
+ "munder" => 1,
+ "munderover" => 1
+ );
+
+ /**
+ * The svg elements.
+ *
+ * The Mozilla documentation has a good list at https://developer.mozilla.org/en-US/docs/SVG/Element.
+ * The w3c list appears to be lacking in some areas like filter effect elements.
+ * That list can be found at http://www.w3.org/wiki/SVG/Elements.
+ *
+ * Note, FireFox appears to do a better job rendering filter effects than chrome.
+ * While they are in the spec I'm not sure how widely implemented they are.
+ *
+ * @var array
+ */
+ public static $svg = array(
+ "a" => 1,
+ "altGlyph" => 1,
+ "altGlyphDef" => 1,
+ "altGlyphItem" => 1,
+ "animate" => 1,
+ "animateColor" => 1,
+ "animateMotion" => 1,
+ "animateTransform" => 1,
+ "circle" => 1,
+ "clipPath" => 1,
+ "color-profile" => 1,
+ "cursor" => 1,
+ "defs" => 1,
+ "desc" => 1,
+ "ellipse" => 1,
+ "feBlend" => 1,
+ "feColorMatrix" => 1,
+ "feComponentTransfer" => 1,
+ "feComposite" => 1,
+ "feConvolveMatrix" => 1,
+ "feDiffuseLighting" => 1,
+ "feDisplacementMap" => 1,
+ "feDistantLight" => 1,
+ "feFlood" => 1,
+ "feFuncA" => 1,
+ "feFuncB" => 1,
+ "feFuncG" => 1,
+ "feFuncR" => 1,
+ "feGaussianBlur" => 1,
+ "feImage" => 1,
+ "feMerge" => 1,
+ "feMergeNode" => 1,
+ "feMorphology" => 1,
+ "feOffset" => 1,
+ "fePointLight" => 1,
+ "feSpecularLighting" => 1,
+ "feSpotLight" => 1,
+ "feTile" => 1,
+ "feTurbulence" => 1,
+ "filter" => 1,
+ "font" => 1,
+ "font-face" => 1,
+ "font-face-format" => 1,
+ "font-face-name" => 1,
+ "font-face-src" => 1,
+ "font-face-uri" => 1,
+ "foreignObject" => 1,
+ "g" => 1,
+ "glyph" => 1,
+ "glyphRef" => 1,
+ "hkern" => 1,
+ "image" => 1,
+ "line" => 1,
+ "linearGradient" => 1,
+ "marker" => 1,
+ "mask" => 1,
+ "metadata" => 1,
+ "missing-glyph" => 1,
+ "mpath" => 1,
+ "path" => 1,
+ "pattern" => 1,
+ "polygon" => 1,
+ "polyline" => 1,
+ "radialGradient" => 1,
+ "rect" => 1,
+ "script" => 3, // NORMAL | RAW_TEXT
+ "set" => 1,
+ "stop" => 1,
+ "style" => 3, // NORMAL | RAW_TEXT
+ "svg" => 1,
+ "switch" => 1,
+ "symbol" => 1,
+ "text" => 1,
+ "textPath" => 1,
+ "title" => 1,
+ "tref" => 1,
+ "tspan" => 1,
+ "use" => 1,
+ "view" => 1,
+ "vkern" => 1
+ );
+
+ /**
+ * Some attributes in SVG are case sensetitive.
+ *
+ * This map contains key/value pairs with the key as the lowercase attribute
+ * name and the value with the correct casing.
+ */
+ public static $svgCaseSensitiveAttributeMap = array(
+ 'attributename' => 'attributeName',
+ 'attributetype' => 'attributeType',
+ 'basefrequency' => 'baseFrequency',
+ 'baseprofile' => 'baseProfile',
+ 'calcmode' => 'calcMode',
+ 'clippathunits' => 'clipPathUnits',
+ 'contentscripttype' => 'contentScriptType',
+ 'contentstyletype' => 'contentStyleType',
+ 'diffuseconstant' => 'diffuseConstant',
+ 'edgemode' => 'edgeMode',
+ 'externalresourcesrequired' => 'externalResourcesRequired',
+ 'filterres' => 'filterRes',
+ 'filterunits' => 'filterUnits',
+ 'glyphref' => 'glyphRef',
+ 'gradienttransform' => 'gradientTransform',
+ 'gradientunits' => 'gradientUnits',
+ 'kernelmatrix' => 'kernelMatrix',
+ 'kernelunitlength' => 'kernelUnitLength',
+ 'keypoints' => 'keyPoints',
+ 'keysplines' => 'keySplines',
+ 'keytimes' => 'keyTimes',
+ 'lengthadjust' => 'lengthAdjust',
+ 'limitingconeangle' => 'limitingConeAngle',
+ 'markerheight' => 'markerHeight',
+ 'markerunits' => 'markerUnits',
+ 'markerwidth' => 'markerWidth',
+ 'maskcontentunits' => 'maskContentUnits',
+ 'maskunits' => 'maskUnits',
+ 'numoctaves' => 'numOctaves',
+ 'pathlength' => 'pathLength',
+ 'patterncontentunits' => 'patternContentUnits',
+ 'patterntransform' => 'patternTransform',
+ 'patternunits' => 'patternUnits',
+ 'pointsatx' => 'pointsAtX',
+ 'pointsaty' => 'pointsAtY',
+ 'pointsatz' => 'pointsAtZ',
+ 'preservealpha' => 'preserveAlpha',
+ 'preserveaspectratio' => 'preserveAspectRatio',
+ 'primitiveunits' => 'primitiveUnits',
+ 'refx' => 'refX',
+ 'refy' => 'refY',
+ 'repeatcount' => 'repeatCount',
+ 'repeatdur' => 'repeatDur',
+ 'requiredextensions' => 'requiredExtensions',
+ 'requiredfeatures' => 'requiredFeatures',
+ 'specularconstant' => 'specularConstant',
+ 'specularexponent' => 'specularExponent',
+ 'spreadmethod' => 'spreadMethod',
+ 'startoffset' => 'startOffset',
+ 'stddeviation' => 'stdDeviation',
+ 'stitchtiles' => 'stitchTiles',
+ 'surfacescale' => 'surfaceScale',
+ 'systemlanguage' => 'systemLanguage',
+ 'tablevalues' => 'tableValues',
+ 'targetx' => 'targetX',
+ 'targety' => 'targetY',
+ 'textlength' => 'textLength',
+ 'viewbox' => 'viewBox',
+ 'viewtarget' => 'viewTarget',
+ 'xchannelselector' => 'xChannelSelector',
+ 'ychannelselector' => 'yChannelSelector',
+ 'zoomandpan' => 'zoomAndPan'
+ );
+
+ /**
+ * Some SVG elements are case sensetitive.
+ * This map contains these.
+ *
+ * The map contains key/value store of the name is lowercase as the keys and
+ * the correct casing as the value.
+ */
+ public static $svgCaseSensitiveElementMap = array(
+ 'altglyph' => 'altGlyph',
+ 'altglyphdef' => 'altGlyphDef',
+ 'altglyphitem' => 'altGlyphItem',
+ 'animatecolor' => 'animateColor',
+ 'animatemotion' => 'animateMotion',
+ 'animatetransform' => 'animateTransform',
+ 'clippath' => 'clipPath',
+ 'feblend' => 'feBlend',
+ 'fecolormatrix' => 'feColorMatrix',
+ 'fecomponenttransfer' => 'feComponentTransfer',
+ 'fecomposite' => 'feComposite',
+ 'feconvolvematrix' => 'feConvolveMatrix',
+ 'fediffuselighting' => 'feDiffuseLighting',
+ 'fedisplacementmap' => 'feDisplacementMap',
+ 'fedistantlight' => 'feDistantLight',
+ 'feflood' => 'feFlood',
+ 'fefunca' => 'feFuncA',
+ 'fefuncb' => 'feFuncB',
+ 'fefuncg' => 'feFuncG',
+ 'fefuncr' => 'feFuncR',
+ 'fegaussianblur' => 'feGaussianBlur',
+ 'feimage' => 'feImage',
+ 'femerge' => 'feMerge',
+ 'femergenode' => 'feMergeNode',
+ 'femorphology' => 'feMorphology',
+ 'feoffset' => 'feOffset',
+ 'fepointlight' => 'fePointLight',
+ 'fespecularlighting' => 'feSpecularLighting',
+ 'fespotlight' => 'feSpotLight',
+ 'fetile' => 'feTile',
+ 'feturbulence' => 'feTurbulence',
+ 'foreignobject' => 'foreignObject',
+ 'glyphref' => 'glyphRef',
+ 'lineargradient' => 'linearGradient',
+ 'radialgradient' => 'radialGradient',
+ 'textpath' => 'textPath'
+ );
+
+ /**
+ * Check whether the given element meets the given criterion.
+ *
+ * Example:
+ *
+ * Elements::isA('script', Elements::TEXT_RAW); // Returns true.
+ *
+ * Elements::isA('script', Elements::TEXT_RCDATA); // Returns false.
+ *
+ * @param string $name
+ * The element name.
+ * @param int $mask
+ * One of the constants on this class.
+ * @return boolean TRUE if the element matches the mask, FALSE otherwise.
+ */
+ public static function isA($name, $mask)
+ {
+ if (! static::isElement($name)) {
+ return FALSE;
+ }
+
+ return (static::element($name) & $mask) == $mask;
}
- return (static::element($name) & $mask) == $mask;
- }
-
- /**
- * Test if an element is a valid html5 element.
- *
- * @param string $name
- * The name of the element.
- *
- * @return bool
- * True if a html5 element and false otherwise.
- */
- public static function isHtml5Element($name) {
-
- // html5 element names are case insensetitive. Forcing lowercase for the check.
- // Do we need this check or will all data passed here already be lowercase?
- return isset(static::$html5[strtolower($name)]);
- }
-
- /**
- * Test if an element name is a valid MathML presentation element.
- *
- * @param string $name
- * The name of the element.
- *
- * @return bool
- * True if a MathML name and false otherwise.
- */
- public static function isMathMLElement($name) {
-
- // MathML is case-sensetitive unlike html5 elements.
- return isset(static::$mathml[$name]);
- }
-
- /**
- * Test if an element is a valid SVG element.
- *
- * @param string $name
- * The name of the element.
- *
- * @return boolean
- * True if a SVG element and false otherise.
- */
- public static function isSvgElement($name) {
-
- // SVG is case-sensetitive unlike html5 elements.
- return isset(static::$svg[$name]);
- }
-
- /**
- * Is an element name valid in an html5 document.
- *
- * This includes html5 elements along with other allowed embedded content
- * such as svg and mathml.
- *
- * @param string $name
- * The name of the element.
- *
- * @return bool
- * True if valid and false otherwise.
- */
- public static function isElement($name) {
- return static::isHtml5Element($name) || static::isMathMLElement($name) || static::isSvgElement($name);
- }
-
- /**
- * Get the element mask for the given element name.
- *
- * @param string $name
- * The name of the element.
- *
- * @return int
- * The element mask.
- */
- public static function element($name) {
- if (isset(static::$html5[$name])) {
- return static::$html5[$name];
- }
- if (isset(static::$svg[$name])) {
- return static::$svg[$name];
- }
- if (isset(static::$mathml[$name])) {
- return static::$mathml[$name];
+ /**
+ * Test if an element is a valid html5 element.
+ *
+ * @param string $name
+ * The name of the element.
+ *
+ * @return bool True if a html5 element and false otherwise.
+ */
+ public static function isHtml5Element($name)
+ {
+ // html5 element names are case insensetitive. Forcing lowercase for the check.
+ // Do we need this check or will all data passed here already be lowercase?
+ return isset(static::$html5[strtolower($name)]);
}
- return FALSE;
- }
-
- /**
- * Normalize a SVG element name to its proper case and form.
- *
- * @param string $name
- * The name of the element.
- *
- * @return string
- * The normalized form of the element name.
- */
- public static function normalizeSvgElement($name) {
- $name = strtolower($name);
- if (isset(static::$svgCaseSensitiveElementMap[$name])) {
- $name = static::$svgCaseSensitiveElementMap[$name];
+ /**
+ * Test if an element name is a valid MathML presentation element.
+ *
+ * @param string $name
+ * The name of the element.
+ *
+ * @return bool True if a MathML name and false otherwise.
+ */
+ public static function isMathMLElement($name)
+ {
+ // MathML is case-sensetitive unlike html5 elements.
+ return isset(static::$mathml[$name]);
}
- return $name;
- }
+ /**
+ * Test if an element is a valid SVG element.
+ *
+ * @param string $name
+ * The name of the element.
+ *
+ * @return boolean True if a SVG element and false otherise.
+ */
+ public static function isSvgElement($name)
+ {
+ // SVG is case-sensetitive unlike html5 elements.
+ return isset(static::$svg[$name]);
+ }
- /**
- * Normalize a SVG attribute name to its proper case and form.
- *
- * @param string $name
- * The name of the attribute.
- *
- * @return string
- * The normalized form of the attribute name.
- */
- public static function normalizeSvgAttribute($name) {
- $name = strtolower($name);
- if (isset(static::$svgCaseSensitiveAttributeMap[$name])) {
- $name = static::$svgCaseSensitiveAttributeMap[$name];
+ /**
+ * Is an element name valid in an html5 document.
+ *
+ * This includes html5 elements along with other allowed embedded content
+ * such as svg and mathml.
+ *
+ * @param string $name
+ * The name of the element.
+ *
+ * @return bool True if valid and false otherwise.
+ */
+ public static function isElement($name)
+ {
+ return static::isHtml5Element($name) || static::isMathMLElement($name) || static::isSvgElement($name);
}
- return $name;
- }
+ /**
+ * Get the element mask for the given element name.
+ *
+ * @param string $name
+ * The name of the element.
+ *
+ * @return int The element mask.
+ */
+ public static function element($name)
+ {
+ if (isset(static::$html5[$name])) {
+ return static::$html5[$name];
+ }
+ if (isset(static::$svg[$name])) {
+ return static::$svg[$name];
+ }
+ if (isset(static::$mathml[$name])) {
+ return static::$mathml[$name];
+ }
+
+ return FALSE;
+ }
- /**
- * Normalize a MathML attribute name to its proper case and form.
- *
- * Note, all MathML element names are lowercase.
- *
- * @param string $name
- * The name of the attribute.
- *
- * @return string
- * The normalized form of the attribute name.
- */
- public static function normalizeMathMlAttribute($name) {
- $name = strtolower($name);
+ /**
+ * Normalize a SVG element name to its proper case and form.
+ *
+ * @param string $name
+ * The name of the element.
+ *
+ * @return string The normalized form of the element name.
+ */
+ public static function normalizeSvgElement($name)
+ {
+ $name = strtolower($name);
+ if (isset(static::$svgCaseSensitiveElementMap[$name])) {
+ $name = static::$svgCaseSensitiveElementMap[$name];
+ }
+
+ return $name;
+ }
- // Only one attribute has a mixed case form for MathML.
- if ($name == 'definitionurl') {
- $name = 'definitionURL';
+ /**
+ * Normalize a SVG attribute name to its proper case and form.
+ *
+ * @param string $name
+ * The name of the attribute.
+ *
+ * @return string The normalized form of the attribute name.
+ */
+ public static function normalizeSvgAttribute($name)
+ {
+ $name = strtolower($name);
+ if (isset(static::$svgCaseSensitiveAttributeMap[$name])) {
+ $name = static::$svgCaseSensitiveAttributeMap[$name];
+ }
+
+ return $name;
}
- return $name;
- }
+ /**
+ * Normalize a MathML attribute name to its proper case and form.
+ *
+ * Note, all MathML element names are lowercase.
+ *
+ * @param string $name
+ * The name of the attribute.
+ *
+ * @return string The normalized form of the attribute name.
+ */
+ public static function normalizeMathMlAttribute($name)
+ {
+ $name = strtolower($name);
+
+ // Only one attribute has a mixed case form for MathML.
+ if ($name == 'definitionurl') {
+ $name = 'definitionURL';
+ }
+
+ return $name;
+ }
}
diff --git a/src/HTML5/Entities.php b/src/HTML5/Entities.php
index 2174f0d..2e605d6 100644
--- a/src/HTML5/Entities.php
+++ b/src/HTML5/Entities.php
@@ -1,2230 +1,2236 @@
<?php
namespace Masterminds\HTML5;
-/** Entity lookup tables. This class is automatically generated. */
-class Entities {
- public static $byName = array (
- 'Aacute' => 'Á',
- 'Aacut' => 'Á',
- 'aacute' => 'á',
- 'aacut' => 'á',
- 'Abreve' => 'Ă',
- 'abreve' => 'ă',
- 'ac' => '∾',
- 'acd' => '∿',
- 'acE' => '∾̳',
- 'Acirc' => 'Â',
- 'Acir' => 'Â',
- 'acirc' => 'â',
- 'acir' => 'â',
- 'acute' => '´',
- 'acut' => '´',
- 'Acy' => 'А',
- 'acy' => 'а',
- 'AElig' => 'Æ',
- 'AEli' => 'Æ',
- 'aelig' => 'æ',
- 'aeli' => 'æ',
- 'af' => '⁡',
- 'Afr' => '𝔄',
- 'afr' => '𝔞',
- 'Agrave' => 'À',
- 'Agrav' => 'À',
- 'agrave' => 'à',
- 'agrav' => 'à',
- 'alefsym' => 'ℵ',
- 'aleph' => 'ℵ',
- 'Alpha' => 'Α',
- 'alpha' => 'α',
- 'Amacr' => 'Ā',
- 'amacr' => 'ā',
- 'amalg' => '⨿',
- 'AMP' => '&',
- 'AM' => '&',
- 'amp' => '&',
- 'am' => '&',
- 'And' => '⩓',
- 'and' => '∧',
- 'andand' => '⩕',
- 'andd' => '⩜',
- 'andslope' => '⩘',
- 'andv' => '⩚',
- 'ang' => '∠',
- 'ange' => '⦤',
- 'angle' => '∠',
- 'angmsd' => '∡',
- 'angmsdaa' => '⦨',
- 'angmsdab' => '⦩',
- 'angmsdac' => '⦪',
- 'angmsdad' => '⦫',
- 'angmsdae' => '⦬',
- 'angmsdaf' => '⦭',
- 'angmsdag' => '⦮',
- 'angmsdah' => '⦯',
- 'angrt' => '∟',
- 'angrtvb' => '⊾',
- 'angrtvbd' => '⦝',
- 'angsph' => '∢',
- 'angst' => 'Å',
- 'angzarr' => '⍼',
- 'Aogon' => 'Ą',
- 'aogon' => 'ą',
- 'Aopf' => '𝔸',
- 'aopf' => '𝕒',
- 'ap' => '≈',
- 'apacir' => '⩯',
- 'apE' => '⩰',
- 'ape' => '≊',
- 'apid' => '≋',
- 'apos' => '\'',
- 'ApplyFunction' => '⁡',
- 'approx' => '≈',
- 'approxeq' => '≊',
- 'Aring' => 'Å',
- 'Arin' => 'Å',
- 'aring' => 'å',
- 'arin' => 'å',
- 'Ascr' => '𝒜',
- 'ascr' => '𝒶',
- 'Assign' => '≔',
- 'ast' => '*',
- 'asymp' => '≈',
- 'asympeq' => '≍',
- 'Atilde' => 'Ã',
- 'Atild' => 'Ã',
- 'atilde' => 'ã',
- 'atild' => 'ã',
- 'Auml' => 'Ä',
- 'Aum' => 'Ä',
- 'auml' => 'ä',
- 'aum' => 'ä',
- 'awconint' => '∳',
- 'awint' => '⨑',
- 'backcong' => '≌',
- 'backepsilon' => '϶',
- 'backprime' => '‵',
- 'backsim' => '∽',
- 'backsimeq' => '⋍',
- 'Backslash' => '∖',
- 'Barv' => '⫧',
- 'barvee' => '⊽',
- 'Barwed' => '⌆',
- 'barwed' => '⌅',
- 'barwedge' => '⌅',
- 'bbrk' => '⎵',
- 'bbrktbrk' => '⎶',
- 'bcong' => '≌',
- 'Bcy' => 'Б',
- 'bcy' => 'б',
- 'bdquo' => '„',
- 'becaus' => '∵',
- 'Because' => '∵',
- 'because' => '∵',
- 'bemptyv' => '⦰',
- 'bepsi' => '϶',
- 'bernou' => 'ℬ',
- 'Bernoullis' => 'ℬ',
- 'Beta' => 'Β',
- 'beta' => 'β',
- 'beth' => 'ℶ',
- 'between' => '≬',
- 'Bfr' => '𝔅',
- 'bfr' => '𝔟',
- 'bigcap' => '⋂',
- 'bigcirc' => '◯',
- 'bigcup' => '⋃',
- 'bigodot' => '⨀',
- 'bigoplus' => '⨁',
- 'bigotimes' => '⨂',
- 'bigsqcup' => '⨆',
- 'bigstar' => '★',
- 'bigtriangledown' => '▽',
- 'bigtriangleup' => '△',
- 'biguplus' => '⨄',
- 'bigvee' => '⋁',
- 'bigwedge' => '⋀',
- 'bkarow' => '⤍',
- 'blacklozenge' => '⧫',
- 'blacksquare' => '▪',
- 'blacktriangle' => '▴',
- 'blacktriangledown' => '▾',
- 'blacktriangleleft' => '◂',
- 'blacktriangleright' => '▸',
- 'blank' => '␣',
- 'blk12' => '▒',
- 'blk14' => '░',
- 'blk34' => '▓',
- 'block' => '█',
- 'bne' => '=⃥',
- 'bnequiv' => '≡⃥',
- 'bNot' => '⫭',
- 'bnot' => '⌐',
- 'Bopf' => '𝔹',
- 'bopf' => '𝕓',
- 'bot' => '⊥',
- 'bottom' => '⊥',
- 'bowtie' => '⋈',
- 'boxbox' => '⧉',
- 'boxDL' => '╗',
- 'boxDl' => '╖',
- 'boxdL' => '╕',
- 'boxdl' => '┐',
- 'boxDR' => '╔',
- 'boxDr' => '╓',
- 'boxdR' => '╒',
- 'boxdr' => '┌',
- 'boxH' => '═',
- 'boxh' => '─',
- 'boxHD' => '╦',
- 'boxHd' => '╤',
- 'boxhD' => '╥',
- 'boxhd' => '┬',
- 'boxHU' => '╩',
- 'boxHu' => '╧',
- 'boxhU' => '╨',
- 'boxhu' => '┴',
- 'boxminus' => '⊟',
- 'boxplus' => '⊞',
- 'boxtimes' => '⊠',
- 'boxUL' => '╝',
- 'boxUl' => '╜',
- 'boxuL' => '╛',
- 'boxul' => '┘',
- 'boxUR' => '╚',
- 'boxUr' => '╙',
- 'boxuR' => '╘',
- 'boxur' => '└',
- 'boxV' => '║',
- 'boxv' => '│',
- 'boxVH' => '╬',
- 'boxVh' => '╫',
- 'boxvH' => '╪',
- 'boxvh' => '┼',
- 'boxVL' => '╣',
- 'boxVl' => '╢',
- 'boxvL' => '╡',
- 'boxvl' => '┤',
- 'boxVR' => '╠',
- 'boxVr' => '╟',
- 'boxvR' => '╞',
- 'boxvr' => '├',
- 'bprime' => '‵',
- 'Breve' => '˘',
- 'breve' => '˘',
- 'brvbar' => '¦',
- 'brvba' => '¦',
- 'Bscr' => 'ℬ',
- 'bscr' => '𝒷',
- 'bsemi' => '⁏',
- 'bsim' => '∽',
- 'bsime' => '⋍',
- 'bsol' => '\\',
- 'bsolb' => '⧅',
- 'bsolhsub' => '⟈',
- 'bull' => '•',
- 'bullet' => '•',
- 'bump' => '≎',
- 'bumpE' => '⪮',
- 'bumpe' => '≏',
- 'Bumpeq' => '≎',
- 'bumpeq' => '≏',
- 'Cacute' => 'Ć',
- 'cacute' => 'ć',
- 'Cap' => '⋒',
- 'cap' => '∩',
- 'capand' => '⩄',
- 'capbrcup' => '⩉',
- 'capcap' => '⩋',
- 'capcup' => '⩇',
- 'capdot' => '⩀',
- 'CapitalDifferentialD' => 'ⅅ',
- 'caps' => '∩︀',
- 'caret' => '⁁',
- 'caron' => 'ˇ',
- 'Cayleys' => 'ℭ',
- 'ccaps' => '⩍',
- 'Ccaron' => 'Č',
- 'ccaron' => 'č',
- 'Ccedil' => 'Ç',
- 'Ccedi' => 'Ç',
- 'ccedil' => 'ç',
- 'ccedi' => 'ç',
- 'Ccirc' => 'Ĉ',
- 'ccirc' => 'ĉ',
- 'Cconint' => '∰',
- 'ccups' => '⩌',
- 'ccupssm' => '⩐',
- 'Cdot' => 'Ċ',
- 'cdot' => 'ċ',
- 'cedil' => '¸',
- 'cedi' => '¸',
- 'Cedilla' => '¸',
- 'cemptyv' => '⦲',
- 'cent' => '¢',
- 'cen' => '¢',
- 'CenterDot' => '·',
- 'centerdot' => '·',
- 'Cfr' => 'ℭ',
- 'cfr' => '𝔠',
- 'CHcy' => 'Ч',
- 'chcy' => 'ч',
- 'check' => '✓',
- 'checkmark' => '✓',
- 'Chi' => 'Χ',
- 'chi' => 'χ',
- 'cir' => '○',
- 'circ' => 'ˆ',
- 'circeq' => '≗',
- 'circlearrowleft' => '↺',
- 'circlearrowright' => '↻',
- 'circledast' => '⊛',
- 'circledcirc' => '⊚',
- 'circleddash' => '⊝',
- 'CircleDot' => '⊙',
- 'circledR' => '®',
- 'circledS' => 'Ⓢ',
- 'CircleMinus' => '⊖',
- 'CirclePlus' => '⊕',
- 'CircleTimes' => '⊗',
- 'cirE' => '⧃',
- 'cire' => '≗',
- 'cirfnint' => '⨐',
- 'cirmid' => '⫯',
- 'cirscir' => '⧂',
- 'ClockwiseContourIntegral' => '∲',
- 'CloseCurlyDoubleQuote' => '”',
- 'CloseCurlyQuote' => '’',
- 'clubs' => '♣',
- 'clubsuit' => '♣',
- 'Colon' => '∷',
- 'colon' => ':',
- 'Colone' => '⩴',
- 'colone' => '≔',
- 'coloneq' => '≔',
- 'comma' => ',',
- 'commat' => '@',
- 'comp' => '∁',
- 'compfn' => '∘',
- 'complement' => '∁',
- 'complexes' => 'ℂ',
- 'cong' => '≅',
- 'congdot' => '⩭',
- 'Congruent' => '≡',
- 'Conint' => '∯',
- 'conint' => '∮',
- 'ContourIntegral' => '∮',
- 'Copf' => 'ℂ',
- 'copf' => '𝕔',
- 'coprod' => '∐',
- 'Coproduct' => '∐',
- 'COPY' => '©',
- 'COP' => '©',
- 'copy' => '©',
- 'cop' => '©',
- 'copysr' => '℗',
- 'CounterClockwiseContourIntegral' => '∳',
- 'crarr' => '↵',
- 'Cross' => '⨯',
- 'cross' => '✗',
- 'Cscr' => '𝒞',
- 'cscr' => '𝒸',
- 'csub' => '⫏',
- 'csube' => '⫑',
- 'csup' => '⫐',
- 'csupe' => '⫒',
- 'ctdot' => '⋯',
- 'cudarrl' => '⤸',
- 'cudarrr' => '⤵',
- 'cuepr' => '⋞',
- 'cuesc' => '⋟',
- 'cularr' => '↶',
- 'cularrp' => '⤽',
- 'Cup' => '⋓',
- 'cup' => '∪',
- 'cupbrcap' => '⩈',
- 'CupCap' => '≍',
- 'cupcap' => '⩆',
- 'cupcup' => '⩊',
- 'cupdot' => '⊍',
- 'cupor' => '⩅',
- 'cups' => '∪︀',
- 'curarr' => '↷',
- 'curarrm' => '⤼',
- 'curlyeqprec' => '⋞',
- 'curlyeqsucc' => '⋟',
- 'curlyvee' => '⋎',
- 'curlywedge' => '⋏',
- 'curren' => '¤',
- 'curre' => '¤',
- 'curvearrowleft' => '↶',
- 'curvearrowright' => '↷',
- 'cuvee' => '⋎',
- 'cuwed' => '⋏',
- 'cwconint' => '∲',
- 'cwint' => '∱',
- 'cylcty' => '⌭',
- 'Dagger' => '‡',
- 'dagger' => '†',
- 'daleth' => 'ℸ',
- 'Darr' => '↡',
- 'dArr' => '⇓',
- 'darr' => '↓',
- 'dash' => '‐',
- 'Dashv' => '⫤',
- 'dashv' => '⊣',
- 'dbkarow' => '⤏',
- 'dblac' => '˝',
- 'Dcaron' => 'Ď',
- 'dcaron' => 'ď',
- 'Dcy' => 'Д',
- 'dcy' => 'д',
- 'DD' => 'ⅅ',
- 'dd' => 'ⅆ',
- 'ddagger' => '‡',
- 'ddarr' => '⇊',
- 'DDotrahd' => '⤑',
- 'ddotseq' => '⩷',
- 'deg' => '°',
- 'de' => '°',
- 'Del' => '∇',
- 'Delta' => 'Δ',
- 'delta' => 'δ',
- 'demptyv' => '⦱',
- 'dfisht' => '⥿',
- 'Dfr' => '𝔇',
- 'dfr' => '𝔡',
- 'dHar' => '⥥',
- 'dharl' => '⇃',
- 'dharr' => '⇂',
- 'DiacriticalAcute' => '´',
- 'DiacriticalDot' => '˙',
- 'DiacriticalDoubleAcute' => '˝',
- 'DiacriticalGrave' => '`',
- 'DiacriticalTilde' => '˜',
- 'diam' => '⋄',
- 'Diamond' => '⋄',
- 'diamond' => '⋄',
- 'diamondsuit' => '♦',
- 'diams' => '♦',
- 'die' => '¨',
- 'DifferentialD' => 'ⅆ',
- 'digamma' => 'ϝ',
- 'disin' => '⋲',
- 'div' => '÷',
- 'divide' => '÷',
- 'divid' => '÷',
- 'divideontimes' => '⋇',
- 'divonx' => '⋇',
- 'DJcy' => 'Ђ',
- 'djcy' => 'ђ',
- 'dlcorn' => '⌞',
- 'dlcrop' => '⌍',
- 'dollar' => '$',
- 'Dopf' => '𝔻',
- 'dopf' => '𝕕',
- 'Dot' => '¨',
- 'dot' => '˙',
- 'DotDot' => '⃜',
- 'doteq' => '≐',
- 'doteqdot' => '≑',
- 'DotEqual' => '≐',
- 'dotminus' => '∸',
- 'dotplus' => '∔',
- 'dotsquare' => '⊡',
- 'doublebarwedge' => '⌆',
- 'DoubleContourIntegral' => '∯',
- 'DoubleDot' => '¨',
- 'DoubleDownArrow' => '⇓',
- 'DoubleLeftArrow' => '⇐',
- 'DoubleLeftRightArrow' => '⇔',
- 'DoubleLeftTee' => '⫤',
- 'DoubleLongLeftArrow' => '⟸',
- 'DoubleLongLeftRightArrow' => '⟺',
- 'DoubleLongRightArrow' => '⟹',
- 'DoubleRightArrow' => '⇒',
- 'DoubleRightTee' => '⊨',
- 'DoubleUpArrow' => '⇑',
- 'DoubleUpDownArrow' => '⇕',
- 'DoubleVerticalBar' => '∥',
- 'DownArrow' => '↓',
- 'Downarrow' => '⇓',
- 'downarrow' => '↓',
- 'DownArrowBar' => '⤓',
- 'DownArrowUpArrow' => '⇵',
- 'DownBreve' => '̑',
- 'downdownarrows' => '⇊',
- 'downharpoonleft' => '⇃',
- 'downharpoonright' => '⇂',
- 'DownLeftRightVector' => '⥐',
- 'DownLeftTeeVector' => '⥞',
- 'DownLeftVector' => '↽',
- 'DownLeftVectorBar' => '⥖',
- 'DownRightTeeVector' => '⥟',
- 'DownRightVector' => '⇁',
- 'DownRightVectorBar' => '⥗',
- 'DownTee' => '⊤',
- 'DownTeeArrow' => '↧',
- 'drbkarow' => '⤐',
- 'drcorn' => '⌟',
- 'drcrop' => '⌌',
- 'Dscr' => '𝒟',
- 'dscr' => '𝒹',
- 'DScy' => 'Ѕ',
- 'dscy' => 'ѕ',
- 'dsol' => '⧶',
- 'Dstrok' => 'Đ',
- 'dstrok' => 'đ',
- 'dtdot' => '⋱',
- 'dtri' => '▿',
- 'dtrif' => '▾',
- 'duarr' => '⇵',
- 'duhar' => '⥯',
- 'dwangle' => '⦦',
- 'DZcy' => 'Џ',
- 'dzcy' => 'џ',
- 'dzigrarr' => '⟿',
- 'Eacute' => 'É',
- 'Eacut' => 'É',
- 'eacute' => 'é',
- 'eacut' => 'é',
- 'easter' => '⩮',
- 'Ecaron' => 'Ě',
- 'ecaron' => 'ě',
- 'ecir' => 'ê',
- 'Ecirc' => 'Ê',
- 'Ecir' => 'Ê',
- 'ecirc' => 'ê',
- 'ecolon' => '≕',
- 'Ecy' => 'Э',
- 'ecy' => 'э',
- 'eDDot' => '⩷',
- 'Edot' => 'Ė',
- 'eDot' => '≑',
- 'edot' => 'ė',
- 'ee' => 'ⅇ',
- 'efDot' => '≒',
- 'Efr' => '𝔈',
- 'efr' => '𝔢',
- 'eg' => '⪚',
- 'Egrave' => 'È',
- 'Egrav' => 'È',
- 'egrave' => 'è',
- 'egrav' => 'è',
- 'egs' => '⪖',
- 'egsdot' => '⪘',
- 'el' => '⪙',
- 'Element' => '∈',
- 'elinters' => '⏧',
- 'ell' => 'ℓ',
- 'els' => '⪕',
- 'elsdot' => '⪗',
- 'Emacr' => 'Ē',
- 'emacr' => 'ē',
- 'empty' => '∅',
- 'emptyset' => '∅',
- 'EmptySmallSquare' => '◻',
- 'emptyv' => '∅',
- 'EmptyVerySmallSquare' => '▫',
- 'emsp' => ' ',
- 'emsp13' => ' ',
- 'emsp14' => ' ',
- 'ENG' => 'Ŋ',
- 'eng' => 'ŋ',
- 'ensp' => ' ',
- 'Eogon' => 'Ę',
- 'eogon' => 'ę',
- 'Eopf' => '𝔼',
- 'eopf' => '𝕖',
- 'epar' => '⋕',
- 'eparsl' => '⧣',
- 'eplus' => '⩱',
- 'epsi' => 'ε',
- 'Epsilon' => 'Ε',
- 'epsilon' => 'ε',
- 'epsiv' => 'ϵ',
- 'eqcirc' => '≖',
- 'eqcolon' => '≕',
- 'eqsim' => '≂',
- 'eqslantgtr' => '⪖',
- 'eqslantless' => '⪕',
- 'Equal' => '⩵',
- 'equals' => '=',
- 'EqualTilde' => '≂',
- 'equest' => '≟',
- 'Equilibrium' => '⇌',
- 'equiv' => '≡',
- 'equivDD' => '⩸',
- 'eqvparsl' => '⧥',
- 'erarr' => '⥱',
- 'erDot' => '≓',
- 'Escr' => 'ℰ',
- 'escr' => 'ℯ',
- 'esdot' => '≐',
- 'Esim' => '⩳',
- 'esim' => '≂',
- 'Eta' => 'Η',
- 'eta' => 'η',
- 'ETH' => 'Ð',
- 'ET' => 'Ð',
- 'eth' => 'ð',
- 'et' => 'ð',
- 'Euml' => 'Ë',
- 'Eum' => 'Ë',
- 'euml' => 'ë',
- 'eum' => 'ë',
- 'euro' => '€',
- 'excl' => '!',
- 'exist' => '∃',
- 'Exists' => '∃',
- 'expectation' => 'ℰ',
- 'ExponentialE' => 'ⅇ',
- 'exponentiale' => 'ⅇ',
- 'fallingdotseq' => '≒',
- 'Fcy' => 'Ф',
- 'fcy' => 'ф',
- 'female' => '♀',
- 'ffilig' => 'ffi',
- 'fflig' => 'ff',
- 'ffllig' => 'ffl',
- 'Ffr' => '𝔉',
- 'ffr' => '𝔣',
- 'filig' => 'fi',
- 'FilledSmallSquare' => '◼',
- 'FilledVerySmallSquare' => '▪',
- 'fjlig' => 'fj',
- 'flat' => '♭',
- 'fllig' => 'fl',
- 'fltns' => '▱',
- 'fnof' => 'ƒ',
- 'Fopf' => '𝔽',
- 'fopf' => '𝕗',
- 'ForAll' => '∀',
- 'forall' => '∀',
- 'fork' => '⋔',
- 'forkv' => '⫙',
- 'Fouriertrf' => 'ℱ',
- 'fpartint' => '⨍',
- 'frac12' => '½',
- 'frac1' => '¼',
- 'frac13' => '⅓',
- 'frac14' => '¼',
- 'frac15' => '⅕',
- 'frac16' => '⅙',
- 'frac18' => '⅛',
- 'frac23' => '⅔',
- 'frac25' => '⅖',
- 'frac34' => '¾',
- 'frac3' => '¾',
- 'frac35' => '⅗',
- 'frac38' => '⅜',
- 'frac45' => '⅘',
- 'frac56' => '⅚',
- 'frac58' => '⅝',
- 'frac78' => '⅞',
- 'frasl' => '⁄',
- 'frown' => '⌢',
- 'Fscr' => 'ℱ',
- 'fscr' => '𝒻',
- 'gacute' => 'ǵ',
- 'Gamma' => 'Γ',
- 'gamma' => 'γ',
- 'Gammad' => 'Ϝ',
- 'gammad' => 'ϝ',
- 'gap' => '⪆',
- 'Gbreve' => 'Ğ',
- 'gbreve' => 'ğ',
- 'Gcedil' => 'Ģ',
- 'Gcirc' => 'Ĝ',
- 'gcirc' => 'ĝ',
- 'Gcy' => 'Г',
- 'gcy' => 'г',
- 'Gdot' => 'Ġ',
- 'gdot' => 'ġ',
- 'gE' => '≧',
- 'ge' => '≥',
- 'gEl' => '⪌',
- 'gel' => '⋛',
- 'geq' => '≥',
- 'geqq' => '≧',
- 'geqslant' => '⩾',
- 'ges' => '⩾',
- 'gescc' => '⪩',
- 'gesdot' => '⪀',
- 'gesdoto' => '⪂',
- 'gesdotol' => '⪄',
- 'gesl' => '⋛︀',
- 'gesles' => '⪔',
- 'Gfr' => '𝔊',
- 'gfr' => '𝔤',
- 'Gg' => '⋙',
- 'gg' => '≫',
- 'ggg' => '⋙',
- 'gimel' => 'ℷ',
- 'GJcy' => 'Ѓ',
- 'gjcy' => 'ѓ',
- 'gl' => '≷',
- 'gla' => '⪥',
- 'glE' => '⪒',
- 'glj' => '⪤',
- 'gnap' => '⪊',
- 'gnapprox' => '⪊',
- 'gnE' => '≩',
- 'gne' => '⪈',
- 'gneq' => '⪈',
- 'gneqq' => '≩',
- 'gnsim' => '⋧',
- 'Gopf' => '𝔾',
- 'gopf' => '𝕘',
- 'grave' => '`',
- 'GreaterEqual' => '≥',
- 'GreaterEqualLess' => '⋛',
- 'GreaterFullEqual' => '≧',
- 'GreaterGreater' => '⪢',
- 'GreaterLess' => '≷',
- 'GreaterSlantEqual' => '⩾',
- 'GreaterTilde' => '≳',
- 'Gscr' => '𝒢',
- 'gscr' => 'ℊ',
- 'gsim' => '≳',
- 'gsime' => '⪎',
- 'gsiml' => '⪐',
- 'GT' => '>',
- 'G' => '>',
- 'Gt' => '≫',
- 'gt' => '>',
- 'g' => '>',
- 'gtcc' => '⪧',
- 'gtcir' => '⩺',
- 'gtdot' => '⋗',
- 'gtlPar' => '⦕',
- 'gtquest' => '⩼',
- 'gtrapprox' => '⪆',
- 'gtrarr' => '⥸',
- 'gtrdot' => '⋗',
- 'gtreqless' => '⋛',
- 'gtreqqless' => '⪌',
- 'gtrless' => '≷',
- 'gtrsim' => '≳',
- 'gvertneqq' => '≩︀',
- 'gvnE' => '≩︀',
- 'Hacek' => 'ˇ',
- 'hairsp' => ' ',
- 'half' => '½',
- 'hamilt' => 'ℋ',
- 'HARDcy' => 'Ъ',
- 'hardcy' => 'ъ',
- 'hArr' => '⇔',
- 'harr' => '↔',
- 'harrcir' => '⥈',
- 'harrw' => '↭',
- 'Hat' => '^',
- 'hbar' => 'ℏ',
- 'Hcirc' => 'Ĥ',
- 'hcirc' => 'ĥ',
- 'hearts' => '♥',
- 'heartsuit' => '♥',
- 'hellip' => '…',
- 'hercon' => '⊹',
- 'Hfr' => 'ℌ',
- 'hfr' => '𝔥',
- 'HilbertSpace' => 'ℋ',
- 'hksearow' => '⤥',
- 'hkswarow' => '⤦',
- 'hoarr' => '⇿',
- 'homtht' => '∻',
- 'hookleftarrow' => '↩',
- 'hookrightarrow' => '↪',
- 'Hopf' => 'ℍ',
- 'hopf' => '𝕙',
- 'horbar' => '―',
- 'HorizontalLine' => '─',
- 'Hscr' => 'ℋ',
- 'hscr' => '𝒽',
- 'hslash' => 'ℏ',
- 'Hstrok' => 'Ħ',
- 'hstrok' => 'ħ',
- 'HumpDownHump' => '≎',
- 'HumpEqual' => '≏',
- 'hybull' => '⁃',
- 'hyphen' => '‐',
- 'Iacute' => 'Í',
- 'Iacut' => 'Í',
- 'iacute' => 'í',
- 'iacut' => 'í',
- 'ic' => '⁣',
- 'Icirc' => 'Î',
- 'Icir' => 'Î',
- 'icirc' => 'î',
- 'icir' => 'î',
- 'Icy' => 'И',
- 'icy' => 'и',
- 'Idot' => 'İ',
- 'IEcy' => 'Е',
- 'iecy' => 'е',
- 'iexcl' => '¡',
- 'iexc' => '¡',
- 'iff' => '⇔',
- 'Ifr' => 'ℑ',
- 'ifr' => '𝔦',
- 'Igrave' => 'Ì',
- 'Igrav' => 'Ì',
- 'igrave' => 'ì',
- 'igrav' => 'ì',
- 'ii' => 'ⅈ',
- 'iiiint' => '⨌',
- 'iiint' => '∭',
- 'iinfin' => '⧜',
- 'iiota' => '℩',
- 'IJlig' => 'IJ',
- 'ijlig' => 'ij',
- 'Im' => 'ℑ',
- 'Imacr' => 'Ī',
- 'imacr' => 'ī',
- 'image' => 'ℑ',
- 'ImaginaryI' => 'ⅈ',
- 'imagline' => 'ℐ',
- 'imagpart' => 'ℑ',
- 'imath' => 'ı',
- 'imof' => '⊷',
- 'imped' => 'Ƶ',
- 'Implies' => '⇒',
- 'in' => '∈',
- 'incare' => '℅',
- 'infin' => '∞',
- 'infintie' => '⧝',
- 'inodot' => 'ı',
- 'Int' => '∬',
- 'int' => '∫',
- 'intcal' => '⊺',
- 'integers' => 'ℤ',
- 'Integral' => '∫',
- 'intercal' => '⊺',
- 'Intersection' => '⋂',
- 'intlarhk' => '⨗',
- 'intprod' => '⨼',
- 'InvisibleComma' => '⁣',
- 'InvisibleTimes' => '⁢',
- 'IOcy' => 'Ё',
- 'iocy' => 'ё',
- 'Iogon' => 'Į',
- 'iogon' => 'į',
- 'Iopf' => '𝕀',
- 'iopf' => '𝕚',
- 'Iota' => 'Ι',
- 'iota' => 'ι',
- 'iprod' => '⨼',
- 'iquest' => '¿',
- 'iques' => '¿',
- 'Iscr' => 'ℐ',
- 'iscr' => '𝒾',
- 'isin' => '∈',
- 'isindot' => '⋵',
- 'isinE' => '⋹',
- 'isins' => '⋴',
- 'isinsv' => '⋳',
- 'isinv' => '∈',
- 'it' => '⁢',
- 'Itilde' => 'Ĩ',
- 'itilde' => 'ĩ',
- 'Iukcy' => 'І',
- 'iukcy' => 'і',
- 'Iuml' => 'Ï',
- 'Ium' => 'Ï',
- 'iuml' => 'ï',
- 'ium' => 'ï',
- 'Jcirc' => 'Ĵ',
- 'jcirc' => 'ĵ',
- 'Jcy' => 'Й',
- 'jcy' => 'й',
- 'Jfr' => '𝔍',
- 'jfr' => '𝔧',
- 'jmath' => 'ȷ',
- 'Jopf' => '𝕁',
- 'jopf' => '𝕛',
- 'Jscr' => '𝒥',
- 'jscr' => '𝒿',
- 'Jsercy' => 'Ј',
- 'jsercy' => 'ј',
- 'Jukcy' => 'Є',
- 'jukcy' => 'є',
- 'Kappa' => 'Κ',
- 'kappa' => 'κ',
- 'kappav' => 'ϰ',
- 'Kcedil' => 'Ķ',
- 'kcedil' => 'ķ',
- 'Kcy' => 'К',
- 'kcy' => 'к',
- 'Kfr' => '𝔎',
- 'kfr' => '𝔨',
- 'kgreen' => 'ĸ',
- 'KHcy' => 'Х',
- 'khcy' => 'х',
- 'KJcy' => 'Ќ',
- 'kjcy' => 'ќ',
- 'Kopf' => '𝕂',
- 'kopf' => '𝕜',
- 'Kscr' => '𝒦',
- 'kscr' => '𝓀',
- 'lAarr' => '⇚',
- 'Lacute' => 'Ĺ',
- 'lacute' => 'ĺ',
- 'laemptyv' => '⦴',
- 'lagran' => 'ℒ',
- 'Lambda' => 'Λ',
- 'lambda' => 'λ',
- 'Lang' => '⟪',
- 'lang' => '⟨',
- 'langd' => '⦑',
- 'langle' => '⟨',
- 'lap' => '⪅',
- 'Laplacetrf' => 'ℒ',
- 'laquo' => '«',
- 'laqu' => '«',
- 'Larr' => '↞',
- 'lArr' => '⇐',
- 'larr' => '←',
- 'larrb' => '⇤',
- 'larrbfs' => '⤟',
- 'larrfs' => '⤝',
- 'larrhk' => '↩',
- 'larrlp' => '↫',
- 'larrpl' => '⤹',
- 'larrsim' => '⥳',
- 'larrtl' => '↢',
- 'lat' => '⪫',
- 'lAtail' => '⤛',
- 'latail' => '⤙',
- 'late' => '⪭',
- 'lates' => '⪭︀',
- 'lBarr' => '⤎',
- 'lbarr' => '⤌',
- 'lbbrk' => '❲',
- 'lbrace' => '{',
- 'lbrack' => '[',
- 'lbrke' => '⦋',
- 'lbrksld' => '⦏',
- 'lbrkslu' => '⦍',
- 'Lcaron' => 'Ľ',
- 'lcaron' => 'ľ',
- 'Lcedil' => 'Ļ',
- 'lcedil' => 'ļ',
- 'lceil' => '⌈',
- 'lcub' => '{',
- 'Lcy' => 'Л',
- 'lcy' => 'л',
- 'ldca' => '⤶',
- 'ldquo' => '“',
- 'ldquor' => '„',
- 'ldrdhar' => '⥧',
- 'ldrushar' => '⥋',
- 'ldsh' => '↲',
- 'lE' => '≦',
- 'le' => '≤',
- 'LeftAngleBracket' => '⟨',
- 'LeftArrow' => '←',
- 'Leftarrow' => '⇐',
- 'leftarrow' => '←',
- 'LeftArrowBar' => '⇤',
- 'LeftArrowRightArrow' => '⇆',
- 'leftarrowtail' => '↢',
- 'LeftCeiling' => '⌈',
- 'LeftDoubleBracket' => '⟦',
- 'LeftDownTeeVector' => '⥡',
- 'LeftDownVector' => '⇃',
- 'LeftDownVectorBar' => '⥙',
- 'LeftFloor' => '⌊',
- 'leftharpoondown' => '↽',
- 'leftharpoonup' => '↼',
- 'leftleftarrows' => '⇇',
- 'LeftRightArrow' => '↔',
- 'Leftrightarrow' => '⇔',
- 'leftrightarrow' => '↔',
- 'leftrightarrows' => '⇆',
- 'leftrightharpoons' => '⇋',
- 'leftrightsquigarrow' => '↭',
- 'LeftRightVector' => '⥎',
- 'LeftTee' => '⊣',
- 'LeftTeeArrow' => '↤',
- 'LeftTeeVector' => '⥚',
- 'leftthreetimes' => '⋋',
- 'LeftTriangle' => '⊲',
- 'LeftTriangleBar' => '⧏',
- 'LeftTriangleEqual' => '⊴',
- 'LeftUpDownVector' => '⥑',
- 'LeftUpTeeVector' => '⥠',
- 'LeftUpVector' => '↿',
- 'LeftUpVectorBar' => '⥘',
- 'LeftVector' => '↼',
- 'LeftVectorBar' => '⥒',
- 'lEg' => '⪋',
- 'leg' => '⋚',
- 'leq' => '≤',
- 'leqq' => '≦',
- 'leqslant' => '⩽',
- 'les' => '⩽',
- 'lescc' => '⪨',
- 'lesdot' => '⩿',
- 'lesdoto' => '⪁',
- 'lesdotor' => '⪃',
- 'lesg' => '⋚︀',
- 'lesges' => '⪓',
- 'lessapprox' => '⪅',
- 'lessdot' => '⋖',
- 'lesseqgtr' => '⋚',
- 'lesseqqgtr' => '⪋',
- 'LessEqualGreater' => '⋚',
- 'LessFullEqual' => '≦',
- 'LessGreater' => '≶',
- 'lessgtr' => '≶',
- 'LessLess' => '⪡',
- 'lesssim' => '≲',
- 'LessSlantEqual' => '⩽',
- 'LessTilde' => '≲',
- 'lfisht' => '⥼',
- 'lfloor' => '⌊',
- 'Lfr' => '𝔏',
- 'lfr' => '𝔩',
- 'lg' => '≶',
- 'lgE' => '⪑',
- 'lHar' => '⥢',
- 'lhard' => '↽',
- 'lharu' => '↼',
- 'lharul' => '⥪',
- 'lhblk' => '▄',
- 'LJcy' => 'Љ',
- 'ljcy' => 'љ',
- 'Ll' => '⋘',
- 'll' => '≪',
- 'llarr' => '⇇',
- 'llcorner' => '⌞',
- 'Lleftarrow' => '⇚',
- 'llhard' => '⥫',
- 'lltri' => '◺',
- 'Lmidot' => 'Ŀ',
- 'lmidot' => 'ŀ',
- 'lmoust' => '⎰',
- 'lmoustache' => '⎰',
- 'lnap' => '⪉',
- 'lnapprox' => '⪉',
- 'lnE' => '≨',
- 'lne' => '⪇',
- 'lneq' => '⪇',
- 'lneqq' => '≨',
- 'lnsim' => '⋦',
- 'loang' => '⟬',
- 'loarr' => '⇽',
- 'lobrk' => '⟦',
- 'LongLeftArrow' => '⟵',
- 'Longleftarrow' => '⟸',
- 'longleftarrow' => '⟵',
- 'LongLeftRightArrow' => '⟷',
- 'Longleftrightarrow' => '⟺',
- 'longleftrightarrow' => '⟷',
- 'longmapsto' => '⟼',
- 'LongRightArrow' => '⟶',
- 'Longrightarrow' => '⟹',
- 'longrightarrow' => '⟶',
- 'looparrowleft' => '↫',
- 'looparrowright' => '↬',
- 'lopar' => '⦅',
- 'Lopf' => '𝕃',
- 'lopf' => '𝕝',
- 'loplus' => '⨭',
- 'lotimes' => '⨴',
- 'lowast' => '∗',
- 'lowbar' => '_',
- 'LowerLeftArrow' => '↙',
- 'LowerRightArrow' => '↘',
- 'loz' => '◊',
- 'lozenge' => '◊',
- 'lozf' => '⧫',
- 'lpar' => '(',
- 'lparlt' => '⦓',
- 'lrarr' => '⇆',
- 'lrcorner' => '⌟',
- 'lrhar' => '⇋',
- 'lrhard' => '⥭',
- 'lrm' => '‎',
- 'lrtri' => '⊿',
- 'lsaquo' => '‹',
- 'Lscr' => 'ℒ',
- 'lscr' => '𝓁',
- 'Lsh' => '↰',
- 'lsh' => '↰',
- 'lsim' => '≲',
- 'lsime' => '⪍',
- 'lsimg' => '⪏',
- 'lsqb' => '[',
- 'lsquo' => '‘',
- 'lsquor' => '‚',
- 'Lstrok' => 'Ł',
- 'lstrok' => 'ł',
- 'LT' => '<',
- 'L' => '<',
- 'Lt' => '≪',
- 'lt' => '<',
- 'l' => '<',
- 'ltcc' => '⪦',
- 'ltcir' => '⩹',
- 'ltdot' => '⋖',
- 'lthree' => '⋋',
- 'ltimes' => '⋉',
- 'ltlarr' => '⥶',
- 'ltquest' => '⩻',
- 'ltri' => '◃',
- 'ltrie' => '⊴',
- 'ltrif' => '◂',
- 'ltrPar' => '⦖',
- 'lurdshar' => '⥊',
- 'luruhar' => '⥦',
- 'lvertneqq' => '≨︀',
- 'lvnE' => '≨︀',
- 'macr' => '¯',
- 'mac' => '¯',
- 'male' => '♂',
- 'malt' => '✠',
- 'maltese' => '✠',
- 'Map' => '⤅',
- 'map' => '↦',
- 'mapsto' => '↦',
- 'mapstodown' => '↧',
- 'mapstoleft' => '↤',
- 'mapstoup' => '↥',
- 'marker' => '▮',
- 'mcomma' => '⨩',
- 'Mcy' => 'М',
- 'mcy' => 'м',
- 'mdash' => '—',
- 'mDDot' => '∺',
- 'measuredangle' => '∡',
- 'MediumSpace' => ' ',
- 'Mellintrf' => 'ℳ',
- 'Mfr' => '𝔐',
- 'mfr' => '𝔪',
- 'mho' => '℧',
- 'micro' => 'µ',
- 'micr' => 'µ',
- 'mid' => '∣',
- 'midast' => '*',
- 'midcir' => '⫰',
- 'middot' => '·',
- 'middo' => '·',
- 'minus' => '−',
- 'minusb' => '⊟',
- 'minusd' => '∸',
- 'minusdu' => '⨪',
- 'MinusPlus' => '∓',
- 'mlcp' => '⫛',
- 'mldr' => '…',
- 'mnplus' => '∓',
- 'models' => '⊧',
- 'Mopf' => '𝕄',
- 'mopf' => '𝕞',
- 'mp' => '∓',
- 'Mscr' => 'ℳ',
- 'mscr' => '𝓂',
- 'mstpos' => '∾',
- 'Mu' => 'Μ',
- 'mu' => 'μ',
- 'multimap' => '⊸',
- 'mumap' => '⊸',
- 'nabla' => '∇',
- 'Nacute' => 'Ń',
- 'nacute' => 'ń',
- 'nang' => '∠⃒',
- 'nap' => '≉',
- 'napE' => '⩰̸',
- 'napid' => '≋̸',
- 'napos' => 'ʼn',
- 'napprox' => '≉',
- 'natur' => '♮',
- 'natural' => '♮',
- 'naturals' => 'ℕ',
- 'nbsp' => ' ',
- 'nbs' => ' ',
- 'nbump' => '≎̸',
- 'nbumpe' => '≏̸',
- 'ncap' => '⩃',
- 'Ncaron' => 'Ň',
- 'ncaron' => 'ň',
- 'Ncedil' => 'Ņ',
- 'ncedil' => 'ņ',
- 'ncong' => '≇',
- 'ncongdot' => '⩭̸',
- 'ncup' => '⩂',
- 'Ncy' => 'Н',
- 'ncy' => 'н',
- 'ndash' => '–',
- 'ne' => '≠',
- 'nearhk' => '⤤',
- 'neArr' => '⇗',
- 'nearr' => '↗',
- 'nearrow' => '↗',
- 'nedot' => '≐̸',
- 'NegativeMediumSpace' => '​',
- 'NegativeThickSpace' => '​',
- 'NegativeThinSpace' => '​',
- 'NegativeVeryThinSpace' => '​',
- 'nequiv' => '≢',
- 'nesear' => '⤨',
- 'nesim' => '≂̸',
- 'NestedGreaterGreater' => '≫',
- 'NestedLessLess' => '≪',
- 'NewLine' => '
+
+/**
+ * Entity lookup tables.
+ * This class is automatically generated.
+ */
+class Entities
+{
+
+ public static $byName = array(
+ 'Aacute' => 'Á',
+ 'Aacut' => 'Á',
+ 'aacute' => 'á',
+ 'aacut' => 'á',
+ 'Abreve' => 'Ă',
+ 'abreve' => 'ă',
+ 'ac' => '∾',
+ 'acd' => '∿',
+ 'acE' => '∾̳',
+ 'Acirc' => 'Â',
+ 'Acir' => 'Â',
+ 'acirc' => 'â',
+ 'acir' => 'â',
+ 'acute' => '´',
+ 'acut' => '´',
+ 'Acy' => 'А',
+ 'acy' => 'а',
+ 'AElig' => 'Æ',
+ 'AEli' => 'Æ',
+ 'aelig' => 'æ',
+ 'aeli' => 'æ',
+ 'af' => '⁡',
+ 'Afr' => '𝔄',
+ 'afr' => '𝔞',
+ 'Agrave' => 'À',
+ 'Agrav' => 'À',
+ 'agrave' => 'à',
+ 'agrav' => 'à',
+ 'alefsym' => 'ℵ',
+ 'aleph' => 'ℵ',
+ 'Alpha' => 'Α',
+ 'alpha' => 'α',
+ 'Amacr' => 'Ā',
+ 'amacr' => 'ā',
+ 'amalg' => '⨿',
+ 'AMP' => '&',
+ 'AM' => '&',
+ 'amp' => '&',
+ 'am' => '&',
+ 'And' => '⩓',
+ 'and' => '∧',
+ 'andand' => '⩕',
+ 'andd' => '⩜',
+ 'andslope' => '⩘',
+ 'andv' => '⩚',
+ 'ang' => '∠',
+ 'ange' => '⦤',
+ 'angle' => '∠',
+ 'angmsd' => '∡',
+ 'angmsdaa' => '⦨',
+ 'angmsdab' => '⦩',
+ 'angmsdac' => '⦪',
+ 'angmsdad' => '⦫',
+ 'angmsdae' => '⦬',
+ 'angmsdaf' => '⦭',
+ 'angmsdag' => '⦮',
+ 'angmsdah' => '⦯',
+ 'angrt' => '∟',
+ 'angrtvb' => '⊾',
+ 'angrtvbd' => '⦝',
+ 'angsph' => '∢',
+ 'angst' => 'Å',
+ 'angzarr' => '⍼',
+ 'Aogon' => 'Ą',
+ 'aogon' => 'ą',
+ 'Aopf' => '𝔸',
+ 'aopf' => '𝕒',
+ 'ap' => '≈',
+ 'apacir' => '⩯',
+ 'apE' => '⩰',
+ 'ape' => '≊',
+ 'apid' => '≋',
+ 'apos' => '\'',
+ 'ApplyFunction' => '⁡',
+ 'approx' => '≈',
+ 'approxeq' => '≊',
+ 'Aring' => 'Å',
+ 'Arin' => 'Å',
+ 'aring' => 'å',
+ 'arin' => 'å',
+ 'Ascr' => '𝒜',
+ 'ascr' => '𝒶',
+ 'Assign' => '≔',
+ 'ast' => '*',
+ 'asymp' => '≈',
+ 'asympeq' => '≍',
+ 'Atilde' => 'Ã',
+ 'Atild' => 'Ã',
+ 'atilde' => 'ã',
+ 'atild' => 'ã',
+ 'Auml' => 'Ä',
+ 'Aum' => 'Ä',
+ 'auml' => 'ä',
+ 'aum' => 'ä',
+ 'awconint' => '∳',
+ 'awint' => '⨑',
+ 'backcong' => '≌',
+ 'backepsilon' => '϶',
+ 'backprime' => '‵',
+ 'backsim' => '∽',
+ 'backsimeq' => '⋍',
+ 'Backslash' => '∖',
+ 'Barv' => '⫧',
+ 'barvee' => '⊽',
+ 'Barwed' => '⌆',
+ 'barwed' => '⌅',
+ 'barwedge' => '⌅',
+ 'bbrk' => '⎵',
+ 'bbrktbrk' => '⎶',
+ 'bcong' => '≌',
+ 'Bcy' => 'Б',
+ 'bcy' => 'б',
+ 'bdquo' => '„',
+ 'becaus' => '∵',
+ 'Because' => '∵',
+ 'because' => '∵',
+ 'bemptyv' => '⦰',
+ 'bepsi' => '϶',
+ 'bernou' => 'ℬ',
+ 'Bernoullis' => 'ℬ',
+ 'Beta' => 'Β',
+ 'beta' => 'β',
+ 'beth' => 'ℶ',
+ 'between' => '≬',
+ 'Bfr' => '𝔅',
+ 'bfr' => '𝔟',
+ 'bigcap' => '⋂',
+ 'bigcirc' => '◯',
+ 'bigcup' => '⋃',
+ 'bigodot' => '⨀',
+ 'bigoplus' => '⨁',
+ 'bigotimes' => '⨂',
+ 'bigsqcup' => '⨆',
+ 'bigstar' => '★',
+ 'bigtriangledown' => '▽',
+ 'bigtriangleup' => '△',
+ 'biguplus' => '⨄',
+ 'bigvee' => '⋁',
+ 'bigwedge' => '⋀',
+ 'bkarow' => '⤍',
+ 'blacklozenge' => '⧫',
+ 'blacksquare' => '▪',
+ 'blacktriangle' => '▴',
+ 'blacktriangledown' => '▾',
+ 'blacktriangleleft' => '◂',
+ 'blacktriangleright' => '▸',
+ 'blank' => '␣',
+ 'blk12' => '▒',
+ 'blk14' => '░',
+ 'blk34' => '▓',
+ 'block' => '█',
+ 'bne' => '=⃥',
+ 'bnequiv' => '≡⃥',
+ 'bNot' => '⫭',
+ 'bnot' => '⌐',
+ 'Bopf' => '𝔹',
+ 'bopf' => '𝕓',
+ 'bot' => '⊥',
+ 'bottom' => '⊥',
+ 'bowtie' => '⋈',
+ 'boxbox' => '⧉',
+ 'boxDL' => '╗',
+ 'boxDl' => '╖',
+ 'boxdL' => '╕',
+ 'boxdl' => '┐',
+ 'boxDR' => '╔',
+ 'boxDr' => '╓',
+ 'boxdR' => '╒',
+ 'boxdr' => '┌',
+ 'boxH' => '═',
+ 'boxh' => '─',
+ 'boxHD' => '╦',
+ 'boxHd' => '╤',
+ 'boxhD' => '╥',
+ 'boxhd' => '┬',
+ 'boxHU' => '╩',
+ 'boxHu' => '╧',
+ 'boxhU' => '╨',
+ 'boxhu' => '┴',
+ 'boxminus' => '⊟',
+ 'boxplus' => '⊞',
+ 'boxtimes' => '⊠',
+ 'boxUL' => '╝',
+ 'boxUl' => '╜',
+ 'boxuL' => '╛',
+ 'boxul' => '┘',
+ 'boxUR' => '╚',
+ 'boxUr' => '╙',
+ 'boxuR' => '╘',
+ 'boxur' => '└',
+ 'boxV' => '║',
+ 'boxv' => '│',
+ 'boxVH' => '╬',
+ 'boxVh' => '╫',
+ 'boxvH' => '╪',
+ 'boxvh' => '┼',
+ 'boxVL' => '╣',
+ 'boxVl' => '╢',
+ 'boxvL' => '╡',
+ 'boxvl' => '┤',
+ 'boxVR' => '╠',
+ 'boxVr' => '╟',
+ 'boxvR' => '╞',
+ 'boxvr' => '├',
+ 'bprime' => '‵',
+ 'Breve' => '˘',
+ 'breve' => '˘',
+ 'brvbar' => '¦',
+ 'brvba' => '¦',
+ 'Bscr' => 'ℬ',
+ 'bscr' => '𝒷',
+ 'bsemi' => '⁏',
+ 'bsim' => '∽',
+ 'bsime' => '⋍',
+ 'bsol' => '\\',
+ 'bsolb' => '⧅',
+ 'bsolhsub' => '⟈',
+ 'bull' => '•',
+ 'bullet' => '•',
+ 'bump' => '≎',
+ 'bumpE' => '⪮',
+ 'bumpe' => '≏',
+ 'Bumpeq' => '≎',
+ 'bumpeq' => '≏',
+ 'Cacute' => 'Ć',
+ 'cacute' => 'ć',
+ 'Cap' => '⋒',
+ 'cap' => '∩',
+ 'capand' => '⩄',
+ 'capbrcup' => '⩉',
+ 'capcap' => '⩋',
+ 'capcup' => '⩇',
+ 'capdot' => '⩀',
+ 'CapitalDifferentialD' => 'ⅅ',
+ 'caps' => '∩︀',
+ 'caret' => '⁁',
+ 'caron' => 'ˇ',
+ 'Cayleys' => 'ℭ',
+ 'ccaps' => '⩍',
+ 'Ccaron' => 'Č',
+ 'ccaron' => 'č',
+ 'Ccedil' => 'Ç',
+ 'Ccedi' => 'Ç',
+ 'ccedil' => 'ç',
+ 'ccedi' => 'ç',
+ 'Ccirc' => 'Ĉ',
+ 'ccirc' => 'ĉ',
+ 'Cconint' => '∰',
+ 'ccups' => '⩌',
+ 'ccupssm' => '⩐',
+ 'Cdot' => 'Ċ',
+ 'cdot' => 'ċ',
+ 'cedil' => '¸',
+ 'cedi' => '¸',
+ 'Cedilla' => '¸',
+ 'cemptyv' => '⦲',
+ 'cent' => '¢',
+ 'cen' => '¢',
+ 'CenterDot' => '·',
+ 'centerdot' => '·',
+ 'Cfr' => 'ℭ',
+ 'cfr' => '𝔠',
+ 'CHcy' => 'Ч',
+ 'chcy' => 'ч',
+ 'check' => '✓',
+ 'checkmark' => '✓',
+ 'Chi' => 'Χ',
+ 'chi' => 'χ',
+ 'cir' => '○',
+ 'circ' => 'ˆ',
+ 'circeq' => '≗',
+ 'circlearrowleft' => '↺',
+ 'circlearrowright' => '↻',
+ 'circledast' => '⊛',
+ 'circledcirc' => '⊚',
+ 'circleddash' => '⊝',
+ 'CircleDot' => '⊙',
+ 'circledR' => '®',
+ 'circledS' => 'Ⓢ',
+ 'CircleMinus' => '⊖',
+ 'CirclePlus' => '⊕',
+ 'CircleTimes' => '⊗',
+ 'cirE' => '⧃',
+ 'cire' => '≗',
+ 'cirfnint' => '⨐',
+ 'cirmid' => '⫯',
+ 'cirscir' => '⧂',
+ 'ClockwiseContourIntegral' => '∲',
+ 'CloseCurlyDoubleQuote' => '”',
+ 'CloseCurlyQuote' => '’',
+ 'clubs' => '♣',
+ 'clubsuit' => '♣',
+ 'Colon' => '∷',
+ 'colon' => ':',
+ 'Colone' => '⩴',
+ 'colone' => '≔',
+ 'coloneq' => '≔',
+ 'comma' => ',',
+ 'commat' => '@',
+ 'comp' => '∁',
+ 'compfn' => '∘',
+ 'complement' => '∁',
+ 'complexes' => 'ℂ',
+ 'cong' => '≅',
+ 'congdot' => '⩭',
+ 'Congruent' => '≡',
+ 'Conint' => '∯',
+ 'conint' => '∮',
+ 'ContourIntegral' => '∮',
+ 'Copf' => 'ℂ',
+ 'copf' => '𝕔',
+ 'coprod' => '∐',
+ 'Coproduct' => '∐',
+ 'COPY' => '©',
+ 'COP' => '©',
+ 'copy' => '©',
+ 'cop' => '©',
+ 'copysr' => '℗',
+ 'CounterClockwiseContourIntegral' => '∳',
+ 'crarr' => '↵',
+ 'Cross' => '⨯',
+ 'cross' => '✗',
+ 'Cscr' => '𝒞',
+ 'cscr' => '𝒸',
+ 'csub' => '⫏',
+ 'csube' => '⫑',
+ 'csup' => '⫐',
+ 'csupe' => '⫒',
+ 'ctdot' => '⋯',
+ 'cudarrl' => '⤸',
+ 'cudarrr' => '⤵',
+ 'cuepr' => '⋞',
+ 'cuesc' => '⋟',
+ 'cularr' => '↶',
+ 'cularrp' => '⤽',
+ 'Cup' => '⋓',
+ 'cup' => '∪',
+ 'cupbrcap' => '⩈',
+ 'CupCap' => '≍',
+ 'cupcap' => '⩆',
+ 'cupcup' => '⩊',
+ 'cupdot' => '⊍',
+ 'cupor' => '⩅',
+ 'cups' => '∪︀',
+ 'curarr' => '↷',
+ 'curarrm' => '⤼',
+ 'curlyeqprec' => '⋞',
+ 'curlyeqsucc' => '⋟',
+ 'curlyvee' => '⋎',
+ 'curlywedge' => '⋏',
+ 'curren' => '¤',
+ 'curre' => '¤',
+ 'curvearrowleft' => '↶',
+ 'curvearrowright' => '↷',
+ 'cuvee' => '⋎',
+ 'cuwed' => '⋏',
+ 'cwconint' => '∲',
+ 'cwint' => '∱',
+ 'cylcty' => '⌭',
+ 'Dagger' => '‡',
+ 'dagger' => '†',
+ 'daleth' => 'ℸ',
+ 'Darr' => '↡',
+ 'dArr' => '⇓',
+ 'darr' => '↓',
+ 'dash' => '‐',
+ 'Dashv' => '⫤',
+ 'dashv' => '⊣',
+ 'dbkarow' => '⤏',
+ 'dblac' => '˝',
+ 'Dcaron' => 'Ď',
+ 'dcaron' => 'ď',
+ 'Dcy' => 'Д',
+ 'dcy' => 'д',
+ 'DD' => 'ⅅ',
+ 'dd' => 'ⅆ',
+ 'ddagger' => '‡',
+ 'ddarr' => '⇊',
+ 'DDotrahd' => '⤑',
+ 'ddotseq' => '⩷',
+ 'deg' => '°',
+ 'de' => '°',
+ 'Del' => '∇',
+ 'Delta' => 'Δ',
+ 'delta' => 'δ',
+ 'demptyv' => '⦱',
+ 'dfisht' => '⥿',
+ 'Dfr' => '𝔇',
+ 'dfr' => '𝔡',
+ 'dHar' => '⥥',
+ 'dharl' => '⇃',
+ 'dharr' => '⇂',
+ 'DiacriticalAcute' => '´',
+ 'DiacriticalDot' => '˙',
+ 'DiacriticalDoubleAcute' => '˝',
+ 'DiacriticalGrave' => '`',
+ 'DiacriticalTilde' => '˜',
+ 'diam' => '⋄',
+ 'Diamond' => '⋄',
+ 'diamond' => '⋄',
+ 'diamondsuit' => '♦',
+ 'diams' => '♦',
+ 'die' => '¨',
+ 'DifferentialD' => 'ⅆ',
+ 'digamma' => 'ϝ',
+ 'disin' => '⋲',
+ 'div' => '÷',
+ 'divide' => '÷',
+ 'divid' => '÷',
+ 'divideontimes' => '⋇',
+ 'divonx' => '⋇',
+ 'DJcy' => 'Ђ',
+ 'djcy' => 'ђ',
+ 'dlcorn' => '⌞',
+ 'dlcrop' => '⌍',
+ 'dollar' => '$',
+ 'Dopf' => '𝔻',
+ 'dopf' => '𝕕',
+ 'Dot' => '¨',
+ 'dot' => '˙',
+ 'DotDot' => '⃜',
+ 'doteq' => '≐',
+ 'doteqdot' => '≑',
+ 'DotEqual' => '≐',
+ 'dotminus' => '∸',
+ 'dotplus' => '∔',
+ 'dotsquare' => '⊡',
+ 'doublebarwedge' => '⌆',
+ 'DoubleContourIntegral' => '∯',
+ 'DoubleDot' => '¨',
+ 'DoubleDownArrow' => '⇓',
+ 'DoubleLeftArrow' => '⇐',
+ 'DoubleLeftRightArrow' => '⇔',
+ 'DoubleLeftTee' => '⫤',
+ 'DoubleLongLeftArrow' => '⟸',
+ 'DoubleLongLeftRightArrow' => '⟺',
+ 'DoubleLongRightArrow' => '⟹',
+ 'DoubleRightArrow' => '⇒',
+ 'DoubleRightTee' => '⊨',
+ 'DoubleUpArrow' => '⇑',
+ 'DoubleUpDownArrow' => '⇕',
+ 'DoubleVerticalBar' => '∥',
+ 'DownArrow' => '↓',
+ 'Downarrow' => '⇓',
+ 'downarrow' => '↓',
+ 'DownArrowBar' => '⤓',
+ 'DownArrowUpArrow' => '⇵',
+ 'DownBreve' => '̑',
+ 'downdownarrows' => '⇊',
+ 'downharpoonleft' => '⇃',
+ 'downharpoonright' => '⇂',
+ 'DownLeftRightVector' => '⥐',
+ 'DownLeftTeeVector' => '⥞',
+ 'DownLeftVector' => '↽',
+ 'DownLeftVectorBar' => '⥖',
+ 'DownRightTeeVector' => '⥟',
+ 'DownRightVector' => '⇁',
+ 'DownRightVectorBar' => '⥗',
+ 'DownTee' => '⊤',
+ 'DownTeeArrow' => '↧',
+ 'drbkarow' => '⤐',
+ 'drcorn' => '⌟',
+ 'drcrop' => '⌌',
+ 'Dscr' => '𝒟',
+ 'dscr' => '𝒹',
+ 'DScy' => 'Ѕ',
+ 'dscy' => 'ѕ',
+ 'dsol' => '⧶',
+ 'Dstrok' => 'Đ',
+ 'dstrok' => 'đ',
+ 'dtdot' => '⋱',
+ 'dtri' => '▿',
+ 'dtrif' => '▾',
+ 'duarr' => '⇵',
+ 'duhar' => '⥯',
+ 'dwangle' => '⦦',
+ 'DZcy' => 'Џ',
+ 'dzcy' => 'џ',
+ 'dzigrarr' => '⟿',
+ 'Eacute' => 'É',
+ 'Eacut' => 'É',
+ 'eacute' => 'é',
+ 'eacut' => 'é',
+ 'easter' => '⩮',
+ 'Ecaron' => 'Ě',
+ 'ecaron' => 'ě',
+ 'ecir' => 'ê',
+ 'Ecirc' => 'Ê',
+ 'Ecir' => 'Ê',
+ 'ecirc' => 'ê',
+ 'ecolon' => '≕',
+ 'Ecy' => 'Э',
+ 'ecy' => 'э',
+ 'eDDot' => '⩷',
+ 'Edot' => 'Ė',
+ 'eDot' => '≑',
+ 'edot' => 'ė',
+ 'ee' => 'ⅇ',
+ 'efDot' => '≒',
+ 'Efr' => '𝔈',
+ 'efr' => '𝔢',
+ 'eg' => '⪚',
+ 'Egrave' => 'È',
+ 'Egrav' => 'È',
+ 'egrave' => 'è',
+ 'egrav' => 'è',
+ 'egs' => '⪖',
+ 'egsdot' => '⪘',
+ 'el' => '⪙',
+ 'Element' => '∈',
+ 'elinters' => '⏧',
+ 'ell' => 'ℓ',
+ 'els' => '⪕',
+ 'elsdot' => '⪗',
+ 'Emacr' => 'Ē',
+ 'emacr' => 'ē',
+ 'empty' => '∅',
+ 'emptyset' => '∅',
+ 'EmptySmallSquare' => '◻',
+ 'emptyv' => '∅',
+ 'EmptyVerySmallSquare' => '▫',
+ 'emsp' => ' ',
+ 'emsp13' => ' ',
+ 'emsp14' => ' ',
+ 'ENG' => 'Ŋ',
+ 'eng' => 'ŋ',
+ 'ensp' => ' ',
+ 'Eogon' => 'Ę',
+ 'eogon' => 'ę',
+ 'Eopf' => '𝔼',
+ 'eopf' => '𝕖',
+ 'epar' => '⋕',
+ 'eparsl' => '⧣',
+ 'eplus' => '⩱',
+ 'epsi' => 'ε',
+ 'Epsilon' => 'Ε',
+ 'epsilon' => 'ε',
+ 'epsiv' => 'ϵ',
+ 'eqcirc' => '≖',
+ 'eqcolon' => '≕',
+ 'eqsim' => '≂',
+ 'eqslantgtr' => '⪖',
+ 'eqslantless' => '⪕',
+ 'Equal' => '⩵',
+ 'equals' => '=',
+ 'EqualTilde' => '≂',
+ 'equest' => '≟',
+ 'Equilibrium' => '⇌',
+ 'equiv' => '≡',
+ 'equivDD' => '⩸',
+ 'eqvparsl' => '⧥',
+ 'erarr' => '⥱',
+ 'erDot' => '≓',
+ 'Escr' => 'ℰ',
+ 'escr' => 'ℯ',
+ 'esdot' => '≐',
+ 'Esim' => '⩳',
+ 'esim' => '≂',
+ 'Eta' => 'Η',
+ 'eta' => 'η',
+ 'ETH' => 'Ð',
+ 'ET' => 'Ð',
+ 'eth' => 'ð',
+ 'et' => 'ð',
+ 'Euml' => 'Ë',
+ 'Eum' => 'Ë',
+ 'euml' => 'ë',
+ 'eum' => 'ë',
+ 'euro' => '€',
+ 'excl' => '!',
+ 'exist' => '∃',
+ 'Exists' => '∃',
+ 'expectation' => 'ℰ',
+ 'ExponentialE' => 'ⅇ',
+ 'exponentiale' => 'ⅇ',
+ 'fallingdotseq' => '≒',
+ 'Fcy' => 'Ф',
+ 'fcy' => 'ф',
+ 'female' => '♀',
+ 'ffilig' => 'ffi',
+ 'fflig' => 'ff',
+ 'ffllig' => 'ffl',
+ 'Ffr' => '𝔉',
+ 'ffr' => '𝔣',
+ 'filig' => 'fi',
+ 'FilledSmallSquare' => '◼',
+ 'FilledVerySmallSquare' => '▪',
+ 'fjlig' => 'fj',
+ 'flat' => '♭',
+ 'fllig' => 'fl',
+ 'fltns' => '▱',
+ 'fnof' => 'ƒ',
+ 'Fopf' => '𝔽',
+ 'fopf' => '𝕗',
+ 'ForAll' => '∀',
+ 'forall' => '∀',
+ 'fork' => '⋔',
+ 'forkv' => '⫙',
+ 'Fouriertrf' => 'ℱ',
+ 'fpartint' => '⨍',
+ 'frac12' => '½',
+ 'frac1' => '¼',
+ 'frac13' => '⅓',
+ 'frac14' => '¼',
+ 'frac15' => '⅕',
+ 'frac16' => '⅙',
+ 'frac18' => '⅛',
+ 'frac23' => '⅔',
+ 'frac25' => '⅖',
+ 'frac34' => '¾',
+ 'frac3' => '¾',
+ 'frac35' => '⅗',
+ 'frac38' => '⅜',
+ 'frac45' => '⅘',
+ 'frac56' => '⅚',
+ 'frac58' => '⅝',
+ 'frac78' => '⅞',
+ 'frasl' => '⁄',
+ 'frown' => '⌢',
+ 'Fscr' => 'ℱ',
+ 'fscr' => '𝒻',
+ 'gacute' => 'ǵ',
+ 'Gamma' => 'Γ',
+ 'gamma' => 'γ',
+ 'Gammad' => 'Ϝ',
+ 'gammad' => 'ϝ',
+ 'gap' => '⪆',
+ 'Gbreve' => 'Ğ',
+ 'gbreve' => 'ğ',
+ 'Gcedil' => 'Ģ',
+ 'Gcirc' => 'Ĝ',
+ 'gcirc' => 'ĝ',
+ 'Gcy' => 'Г',
+ 'gcy' => 'г',
+ 'Gdot' => 'Ġ',
+ 'gdot' => 'ġ',
+ 'gE' => '≧',
+ 'ge' => '≥',
+ 'gEl' => '⪌',
+ 'gel' => '⋛',
+ 'geq' => '≥',
+ 'geqq' => '≧',
+ 'geqslant' => '⩾',
+ 'ges' => '⩾',
+ 'gescc' => '⪩',
+ 'gesdot' => '⪀',
+ 'gesdoto' => '⪂',
+ 'gesdotol' => '⪄',
+ 'gesl' => '⋛︀',
+ 'gesles' => '⪔',
+ 'Gfr' => '𝔊',
+ 'gfr' => '𝔤',
+ 'Gg' => '⋙',
+ 'gg' => '≫',
+ 'ggg' => '⋙',
+ 'gimel' => 'ℷ',
+ 'GJcy' => 'Ѓ',
+ 'gjcy' => 'ѓ',
+ 'gl' => '≷',
+ 'gla' => '⪥',
+ 'glE' => '⪒',
+ 'glj' => '⪤',
+ 'gnap' => '⪊',
+ 'gnapprox' => '⪊',
+ 'gnE' => '≩',
+ 'gne' => '⪈',
+ 'gneq' => '⪈',
+ 'gneqq' => '≩',
+ 'gnsim' => '⋧',
+ 'Gopf' => '𝔾',
+ 'gopf' => '𝕘',
+ 'grave' => '`',
+ 'GreaterEqual' => '≥',
+ 'GreaterEqualLess' => '⋛',
+ 'GreaterFullEqual' => '≧',
+ 'GreaterGreater' => '⪢',
+ 'GreaterLess' => '≷',
+ 'GreaterSlantEqual' => '⩾',
+ 'GreaterTilde' => '≳',
+ 'Gscr' => '𝒢',
+ 'gscr' => 'ℊ',
+ 'gsim' => '≳',
+ 'gsime' => '⪎',
+ 'gsiml' => '⪐',
+ 'GT' => '>',
+ 'G' => '>',
+ 'Gt' => '≫',
+ 'gt' => '>',
+ 'g' => '>',
+ 'gtcc' => '⪧',
+ 'gtcir' => '⩺',
+ 'gtdot' => '⋗',
+ 'gtlPar' => '⦕',
+ 'gtquest' => '⩼',
+ 'gtrapprox' => '⪆',
+ 'gtrarr' => '⥸',
+ 'gtrdot' => '⋗',
+ 'gtreqless' => '⋛',
+ 'gtreqqless' => '⪌',
+ 'gtrless' => '≷',
+ 'gtrsim' => '≳',
+ 'gvertneqq' => '≩︀',
+ 'gvnE' => '≩︀',
+ 'Hacek' => 'ˇ',
+ 'hairsp' => ' ',
+ 'half' => '½',
+ 'hamilt' => 'ℋ',
+ 'HARDcy' => 'Ъ',
+ 'hardcy' => 'ъ',
+ 'hArr' => '⇔',
+ 'harr' => '↔',
+ 'harrcir' => '⥈',
+ 'harrw' => '↭',
+ 'Hat' => '^',
+ 'hbar' => 'ℏ',
+ 'Hcirc' => 'Ĥ',
+ 'hcirc' => 'ĥ',
+ 'hearts' => '♥',
+ 'heartsuit' => '♥',
+ 'hellip' => '…',
+ 'hercon' => '⊹',
+ 'Hfr' => 'ℌ',
+ 'hfr' => '𝔥',
+ 'HilbertSpace' => 'ℋ',
+ 'hksearow' => '⤥',
+ 'hkswarow' => '⤦',
+ 'hoarr' => '⇿',
+ 'homtht' => '∻',
+ 'hookleftarrow' => '↩',
+ 'hookrightarrow' => '↪',
+ 'Hopf' => 'ℍ',
+ 'hopf' => '𝕙',
+ 'horbar' => '―',
+ 'HorizontalLine' => '─',
+ 'Hscr' => 'ℋ',
+ 'hscr' => '𝒽',
+ 'hslash' => 'ℏ',
+ 'Hstrok' => 'Ħ',
+ 'hstrok' => 'ħ',
+ 'HumpDownHump' => '≎',
+ 'HumpEqual' => '≏',
+ 'hybull' => '⁃',
+ 'hyphen' => '‐',
+ 'Iacute' => 'Í',
+ 'Iacut' => 'Í',
+ 'iacute' => 'í',
+ 'iacut' => 'í',
+ 'ic' => '⁣',
+ 'Icirc' => 'Î',
+ 'Icir' => 'Î',
+ 'icirc' => 'î',
+ 'icir' => 'î',
+ 'Icy' => 'И',
+ 'icy' => 'и',
+ 'Idot' => 'İ',
+ 'IEcy' => 'Е',
+ 'iecy' => 'е',
+ 'iexcl' => '¡',
+ 'iexc' => '¡',
+ 'iff' => '⇔',
+ 'Ifr' => 'ℑ',
+ 'ifr' => '𝔦',
+ 'Igrave' => 'Ì',
+ 'Igrav' => 'Ì',
+ 'igrave' => 'ì',
+ 'igrav' => 'ì',
+ 'ii' => 'ⅈ',
+ 'iiiint' => '⨌',
+ 'iiint' => '∭',
+ 'iinfin' => '⧜',
+ 'iiota' => '℩',
+ 'IJlig' => 'IJ',
+ 'ijlig' => 'ij',
+ 'Im' => 'ℑ',
+ 'Imacr' => 'Ī',
+ 'imacr' => 'ī',
+ 'image' => 'ℑ',
+ 'ImaginaryI' => 'ⅈ',
+ 'imagline' => 'ℐ',
+ 'imagpart' => 'ℑ',
+ 'imath' => 'ı',
+ 'imof' => '⊷',
+ 'imped' => 'Ƶ',
+ 'Implies' => '⇒',
+ 'in' => '∈',
+ 'incare' => '℅',
+ 'infin' => '∞',
+ 'infintie' => '⧝',
+ 'inodot' => 'ı',
+ 'Int' => '∬',
+ 'int' => '∫',
+ 'intcal' => '⊺',
+ 'integers' => 'ℤ',
+ 'Integral' => '∫',
+ 'intercal' => '⊺',
+ 'Intersection' => '⋂',
+ 'intlarhk' => '⨗',
+ 'intprod' => '⨼',
+ 'InvisibleComma' => '⁣',
+ 'InvisibleTimes' => '⁢',
+ 'IOcy' => 'Ё',
+ 'iocy' => 'ё',
+ 'Iogon' => 'Į',
+ 'iogon' => 'į',
+ 'Iopf' => '𝕀',
+ 'iopf' => '𝕚',
+ 'Iota' => 'Ι',
+ 'iota' => 'ι',
+ 'iprod' => '⨼',
+ 'iquest' => '¿',
+ 'iques' => '¿',
+ 'Iscr' => 'ℐ',
+ 'iscr' => '𝒾',
+ 'isin' => '∈',
+ 'isindot' => '⋵',
+ 'isinE' => '⋹',
+ 'isins' => '⋴',
+ 'isinsv' => '⋳',
+ 'isinv' => '∈',
+ 'it' => '⁢',
+ 'Itilde' => 'Ĩ',
+ 'itilde' => 'ĩ',
+ 'Iukcy' => 'І',
+ 'iukcy' => 'і',
+ 'Iuml' => 'Ï',
+ 'Ium' => 'Ï',
+ 'iuml' => 'ï',
+ 'ium' => 'ï',
+ 'Jcirc' => 'Ĵ',
+ 'jcirc' => 'ĵ',
+ 'Jcy' => 'Й',
+ 'jcy' => 'й',
+ 'Jfr' => '𝔍',
+ 'jfr' => '𝔧',
+ 'jmath' => 'ȷ',
+ 'Jopf' => '𝕁',
+ 'jopf' => '𝕛',
+ 'Jscr' => '𝒥',
+ 'jscr' => '𝒿',
+ 'Jsercy' => 'Ј',
+ 'jsercy' => 'ј',
+ 'Jukcy' => 'Є',
+ 'jukcy' => 'є',
+ 'Kappa' => 'Κ',
+ 'kappa' => 'κ',
+ 'kappav' => 'ϰ',
+ 'Kcedil' => 'Ķ',
+ 'kcedil' => 'ķ',
+ 'Kcy' => 'К',
+ 'kcy' => 'к',
+ 'Kfr' => '𝔎',
+ 'kfr' => '𝔨',
+ 'kgreen' => 'ĸ',
+ 'KHcy' => 'Х',
+ 'khcy' => 'х',
+ 'KJcy' => 'Ќ',
+ 'kjcy' => 'ќ',
+ 'Kopf' => '𝕂',
+ 'kopf' => '𝕜',
+ 'Kscr' => '𝒦',
+ 'kscr' => '𝓀',
+ 'lAarr' => '⇚',
+ 'Lacute' => 'Ĺ',
+ 'lacute' => 'ĺ',
+ 'laemptyv' => '⦴',
+ 'lagran' => 'ℒ',
+ 'Lambda' => 'Λ',
+ 'lambda' => 'λ',
+ 'Lang' => '⟪',
+ 'lang' => '⟨',
+ 'langd' => '⦑',
+ 'langle' => '⟨',
+ 'lap' => '⪅',
+ 'Laplacetrf' => 'ℒ',
+ 'laquo' => '«',
+ 'laqu' => '«',
+ 'Larr' => '↞',
+ 'lArr' => '⇐',
+ 'larr' => '←',
+ 'larrb' => '⇤',
+ 'larrbfs' => '⤟',
+ 'larrfs' => '⤝',
+ 'larrhk' => '↩',
+ 'larrlp' => '↫',
+ 'larrpl' => '⤹',
+ 'larrsim' => '⥳',
+ 'larrtl' => '↢',
+ 'lat' => '⪫',
+ 'lAtail' => '⤛',
+ 'latail' => '⤙',
+ 'late' => '⪭',
+ 'lates' => '⪭︀',
+ 'lBarr' => '⤎',
+ 'lbarr' => '⤌',
+ 'lbbrk' => '❲',
+ 'lbrace' => '{',
+ 'lbrack' => '[',
+ 'lbrke' => '⦋',
+ 'lbrksld' => '⦏',
+ 'lbrkslu' => '⦍',
+ 'Lcaron' => 'Ľ',
+ 'lcaron' => 'ľ',
+ 'Lcedil' => 'Ļ',
+ 'lcedil' => 'ļ',
+ 'lceil' => '⌈',
+ 'lcub' => '{',
+ 'Lcy' => 'Л',
+ 'lcy' => 'л',
+ 'ldca' => '⤶',
+ 'ldquo' => '“',
+ 'ldquor' => '„',
+ 'ldrdhar' => '⥧',
+ 'ldrushar' => '⥋',
+ 'ldsh' => '↲',
+ 'lE' => '≦',
+ 'le' => '≤',
+ 'LeftAngleBracket' => '⟨',
+ 'LeftArrow' => '←',
+ 'Leftarrow' => '⇐',
+ 'leftarrow' => '←',
+ 'LeftArrowBar' => '⇤',
+ 'LeftArrowRightArrow' => '⇆',
+ 'leftarrowtail' => '↢',
+ 'LeftCeiling' => '⌈',
+ 'LeftDoubleBracket' => '⟦',
+ 'LeftDownTeeVector' => '⥡',
+ 'LeftDownVector' => '⇃',
+ 'LeftDownVectorBar' => '⥙',
+ 'LeftFloor' => '⌊',
+ 'leftharpoondown' => '↽',
+ 'leftharpoonup' => '↼',
+ 'leftleftarrows' => '⇇',
+ 'LeftRightArrow' => '↔',
+ 'Leftrightarrow' => '⇔',
+ 'leftrightarrow' => '↔',
+ 'leftrightarrows' => '⇆',
+ 'leftrightharpoons' => '⇋',
+ 'leftrightsquigarrow' => '↭',
+ 'LeftRightVector' => '⥎',
+ 'LeftTee' => '⊣',
+ 'LeftTeeArrow' => '↤',
+ 'LeftTeeVector' => '⥚',
+ 'leftthreetimes' => '⋋',
+ 'LeftTriangle' => '⊲',
+ 'LeftTriangleBar' => '⧏',
+ 'LeftTriangleEqual' => '⊴',
+ 'LeftUpDownVector' => '⥑',
+ 'LeftUpTeeVector' => '⥠',
+ 'LeftUpVector' => '↿',
+ 'LeftUpVectorBar' => '⥘',
+ 'LeftVector' => '↼',
+ 'LeftVectorBar' => '⥒',
+ 'lEg' => '⪋',
+ 'leg' => '⋚',
+ 'leq' => '≤',
+ 'leqq' => '≦',
+ 'leqslant' => '⩽',
+ 'les' => '⩽',
+ 'lescc' => '⪨',
+ 'lesdot' => '⩿',
+ 'lesdoto' => '⪁',
+ 'lesdotor' => '⪃',
+ 'lesg' => '⋚︀',
+ 'lesges' => '⪓',
+ 'lessapprox' => '⪅',
+ 'lessdot' => '⋖',
+ 'lesseqgtr' => '⋚',
+ 'lesseqqgtr' => '⪋',
+ 'LessEqualGreater' => '⋚',
+ 'LessFullEqual' => '≦',
+ 'LessGreater' => '≶',
+ 'lessgtr' => '≶',
+ 'LessLess' => '⪡',
+ 'lesssim' => '≲',
+ 'LessSlantEqual' => '⩽',
+ 'LessTilde' => '≲',
+ 'lfisht' => '⥼',
+ 'lfloor' => '⌊',
+ 'Lfr' => '𝔏',
+ 'lfr' => '𝔩',
+ 'lg' => '≶',
+ 'lgE' => '⪑',
+ 'lHar' => '⥢',
+ 'lhard' => '↽',
+ 'lharu' => '↼',
+ 'lharul' => '⥪',
+ 'lhblk' => '▄',
+ 'LJcy' => 'Љ',
+ 'ljcy' => 'љ',
+ 'Ll' => '⋘',
+ 'll' => '≪',
+ 'llarr' => '⇇',
+ 'llcorner' => '⌞',
+ 'Lleftarrow' => '⇚',
+ 'llhard' => '⥫',
+ 'lltri' => '◺',
+ 'Lmidot' => 'Ŀ',
+ 'lmidot' => 'ŀ',
+ 'lmoust' => '⎰',
+ 'lmoustache' => '⎰',
+ 'lnap' => '⪉',
+ 'lnapprox' => '⪉',
+ 'lnE' => '≨',
+ 'lne' => '⪇',
+ 'lneq' => '⪇',
+ 'lneqq' => '≨',
+ 'lnsim' => '⋦',
+ 'loang' => '⟬',
+ 'loarr' => '⇽',
+ 'lobrk' => '⟦',
+ 'LongLeftArrow' => '⟵',
+ 'Longleftarrow' => '⟸',
+ 'longleftarrow' => '⟵',
+ 'LongLeftRightArrow' => '⟷',
+ 'Longleftrightarrow' => '⟺',
+ 'longleftrightarrow' => '⟷',
+ 'longmapsto' => '⟼',
+ 'LongRightArrow' => '⟶',
+ 'Longrightarrow' => '⟹',
+ 'longrightarrow' => '⟶',
+ 'looparrowleft' => '↫',
+ 'looparrowright' => '↬',
+ 'lopar' => '⦅',
+ 'Lopf' => '𝕃',
+ 'lopf' => '𝕝',
+ 'loplus' => '⨭',
+ 'lotimes' => '⨴',
+ 'lowast' => '∗',
+ 'lowbar' => '_',
+ 'LowerLeftArrow' => '↙',
+ 'LowerRightArrow' => '↘',
+ 'loz' => '◊',
+ 'lozenge' => '◊',
+ 'lozf' => '⧫',
+ 'lpar' => '(',
+ 'lparlt' => '⦓',
+ 'lrarr' => '⇆',
+ 'lrcorner' => '⌟',
+ 'lrhar' => '⇋',
+ 'lrhard' => '⥭',
+ 'lrm' => '‎',
+ 'lrtri' => '⊿',
+ 'lsaquo' => '‹',
+ 'Lscr' => 'ℒ',
+ 'lscr' => '𝓁',
+ 'Lsh' => '↰',
+ 'lsh' => '↰',
+ 'lsim' => '≲',
+ 'lsime' => '⪍',
+ 'lsimg' => '⪏',
+ 'lsqb' => '[',
+ 'lsquo' => '‘',
+ 'lsquor' => '‚',
+ 'Lstrok' => 'Ł',
+ 'lstrok' => 'ł',
+ 'LT' => '<',
+ 'L' => '<',
+ 'Lt' => '≪',
+ 'lt' => '<',
+ 'l' => '<',
+ 'ltcc' => '⪦',
+ 'ltcir' => '⩹',
+ 'ltdot' => '⋖',
+ 'lthree' => '⋋',
+ 'ltimes' => '⋉',
+ 'ltlarr' => '⥶',
+ 'ltquest' => '⩻',
+ 'ltri' => '◃',
+ 'ltrie' => '⊴',
+ 'ltrif' => '◂',
+ 'ltrPar' => '⦖',
+ 'lurdshar' => '⥊',
+ 'luruhar' => '⥦',
+ 'lvertneqq' => '≨︀',
+ 'lvnE' => '≨︀',
+ 'macr' => '¯',
+ 'mac' => '¯',
+ 'male' => '♂',
+ 'malt' => '✠',
+ 'maltese' => '✠',
+ 'Map' => '⤅',
+ 'map' => '↦',
+ 'mapsto' => '↦',
+ 'mapstodown' => '↧',
+ 'mapstoleft' => '↤',
+ 'mapstoup' => '↥',
+ 'marker' => '▮',
+ 'mcomma' => '⨩',
+ 'Mcy' => 'М',
+ 'mcy' => 'м',
+ 'mdash' => '—',
+ 'mDDot' => '∺',
+ 'measuredangle' => '∡',
+ 'MediumSpace' => ' ',
+ 'Mellintrf' => 'ℳ',
+ 'Mfr' => '𝔐',
+ 'mfr' => '𝔪',
+ 'mho' => '℧',
+ 'micro' => 'µ',
+ 'micr' => 'µ',
+ 'mid' => '∣',
+ 'midast' => '*',
+ 'midcir' => '⫰',
+ 'middot' => '·',
+ 'middo' => '·',
+ 'minus' => '−',
+ 'minusb' => '⊟',
+ 'minusd' => '∸',
+ 'minusdu' => '⨪',
+ 'MinusPlus' => '∓',
+ 'mlcp' => '⫛',
+ 'mldr' => '…',
+ 'mnplus' => '∓',
+ 'models' => '⊧',
+ 'Mopf' => '𝕄',
+ 'mopf' => '𝕞',
+ 'mp' => '∓',
+ 'Mscr' => 'ℳ',
+ 'mscr' => '𝓂',
+ 'mstpos' => '∾',
+ 'Mu' => 'Μ',
+ 'mu' => 'μ',
+ 'multimap' => '⊸',
+ 'mumap' => '⊸',
+ 'nabla' => '∇',
+ 'Nacute' => 'Ń',
+ 'nacute' => 'ń',
+ 'nang' => '∠⃒',
+ 'nap' => '≉',
+ 'napE' => '⩰̸',
+ 'napid' => '≋̸',
+ 'napos' => 'ʼn',
+ 'napprox' => '≉',
+ 'natur' => '♮',
+ 'natural' => '♮',
+ 'naturals' => 'ℕ',
+ 'nbsp' => ' ',
+ 'nbs' => ' ',
+ 'nbump' => '≎̸',
+ 'nbumpe' => '≏̸',
+ 'ncap' => '⩃',
+ 'Ncaron' => 'Ň',
+ 'ncaron' => 'ň',
+ 'Ncedil' => 'Ņ',
+ 'ncedil' => 'ņ',
+ 'ncong' => '≇',
+ 'ncongdot' => '⩭̸',
+ 'ncup' => '⩂',
+ 'Ncy' => 'Н',
+ 'ncy' => 'н',
+ 'ndash' => '–',
+ 'ne' => '≠',
+ 'nearhk' => '⤤',
+ 'neArr' => '⇗',
+ 'nearr' => '↗',
+ 'nearrow' => '↗',
+ 'nedot' => '≐̸',
+ 'NegativeMediumSpace' => '​',
+ 'NegativeThickSpace' => '​',
+ 'NegativeThinSpace' => '​',
+ 'NegativeVeryThinSpace' => '​',
+ 'nequiv' => '≢',
+ 'nesear' => '⤨',
+ 'nesim' => '≂̸',
+ 'NestedGreaterGreater' => '≫',
+ 'NestedLessLess' => '≪',
+ 'NewLine' => '
',
- 'nexist' => '∄',
- 'nexists' => '∄',
- 'Nfr' => '𝔑',
- 'nfr' => '𝔫',
- 'ngE' => '≧̸',
- 'nge' => '≱',
- 'ngeq' => '≱',
- 'ngeqq' => '≧̸',
- 'ngeqslant' => '⩾̸',
- 'nges' => '⩾̸',
- 'nGg' => '⋙̸',
- 'ngsim' => '≵',
- 'nGt' => '≫⃒',
- 'ngt' => '≯',
- 'ngtr' => '≯',
- 'nGtv' => '≫̸',
- 'nhArr' => '⇎',
- 'nharr' => '↮',
- 'nhpar' => '⫲',
- 'ni' => '∋',
- 'nis' => '⋼',
- 'nisd' => '⋺',
- 'niv' => '∋',
- 'NJcy' => 'Њ',
- 'njcy' => 'њ',
- 'nlArr' => '⇍',
- 'nlarr' => '↚',
- 'nldr' => '‥',
- 'nlE' => '≦̸',
- 'nle' => '≰',
- 'nLeftarrow' => '⇍',
- 'nleftarrow' => '↚',
- 'nLeftrightarrow' => '⇎',
- 'nleftrightarrow' => '↮',
- 'nleq' => '≰',
- 'nleqq' => '≦̸',
- 'nleqslant' => '⩽̸',
- 'nles' => '⩽̸',
- 'nless' => '≮',
- 'nLl' => '⋘̸',
- 'nlsim' => '≴',
- 'nLt' => '≪⃒',
- 'nlt' => '≮',
- 'nltri' => '⋪',
- 'nltrie' => '⋬',
- 'nLtv' => '≪̸',
- 'nmid' => '∤',
- 'NoBreak' => '⁠',
- 'NonBreakingSpace' => ' ',
- 'Nopf' => 'ℕ',
- 'nopf' => '𝕟',
- 'Not' => '⫬',
- 'not' => '¬',
- 'no' => '¬',
- 'NotCongruent' => '≢',
- 'NotCupCap' => '≭',
- 'NotDoubleVerticalBar' => '∦',
- 'NotElement' => '∉',
- 'NotEqual' => '≠',
- 'NotEqualTilde' => '≂̸',
- 'NotExists' => '∄',
- 'NotGreater' => '≯',
- 'NotGreaterEqual' => '≱',
- 'NotGreaterFullEqual' => '≧̸',
- 'NotGreaterGreater' => '≫̸',
- 'NotGreaterLess' => '≹',
- 'NotGreaterSlantEqual' => '⩾̸',
- 'NotGreaterTilde' => '≵',
- 'NotHumpDownHump' => '≎̸',
- 'NotHumpEqual' => '≏̸',
- 'notin' => '∉',
- 'notindot' => '⋵̸',
- 'notinE' => '⋹̸',
- 'notinva' => '∉',
- 'notinvb' => '⋷',
- 'notinvc' => '⋶',
- 'NotLeftTriangle' => '⋪',
- 'NotLeftTriangleBar' => '⧏̸',
- 'NotLeftTriangleEqual' => '⋬',
- 'NotLess' => '≮',
- 'NotLessEqual' => '≰',
- 'NotLessGreater' => '≸',
- 'NotLessLess' => '≪̸',
- 'NotLessSlantEqual' => '⩽̸',
- 'NotLessTilde' => '≴',
- 'NotNestedGreaterGreater' => '⪢̸',
- 'NotNestedLessLess' => '⪡̸',
- 'notni' => '∌',
- 'notniva' => '∌',
- 'notnivb' => '⋾',
- 'notnivc' => '⋽',
- 'NotPrecedes' => '⊀',
- 'NotPrecedesEqual' => '⪯̸',
- 'NotPrecedesSlantEqual' => '⋠',
- 'NotReverseElement' => '∌',
- 'NotRightTriangle' => '⋫',
- 'NotRightTriangleBar' => '⧐̸',
- 'NotRightTriangleEqual' => '⋭',
- 'NotSquareSubset' => '⊏̸',
- 'NotSquareSubsetEqual' => '⋢',
- 'NotSquareSuperset' => '⊐̸',
- 'NotSquareSupersetEqual' => '⋣',
- 'NotSubset' => '⊂⃒',
- 'NotSubsetEqual' => '⊈',
- 'NotSucceeds' => '⊁',
- 'NotSucceedsEqual' => '⪰̸',
- 'NotSucceedsSlantEqual' => '⋡',
- 'NotSucceedsTilde' => '≿̸',
- 'NotSuperset' => '⊃⃒',
- 'NotSupersetEqual' => '⊉',
- 'NotTilde' => '≁',
- 'NotTildeEqual' => '≄',
- 'NotTildeFullEqual' => '≇',
- 'NotTildeTilde' => '≉',
- 'NotVerticalBar' => '∤',
- 'npar' => '∦',
- 'nparallel' => '∦',
- 'nparsl' => '⫽⃥',
- 'npart' => '∂̸',
- 'npolint' => '⨔',
- 'npr' => '⊀',
- 'nprcue' => '⋠',
- 'npre' => '⪯̸',
- 'nprec' => '⊀',
- 'npreceq' => '⪯̸',
- 'nrArr' => '⇏',
- 'nrarr' => '↛',
- 'nrarrc' => '⤳̸',
- 'nrarrw' => '↝̸',
- 'nRightarrow' => '⇏',
- 'nrightarrow' => '↛',
- 'nrtri' => '⋫',
- 'nrtrie' => '⋭',
- 'nsc' => '⊁',
- 'nsccue' => '⋡',
- 'nsce' => '⪰̸',
- 'Nscr' => '𝒩',
- 'nscr' => '𝓃',
- 'nshortmid' => '∤',
- 'nshortparallel' => '∦',
- 'nsim' => '≁',
- 'nsime' => '≄',
- 'nsimeq' => '≄',
- 'nsmid' => '∤',
- 'nspar' => '∦',
- 'nsqsube' => '⋢',
- 'nsqsupe' => '⋣',
- 'nsub' => '⊄',
- 'nsubE' => '⫅̸',
- 'nsube' => '⊈',
- 'nsubset' => '⊂⃒',
- 'nsubseteq' => '⊈',
- 'nsubseteqq' => '⫅̸',
- 'nsucc' => '⊁',
- 'nsucceq' => '⪰̸',
- 'nsup' => '⊅',
- 'nsupE' => '⫆̸',
- 'nsupe' => '⊉',
- 'nsupset' => '⊃⃒',
- 'nsupseteq' => '⊉',
- 'nsupseteqq' => '⫆̸',
- 'ntgl' => '≹',
- 'Ntilde' => 'Ñ',
- 'Ntild' => 'Ñ',
- 'ntilde' => 'ñ',
- 'ntild' => 'ñ',
- 'ntlg' => '≸',
- 'ntriangleleft' => '⋪',
- 'ntrianglelefteq' => '⋬',
- 'ntriangleright' => '⋫',
- 'ntrianglerighteq' => '⋭',
- 'Nu' => 'Ν',
- 'nu' => 'ν',
- 'num' => '#',
- 'numero' => '№',
- 'numsp' => ' ',
- 'nvap' => '≍⃒',
- 'nVDash' => '⊯',
- 'nVdash' => '⊮',
- 'nvDash' => '⊭',
- 'nvdash' => '⊬',
- 'nvge' => '≥⃒',
- 'nvgt' => '>⃒',
- 'nvHarr' => '⤄',
- 'nvinfin' => '⧞',
- 'nvlArr' => '⤂',
- 'nvle' => '≤⃒',
- 'nvlt' => '<⃒',
- 'nvltrie' => '⊴⃒',
- 'nvrArr' => '⤃',
- 'nvrtrie' => '⊵⃒',
- 'nvsim' => '∼⃒',
- 'nwarhk' => '⤣',
- 'nwArr' => '⇖',
- 'nwarr' => '↖',
- 'nwarrow' => '↖',
- 'nwnear' => '⤧',
- 'Oacute' => 'Ó',
- 'Oacut' => 'Ó',
- 'oacute' => 'ó',
- 'oacut' => 'ó',
- 'oast' => '⊛',
- 'ocir' => 'ô',
- 'Ocirc' => 'Ô',
- 'Ocir' => 'Ô',
- 'ocirc' => 'ô',
- 'Ocy' => 'О',
- 'ocy' => 'о',
- 'odash' => '⊝',
- 'Odblac' => 'Ő',
- 'odblac' => 'ő',
- 'odiv' => '⨸',
- 'odot' => '⊙',
- 'odsold' => '⦼',
- 'OElig' => 'Œ',
- 'oelig' => 'œ',
- 'ofcir' => '⦿',
- 'Ofr' => '𝔒',
- 'ofr' => '𝔬',
- 'ogon' => '˛',
- 'Ograve' => 'Ò',
- 'Ograv' => 'Ò',
- 'ograve' => 'ò',
- 'ograv' => 'ò',
- 'ogt' => '⧁',
- 'ohbar' => '⦵',
- 'ohm' => 'Ω',
- 'oint' => '∮',
- 'olarr' => '↺',
- 'olcir' => '⦾',
- 'olcross' => '⦻',
- 'oline' => '‾',
- 'olt' => '⧀',
- 'Omacr' => 'Ō',
- 'omacr' => 'ō',
- 'Omega' => 'Ω',
- 'omega' => 'ω',
- 'Omicron' => 'Ο',
- 'omicron' => 'ο',
- 'omid' => '⦶',
- 'ominus' => '⊖',
- 'Oopf' => '𝕆',
- 'oopf' => '𝕠',
- 'opar' => '⦷',
- 'OpenCurlyDoubleQuote' => '“',
- 'OpenCurlyQuote' => '‘',
- 'operp' => '⦹',
- 'oplus' => '⊕',
- 'Or' => '⩔',
- 'or' => '∨',
- 'orarr' => '↻',
- 'ord' => 'º',
- 'order' => 'ℴ',
- 'orderof' => 'ℴ',
- 'ordf' => 'ª',
- 'ordm' => 'º',
- 'origof' => '⊶',
- 'oror' => '⩖',
- 'orslope' => '⩗',
- 'orv' => '⩛',
- 'oS' => 'Ⓢ',
- 'Oscr' => '𝒪',
- 'oscr' => 'ℴ',
- 'Oslash' => 'Ø',
- 'Oslas' => 'Ø',
- 'oslash' => 'ø',
- 'oslas' => 'ø',
- 'osol' => '⊘',
- 'Otilde' => 'Õ',
- 'Otild' => 'Õ',
- 'otilde' => 'õ',
- 'otild' => 'õ',
- 'Otimes' => '⨷',
- 'otimes' => '⊗',
- 'otimesas' => '⨶',
- 'Ouml' => 'Ö',
- 'Oum' => 'Ö',
- 'ouml' => 'ö',
- 'oum' => 'ö',
- 'ovbar' => '⌽',
- 'OverBar' => '‾',
- 'OverBrace' => '⏞',
- 'OverBracket' => '⎴',
- 'OverParenthesis' => '⏜',
- 'par' => '¶',
- 'para' => '¶',
- 'parallel' => '∥',
- 'parsim' => '⫳',
- 'parsl' => '⫽',
- 'part' => '∂',
- 'PartialD' => '∂',
- 'Pcy' => 'П',
- 'pcy' => 'п',
- 'percnt' => '%',
- 'period' => '.',
- 'permil' => '‰',
- 'perp' => '⊥',
- 'pertenk' => '‱',
- 'Pfr' => '𝔓',
- 'pfr' => '𝔭',
- 'Phi' => 'Φ',
- 'phi' => 'φ',
- 'phiv' => 'ϕ',
- 'phmmat' => 'ℳ',
- 'phone' => '☎',
- 'Pi' => 'Π',
- 'pi' => 'π',
- 'pitchfork' => '⋔',
- 'piv' => 'ϖ',
- 'planck' => 'ℏ',
- 'planckh' => 'ℎ',
- 'plankv' => 'ℏ',
- 'plus' => '+',
- 'plusacir' => '⨣',
- 'plusb' => '⊞',
- 'pluscir' => '⨢',
- 'plusdo' => '∔',
- 'plusdu' => '⨥',
- 'pluse' => '⩲',
- 'PlusMinus' => '±',
- 'plusmn' => '±',
- 'plusm' => '±',
- 'plussim' => '⨦',
- 'plustwo' => '⨧',
- 'pm' => '±',
- 'Poincareplane' => 'ℌ',
- 'pointint' => '⨕',
- 'Popf' => 'ℙ',
- 'popf' => '𝕡',
- 'pound' => '£',
- 'poun' => '£',
- 'Pr' => '⪻',
- 'pr' => '≺',
- 'prap' => '⪷',
- 'prcue' => '≼',
- 'prE' => '⪳',
- 'pre' => '⪯',
- 'prec' => '≺',
- 'precapprox' => '⪷',
- 'preccurlyeq' => '≼',
- 'Precedes' => '≺',
- 'PrecedesEqual' => '⪯',
- 'PrecedesSlantEqual' => '≼',
- 'PrecedesTilde' => '≾',
- 'preceq' => '⪯',
- 'precnapprox' => '⪹',
- 'precneqq' => '⪵',
- 'precnsim' => '⋨',
- 'precsim' => '≾',
- 'Prime' => '″',
- 'prime' => '′',
- 'primes' => 'ℙ',
- 'prnap' => '⪹',
- 'prnE' => '⪵',
- 'prnsim' => '⋨',
- 'prod' => '∏',
- 'Product' => '∏',
- 'profalar' => '⌮',
- 'profline' => '⌒',
- 'profsurf' => '⌓',
- 'prop' => '∝',
- 'Proportion' => '∷',
- 'Proportional' => '∝',
- 'propto' => '∝',
- 'prsim' => '≾',
- 'prurel' => '⊰',
- 'Pscr' => '𝒫',
- 'pscr' => '𝓅',
- 'Psi' => 'Ψ',
- 'psi' => 'ψ',
- 'puncsp' => ' ',
- 'Qfr' => '𝔔',
- 'qfr' => '𝔮',
- 'qint' => '⨌',
- 'Qopf' => 'ℚ',
- 'qopf' => '𝕢',
- 'qprime' => '⁗',
- 'Qscr' => '𝒬',
- 'qscr' => '𝓆',
- 'quaternions' => 'ℍ',
- 'quatint' => '⨖',
- 'quest' => '?',
- 'questeq' => '≟',
- 'QUOT' => '"',
- 'QUO' => '"',
- 'quot' => '"',
- 'quo' => '"',
- 'rAarr' => '⇛',
- 'race' => '∽̱',
- 'Racute' => 'Ŕ',
- 'racute' => 'ŕ',
- 'radic' => '√',
- 'raemptyv' => '⦳',
- 'Rang' => '⟫',
- 'rang' => '⟩',
- 'rangd' => '⦒',
- 'range' => '⦥',
- 'rangle' => '⟩',
- 'raquo' => '»',
- 'raqu' => '»',
- 'Rarr' => '↠',
- 'rArr' => '⇒',
- 'rarr' => '→',
- 'rarrap' => '⥵',
- 'rarrb' => '⇥',
- 'rarrbfs' => '⤠',
- 'rarrc' => '⤳',
- 'rarrfs' => '⤞',
- 'rarrhk' => '↪',
- 'rarrlp' => '↬',
- 'rarrpl' => '⥅',
- 'rarrsim' => '⥴',
- 'Rarrtl' => '⤖',
- 'rarrtl' => '↣',
- 'rarrw' => '↝',
- 'rAtail' => '⤜',
- 'ratail' => '⤚',
- 'ratio' => '∶',
- 'rationals' => 'ℚ',
- 'RBarr' => '⤐',
- 'rBarr' => '⤏',
- 'rbarr' => '⤍',
- 'rbbrk' => '❳',
- 'rbrace' => '}',
- 'rbrack' => ']',
- 'rbrke' => '⦌',
- 'rbrksld' => '⦎',
- 'rbrkslu' => '⦐',
- 'Rcaron' => 'Ř',
- 'rcaron' => 'ř',
- 'Rcedil' => 'Ŗ',
- 'rcedil' => 'ŗ',
- 'rceil' => '⌉',
- 'rcub' => '}',
- 'Rcy' => 'Р',
- 'rcy' => 'р',
- 'rdca' => '⤷',
- 'rdldhar' => '⥩',
- 'rdquo' => '”',
- 'rdquor' => '”',
- 'rdsh' => '↳',
- 'Re' => 'ℜ',
- 'real' => 'ℜ',
- 'realine' => 'ℛ',
- 'realpart' => 'ℜ',
- 'reals' => 'ℝ',
- 'rect' => '▭',
- 'REG' => '®',
- 'RE' => '®',
- 'reg' => '®',
- 're' => '®',
- 'ReverseElement' => '∋',
- 'ReverseEquilibrium' => '⇋',
- 'ReverseUpEquilibrium' => '⥯',
- 'rfisht' => '⥽',
- 'rfloor' => '⌋',
- 'Rfr' => 'ℜ',
- 'rfr' => '𝔯',
- 'rHar' => '⥤',
- 'rhard' => '⇁',
- 'rharu' => '⇀',
- 'rharul' => '⥬',
- 'Rho' => 'Ρ',
- 'rho' => 'ρ',
- 'rhov' => 'ϱ',
- 'RightAngleBracket' => '⟩',
- 'RightArrow' => '→',
- 'Rightarrow' => '⇒',
- 'rightarrow' => '→',
- 'RightArrowBar' => '⇥',
- 'RightArrowLeftArrow' => '⇄',
- 'rightarrowtail' => '↣',
- 'RightCeiling' => '⌉',
- 'RightDoubleBracket' => '⟧',
- 'RightDownTeeVector' => '⥝',
- 'RightDownVector' => '⇂',
- 'RightDownVectorBar' => '⥕',
- 'RightFloor' => '⌋',
- 'rightharpoondown' => '⇁',
- 'rightharpoonup' => '⇀',
- 'rightleftarrows' => '⇄',
- 'rightleftharpoons' => '⇌',
- 'rightrightarrows' => '⇉',
- 'rightsquigarrow' => '↝',
- 'RightTee' => '⊢',
- 'RightTeeArrow' => '↦',
- 'RightTeeVector' => '⥛',
- 'rightthreetimes' => '⋌',
- 'RightTriangle' => '⊳',
- 'RightTriangleBar' => '⧐',
- 'RightTriangleEqual' => '⊵',
- 'RightUpDownVector' => '⥏',
- 'RightUpTeeVector' => '⥜',
- 'RightUpVector' => '↾',
- 'RightUpVectorBar' => '⥔',
- 'RightVector' => '⇀',
- 'RightVectorBar' => '⥓',
- 'ring' => '˚',
- 'risingdotseq' => '≓',
- 'rlarr' => '⇄',
- 'rlhar' => '⇌',
- 'rlm' => '‏',
- 'rmoust' => '⎱',
- 'rmoustache' => '⎱',
- 'rnmid' => '⫮',
- 'roang' => '⟭',
- 'roarr' => '⇾',
- 'robrk' => '⟧',
- 'ropar' => '⦆',
- 'Ropf' => 'ℝ',
- 'ropf' => '𝕣',
- 'roplus' => '⨮',
- 'rotimes' => '⨵',
- 'RoundImplies' => '⥰',
- 'rpar' => ')',
- 'rpargt' => '⦔',
- 'rppolint' => '⨒',
- 'rrarr' => '⇉',
- 'Rrightarrow' => '⇛',
- 'rsaquo' => '›',
- 'Rscr' => 'ℛ',
- 'rscr' => '𝓇',
- 'Rsh' => '↱',
- 'rsh' => '↱',
- 'rsqb' => ']',
- 'rsquo' => '’',
- 'rsquor' => '’',
- 'rthree' => '⋌',
- 'rtimes' => '⋊',
- 'rtri' => '▹',
- 'rtrie' => '⊵',
- 'rtrif' => '▸',
- 'rtriltri' => '⧎',
- 'RuleDelayed' => '⧴',
- 'ruluhar' => '⥨',
- 'rx' => '℞',
- 'Sacute' => 'Ś',
- 'sacute' => 'ś',
- 'sbquo' => '‚',
- 'Sc' => '⪼',
- 'sc' => '≻',
- 'scap' => '⪸',
- 'Scaron' => 'Š',
- 'scaron' => 'š',
- 'sccue' => '≽',
- 'scE' => '⪴',
- 'sce' => '⪰',
- 'Scedil' => 'Ş',
- 'scedil' => 'ş',
- 'Scirc' => 'Ŝ',
- 'scirc' => 'ŝ',
- 'scnap' => '⪺',
- 'scnE' => '⪶',
- 'scnsim' => '⋩',
- 'scpolint' => '⨓',
- 'scsim' => '≿',
- 'Scy' => 'С',
- 'scy' => 'с',
- 'sdot' => '⋅',
- 'sdotb' => '⊡',
- 'sdote' => '⩦',
- 'searhk' => '⤥',
- 'seArr' => '⇘',
- 'searr' => '↘',
- 'searrow' => '↘',
- 'sect' => '§',
- 'sec' => '§',
- 'semi' => ';',
- 'seswar' => '⤩',
- 'setminus' => '∖',
- 'setmn' => '∖',
- 'sext' => '✶',
- 'Sfr' => '𝔖',
- 'sfr' => '𝔰',
- 'sfrown' => '⌢',
- 'sharp' => '♯',
- 'SHCHcy' => 'Щ',
- 'shchcy' => 'щ',
- 'SHcy' => 'Ш',
- 'shcy' => 'ш',
- 'ShortDownArrow' => '↓',
- 'ShortLeftArrow' => '←',
- 'shortmid' => '∣',
- 'shortparallel' => '∥',
- 'ShortRightArrow' => '→',
- 'ShortUpArrow' => '↑',
- 'shy' => '­',
- 'sh' => '­',
- 'Sigma' => 'Σ',
- 'sigma' => 'σ',
- 'sigmaf' => 'ς',
- 'sigmav' => 'ς',
- 'sim' => '∼',
- 'simdot' => '⩪',
- 'sime' => '≃',
- 'simeq' => '≃',
- 'simg' => '⪞',
- 'simgE' => '⪠',
- 'siml' => '⪝',
- 'simlE' => '⪟',
- 'simne' => '≆',
- 'simplus' => '⨤',
- 'simrarr' => '⥲',
- 'slarr' => '←',
- 'SmallCircle' => '∘',
- 'smallsetminus' => '∖',
- 'smashp' => '⨳',
- 'smeparsl' => '⧤',
- 'smid' => '∣',
- 'smile' => '⌣',
- 'smt' => '⪪',
- 'smte' => '⪬',
- 'smtes' => '⪬︀',
- 'SOFTcy' => 'Ь',
- 'softcy' => 'ь',
- 'sol' => '/',
- 'solb' => '⧄',
- 'solbar' => '⌿',
- 'Sopf' => '𝕊',
- 'sopf' => '𝕤',
- 'spades' => '♠',
- 'spadesuit' => '♠',
- 'spar' => '∥',
- 'sqcap' => '⊓',
- 'sqcaps' => '⊓︀',
- 'sqcup' => '⊔',
- 'sqcups' => '⊔︀',
- 'Sqrt' => '√',
- 'sqsub' => '⊏',
- 'sqsube' => '⊑',
- 'sqsubset' => '⊏',
- 'sqsubseteq' => '⊑',
- 'sqsup' => '⊐',
- 'sqsupe' => '⊒',
- 'sqsupset' => '⊐',
- 'sqsupseteq' => '⊒',
- 'squ' => '□',
- 'Square' => '□',
- 'square' => '□',
- 'SquareIntersection' => '⊓',
- 'SquareSubset' => '⊏',
- 'SquareSubsetEqual' => '⊑',
- 'SquareSuperset' => '⊐',
- 'SquareSupersetEqual' => '⊒',
- 'SquareUnion' => '⊔',
- 'squarf' => '▪',
- 'squf' => '▪',
- 'srarr' => '→',
- 'Sscr' => '𝒮',
- 'sscr' => '𝓈',
- 'ssetmn' => '∖',
- 'ssmile' => '⌣',
- 'sstarf' => '⋆',
- 'Star' => '⋆',
- 'star' => '☆',
- 'starf' => '★',
- 'straightepsilon' => 'ϵ',
- 'straightphi' => 'ϕ',
- 'strns' => '¯',
- 'Sub' => '⋐',
- 'sub' => '⊂',
- 'subdot' => '⪽',
- 'subE' => '⫅',
- 'sube' => '⊆',
- 'subedot' => '⫃',
- 'submult' => '⫁',
- 'subnE' => '⫋',
- 'subne' => '⊊',
- 'subplus' => '⪿',
- 'subrarr' => '⥹',
- 'Subset' => '⋐',
- 'subset' => '⊂',
- 'subseteq' => '⊆',
- 'subseteqq' => '⫅',
- 'SubsetEqual' => '⊆',
- 'subsetneq' => '⊊',
- 'subsetneqq' => '⫋',
- 'subsim' => '⫇',
- 'subsub' => '⫕',
- 'subsup' => '⫓',
- 'succ' => '≻',
- 'succapprox' => '⪸',
- 'succcurlyeq' => '≽',
- 'Succeeds' => '≻',
- 'SucceedsEqual' => '⪰',
- 'SucceedsSlantEqual' => '≽',
- 'SucceedsTilde' => '≿',
- 'succeq' => '⪰',
- 'succnapprox' => '⪺',
- 'succneqq' => '⪶',
- 'succnsim' => '⋩',
- 'succsim' => '≿',
- 'SuchThat' => '∋',
- 'Sum' => '∑',
- 'sum' => '∑',
- 'sung' => '♪',
- 'Sup' => '⋑',
- 'sup' => '³',
- 'sup1' => '¹',
- 'sup2' => '²',
- 'sup3' => '³',
- 'supdot' => '⪾',
- 'supdsub' => '⫘',
- 'supE' => '⫆',
- 'supe' => '⊇',
- 'supedot' => '⫄',
- 'Superset' => '⊃',
- 'SupersetEqual' => '⊇',
- 'suphsol' => '⟉',
- 'suphsub' => '⫗',
- 'suplarr' => '⥻',
- 'supmult' => '⫂',
- 'supnE' => '⫌',
- 'supne' => '⊋',
- 'supplus' => '⫀',
- 'Supset' => '⋑',
- 'supset' => '⊃',
- 'supseteq' => '⊇',
- 'supseteqq' => '⫆',
- 'supsetneq' => '⊋',
- 'supsetneqq' => '⫌',
- 'supsim' => '⫈',
- 'supsub' => '⫔',
- 'supsup' => '⫖',
- 'swarhk' => '⤦',
- 'swArr' => '⇙',
- 'swarr' => '↙',
- 'swarrow' => '↙',
- 'swnwar' => '⤪',
- 'szlig' => 'ß',
- 'szli' => 'ß',
- 'Tab' => ' ',
- 'target' => '⌖',
- 'Tau' => 'Τ',
- 'tau' => 'τ',
- 'tbrk' => '⎴',
- 'Tcaron' => 'Ť',
- 'tcaron' => 'ť',
- 'Tcedil' => 'Ţ',
- 'tcedil' => 'ţ',
- 'Tcy' => 'Т',
- 'tcy' => 'т',
- 'tdot' => '⃛',
- 'telrec' => '⌕',
- 'Tfr' => '𝔗',
- 'tfr' => '𝔱',
- 'there4' => '∴',
- 'Therefore' => '∴',
- 'therefore' => '∴',
- 'Theta' => 'Θ',
- 'theta' => 'θ',
- 'thetasym' => 'ϑ',
- 'thetav' => 'ϑ',
- 'thickapprox' => '≈',
- 'thicksim' => '∼',
- 'ThickSpace' => '  ',
- 'thinsp' => ' ',
- 'ThinSpace' => ' ',
- 'thkap' => '≈',
- 'thksim' => '∼',
- 'THORN' => 'Þ',
- 'THOR' => 'Þ',
- 'thorn' => 'þ',
- 'thor' => 'þ',
- 'Tilde' => '∼',
- 'tilde' => '˜',
- 'TildeEqual' => '≃',
- 'TildeFullEqual' => '≅',
- 'TildeTilde' => '≈',
- 'times' => '×',
- 'time' => '×',
- 'timesb' => '⊠',
- 'timesbar' => '⨱',
- 'timesd' => '⨰',
- 'tint' => '∭',
- 'toea' => '⤨',
- 'top' => '⊤',
- 'topbot' => '⌶',
- 'topcir' => '⫱',
- 'Topf' => '𝕋',
- 'topf' => '𝕥',
- 'topfork' => '⫚',
- 'tosa' => '⤩',
- 'tprime' => '‴',
- 'TRADE' => '™',
- 'trade' => '™',
- 'triangle' => '▵',
- 'triangledown' => '▿',
- 'triangleleft' => '◃',
- 'trianglelefteq' => '⊴',
- 'triangleq' => '≜',
- 'triangleright' => '▹',
- 'trianglerighteq' => '⊵',
- 'tridot' => '◬',
- 'trie' => '≜',
- 'triminus' => '⨺',
- 'TripleDot' => '⃛',
- 'triplus' => '⨹',
- 'trisb' => '⧍',
- 'tritime' => '⨻',
- 'trpezium' => '⏢',
- 'Tscr' => '𝒯',
- 'tscr' => '𝓉',
- 'TScy' => 'Ц',
- 'tscy' => 'ц',
- 'TSHcy' => 'Ћ',
- 'tshcy' => 'ћ',
- 'Tstrok' => 'Ŧ',
- 'tstrok' => 'ŧ',
- 'twixt' => '≬',
- 'twoheadleftarrow' => '↞',
- 'twoheadrightarrow' => '↠',
- 'Uacute' => 'Ú',
- 'Uacut' => 'Ú',
- 'uacute' => 'ú',
- 'uacut' => 'ú',
- 'Uarr' => '↟',
- 'uArr' => '⇑',
- 'uarr' => '↑',
- 'Uarrocir' => '⥉',
- 'Ubrcy' => 'Ў',
- 'ubrcy' => 'ў',
- 'Ubreve' => 'Ŭ',
- 'ubreve' => 'ŭ',
- 'Ucirc' => 'Û',
- 'Ucir' => 'Û',
- 'ucirc' => 'û',
- 'ucir' => 'û',
- 'Ucy' => 'У',
- 'ucy' => 'у',
- 'udarr' => '⇅',
- 'Udblac' => 'Ű',
- 'udblac' => 'ű',
- 'udhar' => '⥮',
- 'ufisht' => '⥾',
- 'Ufr' => '𝔘',
- 'ufr' => '𝔲',
- 'Ugrave' => 'Ù',
- 'Ugrav' => 'Ù',
- 'ugrave' => 'ù',
- 'ugrav' => 'ù',
- 'uHar' => '⥣',
- 'uharl' => '↿',
- 'uharr' => '↾',
- 'uhblk' => '▀',
- 'ulcorn' => '⌜',
- 'ulcorner' => '⌜',
- 'ulcrop' => '⌏',
- 'ultri' => '◸',
- 'Umacr' => 'Ū',
- 'umacr' => 'ū',
- 'uml' => '¨',
- 'um' => '¨',
- 'UnderBar' => '_',
- 'UnderBrace' => '⏟',
- 'UnderBracket' => '⎵',
- 'UnderParenthesis' => '⏝',
- 'Union' => '⋃',
- 'UnionPlus' => '⊎',
- 'Uogon' => 'Ų',
- 'uogon' => 'ų',
- 'Uopf' => '𝕌',
- 'uopf' => '𝕦',
- 'UpArrow' => '↑',
- 'Uparrow' => '⇑',
- 'uparrow' => '↑',
- 'UpArrowBar' => '⤒',
- 'UpArrowDownArrow' => '⇅',
- 'UpDownArrow' => '↕',
- 'Updownarrow' => '⇕',
- 'updownarrow' => '↕',
- 'UpEquilibrium' => '⥮',
- 'upharpoonleft' => '↿',
- 'upharpoonright' => '↾',
- 'uplus' => '⊎',
- 'UpperLeftArrow' => '↖',
- 'UpperRightArrow' => '↗',
- 'Upsi' => 'ϒ',
- 'upsi' => 'υ',
- 'upsih' => 'ϒ',
- 'Upsilon' => 'Υ',
- 'upsilon' => 'υ',
- 'UpTee' => '⊥',
- 'UpTeeArrow' => '↥',
- 'upuparrows' => '⇈',
- 'urcorn' => '⌝',
- 'urcorner' => '⌝',
- 'urcrop' => '⌎',
- 'Uring' => 'Ů',
- 'uring' => 'ů',
- 'urtri' => '◹',
- 'Uscr' => '𝒰',
- 'uscr' => '𝓊',
- 'utdot' => '⋰',
- 'Utilde' => 'Ũ',
- 'utilde' => 'ũ',
- 'utri' => '▵',
- 'utrif' => '▴',
- 'uuarr' => '⇈',
- 'Uuml' => 'Ü',
- 'Uum' => 'Ü',
- 'uuml' => 'ü',
- 'uum' => 'ü',
- 'uwangle' => '⦧',
- 'vangrt' => '⦜',
- 'varepsilon' => 'ϵ',
- 'varkappa' => 'ϰ',
- 'varnothing' => '∅',
- 'varphi' => 'ϕ',
- 'varpi' => 'ϖ',
- 'varpropto' => '∝',
- 'vArr' => '⇕',
- 'varr' => '↕',
- 'varrho' => 'ϱ',
- 'varsigma' => 'ς',
- 'varsubsetneq' => '⊊︀',
- 'varsubsetneqq' => '⫋︀',
- 'varsupsetneq' => '⊋︀',
- 'varsupsetneqq' => '⫌︀',
- 'vartheta' => 'ϑ',
- 'vartriangleleft' => '⊲',
- 'vartriangleright' => '⊳',
- 'Vbar' => '⫫',
- 'vBar' => '⫨',
- 'vBarv' => '⫩',
- 'Vcy' => 'В',
- 'vcy' => 'в',
- 'VDash' => '⊫',
- 'Vdash' => '⊩',
- 'vDash' => '⊨',
- 'vdash' => '⊢',
- 'Vdashl' => '⫦',
- 'Vee' => '⋁',
- 'vee' => '∨',
- 'veebar' => '⊻',
- 'veeeq' => '≚',
- 'vellip' => '⋮',
- 'Verbar' => '‖',
- 'verbar' => '|',
- 'Vert' => '‖',
- 'vert' => '|',
- 'VerticalBar' => '∣',
- 'VerticalLine' => '|',
- 'VerticalSeparator' => '❘',
- 'VerticalTilde' => '≀',
- 'VeryThinSpace' => ' ',
- 'Vfr' => '𝔙',
- 'vfr' => '𝔳',
- 'vltri' => '⊲',
- 'vnsub' => '⊂⃒',
- 'vnsup' => '⊃⃒',
- 'Vopf' => '𝕍',
- 'vopf' => '𝕧',
- 'vprop' => '∝',
- 'vrtri' => '⊳',
- 'Vscr' => '𝒱',
- 'vscr' => '𝓋',
- 'vsubnE' => '⫋︀',
- 'vsubne' => '⊊︀',
- 'vsupnE' => '⫌︀',
- 'vsupne' => '⊋︀',
- 'Vvdash' => '⊪',
- 'vzigzag' => '⦚',
- 'Wcirc' => 'Ŵ',
- 'wcirc' => 'ŵ',
- 'wedbar' => '⩟',
- 'Wedge' => '⋀',
- 'wedge' => '∧',
- 'wedgeq' => '≙',
- 'weierp' => '℘',
- 'Wfr' => '𝔚',
- 'wfr' => '𝔴',
- 'Wopf' => '𝕎',
- 'wopf' => '𝕨',
- 'wp' => '℘',
- 'wr' => '≀',
- 'wreath' => '≀',
- 'Wscr' => '𝒲',
- 'wscr' => '𝓌',
- 'xcap' => '⋂',
- 'xcirc' => '◯',
- 'xcup' => '⋃',
- 'xdtri' => '▽',
- 'Xfr' => '𝔛',
- 'xfr' => '𝔵',
- 'xhArr' => '⟺',
- 'xharr' => '⟷',
- 'Xi' => 'Ξ',
- 'xi' => 'ξ',
- 'xlArr' => '⟸',
- 'xlarr' => '⟵',
- 'xmap' => '⟼',
- 'xnis' => '⋻',
- 'xodot' => '⨀',
- 'Xopf' => '𝕏',
- 'xopf' => '𝕩',
- 'xoplus' => '⨁',
- 'xotime' => '⨂',
- 'xrArr' => '⟹',
- 'xrarr' => '⟶',
- 'Xscr' => '𝒳',
- 'xscr' => '𝓍',
- 'xsqcup' => '⨆',
- 'xuplus' => '⨄',
- 'xutri' => '△',
- 'xvee' => '⋁',
- 'xwedge' => '⋀',
- 'Yacute' => 'Ý',
- 'Yacut' => 'Ý',
- 'yacute' => 'ý',
- 'yacut' => 'ý',
- 'YAcy' => 'Я',
- 'yacy' => 'я',
- 'Ycirc' => 'Ŷ',
- 'ycirc' => 'ŷ',
- 'Ycy' => 'Ы',
- 'ycy' => 'ы',
- 'yen' => '¥',
- 'ye' => '¥',
- 'Yfr' => '𝔜',
- 'yfr' => '𝔶',
- 'YIcy' => 'Ї',
- 'yicy' => 'ї',
- 'Yopf' => '𝕐',
- 'yopf' => '𝕪',
- 'Yscr' => '𝒴',
- 'yscr' => '𝓎',
- 'YUcy' => 'Ю',
- 'yucy' => 'ю',
- 'Yuml' => 'Ÿ',
- 'yuml' => 'ÿ',
- 'yum' => 'ÿ',
- 'Zacute' => 'Ź',
- 'zacute' => 'ź',
- 'Zcaron' => 'Ž',
- 'zcaron' => 'ž',
- 'Zcy' => 'З',
- 'zcy' => 'з',
- 'Zdot' => 'Ż',
- 'zdot' => 'ż',
- 'zeetrf' => 'ℨ',
- 'ZeroWidthSpace' => '​',
- 'Zeta' => 'Ζ',
- 'zeta' => 'ζ',
- 'Zfr' => 'ℨ',
- 'zfr' => '𝔷',
- 'ZHcy' => 'Ж',
- 'zhcy' => 'ж',
- 'zigrarr' => '⇝',
- 'Zopf' => 'ℤ',
- 'zopf' => '𝕫',
- 'Zscr' => '𝒵',
- 'zscr' => '𝓏',
- 'zwj' => '‍',
- 'zwnj' => '‌',
-);
+ 'nexist' => '∄',
+ 'nexists' => '∄',
+ 'Nfr' => '𝔑',
+ 'nfr' => '𝔫',
+ 'ngE' => '≧̸',
+ 'nge' => '≱',
+ 'ngeq' => '≱',
+ 'ngeqq' => '≧̸',
+ 'ngeqslant' => '⩾̸',
+ 'nges' => '⩾̸',
+ 'nGg' => '⋙̸',
+ 'ngsim' => '≵',
+ 'nGt' => '≫⃒',
+ 'ngt' => '≯',
+ 'ngtr' => '≯',
+ 'nGtv' => '≫̸',
+ 'nhArr' => '⇎',
+ 'nharr' => '↮',
+ 'nhpar' => '⫲',
+ 'ni' => '∋',
+ 'nis' => '⋼',
+ 'nisd' => '⋺',
+ 'niv' => '∋',
+ 'NJcy' => 'Њ',
+ 'njcy' => 'њ',
+ 'nlArr' => '⇍',
+ 'nlarr' => '↚',
+ 'nldr' => '‥',
+ 'nlE' => '≦̸',
+ 'nle' => '≰',
+ 'nLeftarrow' => '⇍',
+ 'nleftarrow' => '↚',
+ 'nLeftrightarrow' => '⇎',
+ 'nleftrightarrow' => '↮',
+ 'nleq' => '≰',
+ 'nleqq' => '≦̸',
+ 'nleqslant' => '⩽̸',
+ 'nles' => '⩽̸',
+ 'nless' => '≮',
+ 'nLl' => '⋘̸',
+ 'nlsim' => '≴',
+ 'nLt' => '≪⃒',
+ 'nlt' => '≮',
+ 'nltri' => '⋪',
+ 'nltrie' => '⋬',
+ 'nLtv' => '≪̸',
+ 'nmid' => '∤',
+ 'NoBreak' => '⁠',
+ 'NonBreakingSpace' => ' ',
+ 'Nopf' => 'ℕ',
+ 'nopf' => '𝕟',
+ 'Not' => '⫬',
+ 'not' => '¬',
+ 'no' => '¬',
+ 'NotCongruent' => '≢',
+ 'NotCupCap' => '≭',
+ 'NotDoubleVerticalBar' => '∦',
+ 'NotElement' => '∉',
+ 'NotEqual' => '≠',
+ 'NotEqualTilde' => '≂̸',
+ 'NotExists' => '∄',
+ 'NotGreater' => '≯',
+ 'NotGreaterEqual' => '≱',
+ 'NotGreaterFullEqual' => '≧̸',
+ 'NotGreaterGreater' => '≫̸',
+ 'NotGreaterLess' => '≹',
+ 'NotGreaterSlantEqual' => '⩾̸',
+ 'NotGreaterTilde' => '≵',
+ 'NotHumpDownHump' => '≎̸',
+ 'NotHumpEqual' => '≏̸',
+ 'notin' => '∉',
+ 'notindot' => '⋵̸',
+ 'notinE' => '⋹̸',
+ 'notinva' => '∉',
+ 'notinvb' => '⋷',
+ 'notinvc' => '⋶',
+ 'NotLeftTriangle' => '⋪',
+ 'NotLeftTriangleBar' => '⧏̸',
+ 'NotLeftTriangleEqual' => '⋬',
+ 'NotLess' => '≮',
+ 'NotLessEqual' => '≰',
+ 'NotLessGreater' => '≸',
+ 'NotLessLess' => '≪̸',
+ 'NotLessSlantEqual' => '⩽̸',
+ 'NotLessTilde' => '≴',
+ 'NotNestedGreaterGreater' => '⪢̸',
+ 'NotNestedLessLess' => '⪡̸',
+ 'notni' => '∌',
+ 'notniva' => '∌',
+ 'notnivb' => '⋾',
+ 'notnivc' => '⋽',
+ 'NotPrecedes' => '⊀',
+ 'NotPrecedesEqual' => '⪯̸',
+ 'NotPrecedesSlantEqual' => '⋠',
+ 'NotReverseElement' => '∌',
+ 'NotRightTriangle' => '⋫',
+ 'NotRightTriangleBar' => '⧐̸',
+ 'NotRightTriangleEqual' => '⋭',
+ 'NotSquareSubset' => '⊏̸',
+ 'NotSquareSubsetEqual' => '⋢',
+ 'NotSquareSuperset' => '⊐̸',
+ 'NotSquareSupersetEqual' => '⋣',
+ 'NotSubset' => '⊂⃒',
+ 'NotSubsetEqual' => '⊈',
+ 'NotSucceeds' => '⊁',
+ 'NotSucceedsEqual' => '⪰̸',
+ 'NotSucceedsSlantEqual' => '⋡',
+ 'NotSucceedsTilde' => '≿̸',
+ 'NotSuperset' => '⊃⃒',
+ 'NotSupersetEqual' => '⊉',
+ 'NotTilde' => '≁',
+ 'NotTildeEqual' => '≄',
+ 'NotTildeFullEqual' => '≇',
+ 'NotTildeTilde' => '≉',
+ 'NotVerticalBar' => '∤',
+ 'npar' => '∦',
+ 'nparallel' => '∦',
+ 'nparsl' => '⫽⃥',
+ 'npart' => '∂̸',
+ 'npolint' => '⨔',
+ 'npr' => '⊀',
+ 'nprcue' => '⋠',
+ 'npre' => '⪯̸',
+ 'nprec' => '⊀',
+ 'npreceq' => '⪯̸',
+ 'nrArr' => '⇏',
+ 'nrarr' => '↛',
+ 'nrarrc' => '⤳̸',
+ 'nrarrw' => '↝̸',
+ 'nRightarrow' => '⇏',
+ 'nrightarrow' => '↛',
+ 'nrtri' => '⋫',
+ 'nrtrie' => '⋭',
+ 'nsc' => '⊁',
+ 'nsccue' => '⋡',
+ 'nsce' => '⪰̸',
+ 'Nscr' => '𝒩',
+ 'nscr' => '𝓃',
+ 'nshortmid' => '∤',
+ 'nshortparallel' => '∦',
+ 'nsim' => '≁',
+ 'nsime' => '≄',
+ 'nsimeq' => '≄',
+ 'nsmid' => '∤',
+ 'nspar' => '∦',
+ 'nsqsube' => '⋢',
+ 'nsqsupe' => '⋣',
+ 'nsub' => '⊄',
+ 'nsubE' => '⫅̸',
+ 'nsube' => '⊈',
+ 'nsubset' => '⊂⃒',
+ 'nsubseteq' => '⊈',
+ 'nsubseteqq' => '⫅̸',
+ 'nsucc' => '⊁',
+ 'nsucceq' => '⪰̸',
+ 'nsup' => '⊅',
+ 'nsupE' => '⫆̸',
+ 'nsupe' => '⊉',
+ 'nsupset' => '⊃⃒',
+ 'nsupseteq' => '⊉',
+ 'nsupseteqq' => '⫆̸',
+ 'ntgl' => '≹',
+ 'Ntilde' => 'Ñ',
+ 'Ntild' => 'Ñ',
+ 'ntilde' => 'ñ',
+ 'ntild' => 'ñ',
+ 'ntlg' => '≸',
+ 'ntriangleleft' => '⋪',
+ 'ntrianglelefteq' => '⋬',
+ 'ntriangleright' => '⋫',
+ 'ntrianglerighteq' => '⋭',
+ 'Nu' => 'Ν',
+ 'nu' => 'ν',
+ 'num' => '#',
+ 'numero' => '№',
+ 'numsp' => ' ',
+ 'nvap' => '≍⃒',
+ 'nVDash' => '⊯',
+ 'nVdash' => '⊮',
+ 'nvDash' => '⊭',
+ 'nvdash' => '⊬',
+ 'nvge' => '≥⃒',
+ 'nvgt' => '>⃒',
+ 'nvHarr' => '⤄',
+ 'nvinfin' => '⧞',
+ 'nvlArr' => '⤂',
+ 'nvle' => '≤⃒',
+ 'nvlt' => '<⃒',
+ 'nvltrie' => '⊴⃒',
+ 'nvrArr' => '⤃',
+ 'nvrtrie' => '⊵⃒',
+ 'nvsim' => '∼⃒',
+ 'nwarhk' => '⤣',
+ 'nwArr' => '⇖',
+ 'nwarr' => '↖',
+ 'nwarrow' => '↖',
+ 'nwnear' => '⤧',
+ 'Oacute' => 'Ó',
+ 'Oacut' => 'Ó',
+ 'oacute' => 'ó',
+ 'oacut' => 'ó',
+ 'oast' => '⊛',
+ 'ocir' => 'ô',
+ 'Ocirc' => 'Ô',
+ 'Ocir' => 'Ô',
+ 'ocirc' => 'ô',
+ 'Ocy' => 'О',
+ 'ocy' => 'о',
+ 'odash' => '⊝',
+ 'Odblac' => 'Ő',
+ 'odblac' => 'ő',
+ 'odiv' => '⨸',
+ 'odot' => '⊙',
+ 'odsold' => '⦼',
+ 'OElig' => 'Œ',
+ 'oelig' => 'œ',
+ 'ofcir' => '⦿',
+ 'Ofr' => '𝔒',
+ 'ofr' => '𝔬',
+ 'ogon' => '˛',
+ 'Ograve' => 'Ò',
+ 'Ograv' => 'Ò',
+ 'ograve' => 'ò',
+ 'ograv' => 'ò',
+ 'ogt' => '⧁',
+ 'ohbar' => '⦵',
+ 'ohm' => 'Ω',
+ 'oint' => '∮',
+ 'olarr' => '↺',
+ 'olcir' => '⦾',
+ 'olcross' => '⦻',
+ 'oline' => '‾',
+ 'olt' => '⧀',
+ 'Omacr' => 'Ō',
+ 'omacr' => 'ō',
+ 'Omega' => 'Ω',
+ 'omega' => 'ω',
+ 'Omicron' => 'Ο',
+ 'omicron' => 'ο',
+ 'omid' => '⦶',
+ 'ominus' => '⊖',
+ 'Oopf' => '𝕆',
+ 'oopf' => '𝕠',
+ 'opar' => '⦷',
+ 'OpenCurlyDoubleQuote' => '“',
+ 'OpenCurlyQuote' => '‘',
+ 'operp' => '⦹',
+ 'oplus' => '⊕',
+ 'Or' => '⩔',
+ 'or' => '∨',
+ 'orarr' => '↻',
+ 'ord' => 'º',
+ 'order' => 'ℴ',
+ 'orderof' => 'ℴ',
+ 'ordf' => 'ª',
+ 'ordm' => 'º',
+ 'origof' => '⊶',
+ 'oror' => '⩖',
+ 'orslope' => '⩗',
+ 'orv' => '⩛',
+ 'oS' => 'Ⓢ',
+ 'Oscr' => '𝒪',
+ 'oscr' => 'ℴ',
+ 'Oslash' => 'Ø',
+ 'Oslas' => 'Ø',
+ 'oslash' => 'ø',
+ 'oslas' => 'ø',
+ 'osol' => '⊘',
+ 'Otilde' => 'Õ',
+ 'Otild' => 'Õ',
+ 'otilde' => 'õ',
+ 'otild' => 'õ',
+ 'Otimes' => '⨷',
+ 'otimes' => '⊗',
+ 'otimesas' => '⨶',
+ 'Ouml' => 'Ö',
+ 'Oum' => 'Ö',
+ 'ouml' => 'ö',
+ 'oum' => 'ö',
+ 'ovbar' => '⌽',
+ 'OverBar' => '‾',
+ 'OverBrace' => '⏞',
+ 'OverBracket' => '⎴',
+ 'OverParenthesis' => '⏜',
+ 'par' => '¶',
+ 'para' => '¶',
+ 'parallel' => '∥',
+ 'parsim' => '⫳',
+ 'parsl' => '⫽',
+ 'part' => '∂',
+ 'PartialD' => '∂',
+ 'Pcy' => 'П',
+ 'pcy' => 'п',
+ 'percnt' => '%',
+ 'period' => '.',
+ 'permil' => '‰',
+ 'perp' => '⊥',
+ 'pertenk' => '‱',
+ 'Pfr' => '𝔓',
+ 'pfr' => '𝔭',
+ 'Phi' => 'Φ',
+ 'phi' => 'φ',
+ 'phiv' => 'ϕ',
+ 'phmmat' => 'ℳ',
+ 'phone' => '☎',
+ 'Pi' => 'Π',
+ 'pi' => 'π',
+ 'pitchfork' => '⋔',
+ 'piv' => 'ϖ',
+ 'planck' => 'ℏ',
+ 'planckh' => 'ℎ',
+ 'plankv' => 'ℏ',
+ 'plus' => '+',
+ 'plusacir' => '⨣',
+ 'plusb' => '⊞',
+ 'pluscir' => '⨢',
+ 'plusdo' => '∔',
+ 'plusdu' => '⨥',
+ 'pluse' => '⩲',
+ 'PlusMinus' => '±',
+ 'plusmn' => '±',
+ 'plusm' => '±',
+ 'plussim' => '⨦',
+ 'plustwo' => '⨧',
+ 'pm' => '±',
+ 'Poincareplane' => 'ℌ',
+ 'pointint' => '⨕',
+ 'Popf' => 'ℙ',
+ 'popf' => '𝕡',
+ 'pound' => '£',
+ 'poun' => '£',
+ 'Pr' => '⪻',
+ 'pr' => '≺',
+ 'prap' => '⪷',
+ 'prcue' => '≼',
+ 'prE' => '⪳',
+ 'pre' => '⪯',
+ 'prec' => '≺',
+ 'precapprox' => '⪷',
+ 'preccurlyeq' => '≼',
+ 'Precedes' => '≺',
+ 'PrecedesEqual' => '⪯',
+ 'PrecedesSlantEqual' => '≼',
+ 'PrecedesTilde' => '≾',
+ 'preceq' => '⪯',
+ 'precnapprox' => '⪹',
+ 'precneqq' => '⪵',
+ 'precnsim' => '⋨',
+ 'precsim' => '≾',
+ 'Prime' => '″',
+ 'prime' => '′',
+ 'primes' => 'ℙ',
+ 'prnap' => '⪹',
+ 'prnE' => '⪵',
+ 'prnsim' => '⋨',
+ 'prod' => '∏',
+ 'Product' => '∏',
+ 'profalar' => '⌮',
+ 'profline' => '⌒',
+ 'profsurf' => '⌓',
+ 'prop' => '∝',
+ 'Proportion' => '∷',
+ 'Proportional' => '∝',
+ 'propto' => '∝',
+ 'prsim' => '≾',
+ 'prurel' => '⊰',
+ 'Pscr' => '𝒫',
+ 'pscr' => '𝓅',
+ 'Psi' => 'Ψ',
+ 'psi' => 'ψ',
+ 'puncsp' => ' ',
+ 'Qfr' => '𝔔',
+ 'qfr' => '𝔮',
+ 'qint' => '⨌',
+ 'Qopf' => 'ℚ',
+ 'qopf' => '𝕢',
+ 'qprime' => '⁗',
+ 'Qscr' => '𝒬',
+ 'qscr' => '𝓆',
+ 'quaternions' => 'ℍ',
+ 'quatint' => '⨖',
+ 'quest' => '?',
+ 'questeq' => '≟',
+ 'QUOT' => '"',
+ 'QUO' => '"',
+ 'quot' => '"',
+ 'quo' => '"',
+ 'rAarr' => '⇛',
+ 'race' => '∽̱',
+ 'Racute' => 'Ŕ',
+ 'racute' => 'ŕ',
+ 'radic' => '√',
+ 'raemptyv' => '⦳',
+ 'Rang' => '⟫',
+ 'rang' => '⟩',
+ 'rangd' => '⦒',
+ 'range' => '⦥',
+ 'rangle' => '⟩',
+ 'raquo' => '»',
+ 'raqu' => '»',
+ 'Rarr' => '↠',
+ 'rArr' => '⇒',
+ 'rarr' => '→',
+ 'rarrap' => '⥵',
+ 'rarrb' => '⇥',
+ 'rarrbfs' => '⤠',
+ 'rarrc' => '⤳',
+ 'rarrfs' => '⤞',
+ 'rarrhk' => '↪',
+ 'rarrlp' => '↬',
+ 'rarrpl' => '⥅',
+ 'rarrsim' => '⥴',
+ 'Rarrtl' => '⤖',
+ 'rarrtl' => '↣',
+ 'rarrw' => '↝',
+ 'rAtail' => '⤜',
+ 'ratail' => '⤚',
+ 'ratio' => '∶',
+ 'rationals' => 'ℚ',
+ 'RBarr' => '⤐',
+ 'rBarr' => '⤏',
+ 'rbarr' => '⤍',
+ 'rbbrk' => '❳',
+ 'rbrace' => '}',
+ 'rbrack' => ']',
+ 'rbrke' => '⦌',
+ 'rbrksld' => '⦎',
+ 'rbrkslu' => '⦐',
+ 'Rcaron' => 'Ř',
+ 'rcaron' => 'ř',
+ 'Rcedil' => 'Ŗ',
+ 'rcedil' => 'ŗ',
+ 'rceil' => '⌉',
+ 'rcub' => '}',
+ 'Rcy' => 'Р',
+ 'rcy' => 'р',
+ 'rdca' => '⤷',
+ 'rdldhar' => '⥩',
+ 'rdquo' => '”',
+ 'rdquor' => '”',
+ 'rdsh' => '↳',
+ 'Re' => 'ℜ',
+ 'real' => 'ℜ',
+ 'realine' => 'ℛ',
+ 'realpart' => 'ℜ',
+ 'reals' => 'ℝ',
+ 'rect' => '▭',
+ 'REG' => '®',
+ 'RE' => '®',
+ 'reg' => '®',
+ 're' => '®',
+ 'ReverseElement' => '∋',
+ 'ReverseEquilibrium' => '⇋',
+ 'ReverseUpEquilibrium' => '⥯',
+ 'rfisht' => '⥽',
+ 'rfloor' => '⌋',
+ 'Rfr' => 'ℜ',
+ 'rfr' => '𝔯',
+ 'rHar' => '⥤',
+ 'rhard' => '⇁',
+ 'rharu' => '⇀',
+ 'rharul' => '⥬',
+ 'Rho' => 'Ρ',
+ 'rho' => 'ρ',
+ 'rhov' => 'ϱ',
+ 'RightAngleBracket' => '⟩',
+ 'RightArrow' => '→',
+ 'Rightarrow' => '⇒',
+ 'rightarrow' => '→',
+ 'RightArrowBar' => '⇥',
+ 'RightArrowLeftArrow' => '⇄',
+ 'rightarrowtail' => '↣',
+ 'RightCeiling' => '⌉',
+ 'RightDoubleBracket' => '⟧',
+ 'RightDownTeeVector' => '⥝',
+ 'RightDownVector' => '⇂',
+ 'RightDownVectorBar' => '⥕',
+ 'RightFloor' => '⌋',
+ 'rightharpoondown' => '⇁',
+ 'rightharpoonup' => '⇀',
+ 'rightleftarrows' => '⇄',
+ 'rightleftharpoons' => '⇌',
+ 'rightrightarrows' => '⇉',
+ 'rightsquigarrow' => '↝',
+ 'RightTee' => '⊢',
+ 'RightTeeArrow' => '↦',
+ 'RightTeeVector' => '⥛',
+ 'rightthreetimes' => '⋌',
+ 'RightTriangle' => '⊳',
+ 'RightTriangleBar' => '⧐',
+ 'RightTriangleEqual' => '⊵',
+ 'RightUpDownVector' => '⥏',
+ 'RightUpTeeVector' => '⥜',
+ 'RightUpVector' => '↾',
+ 'RightUpVectorBar' => '⥔',
+ 'RightVector' => '⇀',
+ 'RightVectorBar' => '⥓',
+ 'ring' => '˚',
+ 'risingdotseq' => '≓',
+ 'rlarr' => '⇄',
+ 'rlhar' => '⇌',
+ 'rlm' => '‏',
+ 'rmoust' => '⎱',
+ 'rmoustache' => '⎱',
+ 'rnmid' => '⫮',
+ 'roang' => '⟭',
+ 'roarr' => '⇾',
+ 'robrk' => '⟧',
+ 'ropar' => '⦆',
+ 'Ropf' => 'ℝ',
+ 'ropf' => '𝕣',
+ 'roplus' => '⨮',
+ 'rotimes' => '⨵',
+ 'RoundImplies' => '⥰',
+ 'rpar' => ')',
+ 'rpargt' => '⦔',
+ 'rppolint' => '⨒',
+ 'rrarr' => '⇉',
+ 'Rrightarrow' => '⇛',
+ 'rsaquo' => '›',
+ 'Rscr' => 'ℛ',
+ 'rscr' => '𝓇',
+ 'Rsh' => '↱',
+ 'rsh' => '↱',
+ 'rsqb' => ']',
+ 'rsquo' => '’',
+ 'rsquor' => '’',
+ 'rthree' => '⋌',
+ 'rtimes' => '⋊',
+ 'rtri' => '▹',
+ 'rtrie' => '⊵',
+ 'rtrif' => '▸',
+ 'rtriltri' => '⧎',
+ 'RuleDelayed' => '⧴',
+ 'ruluhar' => '⥨',
+ 'rx' => '℞',
+ 'Sacute' => 'Ś',
+ 'sacute' => 'ś',
+ 'sbquo' => '‚',
+ 'Sc' => '⪼',
+ 'sc' => '≻',
+ 'scap' => '⪸',
+ 'Scaron' => 'Š',
+ 'scaron' => 'š',
+ 'sccue' => '≽',
+ 'scE' => '⪴',
+ 'sce' => '⪰',
+ 'Scedil' => 'Ş',
+ 'scedil' => 'ş',
+ 'Scirc' => 'Ŝ',
+ 'scirc' => 'ŝ',
+ 'scnap' => '⪺',
+ 'scnE' => '⪶',
+ 'scnsim' => '⋩',
+ 'scpolint' => '⨓',
+ 'scsim' => '≿',
+ 'Scy' => 'С',
+ 'scy' => 'с',
+ 'sdot' => '⋅',
+ 'sdotb' => '⊡',
+ 'sdote' => '⩦',
+ 'searhk' => '⤥',
+ 'seArr' => '⇘',
+ 'searr' => '↘',
+ 'searrow' => '↘',
+ 'sect' => '§',
+ 'sec' => '§',
+ 'semi' => ';',
+ 'seswar' => '⤩',
+ 'setminus' => '∖',
+ 'setmn' => '∖',
+ 'sext' => '✶',
+ 'Sfr' => '𝔖',
+ 'sfr' => '𝔰',
+ 'sfrown' => '⌢',
+ 'sharp' => '♯',
+ 'SHCHcy' => 'Щ',
+ 'shchcy' => 'щ',
+ 'SHcy' => 'Ш',
+ 'shcy' => 'ш',
+ 'ShortDownArrow' => '↓',
+ 'ShortLeftArrow' => '←',
+ 'shortmid' => '∣',
+ 'shortparallel' => '∥',
+ 'ShortRightArrow' => '→',
+ 'ShortUpArrow' => '↑',
+ 'shy' => '­',
+ 'sh' => '­',
+ 'Sigma' => 'Σ',
+ 'sigma' => 'σ',
+ 'sigmaf' => 'ς',
+ 'sigmav' => 'ς',
+ 'sim' => '∼',
+ 'simdot' => '⩪',
+ 'sime' => '≃',
+ 'simeq' => '≃',
+ 'simg' => '⪞',
+ 'simgE' => '⪠',
+ 'siml' => '⪝',
+ 'simlE' => '⪟',
+ 'simne' => '≆',
+ 'simplus' => '⨤',
+ 'simrarr' => '⥲',
+ 'slarr' => '←',
+ 'SmallCircle' => '∘',
+ 'smallsetminus' => '∖',
+ 'smashp' => '⨳',
+ 'smeparsl' => '⧤',
+ 'smid' => '∣',
+ 'smile' => '⌣',
+ 'smt' => '⪪',
+ 'smte' => '⪬',
+ 'smtes' => '⪬︀',
+ 'SOFTcy' => 'Ь',
+ 'softcy' => 'ь',
+ 'sol' => '/',
+ 'solb' => '⧄',
+ 'solbar' => '⌿',
+ 'Sopf' => '𝕊',
+ 'sopf' => '𝕤',
+ 'spades' => '♠',
+ 'spadesuit' => '♠',
+ 'spar' => '∥',
+ 'sqcap' => '⊓',
+ 'sqcaps' => '⊓︀',
+ 'sqcup' => '⊔',
+ 'sqcups' => '⊔︀',
+ 'Sqrt' => '√',
+ 'sqsub' => '⊏',
+ 'sqsube' => '⊑',
+ 'sqsubset' => '⊏',
+ 'sqsubseteq' => '⊑',
+ 'sqsup' => '⊐',
+ 'sqsupe' => '⊒',
+ 'sqsupset' => '⊐',
+ 'sqsupseteq' => '⊒',
+ 'squ' => '□',
+ 'Square' => '□',
+ 'square' => '□',
+ 'SquareIntersection' => '⊓',
+ 'SquareSubset' => '⊏',
+ 'SquareSubsetEqual' => '⊑',
+ 'SquareSuperset' => '⊐',
+ 'SquareSupersetEqual' => '⊒',
+ 'SquareUnion' => '⊔',
+ 'squarf' => '▪',
+ 'squf' => '▪',
+ 'srarr' => '→',
+ 'Sscr' => '𝒮',
+ 'sscr' => '𝓈',
+ 'ssetmn' => '∖',
+ 'ssmile' => '⌣',
+ 'sstarf' => '⋆',
+ 'Star' => '⋆',
+ 'star' => '☆',
+ 'starf' => '★',
+ 'straightepsilon' => 'ϵ',
+ 'straightphi' => 'ϕ',
+ 'strns' => '¯',
+ 'Sub' => '⋐',
+ 'sub' => '⊂',
+ 'subdot' => '⪽',
+ 'subE' => '⫅',
+ 'sube' => '⊆',
+ 'subedot' => '⫃',
+ 'submult' => '⫁',
+ 'subnE' => '⫋',
+ 'subne' => '⊊',
+ 'subplus' => '⪿',
+ 'subrarr' => '⥹',
+ 'Subset' => '⋐',
+ 'subset' => '⊂',
+ 'subseteq' => '⊆',
+ 'subseteqq' => '⫅',
+ 'SubsetEqual' => '⊆',
+ 'subsetneq' => '⊊',
+ 'subsetneqq' => '⫋',
+ 'subsim' => '⫇',
+ 'subsub' => '⫕',
+ 'subsup' => '⫓',
+ 'succ' => '≻',
+ 'succapprox' => '⪸',
+ 'succcurlyeq' => '≽',
+ 'Succeeds' => '≻',
+ 'SucceedsEqual' => '⪰',
+ 'SucceedsSlantEqual' => '≽',
+ 'SucceedsTilde' => '≿',
+ 'succeq' => '⪰',
+ 'succnapprox' => '⪺',
+ 'succneqq' => '⪶',
+ 'succnsim' => '⋩',
+ 'succsim' => '≿',
+ 'SuchThat' => '∋',
+ 'Sum' => '∑',
+ 'sum' => '∑',
+ 'sung' => '♪',
+ 'Sup' => '⋑',
+ 'sup' => '³',
+ 'sup1' => '¹',
+ 'sup2' => '²',
+ 'sup3' => '³',
+ 'supdot' => '⪾',
+ 'supdsub' => '⫘',
+ 'supE' => '⫆',
+ 'supe' => '⊇',
+ 'supedot' => '⫄',
+ 'Superset' => '⊃',
+ 'SupersetEqual' => '⊇',
+ 'suphsol' => '⟉',
+ 'suphsub' => '⫗',
+ 'suplarr' => '⥻',
+ 'supmult' => '⫂',
+ 'supnE' => '⫌',
+ 'supne' => '⊋',
+ 'supplus' => '⫀',
+ 'Supset' => '⋑',
+ 'supset' => '⊃',
+ 'supseteq' => '⊇',
+ 'supseteqq' => '⫆',
+ 'supsetneq' => '⊋',
+ 'supsetneqq' => '⫌',
+ 'supsim' => '⫈',
+ 'supsub' => '⫔',
+ 'supsup' => '⫖',
+ 'swarhk' => '⤦',
+ 'swArr' => '⇙',
+ 'swarr' => '↙',
+ 'swarrow' => '↙',
+ 'swnwar' => '⤪',
+ 'szlig' => 'ß',
+ 'szli' => 'ß',
+ 'Tab' => ' ',
+ 'target' => '⌖',
+ 'Tau' => 'Τ',
+ 'tau' => 'τ',
+ 'tbrk' => '⎴',
+ 'Tcaron' => 'Ť',
+ 'tcaron' => 'ť',
+ 'Tcedil' => 'Ţ',
+ 'tcedil' => 'ţ',
+ 'Tcy' => 'Т',
+ 'tcy' => 'т',
+ 'tdot' => '⃛',
+ 'telrec' => '⌕',
+ 'Tfr' => '𝔗',
+ 'tfr' => '𝔱',
+ 'there4' => '∴',
+ 'Therefore' => '∴',
+ 'therefore' => '∴',
+ 'Theta' => 'Θ',
+ 'theta' => 'θ',
+ 'thetasym' => 'ϑ',
+ 'thetav' => 'ϑ',
+ 'thickapprox' => '≈',
+ 'thicksim' => '∼',
+ 'ThickSpace' => '  ',
+ 'thinsp' => ' ',
+ 'ThinSpace' => ' ',
+ 'thkap' => '≈',
+ 'thksim' => '∼',
+ 'THORN' => 'Þ',
+ 'THOR' => 'Þ',
+ 'thorn' => 'þ',
+ 'thor' => 'þ',
+ 'Tilde' => '∼',
+ 'tilde' => '˜',
+ 'TildeEqual' => '≃',
+ 'TildeFullEqual' => '≅',
+ 'TildeTilde' => '≈',
+ 'times' => '×',
+ 'time' => '×',
+ 'timesb' => '⊠',
+ 'timesbar' => '⨱',
+ 'timesd' => '⨰',
+ 'tint' => '∭',
+ 'toea' => '⤨',
+ 'top' => '⊤',
+ 'topbot' => '⌶',
+ 'topcir' => '⫱',
+ 'Topf' => '𝕋',
+ 'topf' => '𝕥',
+ 'topfork' => '⫚',
+ 'tosa' => '⤩',
+ 'tprime' => '‴',
+ 'TRADE' => '™',
+ 'trade' => '™',
+ 'triangle' => '▵',
+ 'triangledown' => '▿',
+ 'triangleleft' => '◃',
+ 'trianglelefteq' => '⊴',
+ 'triangleq' => '≜',
+ 'triangleright' => '▹',
+ 'trianglerighteq' => '⊵',
+ 'tridot' => '◬',
+ 'trie' => '≜',
+ 'triminus' => '⨺',
+ 'TripleDot' => '⃛',
+ 'triplus' => '⨹',
+ 'trisb' => '⧍',
+ 'tritime' => '⨻',
+ 'trpezium' => '⏢',
+ 'Tscr' => '𝒯',
+ 'tscr' => '𝓉',
+ 'TScy' => 'Ц',
+ 'tscy' => 'ц',
+ 'TSHcy' => 'Ћ',
+ 'tshcy' => 'ћ',
+ 'Tstrok' => 'Ŧ',
+ 'tstrok' => 'ŧ',
+ 'twixt' => '≬',
+ 'twoheadleftarrow' => '↞',
+ 'twoheadrightarrow' => '↠',
+ 'Uacute' => 'Ú',
+ 'Uacut' => 'Ú',
+ 'uacute' => 'ú',
+ 'uacut' => 'ú',
+ 'Uarr' => '↟',
+ 'uArr' => '⇑',
+ 'uarr' => '↑',
+ 'Uarrocir' => '⥉',
+ 'Ubrcy' => 'Ў',
+ 'ubrcy' => 'ў',
+ 'Ubreve' => 'Ŭ',
+ 'ubreve' => 'ŭ',
+ 'Ucirc' => 'Û',
+ 'Ucir' => 'Û',
+ 'ucirc' => 'û',
+ 'ucir' => 'û',
+ 'Ucy' => 'У',
+ 'ucy' => 'у',
+ 'udarr' => '⇅',
+ 'Udblac' => 'Ű',
+ 'udblac' => 'ű',
+ 'udhar' => '⥮',
+ 'ufisht' => '⥾',
+ 'Ufr' => '𝔘',
+ 'ufr' => '𝔲',
+ 'Ugrave' => 'Ù',
+ 'Ugrav' => 'Ù',
+ 'ugrave' => 'ù',
+ 'ugrav' => 'ù',
+ 'uHar' => '⥣',
+ 'uharl' => '↿',
+ 'uharr' => '↾',
+ 'uhblk' => '▀',
+ 'ulcorn' => '⌜',
+ 'ulcorner' => '⌜',
+ 'ulcrop' => '⌏',
+ 'ultri' => '◸',
+ 'Umacr' => 'Ū',
+ 'umacr' => 'ū',
+ 'uml' => '¨',
+ 'um' => '¨',
+ 'UnderBar' => '_',
+ 'UnderBrace' => '⏟',
+ 'UnderBracket' => '⎵',
+ 'UnderParenthesis' => '⏝',
+ 'Union' => '⋃',
+ 'UnionPlus' => '⊎',
+ 'Uogon' => 'Ų',
+ 'uogon' => 'ų',
+ 'Uopf' => '𝕌',
+ 'uopf' => '𝕦',
+ 'UpArrow' => '↑',
+ 'Uparrow' => '⇑',
+ 'uparrow' => '↑',
+ 'UpArrowBar' => '⤒',
+ 'UpArrowDownArrow' => '⇅',
+ 'UpDownArrow' => '↕',
+ 'Updownarrow' => '⇕',
+ 'updownarrow' => '↕',
+ 'UpEquilibrium' => '⥮',
+ 'upharpoonleft' => '↿',
+ 'upharpoonright' => '↾',
+ 'uplus' => '⊎',
+ 'UpperLeftArrow' => '↖',
+ 'UpperRightArrow' => '↗',
+ 'Upsi' => 'ϒ',
+ 'upsi' => 'υ',
+ 'upsih' => 'ϒ',
+ 'Upsilon' => 'Υ',
+ 'upsilon' => 'υ',
+ 'UpTee' => '⊥',
+ 'UpTeeArrow' => '↥',
+ 'upuparrows' => '⇈',
+ 'urcorn' => '⌝',
+ 'urcorner' => '⌝',
+ 'urcrop' => '⌎',
+ 'Uring' => 'Ů',
+ 'uring' => 'ů',
+ 'urtri' => '◹',
+ 'Uscr' => '𝒰',
+ 'uscr' => '𝓊',
+ 'utdot' => '⋰',
+ 'Utilde' => 'Ũ',
+ 'utilde' => 'ũ',
+ 'utri' => '▵',
+ 'utrif' => '▴',
+ 'uuarr' => '⇈',
+ 'Uuml' => 'Ü',
+ 'Uum' => 'Ü',
+ 'uuml' => 'ü',
+ 'uum' => 'ü',
+ 'uwangle' => '⦧',
+ 'vangrt' => '⦜',
+ 'varepsilon' => 'ϵ',
+ 'varkappa' => 'ϰ',
+ 'varnothing' => '∅',
+ 'varphi' => 'ϕ',
+ 'varpi' => 'ϖ',
+ 'varpropto' => '∝',
+ 'vArr' => '⇕',
+ 'varr' => '↕',
+ 'varrho' => 'ϱ',
+ 'varsigma' => 'ς',
+ 'varsubsetneq' => '⊊︀',
+ 'varsubsetneqq' => '⫋︀',
+ 'varsupsetneq' => '⊋︀',
+ 'varsupsetneqq' => '⫌︀',
+ 'vartheta' => 'ϑ',
+ 'vartriangleleft' => '⊲',
+ 'vartriangleright' => '⊳',
+ 'Vbar' => '⫫',
+ 'vBar' => '⫨',
+ 'vBarv' => '⫩',
+ 'Vcy' => 'В',
+ 'vcy' => 'в',
+ 'VDash' => '⊫',
+ 'Vdash' => '⊩',
+ 'vDash' => '⊨',
+ 'vdash' => '⊢',
+ 'Vdashl' => '⫦',
+ 'Vee' => '⋁',
+ 'vee' => '∨',
+ 'veebar' => '⊻',
+ 'veeeq' => '≚',
+ 'vellip' => '⋮',
+ 'Verbar' => '‖',
+ 'verbar' => '|',
+ 'Vert' => '‖',
+ 'vert' => '|',
+ 'VerticalBar' => '∣',
+ 'VerticalLine' => '|',
+ 'VerticalSeparator' => '❘',
+ 'VerticalTilde' => '≀',
+ 'VeryThinSpace' => ' ',
+ 'Vfr' => '𝔙',
+ 'vfr' => '𝔳',
+ 'vltri' => '⊲',
+ 'vnsub' => '⊂⃒',
+ 'vnsup' => '⊃⃒',
+ 'Vopf' => '𝕍',
+ 'vopf' => '𝕧',
+ 'vprop' => '∝',
+ 'vrtri' => '⊳',
+ 'Vscr' => '𝒱',
+ 'vscr' => '𝓋',
+ 'vsubnE' => '⫋︀',
+ 'vsubne' => '⊊︀',
+ 'vsupnE' => '⫌︀',
+ 'vsupne' => '⊋︀',
+ 'Vvdash' => '⊪',
+ 'vzigzag' => '⦚',
+ 'Wcirc' => 'Ŵ',
+ 'wcirc' => 'ŵ',
+ 'wedbar' => '⩟',
+ 'Wedge' => '⋀',
+ 'wedge' => '∧',
+ 'wedgeq' => '≙',
+ 'weierp' => '℘',
+ 'Wfr' => '𝔚',
+ 'wfr' => '𝔴',
+ 'Wopf' => '𝕎',
+ 'wopf' => '𝕨',
+ 'wp' => '℘',
+ 'wr' => '≀',
+ 'wreath' => '≀',
+ 'Wscr' => '𝒲',
+ 'wscr' => '𝓌',
+ 'xcap' => '⋂',
+ 'xcirc' => '◯',
+ 'xcup' => '⋃',
+ 'xdtri' => '▽',
+ 'Xfr' => '𝔛',
+ 'xfr' => '𝔵',
+ 'xhArr' => '⟺',
+ 'xharr' => '⟷',
+ 'Xi' => 'Ξ',
+ 'xi' => 'ξ',
+ 'xlArr' => '⟸',
+ 'xlarr' => '⟵',
+ 'xmap' => '⟼',
+ 'xnis' => '⋻',
+ 'xodot' => '⨀',
+ 'Xopf' => '𝕏',
+ 'xopf' => '𝕩',
+ 'xoplus' => '⨁',
+ 'xotime' => '⨂',
+ 'xrArr' => '⟹',
+ 'xrarr' => '⟶',
+ 'Xscr' => '𝒳',
+ 'xscr' => '𝓍',
+ 'xsqcup' => '⨆',
+ 'xuplus' => '⨄',
+ 'xutri' => '△',
+ 'xvee' => '⋁',
+ 'xwedge' => '⋀',
+ 'Yacute' => 'Ý',
+ 'Yacut' => 'Ý',
+ 'yacute' => 'ý',
+ 'yacut' => 'ý',
+ 'YAcy' => 'Я',
+ 'yacy' => 'я',
+ 'Ycirc' => 'Ŷ',
+ 'ycirc' => 'ŷ',
+ 'Ycy' => 'Ы',
+ 'ycy' => 'ы',
+ 'yen' => '¥',
+ 'ye' => '¥',
+ 'Yfr' => '𝔜',
+ 'yfr' => '𝔶',
+ 'YIcy' => 'Ї',
+ 'yicy' => 'ї',
+ 'Yopf' => '𝕐',
+ 'yopf' => '𝕪',
+ 'Yscr' => '𝒴',
+ 'yscr' => '𝓎',
+ 'YUcy' => 'Ю',
+ 'yucy' => 'ю',
+ 'Yuml' => 'Ÿ',
+ 'yuml' => 'ÿ',
+ 'yum' => 'ÿ',
+ 'Zacute' => 'Ź',
+ 'zacute' => 'ź',
+ 'Zcaron' => 'Ž',
+ 'zcaron' => 'ž',
+ 'Zcy' => 'З',
+ 'zcy' => 'з',
+ 'Zdot' => 'Ż',
+ 'zdot' => 'ż',
+ 'zeetrf' => 'ℨ',
+ 'ZeroWidthSpace' => '​',
+ 'Zeta' => 'Ζ',
+ 'zeta' => 'ζ',
+ 'Zfr' => 'ℨ',
+ 'zfr' => '𝔷',
+ 'ZHcy' => 'Ж',
+ 'zhcy' => 'ж',
+ 'zigrarr' => '⇝',
+ 'Zopf' => 'ℤ',
+ 'zopf' => '𝕫',
+ 'Zscr' => '𝒵',
+ 'zscr' => '𝓏',
+ 'zwj' => '‍',
+ 'zwnj' => '‌'
+ );
}
diff --git a/src/HTML5/Exception.php b/src/HTML5/Exception.php
index 9885ae5..8f33126 100644
--- a/src/HTML5/Exception.php
+++ b/src/HTML5/Exception.php
@@ -4,5 +4,6 @@ namespace Masterminds\HTML5;
/**
* The base exception for the HTML5 project.
*/
-class Exception extends \Exception {
+class Exception extends \Exception
+{
}
diff --git a/src/HTML5/InstructionProcessor.php b/src/HTML5/InstructionProcessor.php
index d84ba42..ac6a23c 100644
--- a/src/HTML5/InstructionProcessor.php
+++ b/src/HTML5/InstructionProcessor.php
@@ -16,28 +16,28 @@ namespace Masterminds\HTML5;
* One could, for example, use this mechanism to execute well-formed PHP
* code embedded inside of an HTML5 document.
*/
-interface InstructionProcessor {
+interface InstructionProcessor
+{
- /**
- * Process an individual processing instruction.
- *
- * The process() function is responsible for doing the following:
- * - Determining whether $name is an instruction type it can handle.
- * - Determining what to do with the data passed in.
- * - Making any subsequent modifications to the DOM by modifying the
- * DOMElement or its attached DOM tree.
- *
- * @param DOMElement $element
- * The parent element for the current processing instruction.
- * @param string $name
- * The instruction's name. E.g. `&lt;?php` has the name `php`.
- * @param string $data
- * All of the data between the opening and closing PI marks.
- * @return DOMElement
- * The element that should be considered "Current". This may just be
- * the element passed in, but if the processor added more elements,
- * it may choose to reset the current element to one of the elements
- * it created. (When in doubt, return the element passed in.)
- */
- public function process(\DOMElement $element, $name, $data);
+ /**
+ * Process an individual processing instruction.
+ *
+ * The process() function is responsible for doing the following:
+ * - Determining whether $name is an instruction type it can handle.
+ * - Determining what to do with the data passed in.
+ * - Making any subsequent modifications to the DOM by modifying the
+ * DOMElement or its attached DOM tree.
+ *
+ * @param DOMElement $element
+ * The parent element for the current processing instruction.
+ * @param string $name
+ * The instruction's name. E.g. `&lt;?php` has the name `php`.
+ * @param string $data
+ * All of the data between the opening and closing PI marks.
+ * @return DOMElement The element that should be considered "Current". This may just be
+ * the element passed in, but if the processor added more elements,
+ * it may choose to reset the current element to one of the elements
+ * it created. (When in doubt, return the element passed in.)
+ */
+ public function process(\DOMElement $element, $name, $data);
}
diff --git a/src/HTML5/Parser/CharacterReference.php b/src/HTML5/Parser/CharacterReference.php
index 2ead49e..24cc687 100644
--- a/src/HTML5/Parser/CharacterReference.php
+++ b/src/HTML5/Parser/CharacterReference.php
@@ -9,48 +9,55 @@ use Masterminds\HTML5\Entities;
* This is a simple resolver for HTML5 character reference entitites.
* See \Masterminds\HTML5\Entities for the list of supported entities.
*/
-class CharacterReference {
+class CharacterReference
+{
- protected static $numeric_mask = array(0x0, 0x2FFFF, 0, 0xFFFF);
+ protected static $numeric_mask = array(
+ 0x0,
+ 0x2FFFF,
+ 0,
+ 0xFFFF
+ );
- /**
- * Given a name (e.g. 'amp'), lookup the UTF-8 character ('&')
- *
- * @param string $name
- * The name to look up.
- * @return string
- * The character sequence. In UTF-8 this may be more than one byte.
- */
- public static function lookupName($name) {
- // Do we really want to return NULL here? or FFFD
- return isset(Entities::$byName[$name]) ? Entities::$byName[$name] : NULL;
- }
+ /**
+ * Given a name (e.g.
+ * 'amp'), lookup the UTF-8 character ('&')
+ *
+ * @param string $name
+ * The name to look up.
+ * @return string The character sequence. In UTF-8 this may be more than one byte.
+ */
+ public static function lookupName($name)
+ {
+ // Do we really want to return NULL here? or FFFD
+ return isset(Entities::$byName[$name]) ? Entities::$byName[$name] : NULL;
+ }
- /**
- * Given a Unicode codepoint, return the UTF-8 character.
- *
- * (NOT USED ANYWHERE)
- */
- /*
- public static function lookupCode($codePoint) {
- return 'POINT';
- }
- */
+ /**
+ * Given a Unicode codepoint, return the UTF-8 character.
+ *
+ * (NOT USED ANYWHERE)
+ */
+ /*
+ * public static function lookupCode($codePoint) { return 'POINT'; }
+ */
- /**
- * Given a decimal number, return the UTF-8 character.
- */
- public static function lookupDecimal($int) {
- $entity = '&#' . $int . ';';
- // UNTESTED: This may fail on some planes. Couldn't find full documentation
- // on the value of the mask array.
- return mb_decode_numericentity($entity, static::$numeric_mask, 'utf-8');
- }
+ /**
+ * Given a decimal number, return the UTF-8 character.
+ */
+ public static function lookupDecimal($int)
+ {
+ $entity = '&#' . $int . ';';
+ // UNTESTED: This may fail on some planes. Couldn't find full documentation
+ // on the value of the mask array.
+ return mb_decode_numericentity($entity, static::$numeric_mask, 'utf-8');
+ }
- /**
- * Given a hexidecimal number, return the UTF-8 character.
- */
- public static function lookupHex($hexdec) {
- return static::lookupDecimal(hexdec($hexdec));
- }
+ /**
+ * Given a hexidecimal number, return the UTF-8 character.
+ */
+ public static function lookupHex($hexdec)
+ {
+ return static::lookupDecimal(hexdec($hexdec));
+ }
}
diff --git a/src/HTML5/Parser/DOMTreeBuilder.php b/src/HTML5/Parser/DOMTreeBuilder.php
index 3bf5713..9e77312 100644
--- a/src/HTML5/Parser/DOMTreeBuilder.php
+++ b/src/HTML5/Parser/DOMTreeBuilder.php
@@ -2,6 +2,7 @@
namespace Masterminds\HTML5\Parser;
use Masterminds\HTML5\Elements;
+
/**
* Create an HTML5 DOM tree from events.
*
@@ -20,456 +21,493 @@ use Masterminds\HTML5\Elements;
* re-written to accomodate this. See, for example, the Go language HTML5
* parser.
*/
-class DOMTreeBuilder implements EventHandler {
-
-
- /**
- * Defined in 8.2.5.
- */
- const IM_INITIAL = 0;
- const IM_BEFORE_HTML = 1;
- const IM_BEFORE_HEAD = 2;
- const IM_IN_HEAD = 3;
- const IM_IN_HEAD_NOSCRIPT = 4;
- const IM_AFTER_HEAD = 5;
- const IM_IN_BODY = 6;
- const IM_TEXT = 7;
- const IM_IN_TABLE = 8;
- const IM_IN_TABLE_TEXT = 9;
- const IM_IN_CAPTION = 10;
- const IM_IN_COLUMN_GROUP = 11;
- const IM_IN_TABLE_BODY = 12;
- const IM_IN_ROW = 13;
- const IM_IN_CELL = 14;
- const IM_IN_SELECT = 15;
- const IM_IN_SELECT_IN_TABLE = 16;
- const IM_AFTER_BODY = 17;
- const IM_IN_FRAMESET = 18;
- const IM_AFTER_FRAMESET = 19;
- const IM_AFTER_AFTER_BODY = 20;
- const IM_AFTER_AFTER_FRAMESET = 21;
-
- const IM_IN_SVG = 22;
- const IM_IN_MATHML = 23;
-
- protected $stack = array();
- protected $current; // Pointer in the tag hierarchy.
- protected $doc;
-
- protected $processor;
-
- protected $insertMode = 0;
-
- /**
- * Quirks mode is enabled by default. Any document that is missing the
- * DT will be considered to be in quirks mode.
- */
- protected $quirks = TRUE;
-
- public $isFragment = FALSE;
-
- public function __construct($isFragment = FALSE) {
- $impl = new \DOMImplementation();
- // XXX:
- // Create the doctype. For now, we are always creating HTML5
- // documents, and attempting to up-convert any older DTDs to HTML5.
- $dt = $impl->createDocumentType('html');
- //$this->doc = \DOMImplementation::createDocument(NULL, 'html', $dt);
- $this->doc = $impl->createDocument(NULL, NULL, $dt);
- $this->doc->errors = array();
-
- // $this->current = $this->doc->documentElement;
- $this->current = $this->doc; //->documentElement;
-
- // Create a rules engine for tags.
- $this->rules = new TreeBuildingRules($this->doc);
-
- if ($isFragment) {
- $this->isFragment = TRUE;
- $this->insertMode = static::IM_IN_BODY;
- $ele = $this->doc->createElement('html');
- $this->doc->appendChild($ele);
- $this->current = $ele;
- }
- }
-
- /**
- * Get the document.
- */
- public function document() {
- return $this->doc;
- }
-
- /**
- * Get the DOM fragment for the body.
- *
- * This returns a DOMNodeList because a fragment may have zero or more
- * DOMNodes at its root.
- *
- * @see http://www.w3.org/TR/2012/CR-html5-20121217/syntax.html#concept-frag-parse-context
- *
- * @return \DOMFragmentDocumentFragment
- */
- public function fragment() {
- $append = $this->doc->documentElement->childNodes;
- $frag = $this->doc->createDocumentFragment();
-
- // appendChild() modifies the DOMNodeList, so we
- // have to buffer up the items first, then use the
- // array buffer and loop twice.
- $buffer = array();
- foreach ($append as $node) {
- $buffer[] = $node;
- }
+class DOMTreeBuilder implements EventHandler
+{
- foreach ($buffer as $node) {
- $frag->appendChild($node);
- }
+ /**
+ * Defined in 8.2.5.
+ */
+ const IM_INITIAL = 0;
- $frag->errors = $this->doc->errors;
- return $frag;
- }
-
- /**
- * Provide an instruction processor.
- *
- * This is used for handling Processor Instructions as they are
- * inserted. If omitted, PI's are inserted directly into the DOM tree.
- */
- public function setInstructionProcessor(\Masterminds\HTML5\InstructionProcessor $proc) {
- $this->processor = $proc;
- }
-
- public function doctype($name, $idType = 0, $id = NULL, $quirks = FALSE) {
- // This is used solely for setting quirks mode. Currently we don't
- // try to preserve the inbound DT. We convert it to HTML5.
- $this->quirks = $quirks;
-
- if ($this->insertMode > static::IM_INITIAL) {
- $this->parseError("Illegal placement of DOCTYPE tag. Ignoring: " . $name);
- return;
- }
+ const IM_BEFORE_HTML = 1;
- $this->insertMode = static::IM_BEFORE_HTML;
- }
-
- /**
- * Process the start tag.
- *
- * @todo
- * - XMLNS namespace handling (we need to parse, even if it's not valid)
- * - XLink, MathML and SVG namespace handling
- * - Omission rules: 8.1.2.4 Optional tags
- */
- public function startTag($name, $attributes = array(), $selfClosing = FALSE) {
- // fprintf(STDOUT, $name);
- $lname = $this->normalizeTagName($name);
-
- // Make sure we have an html element.
- if (!$this->doc->documentElement && $name !== 'html') {
- $this->startTag('html');
- }
+ const IM_BEFORE_HEAD = 2;
- // Set quirks mode if we're at IM_INITIAL with no doctype.
- if ($this->insertMode == static::IM_INITIAL) {
- $this->quirks = TRUE;
- $this->parseError("No DOCTYPE specified.");
- }
+ const IM_IN_HEAD = 3;
- // SPECIAL TAG HANDLING:
- // Spec says do this, and "don't ask."
- if ($name == 'image') {
- $name = 'img';
- }
+ const IM_IN_HEAD_NOSCRIPT = 4;
+ const IM_AFTER_HEAD = 5;
- // Autoclose p tags where appropriate.
- if ($this->insertMode >= static::IM_IN_BODY && Elements::isA($name, Elements::AUTOCLOSE_P)) {
- $this->autoclose('p');
- }
+ const IM_IN_BODY = 6;
- // Set insert mode:
- switch ($name) {
- case 'html':
- $this->insertMode = static::IM_BEFORE_HEAD;
- break;
- case 'head':
- if ($this->insertMode > static::IM_BEFORE_HEAD) {
- $this->parseError("Unexpected head tag outside of head context.");
- }
- else {
- $this->insertMode = static::IM_IN_HEAD;
- }
- break;
- case 'body':
- $this->insertMode = static::IM_IN_BODY;
- break;
- case 'svg':
- $this->insertMode = static::IM_IN_SVG;
- break;
- case 'math':
- $this->insertMode = static::IM_IN_MATHML;
- break;
- case 'noscript':
- if ($this->insertMode == static::IM_IN_HEAD) {
- $this->insertMode = static::IM_IN_HEAD_NOSCRIPT;
- }
- break;
+ const IM_TEXT = 7;
- }
+ const IM_IN_TABLE = 8;
- // Special case handling for SVG.
- if ($this->insertMode == static::IM_IN_SVG) {
- $lname = Elements::normalizeSvgElement($lname);
- }
+ const IM_IN_TABLE_TEXT = 9;
- try {
- $ele = $this->doc->createElement($lname);
- }
- catch(\DOMException $e) {
- $this->parseError("Illegal tag name: <$lname>. Replaced with <invalid>.");
- $ele = $this->doc->createElement('invalid');
- }
+ const IM_IN_CAPTION = 10;
+
+ const IM_IN_COLUMN_GROUP = 11;
+
+ const IM_IN_TABLE_BODY = 12;
+
+ const IM_IN_ROW = 13;
+
+ const IM_IN_CELL = 14;
+
+ const IM_IN_SELECT = 15;
+
+ const IM_IN_SELECT_IN_TABLE = 16;
+
+ const IM_AFTER_BODY = 17;
+
+ const IM_IN_FRAMESET = 18;
+
+ const IM_AFTER_FRAMESET = 19;
+
+ const IM_AFTER_AFTER_BODY = 20;
+
+ const IM_AFTER_AFTER_FRAMESET = 21;
+
+ const IM_IN_SVG = 22;
+
+ const IM_IN_MATHML = 23;
+
+ protected $stack = array();
- foreach ($attributes as $aName => $aVal) {
-
- if ($this->insertMode == static::IM_IN_SVG) {
- $aName = Elements::normalizeSvgAttribute($aName);
- }
- elseif ($this->insertMode == static::IM_IN_MATHML) {
- $aName = Elements::normalizeMathMlAttribute($aName);
- }
-
- try {
- $ele->setAttribute($aName, $aVal);
- }
- catch(\DOMException $e) {
- $this->parseError("Illegal attribute name for tag $name. Ignoring: $aName");
- continue;
- }
-
- // This is necessary on a non-DTD schema, like HTML5.
- if ($aName == 'id') {
- $ele->setIdAttribute('id', TRUE);
- }
+ protected $current; // Pointer in the tag hierarchy.
+ protected $doc;
+
+ protected $processor;
+
+ protected $insertMode = 0;
+
+ /**
+ * Quirks mode is enabled by default.
+ * Any document that is missing the
+ * DT will be considered to be in quirks mode.
+ */
+ protected $quirks = TRUE;
+
+ public $isFragment = FALSE;
+
+ public function __construct($isFragment = FALSE)
+ {
+ $impl = new \DOMImplementation();
+ // XXX:
+ // Create the doctype. For now, we are always creating HTML5
+ // documents, and attempting to up-convert any older DTDs to HTML5.
+ $dt = $impl->createDocumentType('html');
+ // $this->doc = \DOMImplementation::createDocument(NULL, 'html', $dt);
+ $this->doc = $impl->createDocument(NULL, NULL, $dt);
+ $this->doc->errors = array();
+
+ // $this->current = $this->doc->documentElement;
+ $this->current = $this->doc; // ->documentElement;
+
+ // Create a rules engine for tags.
+ $this->rules = new TreeBuildingRules($this->doc);
+
+ if ($isFragment) {
+ $this->isFragment = TRUE;
+ $this->insertMode = static::IM_IN_BODY;
+ $ele = $this->doc->createElement('html');
+ $this->doc->appendChild($ele);
+ $this->current = $ele;
+ }
}
- // Some elements have special processing rules. Handle those separately.
- if ($this->rules->hasRules($name)) {
- $this->current = $this->rules->evaluate($ele, $this->current);
+ /**
+ * Get the document.
+ */
+ public function document()
+ {
+ return $this->doc;
}
- // Otherwise, it's a standard element.
- else {
- $this->current->appendChild($ele);
-
- // XXX: Need to handle self-closing tags and unary tags.
- if (!Elements::isA($name, Elements::VOID_TAG)) {
- $this->current = $ele;
- }
+
+ /**
+ * Get the DOM fragment for the body.
+ *
+ * This returns a DOMNodeList because a fragment may have zero or more
+ * DOMNodes at its root.
+ *
+ * @see http://www.w3.org/TR/2012/CR-html5-20121217/syntax.html#concept-frag-parse-context
+ *
+ * @return \DOMFragmentDocumentFragment
+ */
+ public function fragment()
+ {
+ $append = $this->doc->documentElement->childNodes;
+ $frag = $this->doc->createDocumentFragment();
+
+ // appendChild() modifies the DOMNodeList, so we
+ // have to buffer up the items first, then use the
+ // array buffer and loop twice.
+ $buffer = array();
+ foreach ($append as $node) {
+ $buffer[] = $node;
+ }
+
+ foreach ($buffer as $node) {
+ $frag->appendChild($node);
+ }
+
+ $frag->errors = $this->doc->errors;
+
+ return $frag;
}
- // This is sort of a last-ditch attempt to correct for cases where no head/body
- // elements are provided.
- if ($this->insertMode <= static::IM_BEFORE_HEAD && $name != 'head' && $name != 'html') {
- $this->insertMode = static::IM_IN_BODY;
+ /**
+ * Provide an instruction processor.
+ *
+ * This is used for handling Processor Instructions as they are
+ * inserted. If omitted, PI's are inserted directly into the DOM tree.
+ */
+ public function setInstructionProcessor(\Masterminds\HTML5\InstructionProcessor $proc)
+ {
+ $this->processor = $proc;
}
- // Return the element mask, which the tokenizer can then use to set
- // various processing rules.
- return Elements::element($name);
- }
+ public function doctype($name, $idType = 0, $id = NULL, $quirks = FALSE)
+ {
+ // This is used solely for setting quirks mode. Currently we don't
+ // try to preserve the inbound DT. We convert it to HTML5.
+ $this->quirks = $quirks;
- public function endTag($name) {
- $lname = $this->normalizeTagName($name);
+ if ($this->insertMode > static::IM_INITIAL) {
+ $this->parseError("Illegal placement of DOCTYPE tag. Ignoring: " . $name);
- // Ignore closing tags for unary elements.
- if (Elements::isA($name, Elements::VOID_TAG)) {
- return;
- }
+ return;
+ }
- if ($this->insertMode <= static::IM_BEFORE_HTML) {
- // 8.2.5.4.2
- if (in_array($name, array('html', 'br', 'head', 'title'))) {
- $this->startTag('html');
- $this->endTag($name);
- $this->insertMode = static::IM_BEFORE_HEAD;
- return;
- }
-
- // Ignore the tag.
- $this->parseError("Illegal closing tag at global scope.");
- return;
+ $this->insertMode = static::IM_BEFORE_HTML;
}
- // Special case handling for SVG.
- if ($this->insertMode == static::IM_IN_SVG) {
- $lname = Elements::normalizeSvgElement($lname);
+ /**
+ * Process the start tag.
+ *
+ * @todo - XMLNS namespace handling (we need to parse, even if it's not valid)
+ * - XLink, MathML and SVG namespace handling
+ * - Omission rules: 8.1.2.4 Optional tags
+ */
+ public function startTag($name, $attributes = array(), $selfClosing = FALSE)
+ {
+ // fprintf(STDOUT, $name);
+ $lname = $this->normalizeTagName($name);
+
+ // Make sure we have an html element.
+ if (! $this->doc->documentElement && $name !== 'html') {
+ $this->startTag('html');
+ }
+
+ // Set quirks mode if we're at IM_INITIAL with no doctype.
+ if ($this->insertMode == static::IM_INITIAL) {
+ $this->quirks = TRUE;
+ $this->parseError("No DOCTYPE specified.");
+ }
+
+ // SPECIAL TAG HANDLING:
+ // Spec says do this, and "don't ask."
+ if ($name == 'image') {
+ $name = 'img';
+ }
+
+ // Autoclose p tags where appropriate.
+ if ($this->insertMode >= static::IM_IN_BODY && Elements::isA($name, Elements::AUTOCLOSE_P)) {
+ $this->autoclose('p');
+ }
+
+ // Set insert mode:
+ switch ($name) {
+ case 'html':
+ $this->insertMode = static::IM_BEFORE_HEAD;
+ break;
+ case 'head':
+ if ($this->insertMode > static::IM_BEFORE_HEAD) {
+ $this->parseError("Unexpected head tag outside of head context.");
+ } else {
+ $this->insertMode = static::IM_IN_HEAD;
+ }
+ break;
+ case 'body':
+ $this->insertMode = static::IM_IN_BODY;
+ break;
+ case 'svg':
+ $this->insertMode = static::IM_IN_SVG;
+ break;
+ case 'math':
+ $this->insertMode = static::IM_IN_MATHML;
+ break;
+ case 'noscript':
+ if ($this->insertMode == static::IM_IN_HEAD) {
+ $this->insertMode = static::IM_IN_HEAD_NOSCRIPT;
+ }
+ break;
+ }
+
+ // Special case handling for SVG.
+ if ($this->insertMode == static::IM_IN_SVG) {
+ $lname = Elements::normalizeSvgElement($lname);
+ }
+
+ try {
+ $ele = $this->doc->createElement($lname);
+ } catch (\DOMException $e) {
+ $this->parseError("Illegal tag name: <$lname>. Replaced with <invalid>.");
+ $ele = $this->doc->createElement('invalid');
+ }
+
+ foreach ($attributes as $aName => $aVal) {
+
+ if ($this->insertMode == static::IM_IN_SVG) {
+ $aName = Elements::normalizeSvgAttribute($aName);
+ } elseif ($this->insertMode == static::IM_IN_MATHML) {
+ $aName = Elements::normalizeMathMlAttribute($aName);
+ }
+
+ try {
+ $ele->setAttribute($aName, $aVal);
+ } catch (\DOMException $e) {
+ $this->parseError("Illegal attribute name for tag $name. Ignoring: $aName");
+ continue;
+ }
+
+ // This is necessary on a non-DTD schema, like HTML5.
+ if ($aName == 'id') {
+ $ele->setIdAttribute('id', TRUE);
+ }
+ }
+
+ // Some elements have special processing rules. Handle those separately.
+ if ($this->rules->hasRules($name)) {
+ $this->current = $this->rules->evaluate($ele, $this->current);
+ } // Otherwise, it's a standard element.
+ else {
+ $this->current->appendChild($ele);
+
+ // XXX: Need to handle self-closing tags and unary tags.
+ if (! Elements::isA($name, Elements::VOID_TAG)) {
+ $this->current = $ele;
+ }
+ }
+
+ // This is sort of a last-ditch attempt to correct for cases where no head/body
+ // elements are provided.
+ if ($this->insertMode <= static::IM_BEFORE_HEAD && $name != 'head' && $name != 'html') {
+ $this->insertMode = static::IM_IN_BODY;
+ }
+
+ // Return the element mask, which the tokenizer can then use to set
+ // various processing rules.
+ return Elements::element($name);
}
- // XXX: Not sure whether we need this anymore.
- // if ($name != $lname) {
- // return $this->quirksTreeResolver($lname);
- //}
-
- // XXX: HTML has no parent. What do we do, though,
- // if this element appears in the wrong place?
- if ($lname == 'html') {
- return;
+ public function endTag($name)
+ {
+ $lname = $this->normalizeTagName($name);
+
+ // Ignore closing tags for unary elements.
+ if (Elements::isA($name, Elements::VOID_TAG)) {
+ return;
+ }
+
+ if ($this->insertMode <= static::IM_BEFORE_HTML) {
+ // 8.2.5.4.2
+ if (in_array($name, array(
+ 'html',
+ 'br',
+ 'head',
+ 'title'
+ ))) {
+ $this->startTag('html');
+ $this->endTag($name);
+ $this->insertMode = static::IM_BEFORE_HEAD;
+
+ return;
+ }
+
+ // Ignore the tag.
+ $this->parseError("Illegal closing tag at global scope.");
+
+ return;
+ }
+
+ // Special case handling for SVG.
+ if ($this->insertMode == static::IM_IN_SVG) {
+ $lname = Elements::normalizeSvgElement($lname);
+ }
+
+ // XXX: Not sure whether we need this anymore.
+ // if ($name != $lname) {
+ // return $this->quirksTreeResolver($lname);
+ // }
+
+ // XXX: HTML has no parent. What do we do, though,
+ // if this element appears in the wrong place?
+ if ($lname == 'html') {
+ return;
+ }
+
+ // $this->current = $this->current->parentNode;
+ if (! $this->autoclose($lname)) {
+ $this->parseError('Could not find closing tag for ' . $lname);
+ }
+
+ // switch ($this->insertMode) {
+ switch ($lname) {
+ case "head":
+ $this->insertMode = static::IM_AFTER_HEAD;
+ break;
+ case "body":
+ $this->insertMode = static::IM_AFTER_BODY;
+ break;
+ case "svg":
+ case "mathml":
+ $this->insertMode = static::IM_IN_BODY;
+ break;
+ }
}
- //$this->current = $this->current->parentNode;
- if (!$this->autoclose($lname)) {
- $this->parseError('Could not find closing tag for ' . $lname);
+ public function comment($cdata)
+ {
+ // TODO: Need to handle case where comment appears outside of the HTML tag.
+ $node = $this->doc->createComment($cdata);
+ $this->current->appendChild($node);
}
- //switch ($this->insertMode) {
- switch ($lname) {
- case "head":
- $this->insertMode = static::IM_AFTER_HEAD;
- break;
- case "body":
- $this->insertMode = static::IM_AFTER_BODY;
- break;
- case "svg":
- case "mathml":
- $this->insertMode = static::IM_IN_BODY;
- break;
+ public function text($data)
+ {
+ // XXX: Hmmm.... should we really be this strict?
+ if ($this->insertMode < static::IM_IN_HEAD) {
+ // Per '8.2.5.4.3 The "before head" insertion mode' the characters
+ // " \t\n\r\f" should be ignored but no mention of a parse error. This is
+ // practical as most documents contain these characters. Other text is not
+ // expected here so recording a parse error is necessary.
+ $dataTmp = trim($data, " \t\n\r\f");
+ if (! empty($dataTmp)) {
+ // fprintf(STDOUT, "Unexpected insert mode: %d", $this->insertMode);
+ $this->parseError("Unexpected text. Ignoring: " . $dataTmp);
+ }
+
+ return;
+ }
+ // fprintf(STDOUT, "Appending text %s.", $data);
+ $node = $this->doc->createTextNode($data);
+ $this->current->appendChild($node);
}
- }
-
- public function comment($cdata) {
- // TODO: Need to handle case where comment appears outside of the HTML tag.
- $node = $this->doc->createComment($cdata);
- $this->current->appendChild($node);
- }
-
- public function text($data) {
- // XXX: Hmmm.... should we really be this strict?
- if ($this->insertMode < static::IM_IN_HEAD) {
- // Per '8.2.5.4.3 The "before head" insertion mode' the characters
- // " \t\n\r\f" should be ignored but no mention of a parse error. This is
- // practical as most documents contain these characters. Other text is not
- // expected here so recording a parse error is necessary.
- $dataTmp = trim($data, " \t\n\r\f");
- if (!empty($dataTmp)) {
- //fprintf(STDOUT, "Unexpected insert mode: %d", $this->insertMode);
- $this->parseError("Unexpected text. Ignoring: " . $dataTmp);
- }
- return;
+
+ public function eof()
+ {
+ // If the $current isn't the $root, do we need to do anything?
}
- //fprintf(STDOUT, "Appending text %s.", $data);
- $node = $this->doc->createTextNode($data);
- $this->current->appendChild($node);
- }
-
- public function eof() {
- // If the $current isn't the $root, do we need to do anything?
- }
-
- public function parseError($msg, $line = 0, $col = 0) {
- $this->doc->errors[] = sprintf("Line %d, Col %d: %s", $line, $col, $msg);
- }
-
- public function cdata($data) {
- $node = $this->doc->createCDATASection($data);
- $this->current->appendChild($node);
- }
-
- public function processingInstruction($name, $data = NULL) {
- // XXX: Ignore initial XML declaration, per the spec.
- if ($this->insertMode == static::IM_INITIAL && 'xml' == strtolower($name)) {
- return;
+
+ public function parseError($msg, $line = 0, $col = 0)
+ {
+ $this->doc->errors[] = sprintf("Line %d, Col %d: %s", $line, $col, $msg);
}
- // Important: The processor may modify the current DOM tree however
- // it sees fit.
- if (isset($this->processor)) {
- $res = $this->processor->process($this->current, $name, $data);
- if (!empty($res)) {
- $this->current = $res;
- }
- return;
+ public function cdata($data)
+ {
+ $node = $this->doc->createCDATASection($data);
+ $this->current->appendChild($node);
}
- // Otherwise, this is just a dumb PI element.
- $node = $this->doc->createProcessingInstruction($name, $data);
-
- $this->current->appendChild($node);
- }
-
- // ==========================================================================
- // UTILITIES
- // ==========================================================================
-
- /**
- * Apply normalization rules to a tag name.
- *
- * See sections 2.9 and 8.1.2.
- *
- * @param string $name
- * The tag name.
- * @return string
- * The normalized tag name.
- */
- protected function normalizeTagName($name) {
- /* Section 2.9 suggests that we should not do this.
- if (strpos($name, ':') !== FALSE) {
- // We know from the grammar that there must be at least one other
- // char besides :, since : is not a legal tag start.
- $parts = explode(':', $name);
- return array_pop($parts);
+ public function processingInstruction($name, $data = NULL)
+ {
+ // XXX: Ignore initial XML declaration, per the spec.
+ if ($this->insertMode == static::IM_INITIAL && 'xml' == strtolower($name)) {
+ return;
+ }
+
+ // Important: The processor may modify the current DOM tree however
+ // it sees fit.
+ if (isset($this->processor)) {
+ $res = $this->processor->process($this->current, $name, $data);
+ if (! empty($res)) {
+ $this->current = $res;
+ }
+
+ return;
+ }
+
+ // Otherwise, this is just a dumb PI element.
+ $node = $this->doc->createProcessingInstruction($name, $data);
+
+ $this->current->appendChild($node);
}
- */
- return $name;
- }
+ // ==========================================================================
+ // UTILITIES
+ // ==========================================================================
+
+ /**
+ * Apply normalization rules to a tag name.
+ *
+ * See sections 2.9 and 8.1.2.
+ *
+ * @param string $name
+ * The tag name.
+ * @return string The normalized tag name.
+ */
+ protected function normalizeTagName($name)
+ {
+ /*
+ * Section 2.9 suggests that we should not do this. if (strpos($name, ':') !== FALSE) { // We know from the grammar that there must be at least one other // char besides :, since : is not a legal tag start. $parts = explode(':', $name); return array_pop($parts); }
+ */
+ return $name;
+ }
- protected function quirksTreeResolver($name) {
- throw new \Exception("Not implemented.");
+ protected function quirksTreeResolver($name)
+ {
+ throw new \Exception("Not implemented.");
+ }
- }
+ /**
+ * Automatically climb the tree and close the closest node with the matching $tag.
+ */
+ protected function autoclose($tag)
+ {
+ $working = $this->current;
+ do {
+ if ($working->nodeType != XML_ELEMENT_NODE) {
+ return FALSE;
+ }
+ if ($working->tagName == $tag) {
+ $this->current = $working->parentNode;
+
+ return TRUE;
+ }
+ } while ($working = $working->parentNode);
- /**
- * Automatically climb the tree and close the closest node with the matching $tag.
- */
- protected function autoclose($tag) {
- $working = $this->current;
- do {
- if ($working->nodeType != XML_ELEMENT_NODE) {
return FALSE;
- }
- if ($working->tagName == $tag) {
- $this->current = $working->parentNode;
- return TRUE;
- }
- } while ($working = $working->parentNode);
- return FALSE;
-
- }
-
- /**
- * Checks if the given tagname is an ancestor of the present candidate.
- *
- * If $this->current or anything above $this->current matches the given tag
- * name, this returns TRUE.
- */
- protected function isAncestor($tagname) {
- $candidate = $this->current;
- while ($candidate->nodeType === XML_ELEMENT_NODE) {
- if ($candidate->tagName == $tagname) {
- return TRUE;
- }
- $candidate = $candidate->parentNode;
}
- return FALSE;
- }
- /**
- * Returns TRUE if the immediate parent element is of the given tagname.
- */
- protected function isParent($tagname) {
- return $this->current->tagName == $tagname;
- }
+ /**
+ * Checks if the given tagname is an ancestor of the present candidate.
+ *
+ * If $this->current or anything above $this->current matches the given tag
+ * name, this returns TRUE.
+ */
+ protected function isAncestor($tagname)
+ {
+ $candidate = $this->current;
+ while ($candidate->nodeType === XML_ELEMENT_NODE) {
+ if ($candidate->tagName == $tagname) {
+ return TRUE;
+ }
+ $candidate = $candidate->parentNode;
+ }
+ return FALSE;
+ }
+ /**
+ * Returns TRUE if the immediate parent element is of the given tagname.
+ */
+ protected function isParent($tagname)
+ {
+ return $this->current->tagName == $tagname;
+ }
}
diff --git a/src/HTML5/Parser/EventHandler.php b/src/HTML5/Parser/EventHandler.php
index 4a99aa6..ef80909 100644
--- a/src/HTML5/Parser/EventHandler.php
+++ b/src/HTML5/Parser/EventHandler.php
@@ -4,108 +4,119 @@ namespace Masterminds\HTML5\Parser;
/**
* Standard events for HTML5.
*
- * This is roughly analogous to a SAX2 or expat-style interface.
- * However, it is tuned specifically for HTML5, according to section 8
+ * This is roughly analogous to a SAX2 or expat-style interface.
+ * However, it is tuned specifically for HTML5, according to section 8
* of the HTML5 specification.
*
- * An event handler receives parser events. For a concrete
+ * An event handler receives parser events. For a concrete
* implementation, see DOMTreeBuilder.
*
- * Quirks support in the parser is limited to close-in syntax (malformed
- * tags or attributes). Higher order syntax and semantic issues with a
- * document (e.g. mismatched tags, illegal nesting, etc.) are the
+ * Quirks support in the parser is limited to close-in syntax (malformed
+ * tags or attributes). Higher order syntax and semantic issues with a
+ * document (e.g. mismatched tags, illegal nesting, etc.) are the
* responsibility of the event handler implementation.
*
* See HTML5 spec section 8.2.4
*/
-interface EventHandler {
- const DOCTYPE_NONE = 0;
- const DOCTYPE_PUBLIC = 1;
- const DOCTYPE_SYSTEM = 2;
- /**
- * A doctype declaration.
- *
- * @param string $name
- * The name of the root element.
- * @param int $idType
- * One of DOCTYPE_NONE, DOCTYPE_PUBLIC, or DOCTYPE_SYSTEM.
- * @param string $id
- * The identifier. For DOCTYPE_PUBLIC, this is the public ID. If DOCTYPE_SYSTEM,
- * then this is a system ID.
- * @param boolean $quirks
- * Indicates whether the builder should enter quirks mode.
- */
- public function doctype($name, $idType = 0, $id = NULL, $quirks = FALSE);
- /**
- * A start tag.
- *
- * IMPORTANT: The parser watches the return value of this event. If this returns
- * an integer, the parser will switch TEXTMODE patters according to the int.
- *
- * This is how the Tree Builder can tell the Tokenizer when a certain tag should
- * cause the parser to go into RAW text mode.
- *
- * The HTML5 standard requires that the builder is the one that initiates this
- * step, and this is the only way short of a circular reference that we can
- * do that.
- *
- * Example: if a startTag even for a `script` name is fired, and the startTag()
- * implementation returns Tokenizer::TEXTMODE_RAW, then the tokenizer will
- * switch into RAW text mode and consume data until it reaches a closing
- * `script` tag.
- *
- * The textmode is automatically reset to Tokenizer::TEXTMODE_NORMAL when the
- * closing tag is encounter. **This behavior may change.**
- *
- * @param string $name
- * The tag name.
- * @param array $attributes
- * An array with all of the tag's attributes.
- * @param boolean $selfClosing
- * An indicator of whether or not this tag is self-closing (<foo/>)
- * @return numeric
- * One of the Tokenizer::TEXTMODE_* constants.
- */
- public function startTag($name, $attributes = array(), $selfClosing = FALSE);
- /**
- * An end-tag.
- */
- public function endTag($name);
- /**
- * A comment section (unparsed character data).
- */
- public function comment($cdata);
- /**
- * A unit of parsed character data.
- *
- * Entities in this text are *already decoded*.
- */
- public function text($cdata);
- /**
- * Indicates that the document has been entirely processed.
- */
- public function eof();
- /**
- * Emitted when the parser encounters an error condition.
- */
- public function parseError($msg, $line, $col);
+interface EventHandler
+{
- /**
- * A CDATA section.
- *
- * @param string $data
- * The unparsed character data.
- */
- public function cdata($data);
- /**
- * This is a holdover from the XML spec.
- *
- * While user agents don't get PIs, server-side does.
- *
- * @param string $name
- * The name of the processor (e.g. 'php').
- * @param string $data
- * The unparsed data.
- */
- public function processingInstruction($name, $data = NULL);
+ const DOCTYPE_NONE = 0;
+
+ const DOCTYPE_PUBLIC = 1;
+
+ const DOCTYPE_SYSTEM = 2;
+
+ /**
+ * A doctype declaration.
+ *
+ * @param string $name
+ * The name of the root element.
+ * @param int $idType
+ * One of DOCTYPE_NONE, DOCTYPE_PUBLIC, or DOCTYPE_SYSTEM.
+ * @param string $id
+ * The identifier. For DOCTYPE_PUBLIC, this is the public ID. If DOCTYPE_SYSTEM,
+ * then this is a system ID.
+ * @param boolean $quirks
+ * Indicates whether the builder should enter quirks mode.
+ */
+ public function doctype($name, $idType = 0, $id = NULL, $quirks = FALSE);
+
+ /**
+ * A start tag.
+ *
+ * IMPORTANT: The parser watches the return value of this event. If this returns
+ * an integer, the parser will switch TEXTMODE patters according to the int.
+ *
+ * This is how the Tree Builder can tell the Tokenizer when a certain tag should
+ * cause the parser to go into RAW text mode.
+ *
+ * The HTML5 standard requires that the builder is the one that initiates this
+ * step, and this is the only way short of a circular reference that we can
+ * do that.
+ *
+ * Example: if a startTag even for a `script` name is fired, and the startTag()
+ * implementation returns Tokenizer::TEXTMODE_RAW, then the tokenizer will
+ * switch into RAW text mode and consume data until it reaches a closing
+ * `script` tag.
+ *
+ * The textmode is automatically reset to Tokenizer::TEXTMODE_NORMAL when the
+ * closing tag is encounter. **This behavior may change.**
+ *
+ * @param string $name
+ * The tag name.
+ * @param array $attributes
+ * An array with all of the tag's attributes.
+ * @param boolean $selfClosing
+ * An indicator of whether or not this tag is self-closing (<foo/>)
+ * @return numeric One of the Tokenizer::TEXTMODE_* constants.
+ */
+ public function startTag($name, $attributes = array(), $selfClosing = FALSE);
+
+ /**
+ * An end-tag.
+ */
+ public function endTag($name);
+
+ /**
+ * A comment section (unparsed character data).
+ */
+ public function comment($cdata);
+
+ /**
+ * A unit of parsed character data.
+ *
+ * Entities in this text are *already decoded*.
+ */
+ public function text($cdata);
+
+ /**
+ * Indicates that the document has been entirely processed.
+ */
+ public function eof();
+
+ /**
+ * Emitted when the parser encounters an error condition.
+ */
+ public function parseError($msg, $line, $col);
+
+ /**
+ * A CDATA section.
+ *
+ * @param string $data
+ * The unparsed character data.
+ */
+ public function cdata($data);
+
+ /**
+ * This is a holdover from the XML spec.
+ *
+ * While user agents don't get PIs, server-side does.
+ *
+ * @param string $name
+ * The name of the processor (e.g. 'php').
+ * @param string $data
+ * The unparsed data.
+ */
+ public function processingInstruction($name, $data = NULL);
}
diff --git a/src/HTML5/Parser/FileInputStream.php b/src/HTML5/Parser/FileInputStream.php
index ce16fb2..e58006a 100644
--- a/src/HTML5/Parser/FileInputStream.php
+++ b/src/HTML5/Parser/FileInputStream.php
@@ -4,32 +4,29 @@ namespace Masterminds\HTML5\Parser;
/**
* The FileInputStream loads a file to be parsed.
*
+ * So right now we read files into strings and then process the
+ * string. We chose to do this largely for the sake of expediency of
+ * development, and also because we could optimize toward processing
+ * arbitrarily large chunks of the input. But in the future, we'd
+ * really like to rewrite this class to efficiently handle lower level
+ * stream reads (and thus efficiently handle large documents).
+ *
* @todo A buffered input stream would be useful.
*/
-class FileInputStream extends StringInputStream implements InputStream {
-
- /*
- * So right now we read files into strings and then process the
- * string. We chose to do this largely for the sake of expediency of
- * development, and also because we could optimize toward processing
- * arbitrarily large chunks of the input. But in the future, we'd
- * really like to rewrite this class to efficiently handle lower level
- * stream reads (and thus efficiently handle large documents).
- */
-
- /**
- * Load a file input stream.
- *
- * @param string $data
- * The file or url path to load.
- */
- function __construct($data, $encoding = 'UTF-8', $debug = '') {
-
- // Get the contents of the file.
- $content = file_get_contents($data);
-
- parent::__construct($content, $encoding, $debug);
+class FileInputStream extends StringInputStream implements InputStream
+{
- }
+ /**
+ * Load a file input stream.
+ *
+ * @param string $data
+ * The file or url path to load.
+ */
+ public function __construct($data, $encoding = 'UTF-8', $debug = '')
+ {
+ // Get the contents of the file.
+ $content = file_get_contents($data);
+ parent::__construct($content, $encoding, $debug);
+ }
}
diff --git a/src/HTML5/Parser/InputStream.php b/src/HTML5/Parser/InputStream.php
index 8180909..7113d6c 100644
--- a/src/HTML5/Parser/InputStream.php
+++ b/src/HTML5/Parser/InputStream.php
@@ -4,85 +4,84 @@ namespace Masterminds\HTML5\Parser;
/**
* Interface for stream readers.
*
- * The parser only reads from streams. Various input sources can write
+ * The parser only reads from streams. Various input sources can write
* an adapater to this InputStream.
*
- * Currently provided InputStream implementations include
+ * Currently provided InputStream implementations include
* FileInputStream and StringInputStream.
*/
-interface InputStream extends \Iterator {
+interface InputStream extends \Iterator
+{
- /**
- * Returns the current line that is being consumed.
- *
- * TODO: Move this to the scanner.
- */
- public function currentLine();
+ /**
+ * Returns the current line that is being consumed.
+ *
+ * TODO: Move this to the scanner.
+ */
+ public function currentLine();
- /**
- * Returns the current column of the current line that the tokenizer is at.
- *
- * Newlines are column 0. The first char after a newline is column 1.
- *
- * @TODO Move this to the scanner.
- *
- * @return int
- * The column number.
- */
- public function columnOffset();
+ /**
+ * Returns the current column of the current line that the tokenizer is at.
+ *
+ * Newlines are column 0. The first char after a newline is column 1.
+ *
+ * @TODO Move this to the scanner.
+ *
+ * @return int The column number.
+ */
+ public function columnOffset();
- /**
- * Get all characters until EOF.
- *
- * This consumes characters until the EOF.
- */
- public function remainingChars();
+ /**
+ * Get all characters until EOF.
+ *
+ * This consumes characters until the EOF.
+ */
+ public function remainingChars();
- /**
- * Read to a particular match (or until $max bytes are consumed).
- *
- * This operates on byte sequences, not characters.
- *
- * Matches as far as possible until we reach a certain set of bytes
- * and returns the matched substring.
- *
- * @see strcspn
- * @param string $bytes
- * Bytes to match.
- * @param int $max
- * Maximum number of bytes to scan.
- * @return mixed
- * Index or FALSE if no match is found. You should use strong
- * equality when checking the result, since index could be 0.
- */
- public function charsUntil($bytes, $max = null);
+ /**
+ * Read to a particular match (or until $max bytes are consumed).
+ *
+ * This operates on byte sequences, not characters.
+ *
+ * Matches as far as possible until we reach a certain set of bytes
+ * and returns the matched substring.
+ *
+ * @see strcspn
+ * @param string $bytes
+ * Bytes to match.
+ * @param int $max
+ * Maximum number of bytes to scan.
+ * @return mixed Index or FALSE if no match is found. You should use strong
+ * equality when checking the result, since index could be 0.
+ */
+ public function charsUntil($bytes, $max = null);
- /**
- * Returns the string so long as $bytes matches.
- *
- * Matches as far as possible with a certain set of bytes
- * and returns the matched substring.
- *
- * @see strspn
- * @param string $bytes
- * A mask of bytes to match. If ANY byte in this mask matches the
- * current char, the pointer advances and the char is part of the
- * substring.
- * @param int $max
- * The max number of chars to read.
- */
- public function charsWhile($bytes, $max = null);
+ /**
+ * Returns the string so long as $bytes matches.
+ *
+ * Matches as far as possible with a certain set of bytes
+ * and returns the matched substring.
+ *
+ * @see strspn
+ * @param string $bytes
+ * A mask of bytes to match. If ANY byte in this mask matches the
+ * current char, the pointer advances and the char is part of the
+ * substring.
+ * @param int $max
+ * The max number of chars to read.
+ */
+ public function charsWhile($bytes, $max = null);
- /**
- * Unconsume one character.
- *
- * @param int $howMany
- * The number of characters to move the pointer back.
- */
- public function unconsume($howMany = 1);
+ /**
+ * Unconsume one character.
+ *
+ * @param int $howMany
+ * The number of characters to move the pointer back.
+ */
+ public function unconsume($howMany = 1);
- /**
- * Retrieve the next character without advancing the pointer.
- */
- public function peek();
+ /**
+ * Retrieve the next character without advancing the pointer.
+ */
+ public function peek();
}
diff --git a/src/HTML5/Parser/ParseError.php b/src/HTML5/Parser/ParseError.php
index 53a0844..86498a1 100644
--- a/src/HTML5/Parser/ParseError.php
+++ b/src/HTML5/Parser/ParseError.php
@@ -4,5 +4,6 @@ namespace Masterminds\HTML5\Parser;
/**
* Emit when the parser has an error.
*/
-class ParseError extends \Exception {
+class ParseError extends \Exception
+{
}
diff --git a/src/HTML5/Parser/Scanner.php b/src/HTML5/Parser/Scanner.php
index 18ed821..a262004 100644
--- a/src/HTML5/Parser/Scanner.php
+++ b/src/HTML5/Parser/Scanner.php
@@ -6,202 +6,217 @@ namespace Masterminds\HTML5\Parser;
*
* This scans over an input stream.
*/
-class Scanner {
- const CHARS_HEX = 'abcdefABCDEF01234567890';
- const CHARS_ALNUM = 'abcdefAghijklmnopqrstuvwxyABCDEFGHIJKLMNOPQRSTUVWXYZ01234567890';
- const CHARS_ALPHA = 'abcdefAghijklmnopqrstuvwxyABCDEFGHIJKLMNOPQRSTUVWXYZ';
-
- protected $is;
-
- // Flipping this to TRUE will give minisculely more debugging info.
- public $debug = FALSE;
-
- /**
- * Create a new Scanner.
- *
- * @param \Masterminds\HTML5\Parser\InputStream $input
- * An InputStream to be scanned.
- */
- public function __construct($input) {
- $this->is = $input;
- }
-
- /**
- * Get the current position.
- *
- * @return int
- * The current intiger byte position.
- */
- public function position() {
- return $this->is->key();
- }
-
- /**
- * Take a peek at the next character in the data.
- *
- * @return string
- * The next character.
- */
- public function peek() {
- return $this->is->peek();
- }
-
- /**
- * Get the next character.
- *
- * Note: This advances the pointer.
- *
- * @return string
- * The next character.
- */
- public function next() {
- $this->is->next();
- if ($this->is->valid()) {
- if ($this->debug) fprintf(STDOUT, "> %s\n", $this->is->current());
- return $this->is->current();
- }
- return FALSE;
- }
-
- /**
- * Get the current character.
- *
- * Note, this does not advance the pointer.
- *
- * @return string
- * The current character.
- */
- public function current() {
- if ($this->is->valid()) {
- return $this->is->current();
- }
- return FALSE;
- }
-
- /**
- * Silently consume N chars.
- */
- public function consume($count = 1) {
- for ($i = 0; $i < $count; ++$i) {
- $this->next();
- }
- }
-
- /**
- * Unconsume some of the data. This moves the data pointer backwards.
- *
- * @param int $howMany
- * The number of characters to move the pointer back.
- */
- public function unconsume($howMany = 1) {
- $this->is->unconsume($howMany);
- }
-
- /**
- * Get the next group of that contains hex characters.
- *
- * Note, along with getting the characters the pointer in the data will be
- * moved as well.
- *
- * @return string
- * The next group that is hex characters.
- */
- public function getHex() {
- return $this->is->charsWhile(static::CHARS_HEX);
- }
-
- /**
- * Get the next group of characters that are ASCII Alpha characters.
- *
- * Note, along with getting the characters the pointer in the data will be
- * moved as well.
- *
- * @return string
- * The next group of ASCII alpha characters.
- */
- public function getAsciiAlpha() {
- return $this->is->charsWhile(static::CHARS_ALPHA);
- }
-
- /**
- * Get the next group of characters that are ASCII Alpha characters and numbers.
- *
- * Note, along with getting the characters the pointer in the data will be
- * moved as well.
- *
- * @return string
- * The next group of ASCII alpha characters and numbers.
- */
- public function getAsciiAlphaNum() {
- return $this->is->charsWhile(static::CHARS_ALNUM);
- }
-
- /**
- * Get the next group of numbers.
- *
- * Note, along with getting the characters the pointer in the data will be
- * moved as well.
- *
- * @return string
- * The next group of numbers.
- */
- public function getNumeric() {
- return $this->is->charsWhile('0123456789');
- }
-
- /**
- * Consume whitespace.
- *
- * Whitespace in HTML5 is: formfeed, tab, newline, space.
- */
- public function whitespace() {
- return $this->is->charsWhile("\n\t\f ");
- }
-
- /**
- * Returns the current line that is being consumed.
- *
- * @return int
- * The current line number.
- */
- public function currentLine() {
- return $this->is->currentLine();
- }
-
- /**
- * Read chars until something in the mask is encountered.
- */
- public function charsUntil($mask) {
- return $this->is->charsUntil($mask);
- }
- /**
- * Read chars as long as the mask matches.
- */
- public function charsWhile($mask) {
- return $this->is->charsWhile($mask);
- }
-
- /**
- * Returns the current column of the current line that the tokenizer is at.
- *
- * Newlines are column 0. The first char after a newline is column 1.
- *
- * @return int
- * The column number.
- */
- public function columnOffset() {
- return $this->is->columnOffset();
- }
-
- /**
- * Get all characters until EOF.
- *
- * This consumes characters until the EOF.
- *
- * @return int
- * The number of characters remaining.
- */
- public function remainingChars() {
- return $this->is->remainingChars();
- }
+class Scanner
+{
+
+ const CHARS_HEX = 'abcdefABCDEF01234567890';
+
+ const CHARS_ALNUM = 'abcdefAghijklmnopqrstuvwxyABCDEFGHIJKLMNOPQRSTUVWXYZ01234567890';
+
+ const CHARS_ALPHA = 'abcdefAghijklmnopqrstuvwxyABCDEFGHIJKLMNOPQRSTUVWXYZ';
+
+ protected $is;
+
+ // Flipping this to TRUE will give minisculely more debugging info.
+ public $debug = FALSE;
+
+ /**
+ * Create a new Scanner.
+ *
+ * @param \Masterminds\HTML5\Parser\InputStream $input
+ * An InputStream to be scanned.
+ */
+ public function __construct($input)
+ {
+ $this->is = $input;
+ }
+
+ /**
+ * Get the current position.
+ *
+ * @return int The current intiger byte position.
+ */
+ public function position()
+ {
+ return $this->is->key();
+ }
+
+ /**
+ * Take a peek at the next character in the data.
+ *
+ * @return string The next character.
+ */
+ public function peek()
+ {
+ return $this->is->peek();
+ }
+
+ /**
+ * Get the next character.
+ *
+ * Note: This advances the pointer.
+ *
+ * @return string The next character.
+ */
+ public function next()
+ {
+ $this->is->next();
+ if ($this->is->valid()) {
+ if ($this->debug)
+ fprintf(STDOUT, "> %s\n", $this->is->current());
+ return $this->is->current();
+ }
+
+ return FALSE;
+ }
+
+ /**
+ * Get the current character.
+ *
+ * Note, this does not advance the pointer.
+ *
+ * @return string The current character.
+ */
+ public function current()
+ {
+ if ($this->is->valid()) {
+ return $this->is->current();
+ }
+
+ return FALSE;
+ }
+
+ /**
+ * Silently consume N chars.
+ */
+ public function consume($count = 1)
+ {
+ for ($i = 0; $i < $count; ++ $i) {
+ $this->next();
+ }
+ }
+
+ /**
+ * Unconsume some of the data.
+ * This moves the data pointer backwards.
+ *
+ * @param int $howMany
+ * The number of characters to move the pointer back.
+ */
+ public function unconsume($howMany = 1)
+ {
+ $this->is->unconsume($howMany);
+ }
+
+ /**
+ * Get the next group of that contains hex characters.
+ *
+ * Note, along with getting the characters the pointer in the data will be
+ * moved as well.
+ *
+ * @return string The next group that is hex characters.
+ */
+ public function getHex()
+ {
+ return $this->is->charsWhile(static::CHARS_HEX);
+ }
+
+ /**
+ * Get the next group of characters that are ASCII Alpha characters.
+ *
+ * Note, along with getting the characters the pointer in the data will be
+ * moved as well.
+ *
+ * @return string The next group of ASCII alpha characters.
+ */
+ public function getAsciiAlpha()
+ {
+ return $this->is->charsWhile(static::CHARS_ALPHA);
+ }
+
+ /**
+ * Get the next group of characters that are ASCII Alpha characters and numbers.
+ *
+ * Note, along with getting the characters the pointer in the data will be
+ * moved as well.
+ *
+ * @return string The next group of ASCII alpha characters and numbers.
+ */
+ public function getAsciiAlphaNum()
+ {
+ return $this->is->charsWhile(static::CHARS_ALNUM);
+ }
+
+ /**
+ * Get the next group of numbers.
+ *
+ * Note, along with getting the characters the pointer in the data will be
+ * moved as well.
+ *
+ * @return string The next group of numbers.
+ */
+ public function getNumeric()
+ {
+ return $this->is->charsWhile('0123456789');
+ }
+
+ /**
+ * Consume whitespace.
+ *
+ * Whitespace in HTML5 is: formfeed, tab, newline, space.
+ */
+ public function whitespace()
+ {
+ return $this->is->charsWhile("\n\t\f ");
+ }
+
+ /**
+ * Returns the current line that is being consumed.
+ *
+ * @return int The current line number.
+ */
+ public function currentLine()
+ {
+ return $this->is->currentLine();
+ }
+
+ /**
+ * Read chars until something in the mask is encountered.
+ */
+ public function charsUntil($mask)
+ {
+ return $this->is->charsUntil($mask);
+ }
+
+ /**
+ * Read chars as long as the mask matches.
+ */
+ public function charsWhile($mask)
+ {
+ return $this->is->charsWhile($mask);
+ }
+
+ /**
+ * Returns the current column of the current line that the tokenizer is at.
+ *
+ * Newlines are column 0. The first char after a newline is column 1.
+ *
+ * @return int The column number.
+ */
+ public function columnOffset()
+ {
+ return $this->is->columnOffset();
+ }
+
+ /**
+ * Get all characters until EOF.
+ *
+ * This consumes characters until the EOF.
+ *
+ * @return int The number of characters remaining.
+ */
+ public function remainingChars()
+ {
+ return $this->is->remainingChars();
+ }
}
diff --git a/src/HTML5/Parser/StringInputStream.php b/src/HTML5/Parser/StringInputStream.php
index ee1e1ba..9648bce 100644
--- a/src/HTML5/Parser/StringInputStream.php
+++ b/src/HTML5/Parser/StringInputStream.php
@@ -6,13 +6,13 @@ namespace Masterminds\HTML5\Parser;
/*
*
- * Based on code from html5lib:
+* Based on code from html5lib:
Copyright 2009 Geoffrey Sneddon <http://gsnedders.com/>
Permission is hereby granted, free of charge, to any person obtaining a
copy of this software and associated documentation files (the
-"Software"), to deal in the Software without restriction, including
+ "Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
@@ -33,283 +33,299 @@ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
// Some conventions:
// - /* */ indicates verbatim text from the HTML 5 specification
-// MPB: Not sure which version of the spec. Moving from HTML5lib to
+// MPB: Not sure which version of the spec. Moving from HTML5lib to
// HTML5-PHP, I have been using this version:
// http://www.w3.org/TR/2012/CR-html5-20121217/Overview.html#contents
//
// - // indicates regular comments
-class StringInputStream implements InputStream {
- /**
- * The string data we're parsing.
- */
- private $data;
-
- /**
- * The current integer byte position we are in $data
- */
- private $char;
-
- /**
- * Length of $data; when $char === $data, we are at the end-of-file.
- */
- private $EOF;
-
- /**
- * Parse errors.
- */
- public $errors = array();
-
- /**
- * Create a new InputStream wrapper.
- *
- * @param $data Data to parse
- */
- public function __construct($data, $encoding = 'UTF-8', $debug = '') {
-
- $data = UTF8Utils::convertToUTF8($data, $encoding);
- if ($debug) fprintf(STDOUT, $debug, $data, strlen($data));
-
- // There is good reason to question whether it makes sense to
- // do this here, since most of these checks are done during
- // parsing, and since this check doesn't actually *do* anything.
- $this->errors = UTF8Utils::checkForIllegalCodepoints($data);
- //if (!empty($e)) {
- // throw new ParseError("UTF-8 encoding issues: " . implode(', ', $e));
- //}
-
- $data = $this->replaceLinefeeds($data);
-
- $this->data = $data;
- $this->char = 0;
- $this->EOF = strlen($data);
- }
-
- /**
- * Replace linefeed characters according to the spec.
- */
- protected function replaceLinefeeds($data) {
- /* U+000D CARRIAGE RETURN (CR) characters and U+000A LINE FEED
- (LF) characters are treated specially. Any CR characters
- that are followed by LF characters must be removed, and any
- CR characters not followed by LF characters must be converted
- to LF characters. Thus, newlines in HTML DOMs are represented
- by LF characters, and there are never any CR characters in the
- input to the tokenization stage. */
- $crlfTable = array(
- "\0" => "\xEF\xBF\xBD",
- "\r\n" => "\n",
- "\r" => "\n",
- );
- return strtr($data, $crlfTable);
- }
-
- /**
- * Returns the current line that the tokenizer is at.
- */
- public function currentLine() {
- if (empty($this->EOF) || $this->char == 0) {
- return 1;
+class StringInputStream implements InputStream
+{
+
+ /**
+ * The string data we're parsing.
+ */
+ private $data;
+
+ /**
+ * The current integer byte position we are in $data
+ */
+ private $char;
+
+ /**
+ * Length of $data; when $char === $data, we are at the end-of-file.
+ */
+ private $EOF;
+
+ /**
+ * Parse errors.
+ */
+ public $errors = array();
+
+ /**
+ * Create a new InputStream wrapper.
+ *
+ * @param $data Data
+ * to parse
+ */
+ public function __construct($data, $encoding = 'UTF-8', $debug = '')
+ {
+ $data = UTF8Utils::convertToUTF8($data, $encoding);
+ if ($debug)
+ fprintf(STDOUT, $debug, $data, strlen($data));
+
+ // There is good reason to question whether it makes sense to
+ // do this here, since most of these checks are done during
+ // parsing, and since this check doesn't actually *do* anything.
+ $this->errors = UTF8Utils::checkForIllegalCodepoints($data);
+ // if (!empty($e)) {
+ // throw new ParseError("UTF-8 encoding issues: " . implode(', ', $e));
+ // }
+
+ $data = $this->replaceLinefeeds($data);
+
+ $this->data = $data;
+ $this->char = 0;
+ $this->EOF = strlen($data);
}
- // Add one to $this->char because we want the number for the next
- // byte to be processed.
- return substr_count($this->data, "\n", 0, min($this->char, $this->EOF)) + 1;
- }
-
- /**
- * @deprecated
- */
- public function getCurrentLine() {
- return currentLine();
- }
-
- /**
- * Returns the current column of the current line that the tokenizer is at.
- *
- * Newlines are column 0. The first char after a newline is column 1.
- *
- * @return int
- * The column number.
- */
- public function columnOffset() {
-
- // Short circuit for the first char.
- if ($this->char == 0) {
- return 0;
+
+ /**
+ * Replace linefeed characters according to the spec.
+ */
+ protected function replaceLinefeeds($data)
+ {
+ /*
+ * U+000D CARRIAGE RETURN (CR) characters and U+000A LINE FEED (LF) characters are treated specially. Any CR characters that are followed by LF characters must be removed, and any CR characters not followed by LF characters must be converted to LF characters. Thus, newlines in HTML DOMs are represented by LF characters, and there are never any CR characters in the input to the tokenization stage.
+ */
+ $crlfTable = array(
+ "\0" => "\xEF\xBF\xBD",
+ "\r\n" => "\n",
+ "\r" => "\n"
+ );
+
+ return strtr($data, $crlfTable);
}
- // strrpos is weird, and the offset needs to be negative for what we
- // want (i.e., the last \n before $this->char). This needs to not have
- // one (to make it point to the next character, the one we want the
- // position of) added to it because strrpos's behaviour includes the
- // final offset byte.
- $backwardFrom = $this->char - 1 - strlen($this->data);
- $lastLine = strrpos($this->data, "\n", $backwardFrom);
-
- // However, for here we want the length up until the next byte to be
- // processed, so add one to the current byte ($this->char).
- if ($lastLine !== FALSE) {
- $findLengthOf = substr($this->data, $lastLine + 1, $this->char - 1 - $lastLine);
+
+ /**
+ * Returns the current line that the tokenizer is at.
+ */
+ public function currentLine()
+ {
+ if (empty($this->EOF) || $this->char == 0) {
+ return 1;
+ }
+ // Add one to $this->char because we want the number for the next
+ // byte to be processed.
+ return substr_count($this->data, "\n", 0, min($this->char, $this->EOF)) + 1;
}
- else {
- // After a newline.
- $findLengthOf = substr($this->data, 0, $this->char);
+
+ /**
+ *
+ * @deprecated
+ *
+ */
+ public function getCurrentLine()
+ {
+ return currentLine();
}
- return UTF8Utils::countChars($findLengthOf);
- }
-
- /**
- * @deprecated
- */
- public function getColumnOffset() {
- return $this->columnOffset();
- }
-
- /**
- * Get the current character.
- *
- * @return string
- * The current character.
- */
- public function current() {
- return $this->data[$this->char];
- }
-
- /**
- * Advance the pointer. This is part of the Iterator interface.
- */
- public function next() {
- $this->char++;
- }
-
- /**
- * Rewind to the start of the string.
- */
- public function rewind() {
- $this->char = 0;
- }
-
- /**
- * Is the current pointer location valid.
- *
- * @return bool
- * Is the current pointer location valid.
- */
- public function valid() {
- if ($this->char < $this->EOF) {
- return TRUE;
+ /**
+ * Returns the current column of the current line that the tokenizer is at.
+ *
+ * Newlines are column 0. The first char after a newline is column 1.
+ *
+ * @return int The column number.
+ */
+ public function columnOffset()
+ {
+ // Short circuit for the first char.
+ if ($this->char == 0) {
+ return 0;
+ }
+ // strrpos is weird, and the offset needs to be negative for what we
+ // want (i.e., the last \n before $this->char). This needs to not have
+ // one (to make it point to the next character, the one we want the
+ // position of) added to it because strrpos's behaviour includes the
+ // final offset byte.
+ $backwardFrom = $this->char - 1 - strlen($this->data);
+ $lastLine = strrpos($this->data, "\n", $backwardFrom);
+
+ // However, for here we want the length up until the next byte to be
+ // processed, so add one to the current byte ($this->char).
+ if ($lastLine !== FALSE) {
+ $findLengthOf = substr($this->data, $lastLine + 1, $this->char - 1 - $lastLine);
+ } else {
+ // After a newline.
+ $findLengthOf = substr($this->data, 0, $this->char);
+ }
+
+ return UTF8Utils::countChars($findLengthOf);
}
- return FALSE;
- }
-
- /**
- * Get all characters until EOF.
- *
- * This reads to the end of the file, and sets the read marker at the
- * end of the file.
- *
- * @note This performs bounds checking
- *
- * @return string
- * Returns the remaining text. If called when the InputStream is
- * already exhausted, it returns an empty string.
- */
- public function remainingChars() {
- if ($this->char < $this->EOF) {
- $data = substr($this->data, $this->char);
- $this->char = $this->EOF;
- return $data;
+ /**
+ *
+ * @deprecated
+ *
+ */
+ public function getColumnOffset()
+ {
+ return $this->columnOffset();
}
- return '';//FALSE;
- }
-
- /**
- * Read to a particular match (or until $max bytes are consumed).
- *
- * This operates on byte sequences, not characters.
- *
- * Matches as far as possible until we reach a certain set of bytes
- * and returns the matched substring.
- *
- * @param string $bytes
- * Bytes to match.
- * @param int $max
- * Maximum number of bytes to scan.
- * @return mixed
- * Index or FALSE if no match is found. You should use strong
- * equality when checking the result, since index could be 0.
- */
- public function charsUntil($bytes, $max = null) {
- if ($this->char >= $this->EOF) {
- return FALSE;
+
+ /**
+ * Get the current character.
+ *
+ * @return string The current character.
+ */
+ public function current()
+ {
+ return $this->data[$this->char];
}
- if ($max === 0 || $max) {
- $len = strcspn($this->data, $bytes, $this->char, $max);
+ /**
+ * Advance the pointer.
+ * This is part of the Iterator interface.
+ */
+ public function next()
+ {
+ $this->char ++;
}
- else {
- $len = strcspn($this->data, $bytes, $this->char);
+
+ /**
+ * Rewind to the start of the string.
+ */
+ public function rewind()
+ {
+ $this->char = 0;
}
- $string = (string) substr($this->data, $this->char, $len);
- $this->char += $len;
- return $string;
- }
-
- /**
- * Returns the string so long as $bytes matches.
- *
- * Matches as far as possible with a certain set of bytes
- * and returns the matched substring.
- *
- * @param string $bytes
- * A mask of bytes to match. If ANY byte in this mask matches the
- * current char, the pointer advances and the char is part of the
- * substring.
- * @param int $max
- * The max number of chars to read.
- */
- public function charsWhile($bytes, $max = null) {
- if ($this->char >= $this->EOF) {
- return FALSE;
+ /**
+ * Is the current pointer location valid.
+ *
+ * @return bool Is the current pointer location valid.
+ */
+ public function valid()
+ {
+ if ($this->char < $this->EOF) {
+ return TRUE;
+ }
+
+ return FALSE;
}
- if ($max === 0 || $max) {
- $len = strspn($this->data, $bytes, $this->char, $max);
+ /**
+ * Get all characters until EOF.
+ *
+ * This reads to the end of the file, and sets the read marker at the
+ * end of the file.
+ *
+ * @note This performs bounds checking
+ *
+ * @return string Returns the remaining text. If called when the InputStream is
+ * already exhausted, it returns an empty string.
+ */
+ public function remainingChars()
+ {
+ if ($this->char < $this->EOF) {
+ $data = substr($this->data, $this->char);
+ $this->char = $this->EOF;
+
+ return $data;
+ }
+
+ return ''; // FALSE;
}
- else {
- $len = strspn($this->data, $bytes, $this->char);
+
+ /**
+ * Read to a particular match (or until $max bytes are consumed).
+ *
+ * This operates on byte sequences, not characters.
+ *
+ * Matches as far as possible until we reach a certain set of bytes
+ * and returns the matched substring.
+ *
+ * @param string $bytes
+ * Bytes to match.
+ * @param int $max
+ * Maximum number of bytes to scan.
+ * @return mixed Index or FALSE if no match is found. You should use strong
+ * equality when checking the result, since index could be 0.
+ */
+ public function charsUntil($bytes, $max = null)
+ {
+ if ($this->char >= $this->EOF) {
+ return FALSE;
+ }
+
+ if ($max === 0 || $max) {
+ $len = strcspn($this->data, $bytes, $this->char, $max);
+ } else {
+ $len = strcspn($this->data, $bytes, $this->char);
+ }
+
+ $string = (string) substr($this->data, $this->char, $len);
+ $this->char += $len;
+
+ return $string;
}
- $string = (string) substr($this->data, $this->char, $len);
- $this->char += $len;
- return $string;
- }
-
- /**
- * Unconsume characters.
- *
- * @param int $howMany
- * The number of characters to unconsume.
- */
- public function unconsume($howMany = 1) {
- if (($this->char - $howMany) >= 0) {
- $this->char = $this->char - $howMany;
+
+ /**
+ * Returns the string so long as $bytes matches.
+ *
+ * Matches as far as possible with a certain set of bytes
+ * and returns the matched substring.
+ *
+ * @param string $bytes
+ * A mask of bytes to match. If ANY byte in this mask matches the
+ * current char, the pointer advances and the char is part of the
+ * substring.
+ * @param int $max
+ * The max number of chars to read.
+ */
+ public function charsWhile($bytes, $max = null)
+ {
+ if ($this->char >= $this->EOF) {
+ return FALSE;
+ }
+
+ if ($max === 0 || $max) {
+ $len = strspn($this->data, $bytes, $this->char, $max);
+ } else {
+ $len = strspn($this->data, $bytes, $this->char);
+ }
+ $string = (string) substr($this->data, $this->char, $len);
+ $this->char += $len;
+
+ return $string;
}
- }
-
- /**
- * Look ahead without moving cursor.
- */
- public function peek() {
- if (($this->char + 1) <= $this->EOF) {
- return $this->data[$this->char + 1];
+
+ /**
+ * Unconsume characters.
+ *
+ * @param int $howMany
+ * The number of characters to unconsume.
+ */
+ public function unconsume($howMany = 1)
+ {
+ if (($this->char - $howMany) >= 0) {
+ $this->char = $this->char - $howMany;
+ }
}
- return FALSE;
- }
+ /**
+ * Look ahead without moving cursor.
+ */
+ public function peek()
+ {
+ if (($this->char + 1) <= $this->EOF) {
+ return $this->data[$this->char + 1];
+ }
- public function key() {
- return $this->char;
- }
+ return FALSE;
+ }
+
+ public function key()
+ {
+ return $this->char;
+ }
}
diff --git a/src/HTML5/Parser/Tokenizer.php b/src/HTML5/Parser/Tokenizer.php
index c70d92f..0db9ee1 100644
--- a/src/HTML5/Parser/Tokenizer.php
+++ b/src/HTML5/Parser/Tokenizer.php
@@ -23,1062 +23,1061 @@ use Masterminds\HTML5\Elements;
*
* @see http://www.w3.org/TR/2012/CR-html5-20121217/
*/
-class Tokenizer {
- protected $scanner;
- protected $events;
- protected $tok;
-
- /**
- * Buffer for text.
- */
- protected $text = '';
-
- // When this goes to false, the parser stops.
- protected $carryOn = TRUE;
-
- protected $textMode = 0; // TEXTMODE_NORMAL;
- protected $untilTag = NULL;
-
- const WHITE="\t\n\f ";
-
- /**
- * Create a new tokenizer.
- *
- * Typically, parsing a document involves creating a new tokenizer, giving
- * it a scanner (input) and an event handler (output), and then calling
- * the Tokenizer::parse() method.`
- *
- * @param \Masterminds\HTML5\Parser\Scanner $scanner
- * A scanner initialized with an input stream.
- * @param \Masterminds\HTML5\Parser\EventHandler $eventHandler
- * An event handler, initialized and ready to receive
- * events.
- */
- public function __construct($scanner, $eventHandler) {
- $this->scanner = $scanner;
- $this->events = $eventHandler;
- }
-
- /**
- * Begin parsing.
- *
- * This will begin scanning the document, tokenizing as it goes.
- * Tokens are emitted into the event handler.
- *
- * Tokenizing will continue until the document is completely
- * read. Errors are emitted into the event handler, but
- * the parser will attempt to continue parsing until the
- * entire input stream is read.
- */
- public function parse() {
- $p = 0;
- do {
- $p = $this->scanner->position();
- $this->consumeData();
-
- // FIXME: Add infinite loop protection.
- }
- while ($this->carryOn);
- }
-
- /**
- * Set the text mode for the character data reader.
- *
- * HTML5 defines three different modes for reading text:
- * - Normal: Read until a tag is encountered.
- * - RCDATA: Read until a tag is encountered, but skip a few otherwise-
- * special characters.
- * - Raw: Read until a special closing tag is encountered (viz. pre, script)
- *
- * This allows those modes to be set.
- *
- * Normally, setting is done by the event handler via a special return code on
- * startTag(), but it can also be set manually using this function.
- *
- * @param integer $textmode
- * One of Elements::TEXT_*
- * @param string $untilTag
- * The tag that should stop RAW or RCDATA mode. Normal mode does not
- * use this indicator.
- */
- public function setTextMode($textmode, $untilTag = NULL) {
- $this->textMode = $textmode & (Elements::TEXT_RAW | Elements::TEXT_RCDATA);
- $this->untilTag = $untilTag;
- }
-
- /**
- * Consume a character and make a move.
- * HTML5 8.2.4.1
- */
- protected function consumeData() {
- // Character Ref
- /*
- $this->characterReference() ||
- $this->tagOpen() ||
- $this->eof() ||
- $this->characterData();
- */
+class Tokenizer
+{
- $this->characterReference();
- $this->tagOpen();
- $this->eof();
- $this->characterData();
-
-
- return $this->carryOn;
- }
-
- /**
- * Parse anything that looks like character data.
- *
- * Different rules apply based on the current text mode.
- *
- * @see Elements::TEXT_RAW Elements::TEXT_RCDATA.
- */
- protected function characterData() {
- if ($this->scanner->current() === FALSE) {
- return FALSE;
- }
- switch ($this->textMode) {
- case Elements::TEXT_RAW:
- return $this->rawText();
- case Elements::TEXT_RCDATA:
- return $this->rcdata();
- default:
- $tok = $this->scanner->current();
- if (strspn($tok, "<&")) {
- return FALSE;
- }
- return $this->text();
- }
- }
+ protected $scanner;
- /**
- * This buffers the current token as character data.
- */
- protected function text() {
- $tok = $this->scanner->current();
+ protected $events;
- // This should never happen...
- if ($tok === FALSE) {
- return FALSE;
- }
- // Null
- if ($tok === "\00") {
- $this->parseError("Received NULL character.");
- }
- // fprintf(STDOUT, "Writing '%s'", $tok);
- $this->buffer($tok);
- $this->scanner->next();
- return TRUE;
- }
-
- /**
- * Read text in RAW mode.
- */
- protected function rawText() {
- if (is_null($this->untilTag)) {
- return $this->text();
- }
- $sequence = '</' . $this->untilTag . '>';
- $txt = $this->readUntilSequence($sequence);
- $this->events->text($txt);
- $this->setTextMode(0);
- return $this->endTag();
- }
-
- /**
- * Read text in RCDATA mode.
- */
- protected function rcdata() {
- if (is_null($this->untilTag)) {
- return $this->text();
- }
- $sequence = '</' . $this->untilTag . '>';
- $txt = '';
- $tok = $this->scanner->current();
- while ($tok !== FALSE && !($tok == '<' && ($this->sequenceMatches($sequence) || $this->sequenceMatches(strtoupper($sequence))))) {
- if ($tok == '&') {
- $txt .= $this->decodeCharacterReference();
- $tok = $this->scanner->current();
- }
- else {
- $txt .= $tok;
- $tok = $this->scanner->next();
- }
- }
- $this->events->text($txt);
- $this->setTextMode(0);
- return $this->endTag();
- }
-
- /**
- * If the document is read, emit an EOF event.
- */
- protected function eof() {
- if ($this->scanner->current() === FALSE) {
- //fprintf(STDOUT, "EOF");
- $this->flushBuffer();
- $this->events->eof();
- $this->carryOn = FALSE;
- return TRUE;
- }
- return FALSE;
- }
-
- /**
- * Handle character references (aka entities).
- *
- * This version is specific to PCDATA, as it buffers data into the
- * text buffer. For a generic version, see decodeCharacterReference().
- *
- * HTML5 8.2.4.2
- */
- protected function characterReference() {
- $ref = $this->decodeCharacterReference();
- if ($ref !== FALSE) {
- $this->buffer($ref);
- return TRUE;
- }
- return FALSE;
- }
-
-
- /**
- * Emit a tagStart event on encountering a tag.
- *
- * 8.2.4.8
- */
- protected function tagOpen() {
- if ($this->scanner->current() != '<') {
- return FALSE;
+ protected $tok;
+
+ /**
+ * Buffer for text.
+ */
+ protected $text = '';
+
+ // When this goes to false, the parser stops.
+ protected $carryOn = TRUE;
+
+ protected $textMode = 0; // TEXTMODE_NORMAL;
+ protected $untilTag = NULL;
+
+ const WHITE = "\t\n\f ";
+
+ /**
+ * Create a new tokenizer.
+ *
+ * Typically, parsing a document involves creating a new tokenizer, giving
+ * it a scanner (input) and an event handler (output), and then calling
+ * the Tokenizer::parse() method.`
+ *
+ * @param \Masterminds\HTML5\Parser\Scanner $scanner
+ * A scanner initialized with an input stream.
+ * @param \Masterminds\HTML5\Parser\EventHandler $eventHandler
+ * An event handler, initialized and ready to receive
+ * events.
+ */
+ public function __construct($scanner, $eventHandler)
+ {
+ $this->scanner = $scanner;
+ $this->events = $eventHandler;
+ }
+
+ /**
+ * Begin parsing.
+ *
+ * This will begin scanning the document, tokenizing as it goes.
+ * Tokens are emitted into the event handler.
+ *
+ * Tokenizing will continue until the document is completely
+ * read. Errors are emitted into the event handler, but
+ * the parser will attempt to continue parsing until the
+ * entire input stream is read.
+ */
+ public function parse()
+ {
+ $p = 0;
+ do {
+ $p = $this->scanner->position();
+ $this->consumeData();
+
+ // FIXME: Add infinite loop protection.
+ } while ($this->carryOn);
+ }
+
+ /**
+ * Set the text mode for the character data reader.
+ *
+ * HTML5 defines three different modes for reading text:
+ * - Normal: Read until a tag is encountered.
+ * - RCDATA: Read until a tag is encountered, but skip a few otherwise-
+ * special characters.
+ * - Raw: Read until a special closing tag is encountered (viz. pre, script)
+ *
+ * This allows those modes to be set.
+ *
+ * Normally, setting is done by the event handler via a special return code on
+ * startTag(), but it can also be set manually using this function.
+ *
+ * @param integer $textmode
+ * One of Elements::TEXT_*
+ * @param string $untilTag
+ * The tag that should stop RAW or RCDATA mode. Normal mode does not
+ * use this indicator.
+ */
+ public function setTextMode($textmode, $untilTag = NULL)
+ {
+ $this->textMode = $textmode & (Elements::TEXT_RAW | Elements::TEXT_RCDATA);
+ $this->untilTag = $untilTag;
}
- // Any buffered text data can go out now.
- $this->flushBuffer();
-
- $this->scanner->next();
-
- return $this->markupDeclaration() ||
- $this->endTag() ||
- $this->processingInstruction() ||
- $this->tagName() ||
- // This always returns false.
- $this->parseError("Illegal tag opening") ||
- $this->characterData();
- }
-
- /**
- * Look for markup.
- */
- protected function markupDeclaration() {
- if ($this->scanner->current() != '!') {
- return FALSE;
+ /**
+ * Consume a character and make a move.
+ * HTML5 8.2.4.1
+ */
+ protected function consumeData()
+ {
+ // Character Ref
+ /*
+ * $this->characterReference() || $this->tagOpen() || $this->eof() || $this->characterData();
+ */
+ $this->characterReference();
+ $this->tagOpen();
+ $this->eof();
+ $this->characterData();
+
+ return $this->carryOn;
+ }
+
+ /**
+ * Parse anything that looks like character data.
+ *
+ * Different rules apply based on the current text mode.
+ *
+ * @see Elements::TEXT_RAW Elements::TEXT_RCDATA.
+ */
+ protected function characterData()
+ {
+ if ($this->scanner->current() === FALSE) {
+ return FALSE;
+ }
+ switch ($this->textMode) {
+ case Elements::TEXT_RAW:
+ return $this->rawText();
+ case Elements::TEXT_RCDATA:
+ return $this->rcdata();
+ default:
+ $tok = $this->scanner->current();
+ if (strspn($tok, "<&")) {
+ return FALSE;
+ }
+ return $this->text();
+ }
}
- $tok = $this->scanner->next();
+ /**
+ * This buffers the current token as character data.
+ */
+ protected function text()
+ {
+ $tok = $this->scanner->current();
- // Comment:
- if ($tok == '-' && $this->scanner->peek() == '-') {
- $this->scanner->next(); // Consume the other '-'
- $this->scanner->next(); // Next char.
- return $this->comment();
+ // This should never happen...
+ if ($tok === FALSE) {
+ return FALSE;
+ }
+ // Null
+ if ($tok === "\00") {
+ $this->parseError("Received NULL character.");
+ }
+ // fprintf(STDOUT, "Writing '%s'", $tok);
+ $this->buffer($tok);
+ $this->scanner->next();
+ return TRUE;
}
- // Doctype
- elseif($tok == 'D' || $tok == 'd') {
- return $this->doctype('');
+
+ /**
+ * Read text in RAW mode.
+ */
+ protected function rawText()
+ {
+ if (is_null($this->untilTag)) {
+ return $this->text();
+ }
+ $sequence = '</' . $this->untilTag . '>';
+ $txt = $this->readUntilSequence($sequence);
+ $this->events->text($txt);
+ $this->setTextMode(0);
+ return $this->endTag();
}
- // CDATA section
- elseif($tok == '[') {
- return $this->cdataSection();
+
+ /**
+ * Read text in RCDATA mode.
+ */
+ protected function rcdata()
+ {
+ if (is_null($this->untilTag)) {
+ return $this->text();
+ }
+ $sequence = '</' . $this->untilTag . '>';
+ $txt = '';
+ $tok = $this->scanner->current();
+ while ($tok !== FALSE && ! ($tok == '<' && ($this->sequenceMatches($sequence) || $this->sequenceMatches(strtoupper($sequence))))) {
+ if ($tok == '&') {
+ $txt .= $this->decodeCharacterReference();
+ $tok = $this->scanner->current();
+ } else {
+ $txt .= $tok;
+ $tok = $this->scanner->next();
+ }
+ }
+ $this->events->text($txt);
+ $this->setTextMode(0);
+ return $this->endTag();
}
- // FINISH
- $this->parseError("Expected <!--, <![CDATA[, or <!DOCTYPE. Got <!%s", $tok);
- $this->bogusComment('<!');
- return TRUE;
- }
-
- /**
- * Consume an end tag.
- * 8.2.4.9
- */
- protected function endTag() {
- if ($this->scanner->current() != '/') {
- return FALSE;
+ /**
+ * If the document is read, emit an EOF event.
+ */
+ protected function eof()
+ {
+ if ($this->scanner->current() === FALSE) {
+ // fprintf(STDOUT, "EOF");
+ $this->flushBuffer();
+ $this->events->eof();
+ $this->carryOn = FALSE;
+ return TRUE;
+ }
+ return FALSE;
}
- $tok = $this->scanner->next();
-
- // a-zA-Z -> tagname
- // > -> parse error
- // EOF -> parse error
- // -> parse error
- if (!ctype_alpha($tok)) {
- $this->parseError("Expected tag name, got '%s'", $tok);
- if ($tok == "\0" || $tok === FALSE) {
+
+ /**
+ * Handle character references (aka entities).
+ *
+ * This version is specific to PCDATA, as it buffers data into the
+ * text buffer. For a generic version, see decodeCharacterReference().
+ *
+ * HTML5 8.2.4.2
+ */
+ protected function characterReference()
+ {
+ $ref = $this->decodeCharacterReference();
+ if ($ref !== FALSE) {
+ $this->buffer($ref);
+ return TRUE;
+ }
return FALSE;
- }
- return $this->bogusComment('</');
}
- $name = strtolower($this->scanner->charsUntil("\n\f \t>"));
- // Trash whitespace.
- $this->scanner->whitespace();
+ /**
+ * Emit a tagStart event on encountering a tag.
+ *
+ * 8.2.4.8
+ */
+ protected function tagOpen()
+ {
+ if ($this->scanner->current() != '<') {
+ return FALSE;
+ }
+
+ // Any buffered text data can go out now.
+ $this->flushBuffer();
- if ($this->scanner->current() != '>') {
- $this->parseError("Expected >, got '%s'", $this->scanner->current());
- // We just trash stuff until we get to the next tag close.
- $this->scanner->charsUntil('>');
+ $this->scanner->next();
+
+ return $this->markupDeclaration() || $this->endTag() || $this->processingInstruction() || $this->tagName() ||
+ /* This always returns false. */
+ $this->parseError("Illegal tag opening") || $this->characterData();
}
- $this->events->endTag($name);
- $this->scanner->next();
- return TRUE;
+ /**
+ * Look for markup.
+ */
+ protected function markupDeclaration()
+ {
+ if ($this->scanner->current() != '!') {
+ return FALSE;
+ }
- }
+ $tok = $this->scanner->next();
- /**
- * Consume a tag name and body.
- * 8.2.4.10
- */
- protected function tagName() {
- $tok = $this->scanner->current();
- if (!ctype_alpha($tok)) {
- return FALSE;
- }
+ // Comment:
+ if ($tok == '-' && $this->scanner->peek() == '-') {
+ $this->scanner->next(); // Consume the other '-'
+ $this->scanner->next(); // Next char.
+ return $this->comment();
+ }
- // We know this is at least one char.
- $name = strtolower($this->scanner->charsWhile(
- ":0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"
- ));
- $attributes = array();
- $selfClose = FALSE;
-
- // Handle attribute parse exceptions here so that we can
- // react by trying to build a sensible parse tree.
- try {
- do {
- $this->scanner->whitespace();
- $this->attribute($attributes);
- }
- while (!$this->isTagEnd($selfClose));
- }
- catch (ParseError $e) {
- $selfClose = FALSE;
- }
+ elseif ($tok == 'D' || $tok == 'd') { // Doctype
+ return $this->doctype('');
+ }
- $mode = $this->events->startTag($name, $attributes, $selfClose);
- // Should we do this? What does this buy that selfClose doesn't?
- if ($selfClose) {
- $this->events->endTag($name);
- }
- elseif (is_int($mode)) {
- //fprintf(STDOUT, "Event response says move into mode %d for tag %s", $mode, $name);
- $this->setTextMode($mode, $name);
- }
+ elseif ($tok == '[') { // CDATA section
+ return $this->cdataSection();
+ }
- $this->scanner->next();
-
- return TRUE;
- }
-
- /**
- * Check if the scanner has reached the end of a tag.
- */
- protected function isTagEnd(&$selfClose) {
- $tok = $this->scanner->current();
- if ($tok == '/') {
- $this->scanner->next();
- $this->scanner->whitespace();
- if ($this->scanner->current() == '>') {
- $selfClose = TRUE;
- return TRUE;
- }
- if ($this->scanner->current() === FALSE) {
- $this->parseError("Unexpected EOF inside of tag.");
+ // FINISH
+ $this->parseError("Expected <!--, <![CDATA[, or <!DOCTYPE. Got <!%s", $tok);
+ $this->bogusComment('<!');
return TRUE;
- }
- // Basically, we skip the / token and go on.
- // See 8.2.4.43.
- $this->parseError("Unexpected '%s' inside of a tag.", $this->scanner->current());
- return FALSE;
}
- if ($this->scanner->current() == '>') {
- return TRUE;
- }
- if ($this->scanner->current() === FALSE) {
- $this->parseError("Unexpected EOF inside of tag.");
- return TRUE;
- }
+ /**
+ * Consume an end tag.
+ * 8.2.4.9
+ */
+ protected function endTag()
+ {
+ if ($this->scanner->current() != '/') {
+ return FALSE;
+ }
+ $tok = $this->scanner->next();
- return FALSE;
- }
+ // a-zA-Z -> tagname
+ // > -> parse error
+ // EOF -> parse error
+ // -> parse error
+ if (! ctype_alpha($tok)) {
+ $this->parseError("Expected tag name, got '%s'", $tok);
+ if ($tok == "\0" || $tok === FALSE) {
+ return FALSE;
+ }
+ return $this->bogusComment('</');
+ }
+ $name = strtolower($this->scanner->charsUntil("\n\f \t>"));
+ // Trash whitespace.
+ $this->scanner->whitespace();
- /**
- * Parse attributes from inside of a tag.
- */
- protected function attribute(&$attributes) {
- $tok = $this->scanner->current();
- if ($tok == '/' || $tok == '>' || $tok === FALSE) {
- return FALSE;
- }
+ if ($this->scanner->current() != '>') {
+ $this->parseError("Expected >, got '%s'", $this->scanner->current());
+ // We just trash stuff until we get to the next tag close.
+ $this->scanner->charsUntil('>');
+ }
- if ($tok == '<') {
- $this->parseError("Unexepcted '<' inside of attributes list.");
- // Push the < back onto the stack.
- $this->scanner->unconsume();
- // Let the caller figure out how to handle this.
- throw new ParseError("Start tag inside of attribute.");
+ $this->events->endTag($name);
+ $this->scanner->next();
+ return TRUE;
}
- $name = strtolower($this->scanner->charsUntil("/>=\n\f\t "));
+ /**
+ * Consume a tag name and body.
+ * 8.2.4.10
+ */
+ protected function tagName()
+ {
+ $tok = $this->scanner->current();
+ if (! ctype_alpha($tok)) {
+ return FALSE;
+ }
- if (strlen($name) == 0) {
- $this->parseError("Expected an attribute name, got %s.", $this->scanner->current());
- // Really, only '=' can be the char here. Everything else gets absorbed
- // under one rule or another.
- $name = $this->scanner->current();
- $this->scanner->next();
- }
+ // We know this is at least one char.
+ $name = strtolower($this->scanner->charsWhile(":0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"));
+ $attributes = array();
+ $selfClose = FALSE;
+
+ // Handle attribute parse exceptions here so that we can
+ // react by trying to build a sensible parse tree.
+ try {
+ do {
+ $this->scanner->whitespace();
+ $this->attribute($attributes);
+ } while (! $this->isTagEnd($selfClose));
+ } catch (ParseError $e) {
+ $selfClose = FALSE;
+ }
- $isValidAttribute = TRUE;
- // Attribute names can contain most Unicode characters for HTML5.
- // But method "DOMElement::setAttribute" is throwing exception
- // because of it's own internal restriction so these have to be filtered.
- // see issue #23: https://github.com/Masterminds/html5-php/issues/23
- // and http://www.w3.org/TR/2011/WD-html5-20110525/syntax.html#syntax-attribute-name
- if (preg_match("/[\x1-\x2C\\/\x3B-\x40\x5B-\x5E\x60\x7B-\x7F]/u", $name)) {
- $this->parseError("Unexpected characters in attribute name: %s", $name);
- $isValidAttribute = FALSE;
- }
- // There is no limitation for 1st character in HTML5.
- // But method "DOMElement::setAttribute" is throwing exception for the
- // characters below so they have to be filtered.
- // see issue #23: https://github.com/Masterminds/html5-php/issues/23
- // and http://www.w3.org/TR/2011/WD-html5-20110525/syntax.html#syntax-attribute-name
- else if (preg_match("/^[0-9.-]/u", $name)) {
- $this->parseError("Unexpected character at the begining of attribute name: %s", $name);
- $isValidAttribute = FALSE;
- }
- // 8.1.2.3
- $this->scanner->whitespace();
+ $mode = $this->events->startTag($name, $attributes, $selfClose);
+ // Should we do this? What does this buy that selfClose doesn't?
+ if ($selfClose) {
+ $this->events->endTag($name);
+ } elseif (is_int($mode)) {
+ // fprintf(STDOUT, "Event response says move into mode %d for tag %s", $mode, $name);
+ $this->setTextMode($mode, $name);
+ }
- $val = $this->attributeValue();
- if($isValidAttribute) {
- $attributes[$name] = $val;
- }
- return TRUE;
- }
-
- /**
- * Consume an attribute value.
- * 8.2.4.37 and after.
- */
- protected function attributeValue() {
- if ($this->scanner->current() != '=') {
- return NULL;
- }
- $this->scanner->next();
- // 8.1.2.3
- $this->scanner->whitespace();
-
- $tok = $this->scanner->current();
- switch ($tok) {
- case "\n":
- case "\f":
- case " ":
- case "\t":
- // Whitespace here indicates an empty value.
- return NULL;
- case '"':
- case "'":
- $this->scanner->next();
- return $this->quotedAttributeValue($tok);
- case '>':
- // case '/': // 8.2.4.37 seems to allow foo=/ as a valid attr.
- $this->parseError("Expected attribute value, got tag end.");
- return NULL;
- case '=':
- case '`':
- $this->parseError("Expecting quotes, got %s.", $tok);
- return $this->unquotedAttributeValue();
- default:
- return $this->unquotedAttributeValue();
- }
- }
-
- /**
- * Get an attribute value string.
- *
- * @param string $quote
- * IMPORTANT: This is a series of chars! Any one of which will be considered
- * termination of an attribute's value. E.g. "\"'" will stop at either
- * ' or ".
- * @return string
- * The attribute value.
- */
- protected function quotedAttributeValue($quote) {
- $stoplist = "\f" . $quote;
- $val = '';
- $tok = $this->scanner->current();
- while (strspn($tok, $stoplist) == 0 && $tok !== FALSE) {
- if ($tok == '&') {
- $val .= $this->decodeCharacterReference(TRUE);
- $tok = $this->scanner->current();
- }
- else {
- $val .= $tok;
- $tok = $this->scanner->next();
- }
+ $this->scanner->next();
+
+ return TRUE;
}
- $this->scanner->next();
- return $val;
- }
- protected function unquotedAttributeValue() {
- $stoplist = "\t\n\f >";
- $val = '';
- $tok = $this->scanner->current();
- while (strspn($tok, $stoplist) == 0 && $tok !== FALSE) {
- if ($tok == '&') {
- $val .= $this->decodeCharacterReference(TRUE);
+
+ /**
+ * Check if the scanner has reached the end of a tag.
+ */
+ protected function isTagEnd(&$selfClose)
+ {
$tok = $this->scanner->current();
- }
- else {
- if(strspn($tok, "\"'<=`") > 0) {
- $this->parseError("Unexpected chars in unquoted attribute value %s", $tok);
+ if ($tok == '/') {
+ $this->scanner->next();
+ $this->scanner->whitespace();
+ if ($this->scanner->current() == '>') {
+ $selfClose = TRUE;
+ return TRUE;
+ }
+ if ($this->scanner->current() === FALSE) {
+ $this->parseError("Unexpected EOF inside of tag.");
+ return TRUE;
+ }
+ // Basically, we skip the / token and go on.
+ // See 8.2.4.43.
+ $this->parseError("Unexpected '%s' inside of a tag.", $this->scanner->current());
+ return FALSE;
}
- $val .= $tok;
- $tok = $this->scanner->next();
- }
- }
- return $val;
- }
-
-
- /**
- * Consume malformed markup as if it were a comment.
- * 8.2.4.44
- *
- * The spec requires that the ENTIRE tag-like thing be enclosed inside of
- * the comment. So this will generate comments like:
- *
- * &lt;!--&lt/+foo&gt;--&gt;
- *
- * @param string $leading
- * Prepend any leading characters. This essentially
- * negates the need to backtrack, but it's sort of
- * a hack.
- */
- protected function bogusComment($leading = '') {
-
- // TODO: This can be done more efficiently when the
- // scanner exposes a readUntil() method.
- $comment = $leading;
- $tok = $this->scanner->current();
- do {
- $comment .= $tok;
- $tok = $this->scanner->next();
- } while ($tok !== FALSE && $tok != '>');
-
- $this->flushBuffer();
- $this->events->comment($comment . $tok);
- $this->scanner->next();
-
- return TRUE;
- }
-
- /**
- * Read a comment.
- *
- * Expects the first tok to be inside of the comment.
- */
- protected function comment() {
- $tok = $this->scanner->current();
- $comment = '';
-
- // <!-->. Emit an empty comment because 8.2.4.46 says to.
- if ($tok == '>') {
- // Parse error. Emit the comment token.
- $this->parseError("Expected comment data, got '>'");
- $this->events->comment('');
- $this->scanner->next();
- return TRUE;
- }
- // Replace NULL with the replacement char.
- if ($tok == "\0") {
- $tok = UTF8Utils::FFFD;
- }
- while (!$this->isCommentEnd()) {
- $comment .= $tok;
- $tok = $this->scanner->next();
- }
+ if ($this->scanner->current() == '>') {
+ return TRUE;
+ }
+ if ($this->scanner->current() === FALSE) {
+ $this->parseError("Unexpected EOF inside of tag.");
+ return TRUE;
+ }
- $this->events->comment($comment);
- $this->scanner->next();
- return TRUE;
- }
-
- /**
- * Check if the scanner has reached the end of a comment.
- */
- protected function isCommentEnd() {
- // EOF
- if($this->scanner->current() === FALSE) {
- // Hit the end.
- $this->parseError("Unexpected EOF in a comment.");
- return TRUE;
+ return FALSE;
}
- // If it doesn't start with -, not the end.
- if($this->scanner->current() != '-') {
- return FALSE;
- }
+ /**
+ * Parse attributes from inside of a tag.
+ */
+ protected function attribute(&$attributes)
+ {
+ $tok = $this->scanner->current();
+ if ($tok == '/' || $tok == '>' || $tok === FALSE) {
+ return FALSE;
+ }
+ if ($tok == '<') {
+ $this->parseError("Unexepcted '<' inside of attributes list.");
+ // Push the < back onto the stack.
+ $this->scanner->unconsume();
+ // Let the caller figure out how to handle this.
+ throw new ParseError("Start tag inside of attribute.");
+ }
- // Advance one, and test for '->'
- if ($this->scanner->next() == '-'
- && $this->scanner->peek() == '>') {
- $this->scanner->next(); // Consume the last '>'
- return TRUE;
- }
- // Unread '-';
- $this->scanner->unconsume(1);
- return FALSE;
- }
-
- /**
- * Parse a DOCTYPE.
- *
- * Parse a DOCTYPE declaration. This method has strong bearing on whether or
- * not Quirksmode is enabled on the event handler.
- *
- * @todo This method is a little long. Should probably refactor.
- */
- protected function doctype() {
- if (strcasecmp($this->scanner->current(), 'D')) {
- return FALSE;
- }
- // Check that string is DOCTYPE.
- $chars = $this->scanner->charsWhile("DOCTYPEdoctype");
- if (strcasecmp($chars, 'DOCTYPE')) {
- $this->parseError('Expected DOCTYPE, got %s', $chars);
- return $this->bogusComment('<!' . $chars);
- }
+ $name = strtolower($this->scanner->charsUntil("/>=\n\f\t "));
- $this->scanner->whitespace();
- $tok = $this->scanner->current();
+ if (strlen($name) == 0) {
+ $this->parseError("Expected an attribute name, got %s.", $this->scanner->current());
+ // Really, only '=' can be the char here. Everything else gets absorbed
+ // under one rule or another.
+ $name = $this->scanner->current();
+ $this->scanner->next();
+ }
+
+ $isValidAttribute = TRUE;
+ // Attribute names can contain most Unicode characters for HTML5.
+ // But method "DOMElement::setAttribute" is throwing exception
+ // because of it's own internal restriction so these have to be filtered.
+ // see issue #23: https://github.com/Masterminds/html5-php/issues/23
+ // and http://www.w3.org/TR/2011/WD-html5-20110525/syntax.html#syntax-attribute-name
+ if (preg_match("/[\x1-\x2C\\/\x3B-\x40\x5B-\x5E\x60\x7B-\x7F]/u", $name)) {
+ $this->parseError("Unexpected characters in attribute name: %s", $name);
+ $isValidAttribute = FALSE;
+ } // There is no limitation for 1st character in HTML5.
+ // But method "DOMElement::setAttribute" is throwing exception for the
+ // characters below so they have to be filtered.
+ // see issue #23: https://github.com/Masterminds/html5-php/issues/23
+ // and http://www.w3.org/TR/2011/WD-html5-20110525/syntax.html#syntax-attribute-name
+ else
+ if (preg_match("/^[0-9.-]/u", $name)) {
+ $this->parseError("Unexpected character at the begining of attribute name: %s", $name);
+ $isValidAttribute = FALSE;
+ }
+ // 8.1.2.3
+ $this->scanner->whitespace();
- // EOF: die.
- if ($tok === FALSE) {
- $this->events->doctype('html5',EventHandler::DOCTYPE_NONE,'', TRUE);
- return $this->eof();
+ $val = $this->attributeValue();
+ if ($isValidAttribute) {
+ $attributes[$name] = $val;
+ }
+ return TRUE;
}
- $doctypeName = '';
+ /**
+ * Consume an attribute value.
+ * 8.2.4.37 and after.
+ */
+ protected function attributeValue()
+ {
+ if ($this->scanner->current() != '=') {
+ return NULL;
+ }
+ $this->scanner->next();
+ // 8.1.2.3
+ $this->scanner->whitespace();
- // NULL char: convert.
- if ($tok === "\0") {
- $this->parseError("Unexpected NULL character in DOCTYPE.");
- $doctypeName .= UTF8::FFFD;
- $tok = $this->scanner->next();
+ $tok = $this->scanner->current();
+ switch ($tok) {
+ case "\n":
+ case "\f":
+ case " ":
+ case "\t":
+ // Whitespace here indicates an empty value.
+ return NULL;
+ case '"':
+ case "'":
+ $this->scanner->next();
+ return $this->quotedAttributeValue($tok);
+ case '>':
+ // case '/': // 8.2.4.37 seems to allow foo=/ as a valid attr.
+ $this->parseError("Expected attribute value, got tag end.");
+ return NULL;
+ case '=':
+ case '`':
+ $this->parseError("Expecting quotes, got %s.", $tok);
+ return $this->unquotedAttributeValue();
+ default:
+ return $this->unquotedAttributeValue();
+ }
}
- $stop = " \n\f>";
- $doctypeName = $this->scanner->charsUntil($stop);
- // Lowercase ASCII, replace \0 with FFFD
- $doctypeName = strtolower(strtr($doctypeName, "\0", UTF8Utils::FFFD));
+ /**
+ * Get an attribute value string.
+ *
+ * @param string $quote
+ * IMPORTANT: This is a series of chars! Any one of which will be considered
+ * termination of an attribute's value. E.g. "\"'" will stop at either
+ * ' or ".
+ * @return string The attribute value.
+ */
+ protected function quotedAttributeValue($quote)
+ {
+ $stoplist = "\f" . $quote;
+ $val = '';
+ $tok = $this->scanner->current();
+ while (strspn($tok, $stoplist) == 0 && $tok !== FALSE) {
+ if ($tok == '&') {
+ $val .= $this->decodeCharacterReference(TRUE);
+ $tok = $this->scanner->current();
+ } else {
+ $val .= $tok;
+ $tok = $this->scanner->next();
+ }
+ }
+ $this->scanner->next();
+ return $val;
+ }
- $tok = $this->scanner->current();
+ protected function unquotedAttributeValue()
+ {
+ $stoplist = "\t\n\f >";
+ $val = '';
+ $tok = $this->scanner->current();
+ while (strspn($tok, $stoplist) == 0 && $tok !== FALSE) {
+ if ($tok == '&') {
+ $val .= $this->decodeCharacterReference(TRUE);
+ $tok = $this->scanner->current();
+ } else {
+ if (strspn($tok, "\"'<=`") > 0) {
+ $this->parseError("Unexpected chars in unquoted attribute value %s", $tok);
+ }
+ $val .= $tok;
+ $tok = $this->scanner->next();
+ }
+ }
+ return $val;
+ }
+
+ /**
+ * Consume malformed markup as if it were a comment.
+ * 8.2.4.44
+ *
+ * The spec requires that the ENTIRE tag-like thing be enclosed inside of
+ * the comment. So this will generate comments like:
+ *
+ * &lt;!--&lt/+foo&gt;--&gt;
+ *
+ * @param string $leading
+ * Prepend any leading characters. This essentially
+ * negates the need to backtrack, but it's sort of
+ * a hack.
+ */
+ protected function bogusComment($leading = '')
+ {
- // If FALSE, emit a parse error, DOCTYPE, and return.
- if ($tok === FALSE) {
- $this->parseError('Unexpected EOF in DOCTYPE declaration.');
- $this->events->doctype($doctypeName, EventHandler::DOCTYPE_NONE, NULL, TRUE);
- return TRUE;
- }
+ // TODO: This can be done more efficiently when the
+ // scanner exposes a readUntil() method.
+ $comment = $leading;
+ $tok = $this->scanner->current();
+ do {
+ $comment .= $tok;
+ $tok = $this->scanner->next();
+ } while ($tok !== FALSE && $tok != '>');
- // Short DOCTYPE, like <!DOCTYPE html>
- if ($tok == '>') {
- // DOCTYPE without a name.
- if (strlen($doctypeName) == 0) {
- $this->parseError("Expected a DOCTYPE name. Got nothing.");
- $this->events->doctype($doctypeName, 0, NULL, TRUE);
+ $this->flushBuffer();
+ $this->events->comment($comment . $tok);
$this->scanner->next();
+
return TRUE;
- }
- $this->events->doctype($doctypeName);
- $this->scanner->next();
- return TRUE;
}
- $this->scanner->whitespace();
-
- $pub = strtoupper($this->scanner->getAsciiAlpha());
- $white = strlen($this->scanner->whitespace());
- $tok = $this->scanner->current();
-
- // Get ID, and flag it as pub or system.
- if (($pub == 'PUBLIC' || $pub == 'SYSTEM') && $white > 0) {
- // Get the sys ID.
- $type = $pub == 'PUBLIC' ? EventHandler::DOCTYPE_PUBLIC : EventHandler::DOCTYPE_SYSTEM;
- $id = $this->quotedString("\0>");
- if ($id === FALSE) {
- $this->events->doctype($doctypeName, $type, $pub, FALSE);
- return FALSE;
- }
- // Premature EOF.
- if ($this->scanner->current() === FALSE) {
- $this->parseError("Unexpected EOF in DOCTYPE");
- $this->events->doctype($doctypeName, $type, $id, TRUE);
- return TRUE;
- }
+ /**
+ * Read a comment.
+ *
+ * Expects the first tok to be inside of the comment.
+ */
+ protected function comment()
+ {
+ $tok = $this->scanner->current();
+ $comment = '';
+
+ // <!-->. Emit an empty comment because 8.2.4.46 says to.
+ if ($tok == '>') {
+ // Parse error. Emit the comment token.
+ $this->parseError("Expected comment data, got '>'");
+ $this->events->comment('');
+ $this->scanner->next();
+ return TRUE;
+ }
+
+ // Replace NULL with the replacement char.
+ if ($tok == "\0") {
+ $tok = UTF8Utils::FFFD;
+ }
+ while (! $this->isCommentEnd()) {
+ $comment .= $tok;
+ $tok = $this->scanner->next();
+ }
- // Well-formed complete DOCTYPE.
- $this->scanner->whitespace();
- if ($this->scanner->current() == '>') {
- $this->events->doctype($doctypeName, $type, $id, FALSE);
+ $this->events->comment($comment);
$this->scanner->next();
return TRUE;
- }
-
- // If we get here, we have <!DOCTYPE foo PUBLIC "bar" SOME_JUNK
- // Throw away the junk, parse error, quirks mode, return TRUE.
- $this->scanner->charsUntil(">");
- $this->parseError("Malformed DOCTYPE.");
- $this->events->doctype($doctypeName, $type, $id, TRUE);
- $this->scanner->next();
- return TRUE;
}
- // Else it's a bogus DOCTYPE.
- // Consume to > and trash.
- $this->scanner->charsUntil('>');
-
- $this->parseError("Expected PUBLIC or SYSTEM. Got %s.", $pub);
- $this->events->doctype($doctypeName, 0, NULL, TRUE);
- $this->scanner->next();
- return TRUE;
-
- }
-
- /**
- * Utility for reading a quoted string.
- *
- * @param string $stopchars
- * Characters (in addition to a close-quote) that should stop the string.
- * E.g. sometimes '>' is higher precedence than '"' or "'".
- * @return mixed
- * String if one is found (quotations omitted)
- */
- protected function quotedString($stopchars) {
- $tok = $this->scanner->current();
- if ($tok == '"' || $tok == "'") {
- $this->scanner->next();
- $ret = $this->scanner->charsUntil($tok . $stopchars);
- if ($this->scanner->current() == $tok) {
- $this->scanner->next();
- }
- else {
- // Parse error because no close quote.
- $this->parseError("Expected %s, got %s", $tok, $this->scanner->current());
- }
- return $ret;
- }
- return FALSE;
- }
+ /**
+ * Check if the scanner has reached the end of a comment.
+ */
+ protected function isCommentEnd()
+ {
+ // EOF
+ if ($this->scanner->current() === FALSE) {
+ // Hit the end.
+ $this->parseError("Unexpected EOF in a comment.");
+ return TRUE;
+ }
+ // If it doesn't start with -, not the end.
+ if ($this->scanner->current() != '-') {
+ return FALSE;
+ }
- /**
- * Handle a CDATA section.
- */
- protected function cdataSection() {
- if ($this->scanner->current() != '[') {
- return FALSE;
+ // Advance one, and test for '->'
+ if ($this->scanner->next() == '-' && $this->scanner->peek() == '>') {
+ $this->scanner->next(); // Consume the last '>'
+ return TRUE;
+ }
+ // Unread '-';
+ $this->scanner->unconsume(1);
+ return FALSE;
}
- $cdata = '';
- $this->scanner->next();
- $chars = $this->scanner->charsWhile('CDAT');
- if ($chars != 'CDATA' || $this->scanner->current() != '[') {
- $this->parseError('Expected [CDATA[, got %s', $chars);
- return $this->bogusComment('<![' . $chars);
- }
+ /**
+ * Parse a DOCTYPE.
+ *
+ * Parse a DOCTYPE declaration. This method has strong bearing on whether or
+ * not Quirksmode is enabled on the event handler.
+ *
+ * @todo This method is a little long. Should probably refactor.
+ */
+ protected function doctype()
+ {
+ if (strcasecmp($this->scanner->current(), 'D')) {
+ return FALSE;
+ }
+ // Check that string is DOCTYPE.
+ $chars = $this->scanner->charsWhile("DOCTYPEdoctype");
+ if (strcasecmp($chars, 'DOCTYPE')) {
+ $this->parseError('Expected DOCTYPE, got %s', $chars);
+ return $this->bogusComment('<!' . $chars);
+ }
- $tok = $this->scanner->next();
- do {
- if ($tok === FALSE) {
- $this->parseError('Unexpected EOF inside CDATA.');
- $this->bogusComment('<![CDATA[' . $cdata);
- return TRUE;
- }
- $cdata .= $tok;
- $tok = $this->scanner->next();
- }
- while (!$this->sequenceMatches(']]>'));
-
- // Consume ]]>
- $this->scanner->consume(3);
-
- $this->events->cdata($cdata);
- return TRUE;
-
- }
-
- // ================================================================
- // Non-HTML5
- // ================================================================
- /**
- * Handle a processing instruction.
- *
- * XML processing instructions are supposed to be ignored in HTML5,
- * treated as "bogus comments". However, since we're not a user
- * agent, we allow them. We consume until ?> and then issue a
- * EventListener::processingInstruction() event.
- */
- protected function processingInstruction() {
- if ($this->scanner->current() != '?') {
- return FALSE;
- }
+ $this->scanner->whitespace();
+ $tok = $this->scanner->current();
- $tok = $this->scanner->next();
- $procName = $this->scanner->getAsciiAlpha();
- $white = strlen($this->scanner->whitespace());
+ // EOF: die.
+ if ($tok === FALSE) {
+ $this->events->doctype('html5', EventHandler::DOCTYPE_NONE, '', TRUE);
+ return $this->eof();
+ }
- // If not a PI, send to bogusComment.
- if (strlen($procName) == 0 || $white == 0 || $this->scanner->current() == FALSE) {
- $this->parseError("Expected processing instruction name, got $tok");
- $this->bogusComment('<?' . $tok . $procName);
- return TRUE;
- }
+ $doctypeName = '';
- $data = '';
- // As long as it's not the case that the next two chars are ? and >.
- while (!($this->scanner->current() == '?' && $this->scanner->peek() == '>')) {
- $data .= $this->scanner->current();
+ // NULL char: convert.
+ if ($tok === "\0") {
+ $this->parseError("Unexpected NULL character in DOCTYPE.");
+ $doctypeName .= UTF8::FFFD;
+ $tok = $this->scanner->next();
+ }
- $tok = $this->scanner->next();
- if ($tok === FALSE) {
- $this->parseError("Unexpected EOF in processing instruction.");
- $this->events->processingInstruction($procName, $data);
- return TRUE;
- }
+ $stop = " \n\f>";
+ $doctypeName = $this->scanner->charsUntil($stop);
+ // Lowercase ASCII, replace \0 with FFFD
+ $doctypeName = strtolower(strtr($doctypeName, "\0", UTF8Utils::FFFD));
- }
+ $tok = $this->scanner->current();
- $this->scanner->next(); // >
- $this->scanner->next(); // Next token.
- $this->events->processingInstruction($procName, $data);
- return TRUE;
- }
+ // If FALSE, emit a parse error, DOCTYPE, and return.
+ if ($tok === FALSE) {
+ $this->parseError('Unexpected EOF in DOCTYPE declaration.');
+ $this->events->doctype($doctypeName, EventHandler::DOCTYPE_NONE, NULL, TRUE);
+ return TRUE;
+ }
+ // Short DOCTYPE, like <!DOCTYPE html>
+ if ($tok == '>') {
+ // DOCTYPE without a name.
+ if (strlen($doctypeName) == 0) {
+ $this->parseError("Expected a DOCTYPE name. Got nothing.");
+ $this->events->doctype($doctypeName, 0, NULL, TRUE);
+ $this->scanner->next();
+ return TRUE;
+ }
+ $this->events->doctype($doctypeName);
+ $this->scanner->next();
+ return TRUE;
+ }
+ $this->scanner->whitespace();
- // ================================================================
- // UTILITY FUNCTIONS
- // ================================================================
+ $pub = strtoupper($this->scanner->getAsciiAlpha());
+ $white = strlen($this->scanner->whitespace());
+ $tok = $this->scanner->current();
- /**
- * Read from the input stream until we get to the desired sequene
- * or hit the end of the input stream.
- */
- protected function readUntilSequence($sequence) {
- $buffer = '';
+ // Get ID, and flag it as pub or system.
+ if (($pub == 'PUBLIC' || $pub == 'SYSTEM') && $white > 0) {
+ // Get the sys ID.
+ $type = $pub == 'PUBLIC' ? EventHandler::DOCTYPE_PUBLIC : EventHandler::DOCTYPE_SYSTEM;
+ $id = $this->quotedString("\0>");
+ if ($id === FALSE) {
+ $this->events->doctype($doctypeName, $type, $pub, FALSE);
+ return FALSE;
+ }
+
+ // Premature EOF.
+ if ($this->scanner->current() === FALSE) {
+ $this->parseError("Unexpected EOF in DOCTYPE");
+ $this->events->doctype($doctypeName, $type, $id, TRUE);
+ return TRUE;
+ }
+
+ // Well-formed complete DOCTYPE.
+ $this->scanner->whitespace();
+ if ($this->scanner->current() == '>') {
+ $this->events->doctype($doctypeName, $type, $id, FALSE);
+ $this->scanner->next();
+ return TRUE;
+ }
+
+ // If we get here, we have <!DOCTYPE foo PUBLIC "bar" SOME_JUNK
+ // Throw away the junk, parse error, quirks mode, return TRUE.
+ $this->scanner->charsUntil(">");
+ $this->parseError("Malformed DOCTYPE.");
+ $this->events->doctype($doctypeName, $type, $id, TRUE);
+ $this->scanner->next();
+ return TRUE;
+ }
- // Optimization for reading larger blocks faster.
- $first = substr($sequence, 0, 1);
- while ($this->scanner->current() !== FALSE) {
- $buffer .= $this->scanner->charsUntil($first);
+ // Else it's a bogus DOCTYPE.
+ // Consume to > and trash.
+ $this->scanner->charsUntil('>');
- // Stop as soon as we hit the stopping condition.
- if ($this->sequenceMatches($sequence) || $this->sequenceMatches(strtoupper($sequence))) {
- return $buffer;
- }
- $buffer .= $this->scanner->current();
- $this->scanner->next();
+ $this->parseError("Expected PUBLIC or SYSTEM. Got %s.", $pub);
+ $this->events->doctype($doctypeName, 0, NULL, TRUE);
+ $this->scanner->next();
+ return TRUE;
}
- // If we get here, we hit the EOF.
- $this->parseError("Unexpected EOF during text read.");
- return $buffer;
- }
-
- /**
- * Check if upcomming chars match the given sequence.
- *
- * This will read the stream for the $sequence. If it's
- * found, this will return TRUE. If not, return FALSE.
- * Since this unconsumes any chars it reads, the caller
- * will still need to read the next sequence, even if
- * this returns TRUE.
- *
- * Example: $this->sequenceMatches('</script>') will
- * see if the input stream is at the start of a
- * '</script>' string.
- */
- protected function sequenceMatches($sequence) {
- $len = strlen($sequence);
- $buffer = '';
- for ($i = 0; $i < $len; ++$i) {
- $buffer .= $this->scanner->current();
-
- // EOF. Rewind and let the caller handle it.
- if ($this->scanner->current() === FALSE) {
- $this->scanner->unconsume($i);
+ /**
+ * Utility for reading a quoted string.
+ *
+ * @param string $stopchars
+ * Characters (in addition to a close-quote) that should stop the string.
+ * E.g. sometimes '>' is higher precedence than '"' or "'".
+ * @return mixed String if one is found (quotations omitted)
+ */
+ protected function quotedString($stopchars)
+ {
+ $tok = $this->scanner->current();
+ if ($tok == '"' || $tok == "'") {
+ $this->scanner->next();
+ $ret = $this->scanner->charsUntil($tok . $stopchars);
+ if ($this->scanner->current() == $tok) {
+ $this->scanner->next();
+ } else {
+ // Parse error because no close quote.
+ $this->parseError("Expected %s, got %s", $tok, $this->scanner->current());
+ }
+ return $ret;
+ }
return FALSE;
- }
- $this->scanner->next();
}
- $this->scanner->unconsume($len);
- return $buffer == $sequence;
-
- }
-
- /**
- * Send a TEXT event with the contents of the text buffer.
- *
- * This emits an EventHandler::text() event with the current contents of the
- * temporary text buffer. (The buffer is used to group as much PCDATA
- * as we can instead of emitting lots and lots of TEXT events.)
- */
- protected function flushBuffer() {
- if (empty($this->text)) {
- return;
- }
- $this->events->text($this->text);
- $this->text = '';
- }
-
- /**
- * Add text to the temporary buffer.
- *
- * @see flushBuffer()
- */
- protected function buffer($str) {
- $this->text .= $str;
- }
-
- /**
- * Emit a parse error.
- *
- * A parse error always returns FALSE because it never consumes any
- * characters.
- */
- protected function parseError($msg) {
- $args = func_get_args();
-
- if (count($args) > 1) {
- array_shift($args);
- $msg = vsprintf($msg, $args);
- }
+ /**
+ * Handle a CDATA section.
+ */
+ protected function cdataSection()
+ {
+ if ($this->scanner->current() != '[') {
+ return FALSE;
+ }
+ $cdata = '';
+ $this->scanner->next();
+
+ $chars = $this->scanner->charsWhile('CDAT');
+ if ($chars != 'CDATA' || $this->scanner->current() != '[') {
+ $this->parseError('Expected [CDATA[, got %s', $chars);
+ return $this->bogusComment('<![' . $chars);
+ }
- $line = $this->scanner->currentLine();
- $col = $this->scanner->columnOffset();
- $this->events->parseError($msg, $line, $col);
- return FALSE;
- }
-
- /**
- * Decode a character reference and return the string.
- *
- * Returns FALSE if the entity could not be found. If $inAttribute is set
- * to TRUE, a bare & will be returned as-is.
- *
- * @param boolean $inAttribute
- * Set to TRUE if the text is inside of an attribute value.
- * FALSE otherwise.
- */
- protected function decodeCharacterReference($inAttribute = FALSE) {
-
- // If it fails this, it's definitely not an entity.
- if ($this->scanner->current() != '&') {
- return FALSE;
+ $tok = $this->scanner->next();
+ do {
+ if ($tok === FALSE) {
+ $this->parseError('Unexpected EOF inside CDATA.');
+ $this->bogusComment('<![CDATA[' . $cdata);
+ return TRUE;
+ }
+ $cdata .= $tok;
+ $tok = $this->scanner->next();
+ } while (! $this->sequenceMatches(']]>'));
+
+ // Consume ]]>
+ $this->scanner->consume(3);
+
+ $this->events->cdata($cdata);
+ return TRUE;
}
- // Next char after &.
- $tok = $this->scanner->next();
- $entity = '';
- $start = $this->scanner->position();
+ // ================================================================
+ // Non-HTML5
+ // ================================================================
+ /**
+ * Handle a processing instruction.
+ *
+ * XML processing instructions are supposed to be ignored in HTML5,
+ * treated as "bogus comments". However, since we're not a user
+ * agent, we allow them. We consume until ?> and then issue a
+ * EventListener::processingInstruction() event.
+ */
+ protected function processingInstruction()
+ {
+ if ($this->scanner->current() != '?') {
+ return FALSE;
+ }
+
+ $tok = $this->scanner->next();
+ $procName = $this->scanner->getAsciiAlpha();
+ $white = strlen($this->scanner->whitespace());
+
+ // If not a PI, send to bogusComment.
+ if (strlen($procName) == 0 || $white == 0 || $this->scanner->current() == FALSE) {
+ $this->parseError("Expected processing instruction name, got $tok");
+ $this->bogusComment('<?' . $tok . $procName);
+ return TRUE;
+ }
- if ($tok == FALSE) {
- return '&';
+ $data = '';
+ // As long as it's not the case that the next two chars are ? and >.
+ while (! ($this->scanner->current() == '?' && $this->scanner->peek() == '>')) {
+ $data .= $this->scanner->current();
+
+ $tok = $this->scanner->next();
+ if ($tok === FALSE) {
+ $this->parseError("Unexpected EOF in processing instruction.");
+ $this->events->processingInstruction($procName, $data);
+ return TRUE;
+ }
+ }
+
+ $this->scanner->next(); // >
+ $this->scanner->next(); // Next token.
+ $this->events->processingInstruction($procName, $data);
+ return TRUE;
}
- // These indicate not an entity. We return just
- // the &.
- if (strspn($tok, static::WHITE . "&<") == 1) {
- //$this->scanner->next();
- return '&';
+ // ================================================================
+ // UTILITY FUNCTIONS
+ // ================================================================
+
+ /**
+ * Read from the input stream until we get to the desired sequene
+ * or hit the end of the input stream.
+ */
+ protected function readUntilSequence($sequence)
+ {
+ $buffer = '';
+
+ // Optimization for reading larger blocks faster.
+ $first = substr($sequence, 0, 1);
+ while ($this->scanner->current() !== FALSE) {
+ $buffer .= $this->scanner->charsUntil($first);
+
+ // Stop as soon as we hit the stopping condition.
+ if ($this->sequenceMatches($sequence) || $this->sequenceMatches(strtoupper($sequence))) {
+ return $buffer;
+ }
+ $buffer .= $this->scanner->current();
+ $this->scanner->next();
+ }
+
+ // If we get here, we hit the EOF.
+ $this->parseError("Unexpected EOF during text read.");
+ return $buffer;
}
- // Numeric entity
- if ($tok == '#') {
- $tok = $this->scanner->next();
-
- // Hexidecimal encoding.
- // X[0-9a-fA-F]+;
- // x[0-9a-fA-F]+;
- if ($tok == 'x' || $tok == 'X') {
- $tok = $this->scanner->next(); // Consume x
-
- // Convert from hex code to char.
- $hex = $this->scanner->getHex();
- if (empty($hex)) {
- $this->parseError("Expected &#xHEX;, got &#x%s", $tok);
- // We unconsume because we don't know what parser rules might
- // be in effect for the remaining chars. For example. '&#>'
- // might result in a specific parsing rule inside of tag
- // contexts, while not inside of pcdata context.
- $this->scanner->unconsume(2);
- return '&';
+ /**
+ * Check if upcomming chars match the given sequence.
+ *
+ * This will read the stream for the $sequence. If it's
+ * found, this will return TRUE. If not, return FALSE.
+ * Since this unconsumes any chars it reads, the caller
+ * will still need to read the next sequence, even if
+ * this returns TRUE.
+ *
+ * Example: $this->sequenceMatches('</script>') will
+ * see if the input stream is at the start of a
+ * '</script>' string.
+ */
+ protected function sequenceMatches($sequence)
+ {
+ $len = strlen($sequence);
+ $buffer = '';
+ for ($i = 0; $i < $len; ++ $i) {
+ $buffer .= $this->scanner->current();
+
+ // EOF. Rewind and let the caller handle it.
+ if ($this->scanner->current() === FALSE) {
+ $this->scanner->unconsume($i);
+ return FALSE;
+ }
+ $this->scanner->next();
}
- $entity = CharacterReference::lookupHex($hex);
- }
- // Decimal encoding.
- // [0-9]+;
- else {
- // Convert from decimal to char.
- $numeric = $this->scanner->getNumeric();
- if ($numeric === FALSE) {
- $this->parseError("Expected &#DIGITS;, got &#%s", $tok);
- $this->scanner->unconsume(2);
- return '&';
+
+ $this->scanner->unconsume($len);
+ return $buffer == $sequence;
+ }
+
+ /**
+ * Send a TEXT event with the contents of the text buffer.
+ *
+ * This emits an EventHandler::text() event with the current contents of the
+ * temporary text buffer. (The buffer is used to group as much PCDATA
+ * as we can instead of emitting lots and lots of TEXT events.)
+ */
+ protected function flushBuffer()
+ {
+ if (empty($this->text)) {
+ return;
}
- $entity = CharacterReference::lookupDecimal($numeric);
- }
+ $this->events->text($this->text);
+ $this->text = '';
}
- // String entity.
- else {
- // Attempt to consume a string up to a ';'.
- // [a-zA-Z0-9]+;
- $cname = $this->scanner->getAsciiAlpha();
- $entity = CharacterReference::lookupName($cname);
- if ($entity == NULL) {
- $this->parseError("No match in entity table for '%s'", $entity);
- }
+
+ /**
+ * Add text to the temporary buffer.
+ *
+ * @see flushBuffer()
+ */
+ protected function buffer($str)
+ {
+ $this->text .= $str;
}
- // The scanner has advanced the cursor for us.
- $tok = $this->scanner->current();
+ /**
+ * Emit a parse error.
+ *
+ * A parse error always returns FALSE because it never consumes any
+ * characters.
+ */
+ protected function parseError($msg)
+ {
+ $args = func_get_args();
+
+ if (count($args) > 1) {
+ array_shift($args);
+ $msg = vsprintf($msg, $args);
+ }
- // We have an entity. We're done here.
- if ($tok == ';') {
- $this->scanner->next();
- return $entity;
+ $line = $this->scanner->currentLine();
+ $col = $this->scanner->columnOffset();
+ $this->events->parseError($msg, $line, $col);
+ return FALSE;
}
- // If in an attribute, then failing to match ; means unconsume the
- // entire string. Otherwise, failure to match is an error.
- if ($inAttribute) {
- $this->scanner->unconsume($this->scanner->position() - $start);
- return '&';
- }
+ /**
+ * Decode a character reference and return the string.
+ *
+ * Returns FALSE if the entity could not be found. If $inAttribute is set
+ * to TRUE, a bare & will be returned as-is.
+ *
+ * @param boolean $inAttribute
+ * Set to TRUE if the text is inside of an attribute value.
+ * FALSE otherwise.
+ */
+ protected function decodeCharacterReference($inAttribute = FALSE)
+ {
- $this->parseError("Expected &ENTITY;, got &ENTITY%s (no trailing ;) ", $tok);
- return '&' . $entity;
+ // If it fails this, it's definitely not an entity.
+ if ($this->scanner->current() != '&') {
+ return FALSE;
+ }
- }
+ // Next char after &.
+ $tok = $this->scanner->next();
+ $entity = '';
+ $start = $this->scanner->position();
+ if ($tok == FALSE) {
+ return '&';
+ }
+
+ // These indicate not an entity. We return just
+ // the &.
+ if (strspn($tok, static::WHITE . "&<") == 1) {
+ // $this->scanner->next();
+ return '&';
+ }
+
+ // Numeric entity
+ if ($tok == '#') {
+ $tok = $this->scanner->next();
+
+ // Hexidecimal encoding.
+ // X[0-9a-fA-F]+;
+ // x[0-9a-fA-F]+;
+ if ($tok == 'x' || $tok == 'X') {
+ $tok = $this->scanner->next(); // Consume x
+
+ // Convert from hex code to char.
+ $hex = $this->scanner->getHex();
+ if (empty($hex)) {
+ $this->parseError("Expected &#xHEX;, got &#x%s", $tok);
+ // We unconsume because we don't know what parser rules might
+ // be in effect for the remaining chars. For example. '&#>'
+ // might result in a specific parsing rule inside of tag
+ // contexts, while not inside of pcdata context.
+ $this->scanner->unconsume(2);
+ return '&';
+ }
+ $entity = CharacterReference::lookupHex($hex);
+ } // Decimal encoding.
+ // [0-9]+;
+ else {
+ // Convert from decimal to char.
+ $numeric = $this->scanner->getNumeric();
+ if ($numeric === FALSE) {
+ $this->parseError("Expected &#DIGITS;, got &#%s", $tok);
+ $this->scanner->unconsume(2);
+ return '&';
+ }
+ $entity = CharacterReference::lookupDecimal($numeric);
+ }
+ } // String entity.
+ else {
+ // Attempt to consume a string up to a ';'.
+ // [a-zA-Z0-9]+;
+ $cname = $this->scanner->getAsciiAlpha();
+ $entity = CharacterReference::lookupName($cname);
+ if ($entity == NULL) {
+ $this->parseError("No match in entity table for '%s'", $entity);
+ }
+ }
+
+ // The scanner has advanced the cursor for us.
+ $tok = $this->scanner->current();
+
+ // We have an entity. We're done here.
+ if ($tok == ';') {
+ $this->scanner->next();
+ return $entity;
+ }
+
+ // If in an attribute, then failing to match ; means unconsume the
+ // entire string. Otherwise, failure to match is an error.
+ if ($inAttribute) {
+ $this->scanner->unconsume($this->scanner->position() - $start);
+ return '&';
+ }
+
+ $this->parseError("Expected &ENTITY;, got &ENTITY%s (no trailing ;) ", $tok);
+ return '&' . $entity;
+ }
}
diff --git a/src/HTML5/Parser/TreeBuildingRules.php b/src/HTML5/Parser/TreeBuildingRules.php
index 73b7fc4..4eb0a54 100644
--- a/src/HTML5/Parser/TreeBuildingRules.php
+++ b/src/HTML5/Parser/TreeBuildingRules.php
@@ -1,114 +1,140 @@
<?php
namespace Masterminds\HTML5\Parser;
-use Masterminds\HTML5\Elements;
-
/**
* Handles special-case rules for the DOM tree builder.
*
- * Many tags have special rules that need to be accomodated on an
+ * Many tags have special rules that need to be accomodated on an
* individual basis. This class handles those rules.
*
* See section 8.1.2.4 of the spec.
*
- * @todo
- * - colgroup and col special behaviors
- * - body and head special behaviors
+ * @todo - colgroup and col special behaviors
+ * - body and head special behaviors
*/
-class TreeBuildingRules {
-
- protected static $tags = array(
- 'li' => 1,
- 'dd' => 1,
- 'dt' => 1,
- 'rt' => 1,
- 'rp' => 1,
- 'tr' => 1,
- 'th' => 1,
- 'td' => 1,
- 'thead' => 1,
- 'tfoot' => 1,
- 'tbody' => 1,
- 'table' => 1,
- 'optgroup' => 1,
- 'option' => 1,
- );
-
- /**
- * Build a new rules engine.
- *
- * @param \DOMDocument $doc
- * The DOM document to use for evaluation and modification.
- */
- public function __construct($doc) {
- $this->doc = $doc;
- }
+class TreeBuildingRules
+{
- /**
- * Returns TRUE if the given tagname has special processing rules.
- */
- public function hasRules($tagname) {
- return isset(static::$tags[$tagname]);
- }
+ protected static $tags = array(
+ 'li' => 1,
+ 'dd' => 1,
+ 'dt' => 1,
+ 'rt' => 1,
+ 'rp' => 1,
+ 'tr' => 1,
+ 'th' => 1,
+ 'td' => 1,
+ 'thead' => 1,
+ 'tfoot' => 1,
+ 'tbody' => 1,
+ 'table' => 1,
+ 'optgroup' => 1,
+ 'option' => 1
+ );
- /**
- * Evaluate the rule for the current tag name.
- *
- * This may modify the existing DOM.
- *
- * @return \DOMElement
- * The new Current DOM element.
- */
- public function evaluate($new, $current) {
+ /**
+ * Build a new rules engine.
+ *
+ * @param \DOMDocument $doc
+ * The DOM document to use for evaluation and modification.
+ */
+ public function __construct($doc)
+ {
+ $this->doc = $doc;
+ }
- switch($new->tagName) {
- case 'li':
- return $this->handleLI($new, $current);
- case 'dt':
- case 'dd':
- return $this->handleDT($new, $current);
- case 'rt':
- case 'rp':
- return $this->handleRT($new, $current);
- case 'optgroup':
- return $this->closeIfCurrentMatches($new, $current, array('optgroup'));
- case 'option':
- return $this->closeIfCurrentMatches($new, $current, array('option', 'optgroup'));
- case 'tr':
- return $this->closeIfCurrentMatches($new, $current, array('tr'));
- case 'td':
- case 'th':
- return $this->closeIfCurrentMatches($new, $current, array('th', 'td'));
- case 'tbody':
- case 'thead':
- case 'tfoot':
- case 'table': // Spec isn't explicit about this, but it's necessary.
- return $this->closeIfCurrentMatches($new, $current, array('thead', 'tfoot', 'tbody'));
+ /**
+ * Returns TRUE if the given tagname has special processing rules.
+ */
+ public function hasRules($tagname)
+ {
+ return isset(static::$tags[$tagname]);
}
- return $current;
- }
+ /**
+ * Evaluate the rule for the current tag name.
+ *
+ * This may modify the existing DOM.
+ *
+ * @return \DOMElement The new Current DOM element.
+ */
+ public function evaluate($new, $current)
+ {
+ switch ($new->tagName) {
+ case 'li':
+ return $this->handleLI($new, $current);
+ case 'dt':
+ case 'dd':
+ return $this->handleDT($new, $current);
+ case 'rt':
+ case 'rp':
+ return $this->handleRT($new, $current);
+ case 'optgroup':
+ return $this->closeIfCurrentMatches($new, $current, array(
+ 'optgroup'
+ ));
+ case 'option':
+ return $this->closeIfCurrentMatches($new, $current, array(
+ 'option',
+ 'optgroup'
+ ));
+ case 'tr':
+ return $this->closeIfCurrentMatches($new, $current, array(
+ 'tr'
+ ));
+ case 'td':
+ case 'th':
+ return $this->closeIfCurrentMatches($new, $current, array(
+ 'th',
+ 'td'
+ ));
+ case 'tbody':
+ case 'thead':
+ case 'tfoot':
+ case 'table': // Spec isn't explicit about this, but it's necessary.
+
+ return $this->closeIfCurrentMatches($new, $current, array(
+ 'thead',
+ 'tfoot',
+ 'tbody'
+ ));
+ }
- protected function handleLI($ele, $current) {
- return $this->closeIfCurrentMatches($ele, $current, array('li'));
- }
+ return $current;
+ }
- protected function handleDT($ele, $current) {
- return $this->closeIfCurrentMatches($ele, $current, array('dt','dd'));
- }
- protected function handleRT($ele, $current) {
- return $this->closeIfCurrentMatches($ele, $current, array('rt','rp'));
- }
+ protected function handleLI($ele, $current)
+ {
+ return $this->closeIfCurrentMatches($ele, $current, array(
+ 'li'
+ ));
+ }
- protected function closeIfCurrentMatches($ele, $current, $match) {
- $tname = $current->tagName;
- if (in_array($current->tagName, $match)) {
- $current->parentNode->appendChild($ele);
+ protected function handleDT($ele, $current)
+ {
+ return $this->closeIfCurrentMatches($ele, $current, array(
+ 'dt',
+ 'dd'
+ ));
}
- else {
- $current->appendChild($ele);
+
+ protected function handleRT($ele, $current)
+ {
+ return $this->closeIfCurrentMatches($ele, $current, array(
+ 'rt',
+ 'rp'
+ ));
}
- return $ele;
- }
+ protected function closeIfCurrentMatches($ele, $current, $match)
+ {
+ $tname = $current->tagName;
+ if (in_array($current->tagName, $match)) {
+ $current->parentNode->appendChild($ele);
+ } else {
+ $current->appendChild($ele);
+ }
+
+ return $ele;
+ }
}
diff --git a/src/HTML5/Parser/UTF8Utils.php b/src/HTML5/Parser/UTF8Utils.php
index 37e8856..ed35997 100644
--- a/src/HTML5/Parser/UTF8Utils.php
+++ b/src/HTML5/Parser/UTF8Utils.php
@@ -1,13 +1,14 @@
<?php
+namespace Masterminds\HTML5\Parser;
/*
*
- * Portions based on code from html5lib files with the following copyright:
+* Portions based on code from html5lib files with the following copyright:
Copyright 2009 Geoffrey Sneddon <http://gsnedders.com/>
Permission is hereby granted, free of charge, to any person obtaining a
copy of this software and associated documentation files (the
-"Software"), to deal in the Software without restriction, including
+ "Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
@@ -25,145 +26,130 @@ TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
-namespace Masterminds\HTML5\Parser;
/**
* UTF-8 Utilities
*/
-class UTF8Utils {
- /**
- * The Unicode replacement character..
- */
- const FFFD = "\xEF\xBF\xBD";
- /**
- * Count the number of characters in a string.
- *
- * UTF-8 aware. This will try (in order) iconv,
- * MB, libxml, and finally a custom counter.
- *
- * @todo Move this to a general utility class.
- */
- public static function countChars($string) {
- // Get the length for the string we need.
- if(function_exists('iconv_strlen')) {
- return iconv_strlen($string, 'utf-8');
- }
- elseif(function_exists('mb_strlen')) {
- return mb_strlen($string, 'utf-8');
- }
- elseif(function_exists('utf8_decode')) {
- // MPB: Will this work? Won't certain decodes lead to two chars
- // extrapolated out of 2-byte chars?
- return strlen(utf8_decode($string));
- }
- $count = count_chars($string);
- // 0x80 = 0x7F - 0 + 1 (one added to get inclusive range)
- // 0x33 = 0xF4 - 0x2C + 1 (one added to get inclusive range)
- return array_sum(array_slice($count, 0, 0x80)) +
- array_sum(array_slice($count, 0xC2, 0x33));
- }
-
- /**
- * Convert data from the given encoding to UTF-8.
- *
- * This has not yet been tested with charactersets other than UTF-8.
- * It should work with ISO-8859-1/-13 and standard Latin Win charsets.
- *
- * @param string $data
- * The data to convert.
- * @param string $encoding
- * A valid encoding. Examples: http://www.php.net/manual/en/mbstring.supported-encodings.php
- */
- public static function convertToUTF8($data, $encoding = 'UTF-8') {
- /*
- * From the HTML5 spec:
- Given an encoding, the bytes in the input stream must be
- converted to Unicode characters for the tokeniser, as
- described by the rules for that encoding, except that the
- leading U+FEFF BYTE ORDER MARK character, if any, must not
- be stripped by the encoding layer (it is stripped by the rule below).
-
- Bytes or sequences of bytes in the original byte stream that
- could not be converted to Unicode characters must be converted
- to U+FFFD REPLACEMENT CHARACTER code points. */
-
- // mb_convert_encoding is chosen over iconv because of a bug. The best
- // details for the bug are on http://us1.php.net/manual/en/function.iconv.php#108643
- // which contains links to the actual but reports as well as work around
- // details.
- if (function_exists('mb_convert_encoding')) {
- // mb library has the following behaviors:
- // - UTF-16 surrogates result in FALSE.
- // - Overlongs and outside Plane 16 result in empty strings.
-
- // Before we run mb_convert_encoding we need to tell it what to do with
- // characters it does not know. This could be different than the parent
- // application executing this library so we store the value, change it
- // to our needs, and then change it back when we are done. This feels
- // a little excessive and it would be great if there was a better way.
- $save = ini_get('mbstring.substitute_character');
- ini_set('mbstring.substitute_character', "none");
- $data = mb_convert_encoding($data, 'UTF-8', $encoding);
- ini_set('mbstring.substitute_character', $save);
- }
- // @todo Get iconv running in at least some environments if that is possible.
- elseif (function_exists('iconv') && $encoding != 'auto') {
- // fprintf(STDOUT, "iconv found\n");
- // iconv has the following behaviors:
- // - Overlong representations are ignored.
- // - Beyond Plane 16 is replaced with a lower char.
- // - Incomplete sequences generate a warning.
- $data = @iconv($encoding, 'UTF-8//IGNORE', $data);
- }
- else {
- // we can make a conforming native implementation
- throw new Exception('Not implemented, please install mbstring or iconv');
- }
+class UTF8Utils
+{
- /* One leading U+FEFF BYTE ORDER MARK character must be
- ignored if any are present. */
- if (substr($data, 0, 3) === "\xEF\xBB\xBF") {
- $data = substr($data, 3);
- }
+ /**
+ * The Unicode replacement character..
+ */
+ const FFFD = "\xEF\xBF\xBD";
- return $data;
- }
-
- /**
- * Checks for Unicode code points that are not valid in a document.
- *
- * @param string $data
- * A string to analyze.
- * @return array
- * An array of (string) error messages produced by the scanning.
- */
- public static function checkForIllegalCodepoints($data) {
- if (!function_exists('preg_match_all')) {
- throw \Exception('The PCRE library is not loaded or is not available.');
+ /**
+ * Count the number of characters in a string.
+ *
+ * UTF-8 aware. This will try (in order) iconv,
+ * MB, libxml, and finally a custom counter.
+ *
+ * @todo Move this to a general utility class.
+ */
+ public static function countChars($string)
+ {
+ // Get the length for the string we need.
+ if (function_exists('iconv_strlen')) {
+ return iconv_strlen($string, 'utf-8');
+ } elseif (function_exists('mb_strlen')) {
+ return mb_strlen($string, 'utf-8');
+ } elseif (function_exists('utf8_decode')) {
+ // MPB: Will this work? Won't certain decodes lead to two chars
+ // extrapolated out of 2-byte chars?
+ return strlen(utf8_decode($string));
+ }
+ $count = count_chars($string);
+ // 0x80 = 0x7F - 0 + 1 (one added to get inclusive range)
+ // 0x33 = 0xF4 - 0x2C + 1 (one added to get inclusive range)
+ return array_sum(array_slice($count, 0, 0x80)) + array_sum(array_slice($count, 0xC2, 0x33));
}
- // Vestigal error handling.
- $errors = array();
+ /**
+ * Convert data from the given encoding to UTF-8.
+ *
+ * This has not yet been tested with charactersets other than UTF-8.
+ * It should work with ISO-8859-1/-13 and standard Latin Win charsets.
+ *
+ * @param string $data
+ * The data to convert.
+ * @param string $encoding
+ * A valid encoding. Examples: http://www.php.net/manual/en/mbstring.supported-encodings.php
+ */
+ public static function convertToUTF8($data, $encoding = 'UTF-8')
+ {
+ /*
+ * From the HTML5 spec: Given an encoding, the bytes in the input stream must be converted to Unicode characters for the tokeniser, as described by the rules for that encoding, except that the leading U+FEFF BYTE ORDER MARK character, if any, must not be stripped by the encoding layer (it is stripped by the rule below). Bytes or sequences of bytes in the original byte stream that could not be converted to Unicode characters must be converted to U+FFFD REPLACEMENT CHARACTER code points.
+ */
+
+ // mb_convert_encoding is chosen over iconv because of a bug. The best
+ // details for the bug are on http://us1.php.net/manual/en/function.iconv.php#108643
+ // which contains links to the actual but reports as well as work around
+ // details.
+ if (function_exists('mb_convert_encoding')) {
+ // mb library has the following behaviors:
+ // - UTF-16 surrogates result in FALSE.
+ // - Overlongs and outside Plane 16 result in empty strings.
+
+ // Before we run mb_convert_encoding we need to tell it what to do with
+ // characters it does not know. This could be different than the parent
+ // application executing this library so we store the value, change it
+ // to our needs, and then change it back when we are done. This feels
+ // a little excessive and it would be great if there was a better way.
+ $save = ini_get('mbstring.substitute_character');
+ ini_set('mbstring.substitute_character', "none");
+ $data = mb_convert_encoding($data, 'UTF-8', $encoding);
+ ini_set('mbstring.substitute_character', $save);
+ } // @todo Get iconv running in at least some environments if that is possible.
+ elseif (function_exists('iconv') && $encoding != 'auto') {
+ // fprintf(STDOUT, "iconv found\n");
+ // iconv has the following behaviors:
+ // - Overlong representations are ignored.
+ // - Beyond Plane 16 is replaced with a lower char.
+ // - Incomplete sequences generate a warning.
+ $data = @iconv($encoding, 'UTF-8//IGNORE', $data);
+ } else {
+ // we can make a conforming native implementation
+ throw new Exception('Not implemented, please install mbstring or iconv');
+ }
+
+ /*
+ * One leading U+FEFF BYTE ORDER MARK character must be ignored if any are present.
+ */
+ if (substr($data, 0, 3) === "\xEF\xBB\xBF") {
+ $data = substr($data, 3);
+ }
- /* All U+0000 NULL characters in the input must be replaced
- by U+FFFD REPLACEMENT CHARACTERs. Any occurrences of such
- characters is a parse error. */
- for ($i = 0, $count = substr_count($data, "\0"); $i < $count; $i++) {
- $errors[] = 'null-character';
+ return $data;
}
- /* Any occurrences of any characters in the ranges U+0001 to
- U+0008, U+000B, U+000E to U+001F, U+007F to U+009F,
- U+D800 to U+DFFF , U+FDD0 to U+FDEF, and
- characters U+FFFE, U+FFFF, U+1FFFE, U+1FFFF, U+2FFFE, U+2FFFF,
- U+3FFFE, U+3FFFF, U+4FFFE, U+4FFFF, U+5FFFE, U+5FFFF, U+6FFFE,
- U+6FFFF, U+7FFFE, U+7FFFF, U+8FFFE, U+8FFFF, U+9FFFE, U+9FFFF,
- U+AFFFE, U+AFFFF, U+BFFFE, U+BFFFF, U+CFFFE, U+CFFFF, U+DFFFE,
- U+DFFFF, U+EFFFE, U+EFFFF, U+FFFFE, U+FFFFF, U+10FFFE, and
- U+10FFFF are parse errors. (These are all control characters
- or permanently undefined Unicode characters.) */
- // Check PCRE is loaded.
- $count = preg_match_all(
- '/(?:
+ /**
+ * Checks for Unicode code points that are not valid in a document.
+ *
+ * @param string $data
+ * A string to analyze.
+ * @return array An array of (string) error messages produced by the scanning.
+ */
+ public static function checkForIllegalCodepoints($data)
+ {
+ if (! function_exists('preg_match_all')) {
+ throw\Exception('The PCRE library is not loaded or is not available.');
+ }
+
+ // Vestigal error handling.
+ $errors = array();
+
+ /*
+ * All U+0000 NULL characters in the input must be replaced by U+FFFD REPLACEMENT CHARACTERs. Any occurrences of such characters is a parse error.
+ */
+ for ($i = 0, $count = substr_count($data, "\0"); $i < $count; $i ++) {
+ $errors[] = 'null-character';
+ }
+
+ /*
+ * Any occurrences of any characters in the ranges U+0001 to U+0008, U+000B, U+000E to U+001F, U+007F to U+009F, U+D800 to U+DFFF , U+FDD0 to U+FDEF, and characters U+FFFE, U+FFFF, U+1FFFE, U+1FFFF, U+2FFFE, U+2FFFF, U+3FFFE, U+3FFFF, U+4FFFE, U+4FFFF, U+5FFFE, U+5FFFF, U+6FFFE, U+6FFFF, U+7FFFE, U+7FFFF, U+8FFFE, U+8FFFF, U+9FFFE, U+9FFFF, U+AFFFE, U+AFFFF, U+BFFFE, U+BFFFF, U+CFFFE, U+CFFFF, U+DFFFE, U+DFFFF, U+EFFFE, U+EFFFF, U+FFFFE, U+FFFFF, U+10FFFE, and U+10FFFF are parse errors. (These are all control characters or permanently undefined Unicode characters.)
+ */
+ // Check PCRE is loaded.
+ $count = preg_match_all(
+ '/(?:
[\x01-\x08\x0B\x0E-\x1F\x7F] # U+0001 to U+0008, U+000B, U+000E to U+001F and U+007F
|
\xC2[\x80-\x9F] # U+0080 to U+009F
@@ -175,13 +161,11 @@ class UTF8Utils {
\xEF\xBF[\xBE\xBF] # U+FFFE and U+FFFF
|
[\xF0-\xF4][\x8F-\xBF]\xBF[\xBE\xBF] # U+nFFFE and U+nFFFF (1 <= n <= 10_{16})
- )/x',
- $data,
- $matches
- );
- for ($i = 0; $i < $count; $i++) {
- $errors[] = 'invalid-codepoint';
+ )/x', $data, $matches);
+ for ($i = 0; $i < $count; $i ++) {
+ $errors[] = 'invalid-codepoint';
+ }
+
+ return $errors;
}
- return $errors;
- }
}
diff --git a/src/HTML5/Serializer/HTML5Entities.php b/src/HTML5/Serializer/HTML5Entities.php
index 91322c9..4f90f84 100644
--- a/src/HTML5/Serializer/HTML5Entities.php
+++ b/src/HTML5/Serializer/HTML5Entities.php
@@ -11,1520 +11,1523 @@
namespace Masterminds\HTML5\Serializer;
/**
- * A mapping of entities to their html5 representation. Used for older PHP
+ * A mapping of entities to their html5 representation.
+ * Used for older PHP
* versions that don't have the mapping.
*/
-class HTML5Entities {
- public static $map = array (
- ' ' => '&Tab;',
- "\n" => '&NewLine;',
- '!' => '&excl;',
- '"' => '&quot;',
- '#' => '&num;',
- '$' => '&dollar;',
- '%' => '&percnt;',
- '&' => '&amp;',
- '\'' => '&apos;',
- '(' => '&lpar;',
- ')' => '&rpar;',
- '*' => '&ast;',
- '+' => '&plus;',
- ',' => '&comma;',
- '.' => '&period;',
- '/' => '&sol;',
- ':' => '&colon;',
- ';' => '&semi;',
- '<' => '&lt;',
- '<⃒' => '&nvlt',
- '=' => '&equals;',
- '=⃥' => '&bne',
- '>' => '&gt;',
- '>⃒' => '&nvgt',
- '?' => '&quest;',
- '@' => '&commat;',
- '[' => '&lbrack;',
- '\\' => '&bsol;',
- ']' => '&rsqb;',
- '^' => '&Hat;',
- '_' => '&lowbar;',
- '`' => '&grave;',
- 'fj' => '&fjlig',
- '{' => '&lbrace;',
- '|' => '&vert;',
- '}' => '&rcub;',
- ' ' => '&nbsp;',
- '¡' => '&iexcl;',
- '¢' => '&cent;',
- '£' => '&pound;',
- '¤' => '&curren;',
- '¥' => '&yen;',
- '¦' => '&brvbar;',
- '§' => '&sect;',
- '¨' => '&DoubleDot;',
- '©' => '&copy;',
- 'ª' => '&ordf;',
- '«' => '&laquo;',
- '¬' => '&not;',
- '­' => '&shy;',
- '®' => '&reg;',
- '¯' => '&macr;',
- '°' => '&deg;',
- '±' => '&plusmn;',
- '²' => '&sup2;',
- '³' => '&sup3;',
- '´' => '&DiacriticalAcute;',
- 'µ' => '&micro;',
- '¶' => '&para;',
- '·' => '&CenterDot;',
- '¸' => '&Cedilla;',
- '¹' => '&sup1;',
- 'º' => '&ordm;',
- '»' => '&raquo;',
- '¼' => '&frac14;',
- '½' => '&half;',
- '¾' => '&frac34;',
- '¿' => '&iquest;',
- 'À' => '&Agrave;',
- 'Á' => '&Aacute;',
- 'Â' => '&Acirc;',
- 'Ã' => '&Atilde;',
- 'Ä' => '&Auml;',
- 'Å' => '&Aring;',
- 'Æ' => '&AElig;',
- 'Ç' => '&Ccedil;',
- 'È' => '&Egrave;',
- 'É' => '&Eacute;',
- 'Ê' => '&Ecirc;',
- 'Ë' => '&Euml;',
- 'Ì' => '&Igrave;',
- 'Í' => '&Iacute;',
- 'Î' => '&Icirc;',
- 'Ï' => '&Iuml;',
- 'Ð' => '&ETH;',
- 'Ñ' => '&Ntilde;',
- 'Ò' => '&Ograve;',
- 'Ó' => '&Oacute;',
- 'Ô' => '&Ocirc;',
- 'Õ' => '&Otilde;',
- 'Ö' => '&Ouml;',
- '×' => '&times;',
- 'Ø' => '&Oslash;',
- 'Ù' => '&Ugrave;',
- 'Ú' => '&Uacute;',
- 'Û' => '&Ucirc;',
- 'Ü' => '&Uuml;',
- 'Ý' => '&Yacute;',
- 'Þ' => '&THORN;',
- 'ß' => '&szlig;',
- 'à' => '&agrave;',
- 'á' => '&aacute;',
- 'â' => '&acirc;',
- 'ã' => '&atilde;',
- 'ä' => '&auml;',
- 'å' => '&aring;',
- 'æ' => '&aelig;',
- 'ç' => '&ccedil;',
- 'è' => '&egrave;',
- 'é' => '&eacute;',
- 'ê' => '&ecirc;',
- 'ë' => '&euml;',
- 'ì' => '&igrave;',
- 'í' => '&iacute;',
- 'î' => '&icirc;',
- 'ï' => '&iuml;',
- 'ð' => '&eth;',
- 'ñ' => '&ntilde;',
- 'ò' => '&ograve;',
- 'ó' => '&oacute;',
- 'ô' => '&ocirc;',
- 'õ' => '&otilde;',
- 'ö' => '&ouml;',
- '÷' => '&divide;',
- 'ø' => '&oslash;',
- 'ù' => '&ugrave;',
- 'ú' => '&uacute;',
- 'û' => '&ucirc;',
- 'ü' => '&uuml;',
- 'ý' => '&yacute;',
- 'þ' => '&thorn;',
- 'ÿ' => '&yuml;',
- 'Ā' => '&Amacr;',
- 'ā' => '&amacr;',
- 'Ă' => '&Abreve;',
- 'ă' => '&abreve;',
- 'Ą' => '&Aogon;',
- 'ą' => '&aogon;',
- 'Ć' => '&Cacute;',
- 'ć' => '&cacute;',
- 'Ĉ' => '&Ccirc;',
- 'ĉ' => '&ccirc;',
- 'Ċ' => '&Cdot;',
- 'ċ' => '&cdot;',
- 'Č' => '&Ccaron;',
- 'č' => '&ccaron;',
- 'Ď' => '&Dcaron;',
- 'ď' => '&dcaron;',
- 'Đ' => '&Dstrok;',
- 'đ' => '&dstrok;',
- 'Ē' => '&Emacr;',
- 'ē' => '&emacr;',
- 'Ė' => '&Edot;',
- 'ė' => '&edot;',
- 'Ę' => '&Eogon;',
- 'ę' => '&eogon;',
- 'Ě' => '&Ecaron;',
- 'ě' => '&ecaron;',
- 'Ĝ' => '&Gcirc;',
- 'ĝ' => '&gcirc;',
- 'Ğ' => '&Gbreve;',
- 'ğ' => '&gbreve;',
- 'Ġ' => '&Gdot;',
- 'ġ' => '&gdot;',
- 'Ģ' => '&Gcedil;',
- 'Ĥ' => '&Hcirc;',
- 'ĥ' => '&hcirc;',
- 'Ħ' => '&Hstrok;',
- 'ħ' => '&hstrok;',
- 'Ĩ' => '&Itilde;',
- 'ĩ' => '&itilde;',
- 'Ī' => '&Imacr;',
- 'ī' => '&imacr;',
- 'Į' => '&Iogon;',
- 'į' => '&iogon;',
- 'İ' => '&Idot;',
- 'ı' => '&inodot;',
- 'IJ' => '&IJlig;',
- 'ij' => '&ijlig;',
- 'Ĵ' => '&Jcirc;',
- 'ĵ' => '&jcirc;',
- 'Ķ' => '&Kcedil;',
- 'ķ' => '&kcedil;',
- 'ĸ' => '&kgreen;',
- 'Ĺ' => '&Lacute;',
- 'ĺ' => '&lacute;',
- 'Ļ' => '&Lcedil;',
- 'ļ' => '&lcedil;',
- 'Ľ' => '&Lcaron;',
- 'ľ' => '&lcaron;',
- 'Ŀ' => '&Lmidot;',
- 'ŀ' => '&lmidot;',
- 'Ł' => '&Lstrok;',
- 'ł' => '&lstrok;',
- 'Ń' => '&Nacute;',
- 'ń' => '&nacute;',
- 'Ņ' => '&Ncedil;',
- 'ņ' => '&ncedil;',
- 'Ň' => '&Ncaron;',
- 'ň' => '&ncaron;',
- 'ʼn' => '&napos;',
- 'Ŋ' => '&ENG;',
- 'ŋ' => '&eng;',
- 'Ō' => '&Omacr;',
- 'ō' => '&omacr;',
- 'Ő' => '&Odblac;',
- 'ő' => '&odblac;',
- 'Œ' => '&OElig;',
- 'œ' => '&oelig;',
- 'Ŕ' => '&Racute;',
- 'ŕ' => '&racute;',
- 'Ŗ' => '&Rcedil;',
- 'ŗ' => '&rcedil;',
- 'Ř' => '&Rcaron;',
- 'ř' => '&rcaron;',
- 'Ś' => '&Sacute;',
- 'ś' => '&sacute;',
- 'Ŝ' => '&Scirc;',
- 'ŝ' => '&scirc;',
- 'Ş' => '&Scedil;',
- 'ş' => '&scedil;',
- 'Š' => '&Scaron;',
- 'š' => '&scaron;',
- 'Ţ' => '&Tcedil;',
- 'ţ' => '&tcedil;',
- 'Ť' => '&Tcaron;',
- 'ť' => '&tcaron;',
- 'Ŧ' => '&Tstrok;',
- 'ŧ' => '&tstrok;',
- 'Ũ' => '&Utilde;',
- 'ũ' => '&utilde;',
- 'Ū' => '&Umacr;',
- 'ū' => '&umacr;',
- 'Ŭ' => '&Ubreve;',
- 'ŭ' => '&ubreve;',
- 'Ů' => '&Uring;',
- 'ů' => '&uring;',
- 'Ű' => '&Udblac;',
- 'ű' => '&udblac;',
- 'Ų' => '&Uogon;',
- 'ų' => '&uogon;',
- 'Ŵ' => '&Wcirc;',
- 'ŵ' => '&wcirc;',
- 'Ŷ' => '&Ycirc;',
- 'ŷ' => '&ycirc;',
- 'Ÿ' => '&Yuml;',
- 'Ź' => '&Zacute;',
- 'ź' => '&zacute;',
- 'Ż' => '&Zdot;',
- 'ż' => '&zdot;',
- 'Ž' => '&Zcaron;',
- 'ž' => '&zcaron;',
- 'ƒ' => '&fnof;',
- 'Ƶ' => '&imped;',
- 'ǵ' => '&gacute;',
- 'ȷ' => '&jmath;',
- 'ˆ' => '&circ;',
- 'ˇ' => '&Hacek;',
- '˘' => '&Breve;',
- '˙' => '&dot;',
- '˚' => '&ring;',
- '˛' => '&ogon;',
- '˜' => '&DiacriticalTilde;',
- '˝' => '&DiacriticalDoubleAcute;',
- '̑' => '&DownBreve;',
- 'Α' => '&Alpha;',
- 'Β' => '&Beta;',
- 'Γ' => '&Gamma;',
- 'Δ' => '&Delta;',
- 'Ε' => '&Epsilon;',
- 'Ζ' => '&Zeta;',
- 'Η' => '&Eta;',
- 'Θ' => '&Theta;',
- 'Ι' => '&Iota;',
- 'Κ' => '&Kappa;',
- 'Λ' => '&Lambda;',
- 'Μ' => '&Mu;',
- 'Ν' => '&Nu;',
- 'Ξ' => '&Xi;',
- 'Ο' => '&Omicron;',
- 'Π' => '&Pi;',
- 'Ρ' => '&Rho;',
- 'Σ' => '&Sigma;',
- 'Τ' => '&Tau;',
- 'Υ' => '&Upsilon;',
- 'Φ' => '&Phi;',
- 'Χ' => '&Chi;',
- 'Ψ' => '&Psi;',
- 'Ω' => '&Omega;',
- 'α' => '&alpha;',
- 'β' => '&beta;',
- 'γ' => '&gamma;',
- 'δ' => '&delta;',
- 'ε' => '&epsi;',
- 'ζ' => '&zeta;',
- 'η' => '&eta;',
- 'θ' => '&theta;',
- 'ι' => '&iota;',
- 'κ' => '&kappa;',
- 'λ' => '&lambda;',
- 'μ' => '&mu;',
- 'ν' => '&nu;',
- 'ξ' => '&xi;',
- 'ο' => '&omicron;',
- 'π' => '&pi;',
- 'ρ' => '&rho;',
- 'ς' => '&sigmav;',
- 'σ' => '&sigma;',
- 'τ' => '&tau;',
- 'υ' => '&upsi;',
- 'φ' => '&phi;',
- 'χ' => '&chi;',
- 'ψ' => '&psi;',
- 'ω' => '&omega;',
- 'ϑ' => '&thetasym;',
- 'ϒ' => '&upsih;',
- 'ϕ' => '&straightphi;',
- 'ϖ' => '&piv;',
- 'Ϝ' => '&Gammad;',
- 'ϝ' => '&gammad;',
- 'ϰ' => '&varkappa;',
- 'ϱ' => '&rhov;',
- 'ϵ' => '&straightepsilon;',
- '϶' => '&backepsilon;',
- 'Ё' => '&IOcy;',
- 'Ђ' => '&DJcy;',
- 'Ѓ' => '&GJcy;',
- 'Є' => '&Jukcy;',
- 'Ѕ' => '&DScy;',
- 'І' => '&Iukcy;',
- 'Ї' => '&YIcy;',
- 'Ј' => '&Jsercy;',
- 'Љ' => '&LJcy;',
- 'Њ' => '&NJcy;',
- 'Ћ' => '&TSHcy;',
- 'Ќ' => '&KJcy;',
- 'Ў' => '&Ubrcy;',
- 'Џ' => '&DZcy;',
- 'А' => '&Acy;',
- 'Б' => '&Bcy;',
- 'В' => '&Vcy;',
- 'Г' => '&Gcy;',
- 'Д' => '&Dcy;',
- 'Е' => '&IEcy;',
- 'Ж' => '&ZHcy;',
- 'З' => '&Zcy;',
- 'И' => '&Icy;',
- 'Й' => '&Jcy;',
- 'К' => '&Kcy;',
- 'Л' => '&Lcy;',
- 'М' => '&Mcy;',
- 'Н' => '&Ncy;',
- 'О' => '&Ocy;',
- 'П' => '&Pcy;',
- 'Р' => '&Rcy;',
- 'С' => '&Scy;',
- 'Т' => '&Tcy;',
- 'У' => '&Ucy;',
- 'Ф' => '&Fcy;',
- 'Х' => '&KHcy;',
- 'Ц' => '&TScy;',
- 'Ч' => '&CHcy;',
- 'Ш' => '&SHcy;',
- 'Щ' => '&SHCHcy;',
- 'Ъ' => '&HARDcy;',
- 'Ы' => '&Ycy;',
- 'Ь' => '&SOFTcy;',
- 'Э' => '&Ecy;',
- 'Ю' => '&YUcy;',
- 'Я' => '&YAcy;',
- 'а' => '&acy;',
- 'б' => '&bcy;',
- 'в' => '&vcy;',
- 'г' => '&gcy;',
- 'д' => '&dcy;',
- 'е' => '&iecy;',
- 'ж' => '&zhcy;',
- 'з' => '&zcy;',
- 'и' => '&icy;',
- 'й' => '&jcy;',
- 'к' => '&kcy;',
- 'л' => '&lcy;',
- 'м' => '&mcy;',
- 'н' => '&ncy;',
- 'о' => '&ocy;',
- 'п' => '&pcy;',
- 'р' => '&rcy;',
- 'с' => '&scy;',
- 'т' => '&tcy;',
- 'у' => '&ucy;',
- 'ф' => '&fcy;',
- 'х' => '&khcy;',
- 'ц' => '&tscy;',
- 'ч' => '&chcy;',
- 'ш' => '&shcy;',
- 'щ' => '&shchcy;',
- 'ъ' => '&hardcy;',
- 'ы' => '&ycy;',
- 'ь' => '&softcy;',
- 'э' => '&ecy;',
- 'ю' => '&yucy;',
- 'я' => '&yacy;',
- 'ё' => '&iocy;',
- 'ђ' => '&djcy;',
- 'ѓ' => '&gjcy;',
- 'є' => '&jukcy;',
- 'ѕ' => '&dscy;',
- 'і' => '&iukcy;',
- 'ї' => '&yicy;',
- 'ј' => '&jsercy;',
- 'љ' => '&ljcy;',
- 'њ' => '&njcy;',
- 'ћ' => '&tshcy;',
- 'ќ' => '&kjcy;',
- 'ў' => '&ubrcy;',
- 'џ' => '&dzcy;',
- ' ' => '&ensp;',
- ' ' => '&emsp;',
- ' ' => '&emsp13;',
- ' ' => '&emsp14;',
- ' ' => '&numsp;',
- ' ' => '&puncsp;',
- ' ' => '&ThinSpace;',
- ' ' => '&hairsp;',
- '​' => '&ZeroWidthSpace;',
- '‌' => '&zwnj;',
- '‍' => '&zwj;',
- '‎' => '&lrm;',
- '‏' => '&rlm;',
- '‐' => '&hyphen;',
- '–' => '&ndash;',
- '—' => '&mdash;',
- '―' => '&horbar;',
- '‖' => '&Verbar;',
- '‘' => '&OpenCurlyQuote;',
- '’' => '&rsquo;',
- '‚' => '&sbquo;',
- '“' => '&OpenCurlyDoubleQuote;',
- '”' => '&rdquo;',
- '„' => '&bdquo;',
- '†' => '&dagger;',
- '‡' => '&Dagger;',
- '•' => '&bull;',
- '‥' => '&nldr;',
- '…' => '&hellip;',
- '‰' => '&permil;',
- '‱' => '&pertenk;',
- '′' => '&prime;',
- '″' => '&Prime;',
- '‴' => '&tprime;',
- '‵' => '&backprime;',
- '‹' => '&lsaquo;',
- '›' => '&rsaquo;',
- '‾' => '&oline;',
- '⁁' => '&caret;',
- '⁃' => '&hybull;',
- '⁄' => '&frasl;',
- '⁏' => '&bsemi;',
- '⁗' => '&qprime;',
- ' ' => '&MediumSpace;',
- '  ' => '&ThickSpace',
- '⁠' => '&NoBreak;',
- '⁡' => '&af;',
- '⁢' => '&InvisibleTimes;',
- '⁣' => '&ic;',
- '€' => '&euro;',
- '⃛' => '&TripleDot;',
- '⃜' => '&DotDot;',
- 'ℂ' => '&complexes;',
- '℅' => '&incare;',
- 'ℊ' => '&gscr;',
- 'ℋ' => '&HilbertSpace;',
- 'ℌ' => '&Hfr;',
- 'ℍ' => '&Hopf;',
- 'ℎ' => '&planckh;',
- 'ℏ' => '&planck;',
- 'ℐ' => '&imagline;',
- 'ℑ' => '&Ifr;',
- 'ℒ' => '&lagran;',
- 'ℓ' => '&ell;',
- 'ℕ' => '&naturals;',
- '№' => '&numero;',
- '℗' => '&copysr;',
- '℘' => '&wp;',
- 'ℙ' => '&primes;',
- 'ℚ' => '&rationals;',
- 'ℛ' => '&realine;',
- 'ℜ' => '&Rfr;',
- 'ℝ' => '&Ropf;',
- '℞' => '&rx;',
- '™' => '&trade;',
- 'ℤ' => '&Zopf;',
- '℧' => '&mho;',
- 'ℨ' => '&Zfr;',
- '℩' => '&iiota;',
- 'ℬ' => '&Bscr;',
- 'ℭ' => '&Cfr;',
- 'ℯ' => '&escr;',
- 'ℰ' => '&expectation;',
- 'ℱ' => '&Fouriertrf;',
- 'ℳ' => '&Mellintrf;',
- 'ℴ' => '&orderof;',
- 'ℵ' => '&aleph;',
- 'ℶ' => '&beth;',
- 'ℷ' => '&gimel;',
- 'ℸ' => '&daleth;',
- 'ⅅ' => '&CapitalDifferentialD;',
- 'ⅆ' => '&DifferentialD;',
- 'ⅇ' => '&exponentiale;',
- 'ⅈ' => '&ImaginaryI;',
- '⅓' => '&frac13;',
- '⅔' => '&frac23;',
- '⅕' => '&frac15;',
- '⅖' => '&frac25;',
- '⅗' => '&frac35;',
- '⅘' => '&frac45;',
- '⅙' => '&frac16;',
- '⅚' => '&frac56;',
- '⅛' => '&frac18;',
- '⅜' => '&frac38;',
- '⅝' => '&frac58;',
- '⅞' => '&frac78;',
- '←' => '&larr;',
- '↑' => '&uarr;',
- '→' => '&srarr;',
- '↓' => '&darr;',
- '↔' => '&harr;',
- '↕' => '&UpDownArrow;',
- '↖' => '&nwarrow;',
- '↗' => '&UpperRightArrow;',
- '↘' => '&LowerRightArrow;',
- '↙' => '&swarr;',
- '↚' => '&nleftarrow;',
- '↛' => '&nrarr;',
- '↝' => '&rarrw;',
- '↝̸' => '&nrarrw',
- '↞' => '&Larr;',
- '↟' => '&Uarr;',
- '↠' => '&twoheadrightarrow;',
- '↡' => '&Darr;',
- '↢' => '&larrtl;',
- '↣' => '&rarrtl;',
- '↤' => '&LeftTeeArrow;',
- '↥' => '&UpTeeArrow;',
- '↦' => '&map;',
- '↧' => '&DownTeeArrow;',
- '↩' => '&larrhk;',
- '↪' => '&rarrhk;',
- '↫' => '&larrlp;',
- '↬' => '&looparrowright;',
- '↭' => '&harrw;',
- '↮' => '&nleftrightarrow;',
- '↰' => '&Lsh;',
- '↱' => '&rsh;',
- '↲' => '&ldsh;',
- '↳' => '&rdsh;',
- '↵' => '&crarr;',
- '↶' => '&curvearrowleft;',
- '↷' => '&curarr;',
- '↺' => '&olarr;',
- '↻' => '&orarr;',
- '↼' => '&leftharpoonup;',
- '↽' => '&leftharpoondown;',
- '↾' => '&RightUpVector;',
- '↿' => '&uharl;',
- '⇀' => '&rharu;',
- '⇁' => '&rhard;',
- '⇂' => '&RightDownVector;',
- '⇃' => '&dharl;',
- '⇄' => '&rightleftarrows;',
- '⇅' => '&udarr;',
- '⇆' => '&lrarr;',
- '⇇' => '&llarr;',
- '⇈' => '&upuparrows;',
- '⇉' => '&rrarr;',
- '⇊' => '&downdownarrows;',
- '⇋' => '&leftrightharpoons;',
- '⇌' => '&rightleftharpoons;',
- '⇍' => '&nLeftarrow;',
- '⇎' => '&nhArr;',
- '⇏' => '&nrArr;',
- '⇐' => '&DoubleLeftArrow;',
- '⇑' => '&DoubleUpArrow;',
- '⇒' => '&Implies;',
- '⇓' => '&Downarrow;',
- '⇔' => '&hArr;',
- '⇕' => '&Updownarrow;',
- '⇖' => '&nwArr;',
- '⇗' => '&neArr;',
- '⇘' => '&seArr;',
- '⇙' => '&swArr;',
- '⇚' => '&lAarr;',
- '⇛' => '&rAarr;',
- '⇝' => '&zigrarr;',
- '⇤' => '&LeftArrowBar;',
- '⇥' => '&RightArrowBar;',
- '⇵' => '&DownArrowUpArrow;',
- '⇽' => '&loarr;',
- '⇾' => '&roarr;',
- '⇿' => '&hoarr;',
- '∀' => '&forall;',
- '∁' => '&comp;',
- '∂' => '&part;',
- '∂̸' => '&npart',
- '∃' => '&Exists;',
- '∄' => '&nexist;',
- '∅' => '&empty;',
- '∇' => '&nabla;',
- '∈' => '&isinv;',
- '∉' => '&notin;',
- '∋' => '&ReverseElement;',
- '∌' => '&notniva;',
- '∏' => '&prod;',
- '∐' => '&Coproduct;',
- '∑' => '&sum;',
- '−' => '&minus;',
- '∓' => '&MinusPlus;',
- '∔' => '&plusdo;',
- '∖' => '&ssetmn;',
- '∗' => '&lowast;',
- '∘' => '&compfn;',
- '√' => '&Sqrt;',
- '∝' => '&prop;',
- '∞' => '&infin;',
- '∟' => '&angrt;',
- '∠' => '&angle;',
- '∠⃒' => '&nang',
- '∡' => '&angmsd;',
- '∢' => '&angsph;',
- '∣' => '&mid;',
- '∤' => '&nshortmid;',
- '∥' => '&shortparallel;',
- '∦' => '&nparallel;',
- '∧' => '&and;',
- '∨' => '&or;',
- '∩' => '&cap;',
- '∩︀' => '&caps',
- '∪' => '&cup;',
- '∪︀' => '&cups',
- '∫' => '&Integral;',
- '∬' => '&Int;',
- '∭' => '&tint;',
- '∮' => '&ContourIntegral;',
- '∯' => '&DoubleContourIntegral;',
- '∰' => '&Cconint;',
- '∱' => '&cwint;',
- '∲' => '&cwconint;',
- '∳' => '&awconint;',
- '∴' => '&there4;',
- '∵' => '&Because;',
- '∶' => '&ratio;',
- '∷' => '&Colon;',
- '∸' => '&minusd;',
- '∺' => '&mDDot;',
- '∻' => '&homtht;',
- '∼' => '&sim;',
- '∼⃒' => '&nvsim',
- '∽' => '&bsim;',
- '∽̱' => '&race',
- '∾' => '&ac;',
- '∾̳' => '&acE',
- '∿' => '&acd;',
- '≀' => '&wr;',
- '≁' => '&NotTilde;',
- '≂' => '&esim;',
- '≂̸' => '&nesim',
- '≃' => '&simeq;',
- '≄' => '&nsime;',
- '≅' => '&TildeFullEqual;',
- '≆' => '&simne;',
- '≇' => '&ncong;',
- '≈' => '&approx;',
- '≉' => '&napprox;',
- '≊' => '&ape;',
- '≋' => '&apid;',
- '≋̸' => '&napid',
- '≌' => '&bcong;',
- '≍' => '&CupCap;',
- '≍⃒' => '&nvap',
- '≎' => '&bump;',
- '≎̸' => '&nbump',
- '≏' => '&HumpEqual;',
- '≏̸' => '&nbumpe',
- '≐' => '&esdot;',
- '≐̸' => '&nedot',
- '≑' => '&doteqdot;',
- '≒' => '&fallingdotseq;',
- '≓' => '&risingdotseq;',
- '≔' => '&coloneq;',
- '≕' => '&eqcolon;',
- '≖' => '&ecir;',
- '≗' => '&circeq;',
- '≙' => '&wedgeq;',
- '≚' => '&veeeq;',
- '≜' => '&triangleq;',
- '≟' => '&equest;',
- '≠' => '&NotEqual;',
- '≡' => '&Congruent;',
- '≡⃥' => '&bnequiv',
- '≢' => '&NotCongruent;',
- '≤' => '&leq;',
- '≤⃒' => '&nvle',
- '≥' => '&ge;',
- '≥⃒' => '&nvge',
- '≦' => '&lE;',
- '≦̸' => '&nlE',
- '≧' => '&geqq;',
- '≧̸' => '&NotGreaterFullEqual',
- '≨' => '&lneqq;',
- '≨︀' => '&lvertneqq',
- '≩' => '&gneqq;',
- '≩︀' => '&gvertneqq',
- '≪' => '&ll;',
- '≪̸' => '&nLtv',
- '≪⃒' => '&nLt',
- '≫' => '&gg;',
- '≫̸' => '&NotGreaterGreater',
- '≫⃒' => '&nGt',
- '≬' => '&between;',
- '≭' => '&NotCupCap;',
- '≮' => '&NotLess;',
- '≯' => '&ngtr;',
- '≰' => '&NotLessEqual;',
- '≱' => '&ngeq;',
- '≲' => '&LessTilde;',
- '≳' => '&GreaterTilde;',
- '≴' => '&nlsim;',
- '≵' => '&ngsim;',
- '≶' => '&lessgtr;',
- '≷' => '&gl;',
- '≸' => '&ntlg;',
- '≹' => '&NotGreaterLess;',
- '≺' => '&prec;',
- '≻' => '&succ;',
- '≼' => '&PrecedesSlantEqual;',
- '≽' => '&succcurlyeq;',
- '≾' => '&precsim;',
- '≿' => '&SucceedsTilde;',
- '≿̸' => '&NotSucceedsTilde',
- '⊀' => '&npr;',
- '⊁' => '&NotSucceeds;',
- '⊂' => '&sub;',
- '⊂⃒' => '&vnsub',
- '⊃' => '&sup;',
- '⊃⃒' => '&nsupset',
- '⊄' => '&nsub;',
- '⊅' => '&nsup;',
- '⊆' => '&SubsetEqual;',
- '⊇' => '&supe;',
- '⊈' => '&NotSubsetEqual;',
- '⊉' => '&NotSupersetEqual;',
- '⊊' => '&subsetneq;',
- '⊊︀' => '&vsubne',
- '⊋' => '&supsetneq;',
- '⊋︀' => '&vsupne',
- '⊍' => '&cupdot;',
- '⊎' => '&UnionPlus;',
- '⊏' => '&sqsub;',
- '⊏̸' => '&NotSquareSubset',
- '⊐' => '&sqsupset;',
- '⊐̸' => '&NotSquareSuperset',
- '⊑' => '&SquareSubsetEqual;',
- '⊒' => '&SquareSupersetEqual;',
- '⊓' => '&sqcap;',
- '⊓︀' => '&sqcaps',
- '⊔' => '&sqcup;',
- '⊔︀' => '&sqcups',
- '⊕' => '&CirclePlus;',
- '⊖' => '&ominus;',
- '⊗' => '&CircleTimes;',
- '⊘' => '&osol;',
- '⊙' => '&CircleDot;',
- '⊚' => '&ocir;',
- '⊛' => '&oast;',
- '⊝' => '&odash;',
- '⊞' => '&boxplus;',
- '⊟' => '&boxminus;',
- '⊠' => '&timesb;',
- '⊡' => '&sdotb;',
- '⊢' => '&vdash;',
- '⊣' => '&dashv;',
- '⊤' => '&DownTee;',
- '⊥' => '&perp;',
- '⊧' => '&models;',
- '⊨' => '&DoubleRightTee;',
- '⊩' => '&Vdash;',
- '⊪' => '&Vvdash;',
- '⊫' => '&VDash;',
- '⊬' => '&nvdash;',
- '⊭' => '&nvDash;',
- '⊮' => '&nVdash;',
- '⊯' => '&nVDash;',
- '⊰' => '&prurel;',
- '⊲' => '&vartriangleleft;',
- '⊳' => '&vrtri;',
- '⊴' => '&LeftTriangleEqual;',
- '⊴⃒' => '&nvltrie',
- '⊵' => '&RightTriangleEqual;',
- '⊵⃒' => '&nvrtrie',
- '⊶' => '&origof;',
- '⊷' => '&imof;',
- '⊸' => '&mumap;',
- '⊹' => '&hercon;',
- '⊺' => '&intcal;',
- '⊻' => '&veebar;',
- '⊽' => '&barvee;',
- '⊾' => '&angrtvb;',
- '⊿' => '&lrtri;',
- '⋀' => '&xwedge;',
- '⋁' => '&xvee;',
- '⋂' => '&bigcap;',
- '⋃' => '&bigcup;',
- '⋄' => '&diamond;',
- '⋅' => '&sdot;',
- '⋆' => '&Star;',
- '⋇' => '&divonx;',
- '⋈' => '&bowtie;',
- '⋉' => '&ltimes;',
- '⋊' => '&rtimes;',
- '⋋' => '&lthree;',
- '⋌' => '&rthree;',
- '⋍' => '&backsimeq;',
- '⋎' => '&curlyvee;',
- '⋏' => '&curlywedge;',
- '⋐' => '&Sub;',
- '⋑' => '&Supset;',
- '⋒' => '&Cap;',
- '⋓' => '&Cup;',
- '⋔' => '&pitchfork;',
- '⋕' => '&epar;',
- '⋖' => '&lessdot;',
- '⋗' => '&gtrdot;',
- '⋘' => '&Ll;',
- '⋘̸' => '&nLl',
- '⋙' => '&Gg;',
- '⋙̸' => '&nGg',
- '⋚' => '&lesseqgtr;',
- '⋚︀' => '&lesg',
- '⋛' => '&gtreqless;',
- '⋛︀' => '&gesl',
- '⋞' => '&curlyeqprec;',
- '⋟' => '&cuesc;',
- '⋠' => '&NotPrecedesSlantEqual;',
- '⋡' => '&NotSucceedsSlantEqual;',
- '⋢' => '&NotSquareSubsetEqual;',
- '⋣' => '&NotSquareSupersetEqual;',
- '⋦' => '&lnsim;',
- '⋧' => '&gnsim;',
- '⋨' => '&precnsim;',
- '⋩' => '&scnsim;',
- '⋪' => '&nltri;',
- '⋫' => '&ntriangleright;',
- '⋬' => '&nltrie;',
- '⋭' => '&NotRightTriangleEqual;',
- '⋮' => '&vellip;',
- '⋯' => '&ctdot;',
- '⋰' => '&utdot;',
- '⋱' => '&dtdot;',
- '⋲' => '&disin;',
- '⋳' => '&isinsv;',
- '⋴' => '&isins;',
- '⋵' => '&isindot;',
- '⋵̸' => '&notindot',
- '⋶' => '&notinvc;',
- '⋷' => '&notinvb;',
- '⋹' => '&isinE;',
- '⋹̸' => '&notinE',
- '⋺' => '&nisd;',
- '⋻' => '&xnis;',
- '⋼' => '&nis;',
- '⋽' => '&notnivc;',
- '⋾' => '&notnivb;',
- '⌅' => '&barwed;',
- '⌆' => '&doublebarwedge;',
- '⌈' => '&lceil;',
- '⌉' => '&RightCeiling;',
- '⌊' => '&LeftFloor;',
- '⌋' => '&RightFloor;',
- '⌌' => '&drcrop;',
- '⌍' => '&dlcrop;',
- '⌎' => '&urcrop;',
- '⌏' => '&ulcrop;',
- '⌐' => '&bnot;',
- '⌒' => '&profline;',
- '⌓' => '&profsurf;',
- '⌕' => '&telrec;',
- '⌖' => '&target;',
- '⌜' => '&ulcorner;',
- '⌝' => '&urcorner;',
- '⌞' => '&llcorner;',
- '⌟' => '&drcorn;',
- '⌢' => '&frown;',
- '⌣' => '&smile;',
- '⌭' => '&cylcty;',
- '⌮' => '&profalar;',
- '⌶' => '&topbot;',
- '⌽' => '&ovbar;',
- '⌿' => '&solbar;',
- '⍼' => '&angzarr;',
- '⎰' => '&lmoust;',
- '⎱' => '&rmoust;',
- '⎴' => '&OverBracket;',
- '⎵' => '&bbrk;',
- '⎶' => '&bbrktbrk;',
- '⏜' => '&OverParenthesis;',
- '⏝' => '&UnderParenthesis;',
- '⏞' => '&OverBrace;',
- '⏟' => '&UnderBrace;',
- '⏢' => '&trpezium;',
- '⏧' => '&elinters;',
- '␣' => '&blank;',
- 'Ⓢ' => '&oS;',
- '─' => '&HorizontalLine;',
- '│' => '&boxv;',
- '┌' => '&boxdr;',
- '┐' => '&boxdl;',
- '└' => '&boxur;',
- '┘' => '&boxul;',
- '├' => '&boxvr;',
- '┤' => '&boxvl;',
- '┬' => '&boxhd;',
- '┴' => '&boxhu;',
- '┼' => '&boxvh;',
- '═' => '&boxH;',
- '║' => '&boxV;',
- '╒' => '&boxdR;',
- '╓' => '&boxDr;',
- '╔' => '&boxDR;',
- '╕' => '&boxdL;',
- '╖' => '&boxDl;',
- '╗' => '&boxDL;',
- '╘' => '&boxuR;',
- '╙' => '&boxUr;',
- '╚' => '&boxUR;',
- '╛' => '&boxuL;',
- '╜' => '&boxUl;',
- '╝' => '&boxUL;',
- '╞' => '&boxvR;',
- '╟' => '&boxVr;',
- '╠' => '&boxVR;',
- '╡' => '&boxvL;',
- '╢' => '&boxVl;',
- '╣' => '&boxVL;',
- '╤' => '&boxHd;',
- '╥' => '&boxhD;',
- '╦' => '&boxHD;',
- '╧' => '&boxHu;',
- '╨' => '&boxhU;',
- '╩' => '&boxHU;',
- '╪' => '&boxvH;',
- '╫' => '&boxVh;',
- '╬' => '&boxVH;',
- '▀' => '&uhblk;',
- '▄' => '&lhblk;',
- '█' => '&block;',
- '░' => '&blk14;',
- '▒' => '&blk12;',
- '▓' => '&blk34;',
- '□' => '&Square;',
- '▪' => '&squarf;',
- '▫' => '&EmptyVerySmallSquare;',
- '▭' => '&rect;',
- '▮' => '&marker;',
- '▱' => '&fltns;',
- '△' => '&bigtriangleup;',
- '▴' => '&blacktriangle;',
- '▵' => '&triangle;',
- '▸' => '&blacktriangleright;',
- '▹' => '&rtri;',
- '▽' => '&bigtriangledown;',
- '▾' => '&blacktriangledown;',
- '▿' => '&triangledown;',
- '◂' => '&blacktriangleleft;',
- '◃' => '&ltri;',
- '◊' => '&lozenge;',
- '○' => '&cir;',
- '◬' => '&tridot;',
- '◯' => '&bigcirc;',
- '◸' => '&ultri;',
- '◹' => '&urtri;',
- '◺' => '&lltri;',
- '◻' => '&EmptySmallSquare;',
- '◼' => '&FilledSmallSquare;',
- '★' => '&starf;',
- '☆' => '&star;',
- '☎' => '&phone;',
- '♀' => '&female;',
- '♂' => '&male;',
- '♠' => '&spadesuit;',
- '♣' => '&clubs;',
- '♥' => '&hearts;',
- '♦' => '&diamondsuit;',
- '♪' => '&sung;',
- '♭' => '&flat;',
- '♮' => '&natur;',
- '♯' => '&sharp;',
- '✓' => '&check;',
- '✗' => '&cross;',
- '✠' => '&maltese;',
- '✶' => '&sext;',
- '❘' => '&VerticalSeparator;',
- '❲' => '&lbbrk;',
- '❳' => '&rbbrk;',
- '⟈' => '&bsolhsub;',
- '⟉' => '&suphsol;',
- '⟦' => '&LeftDoubleBracket;',
- '⟧' => '&RightDoubleBracket;',
- '⟨' => '&langle;',
- '⟩' => '&RightAngleBracket;',
- '⟪' => '&Lang;',
- '⟫' => '&Rang;',
- '⟬' => '&loang;',
- '⟭' => '&roang;',
- '⟵' => '&longleftarrow;',
- '⟶' => '&LongRightArrow;',
- '⟷' => '&LongLeftRightArrow;',
- '⟸' => '&xlArr;',
- '⟹' => '&DoubleLongRightArrow;',
- '⟺' => '&xhArr;',
- '⟼' => '&xmap;',
- '⟿' => '&dzigrarr;',
- '⤂' => '&nvlArr;',
- '⤃' => '&nvrArr;',
- '⤄' => '&nvHarr;',
- '⤅' => '&Map;',
- '⤌' => '&lbarr;',
- '⤍' => '&bkarow;',
- '⤎' => '&lBarr;',
- '⤏' => '&dbkarow;',
- '⤐' => '&drbkarow;',
- '⤑' => '&DDotrahd;',
- '⤒' => '&UpArrowBar;',
- '⤓' => '&DownArrowBar;',
- '⤖' => '&Rarrtl;',
- '⤙' => '&latail;',
- '⤚' => '&ratail;',
- '⤛' => '&lAtail;',
- '⤜' => '&rAtail;',
- '⤝' => '&larrfs;',
- '⤞' => '&rarrfs;',
- '⤟' => '&larrbfs;',
- '⤠' => '&rarrbfs;',
- '⤣' => '&nwarhk;',
- '⤤' => '&nearhk;',
- '⤥' => '&searhk;',
- '⤦' => '&swarhk;',
- '⤧' => '&nwnear;',
- '⤨' => '&toea;',
- '⤩' => '&seswar;',
- '⤪' => '&swnwar;',
- '⤳' => '&rarrc;',
- '⤳̸' => '&nrarrc',
- '⤵' => '&cudarrr;',
- '⤶' => '&ldca;',
- '⤷' => '&rdca;',
- '⤸' => '&cudarrl;',
- '⤹' => '&larrpl;',
- '⤼' => '&curarrm;',
- '⤽' => '&cularrp;',
- '⥅' => '&rarrpl;',
- '⥈' => '&harrcir;',
- '⥉' => '&Uarrocir;',
- '⥊' => '&lurdshar;',
- '⥋' => '&ldrushar;',
- '⥎' => '&LeftRightVector;',
- '⥏' => '&RightUpDownVector;',
- '⥐' => '&DownLeftRightVector;',
- '⥑' => '&LeftUpDownVector;',
- '⥒' => '&LeftVectorBar;',
- '⥓' => '&RightVectorBar;',
- '⥔' => '&RightUpVectorBar;',
- '⥕' => '&RightDownVectorBar;',
- '⥖' => '&DownLeftVectorBar;',
- '⥗' => '&DownRightVectorBar;',
- '⥘' => '&LeftUpVectorBar;',
- '⥙' => '&LeftDownVectorBar;',
- '⥚' => '&LeftTeeVector;',
- '⥛' => '&RightTeeVector;',
- '⥜' => '&RightUpTeeVector;',
- '⥝' => '&RightDownTeeVector;',
- '⥞' => '&DownLeftTeeVector;',
- '⥟' => '&DownRightTeeVector;',
- '⥠' => '&LeftUpTeeVector;',
- '⥡' => '&LeftDownTeeVector;',
- '⥢' => '&lHar;',
- '⥣' => '&uHar;',
- '⥤' => '&rHar;',
- '⥥' => '&dHar;',
- '⥦' => '&luruhar;',
- '⥧' => '&ldrdhar;',
- '⥨' => '&ruluhar;',
- '⥩' => '&rdldhar;',
- '⥪' => '&lharul;',
- '⥫' => '&llhard;',
- '⥬' => '&rharul;',
- '⥭' => '&lrhard;',
- '⥮' => '&udhar;',
- '⥯' => '&ReverseUpEquilibrium;',
- '⥰' => '&RoundImplies;',
- '⥱' => '&erarr;',
- '⥲' => '&simrarr;',
- '⥳' => '&larrsim;',
- '⥴' => '&rarrsim;',
- '⥵' => '&rarrap;',
- '⥶' => '&ltlarr;',
- '⥸' => '&gtrarr;',
- '⥹' => '&subrarr;',
- '⥻' => '&suplarr;',
- '⥼' => '&lfisht;',
- '⥽' => '&rfisht;',
- '⥾' => '&ufisht;',
- '⥿' => '&dfisht;',
- '⦅' => '&lopar;',
- '⦆' => '&ropar;',
- '⦋' => '&lbrke;',
- '⦌' => '&rbrke;',
- '⦍' => '&lbrkslu;',
- '⦎' => '&rbrksld;',
- '⦏' => '&lbrksld;',
- '⦐' => '&rbrkslu;',
- '⦑' => '&langd;',
- '⦒' => '&rangd;',
- '⦓' => '&lparlt;',
- '⦔' => '&rpargt;',
- '⦕' => '&gtlPar;',
- '⦖' => '&ltrPar;',
- '⦚' => '&vzigzag;',
- '⦜' => '&vangrt;',
- '⦝' => '&angrtvbd;',
- '⦤' => '&ange;',
- '⦥' => '&range;',
- '⦦' => '&dwangle;',
- '⦧' => '&uwangle;',
- '⦨' => '&angmsdaa;',
- '⦩' => '&angmsdab;',
- '⦪' => '&angmsdac;',
- '⦫' => '&angmsdad;',
- '⦬' => '&angmsdae;',
- '⦭' => '&angmsdaf;',
- '⦮' => '&angmsdag;',
- '⦯' => '&angmsdah;',
- '⦰' => '&bemptyv;',
- '⦱' => '&demptyv;',
- '⦲' => '&cemptyv;',
- '⦳' => '&raemptyv;',
- '⦴' => '&laemptyv;',
- '⦵' => '&ohbar;',
- '⦶' => '&omid;',
- '⦷' => '&opar;',
- '⦹' => '&operp;',
- '⦻' => '&olcross;',
- '⦼' => '&odsold;',
- '⦾' => '&olcir;',
- '⦿' => '&ofcir;',
- '⧀' => '&olt;',
- '⧁' => '&ogt;',
- '⧂' => '&cirscir;',
- '⧃' => '&cirE;',
- '⧄' => '&solb;',
- '⧅' => '&bsolb;',
- '⧉' => '&boxbox;',
- '⧍' => '&trisb;',
- '⧎' => '&rtriltri;',
- '⧏' => '&LeftTriangleBar;',
- '⧏̸' => '&NotLeftTriangleBar',
- '⧐' => '&RightTriangleBar;',
- '⧐̸' => '&NotRightTriangleBar',
- '⧜' => '&iinfin;',
- '⧝' => '&infintie;',
- '⧞' => '&nvinfin;',
- '⧣' => '&eparsl;',
- '⧤' => '&smeparsl;',
- '⧥' => '&eqvparsl;',
- '⧫' => '&lozf;',
- '⧴' => '&RuleDelayed;',
- '⧶' => '&dsol;',
- '⨀' => '&xodot;',
- '⨁' => '&bigoplus;',
- '⨂' => '&bigotimes;',
- '⨄' => '&biguplus;',
- '⨆' => '&bigsqcup;',
- '⨌' => '&iiiint;',
- '⨍' => '&fpartint;',
- '⨐' => '&cirfnint;',
- '⨑' => '&awint;',
- '⨒' => '&rppolint;',
- '⨓' => '&scpolint;',
- '⨔' => '&npolint;',
- '⨕' => '&pointint;',
- '⨖' => '&quatint;',
- '⨗' => '&intlarhk;',
- '⨢' => '&pluscir;',
- '⨣' => '&plusacir;',
- '⨤' => '&simplus;',
- '⨥' => '&plusdu;',
- '⨦' => '&plussim;',
- '⨧' => '&plustwo;',
- '⨩' => '&mcomma;',
- '⨪' => '&minusdu;',
- '⨭' => '&loplus;',
- '⨮' => '&roplus;',
- '⨯' => '&Cross;',
- '⨰' => '&timesd;',
- '⨱' => '&timesbar;',
- '⨳' => '&smashp;',
- '⨴' => '&lotimes;',
- '⨵' => '&rotimes;',
- '⨶' => '&otimesas;',
- '⨷' => '&Otimes;',
- '⨸' => '&odiv;',
- '⨹' => '&triplus;',
- '⨺' => '&triminus;',
- '⨻' => '&tritime;',
- '⨼' => '&iprod;',
- '⨿' => '&amalg;',
- '⩀' => '&capdot;',
- '⩂' => '&ncup;',
- '⩃' => '&ncap;',
- '⩄' => '&capand;',
- '⩅' => '&cupor;',
- '⩆' => '&cupcap;',
- '⩇' => '&capcup;',
- '⩈' => '&cupbrcap;',
- '⩉' => '&capbrcup;',
- '⩊' => '&cupcup;',
- '⩋' => '&capcap;',
- '⩌' => '&ccups;',
- '⩍' => '&ccaps;',
- '⩐' => '&ccupssm;',
- '⩓' => '&And;',
- '⩔' => '&Or;',
- '⩕' => '&andand;',
- '⩖' => '&oror;',
- '⩗' => '&orslope;',
- '⩘' => '&andslope;',
- '⩚' => '&andv;',
- '⩛' => '&orv;',
- '⩜' => '&andd;',
- '⩝' => '&ord;',
- '⩟' => '&wedbar;',
- '⩦' => '&sdote;',
- '⩪' => '&simdot;',
- '⩭' => '&congdot;',
- '⩭̸' => '&ncongdot',
- '⩮' => '&easter;',
- '⩯' => '&apacir;',
- '⩰' => '&apE;',
- '⩰̸' => '&napE',
- '⩱' => '&eplus;',
- '⩲' => '&pluse;',
- '⩳' => '&Esim;',
- '⩴' => '&Colone;',
- '⩵' => '&Equal;',
- '⩷' => '&ddotseq;',
- '⩸' => '&equivDD;',
- '⩹' => '&ltcir;',
- '⩺' => '&gtcir;',
- '⩻' => '&ltquest;',
- '⩼' => '&gtquest;',
- '⩽' => '&les;',
- '⩽̸' => '&nles',
- '⩾' => '&ges;',
- '⩾̸' => '&nges',
- '⩿' => '&lesdot;',
- '⪀' => '&gesdot;',
- '⪁' => '&lesdoto;',
- '⪂' => '&gesdoto;',
- '⪃' => '&lesdotor;',
- '⪄' => '&gesdotol;',
- '⪅' => '&lap;',
- '⪆' => '&gap;',
- '⪇' => '&lne;',
- '⪈' => '&gne;',
- '⪉' => '&lnap;',
- '⪊' => '&gnap;',
- '⪋' => '&lesseqqgtr;',
- '⪌' => '&gEl;',
- '⪍' => '&lsime;',
- '⪎' => '&gsime;',
- '⪏' => '&lsimg;',
- '⪐' => '&gsiml;',
- '⪑' => '&lgE;',
- '⪒' => '&glE;',
- '⪓' => '&lesges;',
- '⪔' => '&gesles;',
- '⪕' => '&els;',
- '⪖' => '&egs;',
- '⪗' => '&elsdot;',
- '⪘' => '&egsdot;',
- '⪙' => '&el;',
- '⪚' => '&eg;',
- '⪝' => '&siml;',
- '⪞' => '&simg;',
- '⪟' => '&simlE;',
- '⪠' => '&simgE;',
- '⪡' => '&LessLess;',
- '⪡̸' => '&NotNestedLessLess',
- '⪢' => '&GreaterGreater;',
- '⪢̸' => '&NotNestedGreaterGreater',
- '⪤' => '&glj;',
- '⪥' => '&gla;',
- '⪦' => '&ltcc;',
- '⪧' => '&gtcc;',
- '⪨' => '&lescc;',
- '⪩' => '&gescc;',
- '⪪' => '&smt;',
- '⪫' => '&lat;',
- '⪬' => '&smte;',
- '⪬︀' => '&smtes',
- '⪭' => '&late;',
- '⪭︀' => '&lates',
- '⪮' => '&bumpE;',
- '⪯' => '&preceq;',
- '⪯̸' => '&NotPrecedesEqual',
- '⪰' => '&SucceedsEqual;',
- '⪰̸' => '&NotSucceedsEqual',
- '⪳' => '&prE;',
- '⪴' => '&scE;',
- '⪵' => '&precneqq;',
- '⪶' => '&scnE;',
- '⪷' => '&precapprox;',
- '⪸' => '&succapprox;',
- '⪹' => '&precnapprox;',
- '⪺' => '&succnapprox;',
- '⪻' => '&Pr;',
- '⪼' => '&Sc;',
- '⪽' => '&subdot;',
- '⪾' => '&supdot;',
- '⪿' => '&subplus;',
- '⫀' => '&supplus;',
- '⫁' => '&submult;',
- '⫂' => '&supmult;',
- '⫃' => '&subedot;',
- '⫄' => '&supedot;',
- '⫅' => '&subE;',
- '⫅̸' => '&nsubE',
- '⫆' => '&supseteqq;',
- '⫆̸' => '&nsupseteqq',
- '⫇' => '&subsim;',
- '⫈' => '&supsim;',
- '⫋' => '&subsetneqq;',
- '⫋︀' => '&vsubnE',
- '⫌' => '&supnE;',
- '⫌︀' => '&varsupsetneqq',
- '⫏' => '&csub;',
- '⫐' => '&csup;',
- '⫑' => '&csube;',
- '⫒' => '&csupe;',
- '⫓' => '&subsup;',
- '⫔' => '&supsub;',
- '⫕' => '&subsub;',
- '⫖' => '&supsup;',
- '⫗' => '&suphsub;',
- '⫘' => '&supdsub;',
- '⫙' => '&forkv;',
- '⫚' => '&topfork;',
- '⫛' => '&mlcp;',
- '⫤' => '&Dashv;',
- '⫦' => '&Vdashl;',
- '⫧' => '&Barv;',
- '⫨' => '&vBar;',
- '⫩' => '&vBarv;',
- '⫫' => '&Vbar;',
- '⫬' => '&Not;',
- '⫭' => '&bNot;',
- '⫮' => '&rnmid;',
- '⫯' => '&cirmid;',
- '⫰' => '&midcir;',
- '⫱' => '&topcir;',
- '⫲' => '&nhpar;',
- '⫳' => '&parsim;',
- '⫽︀' => '&varsupsetneqq',
- 'ff' => '&fflig;',
- 'fi' => '&filig;',
- 'fl' => '&fllig;',
- 'ffi' => '&ffilig;',
- 'ffl' => '&ffllig;',
- '𝒜' => '&Ascr;',
- '𝒞' => '&Cscr;',
- '𝒟' => '&Dscr;',
- '𝒢' => '&Gscr;',
- '𝒥' => '&Jscr;',
- '𝒦' => '&Kscr;',
- '𝒩' => '&Nscr;',
- '𝒪' => '&Oscr;',
- '𝒫' => '&Pscr;',
- '𝒬' => '&Qscr;',
- '𝒮' => '&Sscr;',
- '𝒯' => '&Tscr;',
- '𝒰' => '&Uscr;',
- '𝒱' => '&Vscr;',
- '𝒲' => '&Wscr;',
- '𝒳' => '&Xscr;',
- '𝒴' => '&Yscr;',
- '𝒵' => '&Zscr;',
- '𝒶' => '&ascr;',
- '𝒷' => '&bscr;',
- '𝒸' => '&cscr;',
- '𝒹' => '&dscr;',
- '𝒻' => '&fscr;',
- '𝒽' => '&hscr;',
- '𝒾' => '&iscr;',
- '𝒿' => '&jscr;',
- '𝓀' => '&kscr;',
- '𝓁' => '&lscr;',
- '𝓂' => '&mscr;',
- '𝓃' => '&nscr;',
- '𝓅' => '&pscr;',
- '𝓆' => '&qscr;',
- '𝓇' => '&rscr;',
- '𝓈' => '&sscr;',
- '𝓉' => '&tscr;',
- '𝓊' => '&uscr;',
- '𝓋' => '&vscr;',
- '𝓌' => '&wscr;',
- '𝓍' => '&xscr;',
- '𝓎' => '&yscr;',
- '𝓏' => '&zscr;',
- '𝔄' => '&Afr;',
- '𝔅' => '&Bfr;',
- '𝔇' => '&Dfr;',
- '𝔈' => '&Efr;',
- '𝔉' => '&Ffr;',
- '𝔊' => '&Gfr;',
- '𝔍' => '&Jfr;',
- '𝔎' => '&Kfr;',
- '𝔏' => '&Lfr;',
- '𝔐' => '&Mfr;',
- '𝔑' => '&Nfr;',
- '𝔒' => '&Ofr;',
- '𝔓' => '&Pfr;',
- '𝔔' => '&Qfr;',
- '𝔖' => '&Sfr;',
- '𝔗' => '&Tfr;',
- '𝔘' => '&Ufr;',
- '𝔙' => '&Vfr;',
- '𝔚' => '&Wfr;',
- '𝔛' => '&Xfr;',
- '𝔜' => '&Yfr;',
- '𝔞' => '&afr;',
- '𝔟' => '&bfr;',
- '𝔠' => '&cfr;',
- '𝔡' => '&dfr;',
- '𝔢' => '&efr;',
- '𝔣' => '&ffr;',
- '𝔤' => '&gfr;',
- '𝔥' => '&hfr;',
- '𝔦' => '&ifr;',
- '𝔧' => '&jfr;',
- '𝔨' => '&kfr;',
- '𝔩' => '&lfr;',
- '𝔪' => '&mfr;',
- '𝔫' => '&nfr;',
- '𝔬' => '&ofr;',
- '𝔭' => '&pfr;',
- '𝔮' => '&qfr;',
- '𝔯' => '&rfr;',
- '𝔰' => '&sfr;',
- '𝔱' => '&tfr;',
- '𝔲' => '&ufr;',
- '𝔳' => '&vfr;',
- '𝔴' => '&wfr;',
- '𝔵' => '&xfr;',
- '𝔶' => '&yfr;',
- '𝔷' => '&zfr;',
- '𝔸' => '&Aopf;',
- '𝔹' => '&Bopf;',
- '𝔻' => '&Dopf;',
- '𝔼' => '&Eopf;',
- '𝔽' => '&Fopf;',
- '𝔾' => '&Gopf;',
- '𝕀' => '&Iopf;',
- '𝕁' => '&Jopf;',
- '𝕂' => '&Kopf;',
- '𝕃' => '&Lopf;',
- '𝕄' => '&Mopf;',
- '𝕆' => '&Oopf;',
- '𝕊' => '&Sopf;',
- '𝕋' => '&Topf;',
- '𝕌' => '&Uopf;',
- '𝕍' => '&Vopf;',
- '𝕎' => '&Wopf;',
- '𝕏' => '&Xopf;',
- '𝕐' => '&Yopf;',
- '𝕒' => '&aopf;',
- '𝕓' => '&bopf;',
- '𝕔' => '&copf;',
- '𝕕' => '&dopf;',
- '𝕖' => '&eopf;',
- '𝕗' => '&fopf;',
- '𝕘' => '&gopf;',
- '𝕙' => '&hopf;',
- '𝕚' => '&iopf;',
- '𝕛' => '&jopf;',
- '𝕜' => '&kopf;',
- '𝕝' => '&lopf;',
- '𝕞' => '&mopf;',
- '𝕟' => '&nopf;',
- '𝕠' => '&oopf;',
- '𝕡' => '&popf;',
- '𝕢' => '&qopf;',
- '𝕣' => '&ropf;',
- '𝕤' => '&sopf;',
- '𝕥' => '&topf;',
- '𝕦' => '&uopf;',
- '𝕧' => '&vopf;',
- '𝕨' => '&wopf;',
- '𝕩' => '&xopf;',
- '𝕪' => '&yopf;',
- '𝕫' => '&zopf;',
- );
+class HTML5Entities
+{
+
+ public static $map = array(
+ ' ' => '&Tab;',
+ "\n" => '&NewLine;',
+ '!' => '&excl;',
+ '"' => '&quot;',
+ '#' => '&num;',
+ '$' => '&dollar;',
+ '%' => '&percnt;',
+ '&' => '&amp;',
+ '\'' => '&apos;',
+ '(' => '&lpar;',
+ ')' => '&rpar;',
+ '*' => '&ast;',
+ '+' => '&plus;',
+ ',' => '&comma;',
+ '.' => '&period;',
+ '/' => '&sol;',
+ ':' => '&colon;',
+ ';' => '&semi;',
+ '<' => '&lt;',
+ '<⃒' => '&nvlt',
+ '=' => '&equals;',
+ '=⃥' => '&bne',
+ '>' => '&gt;',
+ '>⃒' => '&nvgt',
+ '?' => '&quest;',
+ '@' => '&commat;',
+ '[' => '&lbrack;',
+ '\\' => '&bsol;',
+ ']' => '&rsqb;',
+ '^' => '&Hat;',
+ '_' => '&lowbar;',
+ '`' => '&grave;',
+ 'fj' => '&fjlig',
+ '{' => '&lbrace;',
+ '|' => '&vert;',
+ '}' => '&rcub;',
+ ' ' => '&nbsp;',
+ '¡' => '&iexcl;',
+ '¢' => '&cent;',
+ '£' => '&pound;',
+ '¤' => '&curren;',
+ '¥' => '&yen;',
+ '¦' => '&brvbar;',
+ '§' => '&sect;',
+ '¨' => '&DoubleDot;',
+ '©' => '&copy;',
+ 'ª' => '&ordf;',
+ '«' => '&laquo;',
+ '¬' => '&not;',
+ '­' => '&shy;',
+ '®' => '&reg;',
+ '¯' => '&macr;',
+ '°' => '&deg;',
+ '±' => '&plusmn;',
+ '²' => '&sup2;',
+ '³' => '&sup3;',
+ '´' => '&DiacriticalAcute;',
+ 'µ' => '&micro;',
+ '¶' => '&para;',
+ '·' => '&CenterDot;',
+ '¸' => '&Cedilla;',
+ '¹' => '&sup1;',
+ 'º' => '&ordm;',
+ '»' => '&raquo;',
+ '¼' => '&frac14;',
+ '½' => '&half;',
+ '¾' => '&frac34;',
+ '¿' => '&iquest;',
+ 'À' => '&Agrave;',
+ 'Á' => '&Aacute;',
+ 'Â' => '&Acirc;',
+ 'Ã' => '&Atilde;',
+ 'Ä' => '&Auml;',
+ 'Å' => '&Aring;',
+ 'Æ' => '&AElig;',
+ 'Ç' => '&Ccedil;',
+ 'È' => '&Egrave;',
+ 'É' => '&Eacute;',
+ 'Ê' => '&Ecirc;',
+ 'Ë' => '&Euml;',
+ 'Ì' => '&Igrave;',
+ 'Í' => '&Iacute;',
+ 'Î' => '&Icirc;',
+ 'Ï' => '&Iuml;',
+ 'Ð' => '&ETH;',
+ 'Ñ' => '&Ntilde;',
+ 'Ò' => '&Ograve;',
+ 'Ó' => '&Oacute;',
+ 'Ô' => '&Ocirc;',
+ 'Õ' => '&Otilde;',
+ 'Ö' => '&Ouml;',
+ '×' => '&times;',
+ 'Ø' => '&Oslash;',
+ 'Ù' => '&Ugrave;',
+ 'Ú' => '&Uacute;',
+ 'Û' => '&Ucirc;',
+ 'Ü' => '&Uuml;',
+ 'Ý' => '&Yacute;',
+ 'Þ' => '&THORN;',
+ 'ß' => '&szlig;',
+ 'à' => '&agrave;',
+ 'á' => '&aacute;',
+ 'â' => '&acirc;',
+ 'ã' => '&atilde;',
+ 'ä' => '&auml;',
+ 'å' => '&aring;',
+ 'æ' => '&aelig;',
+ 'ç' => '&ccedil;',
+ 'è' => '&egrave;',
+ 'é' => '&eacute;',
+ 'ê' => '&ecirc;',
+ 'ë' => '&euml;',
+ 'ì' => '&igrave;',
+ 'í' => '&iacute;',
+ 'î' => '&icirc;',
+ 'ï' => '&iuml;',
+ 'ð' => '&eth;',
+ 'ñ' => '&ntilde;',
+ 'ò' => '&ograve;',
+ 'ó' => '&oacute;',
+ 'ô' => '&ocirc;',
+ 'õ' => '&otilde;',
+ 'ö' => '&ouml;',
+ '÷' => '&divide;',
+ 'ø' => '&oslash;',
+ 'ù' => '&ugrave;',
+ 'ú' => '&uacute;',
+ 'û' => '&ucirc;',
+ 'ü' => '&uuml;',
+ 'ý' => '&yacute;',
+ 'þ' => '&thorn;',
+ 'ÿ' => '&yuml;',
+ 'Ā' => '&Amacr;',
+ 'ā' => '&amacr;',
+ 'Ă' => '&Abreve;',
+ 'ă' => '&abreve;',
+ 'Ą' => '&Aogon;',
+ 'ą' => '&aogon;',
+ 'Ć' => '&Cacute;',
+ 'ć' => '&cacute;',
+ 'Ĉ' => '&Ccirc;',
+ 'ĉ' => '&ccirc;',
+ 'Ċ' => '&Cdot;',
+ 'ċ' => '&cdot;',
+ 'Č' => '&Ccaron;',
+ 'č' => '&ccaron;',
+ 'Ď' => '&Dcaron;',
+ 'ď' => '&dcaron;',
+ 'Đ' => '&Dstrok;',
+ 'đ' => '&dstrok;',
+ 'Ē' => '&Emacr;',
+ 'ē' => '&emacr;',
+ 'Ė' => '&Edot;',
+ 'ė' => '&edot;',
+ 'Ę' => '&Eogon;',
+ 'ę' => '&eogon;',
+ 'Ě' => '&Ecaron;',
+ 'ě' => '&ecaron;',
+ 'Ĝ' => '&Gcirc;',
+ 'ĝ' => '&gcirc;',
+ 'Ğ' => '&Gbreve;',
+ 'ğ' => '&gbreve;',
+ 'Ġ' => '&Gdot;',
+ 'ġ' => '&gdot;',
+ 'Ģ' => '&Gcedil;',
+ 'Ĥ' => '&Hcirc;',
+ 'ĥ' => '&hcirc;',
+ 'Ħ' => '&Hstrok;',
+ 'ħ' => '&hstrok;',
+ 'Ĩ' => '&Itilde;',
+ 'ĩ' => '&itilde;',
+ 'Ī' => '&Imacr;',
+ 'ī' => '&imacr;',
+ 'Į' => '&Iogon;',
+ 'į' => '&iogon;',
+ 'İ' => '&Idot;',
+ 'ı' => '&inodot;',
+ 'IJ' => '&IJlig;',
+ 'ij' => '&ijlig;',
+ 'Ĵ' => '&Jcirc;',
+ 'ĵ' => '&jcirc;',
+ 'Ķ' => '&Kcedil;',
+ 'ķ' => '&kcedil;',
+ 'ĸ' => '&kgreen;',
+ 'Ĺ' => '&Lacute;',
+ 'ĺ' => '&lacute;',
+ 'Ļ' => '&Lcedil;',
+ 'ļ' => '&lcedil;',
+ 'Ľ' => '&Lcaron;',
+ 'ľ' => '&lcaron;',
+ 'Ŀ' => '&Lmidot;',
+ 'ŀ' => '&lmidot;',
+ 'Ł' => '&Lstrok;',
+ 'ł' => '&lstrok;',
+ 'Ń' => '&Nacute;',
+ 'ń' => '&nacute;',
+ 'Ņ' => '&Ncedil;',
+ 'ņ' => '&ncedil;',
+ 'Ň' => '&Ncaron;',
+ 'ň' => '&ncaron;',
+ 'ʼn' => '&napos;',
+ 'Ŋ' => '&ENG;',
+ 'ŋ' => '&eng;',
+ 'Ō' => '&Omacr;',
+ 'ō' => '&omacr;',
+ 'Ő' => '&Odblac;',
+ 'ő' => '&odblac;',
+ 'Œ' => '&OElig;',
+ 'œ' => '&oelig;',
+ 'Ŕ' => '&Racute;',
+ 'ŕ' => '&racute;',
+ 'Ŗ' => '&Rcedil;',
+ 'ŗ' => '&rcedil;',
+ 'Ř' => '&Rcaron;',
+ 'ř' => '&rcaron;',
+ 'Ś' => '&Sacute;',
+ 'ś' => '&sacute;',
+ 'Ŝ' => '&Scirc;',
+ 'ŝ' => '&scirc;',
+ 'Ş' => '&Scedil;',
+ 'ş' => '&scedil;',
+ 'Š' => '&Scaron;',
+ 'š' => '&scaron;',
+ 'Ţ' => '&Tcedil;',
+ 'ţ' => '&tcedil;',
+ 'Ť' => '&Tcaron;',
+ 'ť' => '&tcaron;',
+ 'Ŧ' => '&Tstrok;',
+ 'ŧ' => '&tstrok;',
+ 'Ũ' => '&Utilde;',
+ 'ũ' => '&utilde;',
+ 'Ū' => '&Umacr;',
+ 'ū' => '&umacr;',
+ 'Ŭ' => '&Ubreve;',
+ 'ŭ' => '&ubreve;',
+ 'Ů' => '&Uring;',
+ 'ů' => '&uring;',
+ 'Ű' => '&Udblac;',
+ 'ű' => '&udblac;',
+ 'Ų' => '&Uogon;',
+ 'ų' => '&uogon;',
+ 'Ŵ' => '&Wcirc;',
+ 'ŵ' => '&wcirc;',
+ 'Ŷ' => '&Ycirc;',
+ 'ŷ' => '&ycirc;',
+ 'Ÿ' => '&Yuml;',
+ 'Ź' => '&Zacute;',
+ 'ź' => '&zacute;',
+ 'Ż' => '&Zdot;',
+ 'ż' => '&zdot;',
+ 'Ž' => '&Zcaron;',
+ 'ž' => '&zcaron;',
+ 'ƒ' => '&fnof;',
+ 'Ƶ' => '&imped;',
+ 'ǵ' => '&gacute;',
+ 'ȷ' => '&jmath;',
+ 'ˆ' => '&circ;',
+ 'ˇ' => '&Hacek;',
+ '˘' => '&Breve;',
+ '˙' => '&dot;',
+ '˚' => '&ring;',
+ '˛' => '&ogon;',
+ '˜' => '&DiacriticalTilde;',
+ '˝' => '&DiacriticalDoubleAcute;',
+ '̑' => '&DownBreve;',
+ 'Α' => '&Alpha;',
+ 'Β' => '&Beta;',
+ 'Γ' => '&Gamma;',
+ 'Δ' => '&Delta;',
+ 'Ε' => '&Epsilon;',
+ 'Ζ' => '&Zeta;',
+ 'Η' => '&Eta;',
+ 'Θ' => '&Theta;',
+ 'Ι' => '&Iota;',
+ 'Κ' => '&Kappa;',
+ 'Λ' => '&Lambda;',
+ 'Μ' => '&Mu;',
+ 'Ν' => '&Nu;',
+ 'Ξ' => '&Xi;',
+ 'Ο' => '&Omicron;',
+ 'Π' => '&Pi;',
+ 'Ρ' => '&Rho;',
+ 'Σ' => '&Sigma;',
+ 'Τ' => '&Tau;',
+ 'Υ' => '&Upsilon;',
+ 'Φ' => '&Phi;',
+ 'Χ' => '&Chi;',
+ 'Ψ' => '&Psi;',
+ 'Ω' => '&Omega;',
+ 'α' => '&alpha;',
+ 'β' => '&beta;',
+ 'γ' => '&gamma;',
+ 'δ' => '&delta;',
+ 'ε' => '&epsi;',
+ 'ζ' => '&zeta;',
+ 'η' => '&eta;',
+ 'θ' => '&theta;',
+ 'ι' => '&iota;',
+ 'κ' => '&kappa;',
+ 'λ' => '&lambda;',
+ 'μ' => '&mu;',
+ 'ν' => '&nu;',
+ 'ξ' => '&xi;',
+ 'ο' => '&omicron;',
+ 'π' => '&pi;',
+ 'ρ' => '&rho;',
+ 'ς' => '&sigmav;',
+ 'σ' => '&sigma;',
+ 'τ' => '&tau;',
+ 'υ' => '&upsi;',
+ 'φ' => '&phi;',
+ 'χ' => '&chi;',
+ 'ψ' => '&psi;',
+ 'ω' => '&omega;',
+ 'ϑ' => '&thetasym;',
+ 'ϒ' => '&upsih;',
+ 'ϕ' => '&straightphi;',
+ 'ϖ' => '&piv;',
+ 'Ϝ' => '&Gammad;',
+ 'ϝ' => '&gammad;',
+ 'ϰ' => '&varkappa;',
+ 'ϱ' => '&rhov;',
+ 'ϵ' => '&straightepsilon;',
+ '϶' => '&backepsilon;',
+ 'Ё' => '&IOcy;',
+ 'Ђ' => '&DJcy;',
+ 'Ѓ' => '&GJcy;',
+ 'Є' => '&Jukcy;',
+ 'Ѕ' => '&DScy;',
+ 'І' => '&Iukcy;',
+ 'Ї' => '&YIcy;',
+ 'Ј' => '&Jsercy;',
+ 'Љ' => '&LJcy;',
+ 'Њ' => '&NJcy;',
+ 'Ћ' => '&TSHcy;',
+ 'Ќ' => '&KJcy;',
+ 'Ў' => '&Ubrcy;',
+ 'Џ' => '&DZcy;',
+ 'А' => '&Acy;',
+ 'Б' => '&Bcy;',
+ 'В' => '&Vcy;',
+ 'Г' => '&Gcy;',
+ 'Д' => '&Dcy;',
+ 'Е' => '&IEcy;',
+ 'Ж' => '&ZHcy;',
+ 'З' => '&Zcy;',
+ 'И' => '&Icy;',
+ 'Й' => '&Jcy;',
+ 'К' => '&Kcy;',
+ 'Л' => '&Lcy;',
+ 'М' => '&Mcy;',
+ 'Н' => '&Ncy;',
+ 'О' => '&Ocy;',
+ 'П' => '&Pcy;',
+ 'Р' => '&Rcy;',
+ 'С' => '&Scy;',
+ 'Т' => '&Tcy;',
+ 'У' => '&Ucy;',
+ 'Ф' => '&Fcy;',
+ 'Х' => '&KHcy;',
+ 'Ц' => '&TScy;',
+ 'Ч' => '&CHcy;',
+ 'Ш' => '&SHcy;',
+ 'Щ' => '&SHCHcy;',
+ 'Ъ' => '&HARDcy;',
+ 'Ы' => '&Ycy;',
+ 'Ь' => '&SOFTcy;',
+ 'Э' => '&Ecy;',
+ 'Ю' => '&YUcy;',
+ 'Я' => '&YAcy;',
+ 'а' => '&acy;',
+ 'б' => '&bcy;',
+ 'в' => '&vcy;',
+ 'г' => '&gcy;',
+ 'д' => '&dcy;',
+ 'е' => '&iecy;',
+ 'ж' => '&zhcy;',
+ 'з' => '&zcy;',
+ 'и' => '&icy;',
+ 'й' => '&jcy;',
+ 'к' => '&kcy;',
+ 'л' => '&lcy;',
+ 'м' => '&mcy;',
+ 'н' => '&ncy;',
+ 'о' => '&ocy;',
+ 'п' => '&pcy;',
+ 'р' => '&rcy;',
+ 'с' => '&scy;',
+ 'т' => '&tcy;',
+ 'у' => '&ucy;',
+ 'ф' => '&fcy;',
+ 'х' => '&khcy;',
+ 'ц' => '&tscy;',
+ 'ч' => '&chcy;',
+ 'ш' => '&shcy;',
+ 'щ' => '&shchcy;',
+ 'ъ' => '&hardcy;',
+ 'ы' => '&ycy;',
+ 'ь' => '&softcy;',
+ 'э' => '&ecy;',
+ 'ю' => '&yucy;',
+ 'я' => '&yacy;',
+ 'ё' => '&iocy;',
+ 'ђ' => '&djcy;',
+ 'ѓ' => '&gjcy;',
+ 'є' => '&jukcy;',
+ 'ѕ' => '&dscy;',
+ 'і' => '&iukcy;',
+ 'ї' => '&yicy;',
+ 'ј' => '&jsercy;',
+ 'љ' => '&ljcy;',
+ 'њ' => '&njcy;',
+ 'ћ' => '&tshcy;',
+ 'ќ' => '&kjcy;',
+ 'ў' => '&ubrcy;',
+ 'џ' => '&dzcy;',
+ ' ' => '&ensp;',
+ ' ' => '&emsp;',
+ ' ' => '&emsp13;',
+ ' ' => '&emsp14;',
+ ' ' => '&numsp;',
+ ' ' => '&puncsp;',
+ ' ' => '&ThinSpace;',
+ ' ' => '&hairsp;',
+ '​' => '&ZeroWidthSpace;',
+ '‌' => '&zwnj;',
+ '‍' => '&zwj;',
+ '‎' => '&lrm;',
+ '‏' => '&rlm;',
+ '‐' => '&hyphen;',
+ '–' => '&ndash;',
+ '—' => '&mdash;',
+ '―' => '&horbar;',
+ '‖' => '&Verbar;',
+ '‘' => '&OpenCurlyQuote;',
+ '’' => '&rsquo;',
+ '‚' => '&sbquo;',
+ '“' => '&OpenCurlyDoubleQuote;',
+ '”' => '&rdquo;',
+ '„' => '&bdquo;',
+ '†' => '&dagger;',
+ '‡' => '&Dagger;',
+ '•' => '&bull;',
+ '‥' => '&nldr;',
+ '…' => '&hellip;',
+ '‰' => '&permil;',
+ '‱' => '&pertenk;',
+ '′' => '&prime;',
+ '″' => '&Prime;',
+ '‴' => '&tprime;',
+ '‵' => '&backprime;',
+ '‹' => '&lsaquo;',
+ '›' => '&rsaquo;',
+ '‾' => '&oline;',
+ '⁁' => '&caret;',
+ '⁃' => '&hybull;',
+ '⁄' => '&frasl;',
+ '⁏' => '&bsemi;',
+ '⁗' => '&qprime;',
+ ' ' => '&MediumSpace;',
+ '  ' => '&ThickSpace',
+ '⁠' => '&NoBreak;',
+ '⁡' => '&af;',
+ '⁢' => '&InvisibleTimes;',
+ '⁣' => '&ic;',
+ '€' => '&euro;',
+ '⃛' => '&TripleDot;',
+ '⃜' => '&DotDot;',
+ 'ℂ' => '&complexes;',
+ '℅' => '&incare;',
+ 'ℊ' => '&gscr;',
+ 'ℋ' => '&HilbertSpace;',
+ 'ℌ' => '&Hfr;',
+ 'ℍ' => '&Hopf;',
+ 'ℎ' => '&planckh;',
+ 'ℏ' => '&planck;',
+ 'ℐ' => '&imagline;',
+ 'ℑ' => '&Ifr;',
+ 'ℒ' => '&lagran;',
+ 'ℓ' => '&ell;',
+ 'ℕ' => '&naturals;',
+ '№' => '&numero;',
+ '℗' => '&copysr;',
+ '℘' => '&wp;',
+ 'ℙ' => '&primes;',
+ 'ℚ' => '&rationals;',
+ 'ℛ' => '&realine;',
+ 'ℜ' => '&Rfr;',
+ 'ℝ' => '&Ropf;',
+ '℞' => '&rx;',
+ '™' => '&trade;',
+ 'ℤ' => '&Zopf;',
+ '℧' => '&mho;',
+ 'ℨ' => '&Zfr;',
+ '℩' => '&iiota;',
+ 'ℬ' => '&Bscr;',
+ 'ℭ' => '&Cfr;',
+ 'ℯ' => '&escr;',
+ 'ℰ' => '&expectation;',
+ 'ℱ' => '&Fouriertrf;',
+ 'ℳ' => '&Mellintrf;',
+ 'ℴ' => '&orderof;',
+ 'ℵ' => '&aleph;',
+ 'ℶ' => '&beth;',
+ 'ℷ' => '&gimel;',
+ 'ℸ' => '&daleth;',
+ 'ⅅ' => '&CapitalDifferentialD;',
+ 'ⅆ' => '&DifferentialD;',
+ 'ⅇ' => '&exponentiale;',
+ 'ⅈ' => '&ImaginaryI;',
+ '⅓' => '&frac13;',
+ '⅔' => '&frac23;',
+ '⅕' => '&frac15;',
+ '⅖' => '&frac25;',
+ '⅗' => '&frac35;',
+ '⅘' => '&frac45;',
+ '⅙' => '&frac16;',
+ '⅚' => '&frac56;',
+ '⅛' => '&frac18;',
+ '⅜' => '&frac38;',
+ '⅝' => '&frac58;',
+ '⅞' => '&frac78;',
+ '←' => '&larr;',
+ '↑' => '&uarr;',
+ '→' => '&srarr;',
+ '↓' => '&darr;',
+ '↔' => '&harr;',
+ '↕' => '&UpDownArrow;',
+ '↖' => '&nwarrow;',
+ '↗' => '&UpperRightArrow;',
+ '↘' => '&LowerRightArrow;',
+ '↙' => '&swarr;',
+ '↚' => '&nleftarrow;',
+ '↛' => '&nrarr;',
+ '↝' => '&rarrw;',
+ '↝̸' => '&nrarrw',
+ '↞' => '&Larr;',
+ '↟' => '&Uarr;',
+ '↠' => '&twoheadrightarrow;',
+ '↡' => '&Darr;',
+ '↢' => '&larrtl;',
+ '↣' => '&rarrtl;',
+ '↤' => '&LeftTeeArrow;',
+ '↥' => '&UpTeeArrow;',
+ '↦' => '&map;',
+ '↧' => '&DownTeeArrow;',
+ '↩' => '&larrhk;',
+ '↪' => '&rarrhk;',
+ '↫' => '&larrlp;',
+ '↬' => '&looparrowright;',
+ '↭' => '&harrw;',
+ '↮' => '&nleftrightarrow;',
+ '↰' => '&Lsh;',
+ '↱' => '&rsh;',
+ '↲' => '&ldsh;',
+ '↳' => '&rdsh;',
+ '↵' => '&crarr;',
+ '↶' => '&curvearrowleft;',
+ '↷' => '&curarr;',
+ '↺' => '&olarr;',
+ '↻' => '&orarr;',
+ '↼' => '&leftharpoonup;',
+ '↽' => '&leftharpoondown;',
+ '↾' => '&RightUpVector;',
+ '↿' => '&uharl;',
+ '⇀' => '&rharu;',
+ '⇁' => '&rhard;',
+ '⇂' => '&RightDownVector;',
+ '⇃' => '&dharl;',
+ '⇄' => '&rightleftarrows;',
+ '⇅' => '&udarr;',
+ '⇆' => '&lrarr;',
+ '⇇' => '&llarr;',
+ '⇈' => '&upuparrows;',
+ '⇉' => '&rrarr;',
+ '⇊' => '&downdownarrows;',
+ '⇋' => '&leftrightharpoons;',
+ '⇌' => '&rightleftharpoons;',
+ '⇍' => '&nLeftarrow;',
+ '⇎' => '&nhArr;',
+ '⇏' => '&nrArr;',
+ '⇐' => '&DoubleLeftArrow;',
+ '⇑' => '&DoubleUpArrow;',
+ '⇒' => '&Implies;',
+ '⇓' => '&Downarrow;',
+ '⇔' => '&hArr;',
+ '⇕' => '&Updownarrow;',
+ '⇖' => '&nwArr;',
+ '⇗' => '&neArr;',
+ '⇘' => '&seArr;',
+ '⇙' => '&swArr;',
+ '⇚' => '&lAarr;',
+ '⇛' => '&rAarr;',
+ '⇝' => '&zigrarr;',
+ '⇤' => '&LeftArrowBar;',
+ '⇥' => '&RightArrowBar;',
+ '⇵' => '&DownArrowUpArrow;',
+ '⇽' => '&loarr;',
+ '⇾' => '&roarr;',
+ '⇿' => '&hoarr;',
+ '∀' => '&forall;',
+ '∁' => '&comp;',
+ '∂' => '&part;',
+ '∂̸' => '&npart',
+ '∃' => '&Exists;',
+ '∄' => '&nexist;',
+ '∅' => '&empty;',
+ '∇' => '&nabla;',
+ '∈' => '&isinv;',
+ '∉' => '&notin;',
+ '∋' => '&ReverseElement;',
+ '∌' => '&notniva;',
+ '∏' => '&prod;',
+ '∐' => '&Coproduct;',
+ '∑' => '&sum;',
+ '−' => '&minus;',
+ '∓' => '&MinusPlus;',
+ '∔' => '&plusdo;',
+ '∖' => '&ssetmn;',
+ '∗' => '&lowast;',
+ '∘' => '&compfn;',
+ '√' => '&Sqrt;',
+ '∝' => '&prop;',
+ '∞' => '&infin;',
+ '∟' => '&angrt;',
+ '∠' => '&angle;',
+ '∠⃒' => '&nang',
+ '∡' => '&angmsd;',
+ '∢' => '&angsph;',
+ '∣' => '&mid;',
+ '∤' => '&nshortmid;',
+ '∥' => '&shortparallel;',
+ '∦' => '&nparallel;',
+ '∧' => '&and;',
+ '∨' => '&or;',
+ '∩' => '&cap;',
+ '∩︀' => '&caps',
+ '∪' => '&cup;',
+ '∪︀' => '&cups',
+ '∫' => '&Integral;',
+ '∬' => '&Int;',
+ '∭' => '&tint;',
+ '∮' => '&ContourIntegral;',
+ '∯' => '&DoubleContourIntegral;',
+ '∰' => '&Cconint;',
+ '∱' => '&cwint;',
+ '∲' => '&cwconint;',
+ '∳' => '&awconint;',
+ '∴' => '&there4;',
+ '∵' => '&Because;',
+ '∶' => '&ratio;',
+ '∷' => '&Colon;',
+ '∸' => '&minusd;',
+ '∺' => '&mDDot;',
+ '∻' => '&homtht;',
+ '∼' => '&sim;',
+ '∼⃒' => '&nvsim',
+ '∽' => '&bsim;',
+ '∽̱' => '&race',
+ '∾' => '&ac;',
+ '∾̳' => '&acE',
+ '∿' => '&acd;',
+ '≀' => '&wr;',
+ '≁' => '&NotTilde;',
+ '≂' => '&esim;',
+ '≂̸' => '&nesim',
+ '≃' => '&simeq;',
+ '≄' => '&nsime;',
+ '≅' => '&TildeFullEqual;',
+ '≆' => '&simne;',
+ '≇' => '&ncong;',
+ '≈' => '&approx;',
+ '≉' => '&napprox;',
+ '≊' => '&ape;',
+ '≋' => '&apid;',
+ '≋̸' => '&napid',
+ '≌' => '&bcong;',
+ '≍' => '&CupCap;',
+ '≍⃒' => '&nvap',
+ '≎' => '&bump;',
+ '≎̸' => '&nbump',
+ '≏' => '&HumpEqual;',
+ '≏̸' => '&nbumpe',
+ '≐' => '&esdot;',
+ '≐̸' => '&nedot',
+ '≑' => '&doteqdot;',
+ '≒' => '&fallingdotseq;',
+ '≓' => '&risingdotseq;',
+ '≔' => '&coloneq;',
+ '≕' => '&eqcolon;',
+ '≖' => '&ecir;',
+ '≗' => '&circeq;',
+ '≙' => '&wedgeq;',
+ '≚' => '&veeeq;',
+ '≜' => '&triangleq;',
+ '≟' => '&equest;',
+ '≠' => '&NotEqual;',
+ '≡' => '&Congruent;',
+ '≡⃥' => '&bnequiv',
+ '≢' => '&NotCongruent;',
+ '≤' => '&leq;',
+ '≤⃒' => '&nvle',
+ '≥' => '&ge;',
+ '≥⃒' => '&nvge',
+ '≦' => '&lE;',
+ '≦̸' => '&nlE',
+ '≧' => '&geqq;',
+ '≧̸' => '&NotGreaterFullEqual',
+ '≨' => '&lneqq;',
+ '≨︀' => '&lvertneqq',
+ '≩' => '&gneqq;',
+ '≩︀' => '&gvertneqq',
+ '≪' => '&ll;',
+ '≪̸' => '&nLtv',
+ '≪⃒' => '&nLt',
+ '≫' => '&gg;',
+ '≫̸' => '&NotGreaterGreater',
+ '≫⃒' => '&nGt',
+ '≬' => '&between;',
+ '≭' => '&NotCupCap;',
+ '≮' => '&NotLess;',
+ '≯' => '&ngtr;',
+ '≰' => '&NotLessEqual;',
+ '≱' => '&ngeq;',
+ '≲' => '&LessTilde;',
+ '≳' => '&GreaterTilde;',
+ '≴' => '&nlsim;',
+ '≵' => '&ngsim;',
+ '≶' => '&lessgtr;',
+ '≷' => '&gl;',
+ '≸' => '&ntlg;',
+ '≹' => '&NotGreaterLess;',
+ '≺' => '&prec;',
+ '≻' => '&succ;',
+ '≼' => '&PrecedesSlantEqual;',
+ '≽' => '&succcurlyeq;',
+ '≾' => '&precsim;',
+ '≿' => '&SucceedsTilde;',
+ '≿̸' => '&NotSucceedsTilde',
+ '⊀' => '&npr;',
+ '⊁' => '&NotSucceeds;',
+ '⊂' => '&sub;',
+ '⊂⃒' => '&vnsub',
+ '⊃' => '&sup;',
+ '⊃⃒' => '&nsupset',
+ '⊄' => '&nsub;',
+ '⊅' => '&nsup;',
+ '⊆' => '&SubsetEqual;',
+ '⊇' => '&supe;',
+ '⊈' => '&NotSubsetEqual;',
+ '⊉' => '&NotSupersetEqual;',
+ '⊊' => '&subsetneq;',
+ '⊊︀' => '&vsubne',
+ '⊋' => '&supsetneq;',
+ '⊋︀' => '&vsupne',
+ '⊍' => '&cupdot;',
+ '⊎' => '&UnionPlus;',
+ '⊏' => '&sqsub;',
+ '⊏̸' => '&NotSquareSubset',
+ '⊐' => '&sqsupset;',
+ '⊐̸' => '&NotSquareSuperset',
+ '⊑' => '&SquareSubsetEqual;',
+ '⊒' => '&SquareSupersetEqual;',
+ '⊓' => '&sqcap;',
+ '⊓︀' => '&sqcaps',
+ '⊔' => '&sqcup;',
+ '⊔︀' => '&sqcups',
+ '⊕' => '&CirclePlus;',
+ '⊖' => '&ominus;',
+ '⊗' => '&CircleTimes;',
+ '⊘' => '&osol;',
+ '⊙' => '&CircleDot;',
+ '⊚' => '&ocir;',
+ '⊛' => '&oast;',
+ '⊝' => '&odash;',
+ '⊞' => '&boxplus;',
+ '⊟' => '&boxminus;',
+ '⊠' => '&timesb;',
+ '⊡' => '&sdotb;',
+ '⊢' => '&vdash;',
+ '⊣' => '&dashv;',
+ '⊤' => '&DownTee;',
+ '⊥' => '&perp;',
+ '⊧' => '&models;',
+ '⊨' => '&DoubleRightTee;',
+ '⊩' => '&Vdash;',
+ '⊪' => '&Vvdash;',
+ '⊫' => '&VDash;',
+ '⊬' => '&nvdash;',
+ '⊭' => '&nvDash;',
+ '⊮' => '&nVdash;',
+ '⊯' => '&nVDash;',
+ '⊰' => '&prurel;',
+ '⊲' => '&vartriangleleft;',
+ '⊳' => '&vrtri;',
+ '⊴' => '&LeftTriangleEqual;',
+ '⊴⃒' => '&nvltrie',
+ '⊵' => '&RightTriangleEqual;',
+ '⊵⃒' => '&nvrtrie',
+ '⊶' => '&origof;',
+ '⊷' => '&imof;',
+ '⊸' => '&mumap;',
+ '⊹' => '&hercon;',
+ '⊺' => '&intcal;',
+ '⊻' => '&veebar;',
+ '⊽' => '&barvee;',
+ '⊾' => '&angrtvb;',
+ '⊿' => '&lrtri;',
+ '⋀' => '&xwedge;',
+ '⋁' => '&xvee;',
+ '⋂' => '&bigcap;',
+ '⋃' => '&bigcup;',
+ '⋄' => '&diamond;',
+ '⋅' => '&sdot;',
+ '⋆' => '&Star;',
+ '⋇' => '&divonx;',
+ '⋈' => '&bowtie;',
+ '⋉' => '&ltimes;',
+ '⋊' => '&rtimes;',
+ '⋋' => '&lthree;',
+ '⋌' => '&rthree;',
+ '⋍' => '&backsimeq;',
+ '⋎' => '&curlyvee;',
+ '⋏' => '&curlywedge;',
+ '⋐' => '&Sub;',
+ '⋑' => '&Supset;',
+ '⋒' => '&Cap;',
+ '⋓' => '&Cup;',
+ '⋔' => '&pitchfork;',
+ '⋕' => '&epar;',
+ '⋖' => '&lessdot;',
+ '⋗' => '&gtrdot;',
+ '⋘' => '&Ll;',
+ '⋘̸' => '&nLl',
+ '⋙' => '&Gg;',
+ '⋙̸' => '&nGg',
+ '⋚' => '&lesseqgtr;',
+ '⋚︀' => '&lesg',
+ '⋛' => '&gtreqless;',
+ '⋛︀' => '&gesl',
+ '⋞' => '&curlyeqprec;',
+ '⋟' => '&cuesc;',
+ '⋠' => '&NotPrecedesSlantEqual;',
+ '⋡' => '&NotSucceedsSlantEqual;',
+ '⋢' => '&NotSquareSubsetEqual;',
+ '⋣' => '&NotSquareSupersetEqual;',
+ '⋦' => '&lnsim;',
+ '⋧' => '&gnsim;',
+ '⋨' => '&precnsim;',
+ '⋩' => '&scnsim;',
+ '⋪' => '&nltri;',
+ '⋫' => '&ntriangleright;',
+ '⋬' => '&nltrie;',
+ '⋭' => '&NotRightTriangleEqual;',
+ '⋮' => '&vellip;',
+ '⋯' => '&ctdot;',
+ '⋰' => '&utdot;',
+ '⋱' => '&dtdot;',
+ '⋲' => '&disin;',
+ '⋳' => '&isinsv;',
+ '⋴' => '&isins;',
+ '⋵' => '&isindot;',
+ '⋵̸' => '&notindot',
+ '⋶' => '&notinvc;',
+ '⋷' => '&notinvb;',
+ '⋹' => '&isinE;',
+ '⋹̸' => '&notinE',
+ '⋺' => '&nisd;',
+ '⋻' => '&xnis;',
+ '⋼' => '&nis;',
+ '⋽' => '&notnivc;',
+ '⋾' => '&notnivb;',
+ '⌅' => '&barwed;',
+ '⌆' => '&doublebarwedge;',
+ '⌈' => '&lceil;',
+ '⌉' => '&RightCeiling;',
+ '⌊' => '&LeftFloor;',
+ '⌋' => '&RightFloor;',
+ '⌌' => '&drcrop;',
+ '⌍' => '&dlcrop;',
+ '⌎' => '&urcrop;',
+ '⌏' => '&ulcrop;',
+ '⌐' => '&bnot;',
+ '⌒' => '&profline;',
+ '⌓' => '&profsurf;',
+ '⌕' => '&telrec;',
+ '⌖' => '&target;',
+ '⌜' => '&ulcorner;',
+ '⌝' => '&urcorner;',
+ '⌞' => '&llcorner;',
+ '⌟' => '&drcorn;',
+ '⌢' => '&frown;',
+ '⌣' => '&smile;',
+ '⌭' => '&cylcty;',
+ '⌮' => '&profalar;',
+ '⌶' => '&topbot;',
+ '⌽' => '&ovbar;',
+ '⌿' => '&solbar;',
+ '⍼' => '&angzarr;',
+ '⎰' => '&lmoust;',
+ '⎱' => '&rmoust;',
+ '⎴' => '&OverBracket;',
+ '⎵' => '&bbrk;',
+ '⎶' => '&bbrktbrk;',
+ '⏜' => '&OverParenthesis;',
+ '⏝' => '&UnderParenthesis;',
+ '⏞' => '&OverBrace;',
+ '⏟' => '&UnderBrace;',
+ '⏢' => '&trpezium;',
+ '⏧' => '&elinters;',
+ '␣' => '&blank;',
+ 'Ⓢ' => '&oS;',
+ '─' => '&HorizontalLine;',
+ '│' => '&boxv;',
+ '┌' => '&boxdr;',
+ '┐' => '&boxdl;',
+ '└' => '&boxur;',
+ '┘' => '&boxul;',
+ '├' => '&boxvr;',
+ '┤' => '&boxvl;',
+ '┬' => '&boxhd;',
+ '┴' => '&boxhu;',
+ '┼' => '&boxvh;',
+ '═' => '&boxH;',
+ '║' => '&boxV;',
+ '╒' => '&boxdR;',
+ '╓' => '&boxDr;',
+ '╔' => '&boxDR;',
+ '╕' => '&boxdL;',
+ '╖' => '&boxDl;',
+ '╗' => '&boxDL;',
+ '╘' => '&boxuR;',
+ '╙' => '&boxUr;',
+ '╚' => '&boxUR;',
+ '╛' => '&boxuL;',
+ '╜' => '&boxUl;',
+ '╝' => '&boxUL;',
+ '╞' => '&boxvR;',
+ '╟' => '&boxVr;',
+ '╠' => '&boxVR;',
+ '╡' => '&boxvL;',
+ '╢' => '&boxVl;',
+ '╣' => '&boxVL;',
+ '╤' => '&boxHd;',
+ '╥' => '&boxhD;',
+ '╦' => '&boxHD;',
+ '╧' => '&boxHu;',
+ '╨' => '&boxhU;',
+ '╩' => '&boxHU;',
+ '╪' => '&boxvH;',
+ '╫' => '&boxVh;',
+ '╬' => '&boxVH;',
+ '▀' => '&uhblk;',
+ '▄' => '&lhblk;',
+ '█' => '&block;',
+ '░' => '&blk14;',
+ '▒' => '&blk12;',
+ '▓' => '&blk34;',
+ '□' => '&Square;',
+ '▪' => '&squarf;',
+ '▫' => '&EmptyVerySmallSquare;',
+ '▭' => '&rect;',
+ '▮' => '&marker;',
+ '▱' => '&fltns;',
+ '△' => '&bigtriangleup;',
+ '▴' => '&blacktriangle;',
+ '▵' => '&triangle;',
+ '▸' => '&blacktriangleright;',
+ '▹' => '&rtri;',
+ '▽' => '&bigtriangledown;',
+ '▾' => '&blacktriangledown;',
+ '▿' => '&triangledown;',
+ '◂' => '&blacktriangleleft;',
+ '◃' => '&ltri;',
+ '◊' => '&lozenge;',
+ '○' => '&cir;',
+ '◬' => '&tridot;',
+ '◯' => '&bigcirc;',
+ '◸' => '&ultri;',
+ '◹' => '&urtri;',
+ '◺' => '&lltri;',
+ '◻' => '&EmptySmallSquare;',
+ '◼' => '&FilledSmallSquare;',
+ '★' => '&starf;',
+ '☆' => '&star;',
+ '☎' => '&phone;',
+ '♀' => '&female;',
+ '♂' => '&male;',
+ '♠' => '&spadesuit;',
+ '♣' => '&clubs;',
+ '♥' => '&hearts;',
+ '♦' => '&diamondsuit;',
+ '♪' => '&sung;',
+ '♭' => '&flat;',
+ '♮' => '&natur;',
+ '♯' => '&sharp;',
+ '✓' => '&check;',
+ '✗' => '&cross;',
+ '✠' => '&maltese;',
+ '✶' => '&sext;',
+ '❘' => '&VerticalSeparator;',
+ '❲' => '&lbbrk;',
+ '❳' => '&rbbrk;',
+ '⟈' => '&bsolhsub;',
+ '⟉' => '&suphsol;',
+ '⟦' => '&LeftDoubleBracket;',
+ '⟧' => '&RightDoubleBracket;',
+ '⟨' => '&langle;',
+ '⟩' => '&RightAngleBracket;',
+ '⟪' => '&Lang;',
+ '⟫' => '&Rang;',
+ '⟬' => '&loang;',
+ '⟭' => '&roang;',
+ '⟵' => '&longleftarrow;',
+ '⟶' => '&LongRightArrow;',
+ '⟷' => '&LongLeftRightArrow;',
+ '⟸' => '&xlArr;',
+ '⟹' => '&DoubleLongRightArrow;',
+ '⟺' => '&xhArr;',
+ '⟼' => '&xmap;',
+ '⟿' => '&dzigrarr;',
+ '⤂' => '&nvlArr;',
+ '⤃' => '&nvrArr;',
+ '⤄' => '&nvHarr;',
+ '⤅' => '&Map;',
+ '⤌' => '&lbarr;',
+ '⤍' => '&bkarow;',
+ '⤎' => '&lBarr;',
+ '⤏' => '&dbkarow;',
+ '⤐' => '&drbkarow;',
+ '⤑' => '&DDotrahd;',
+ '⤒' => '&UpArrowBar;',
+ '⤓' => '&DownArrowBar;',
+ '⤖' => '&Rarrtl;',
+ '⤙' => '&latail;',
+ '⤚' => '&ratail;',
+ '⤛' => '&lAtail;',
+ '⤜' => '&rAtail;',
+ '⤝' => '&larrfs;',
+ '⤞' => '&rarrfs;',
+ '⤟' => '&larrbfs;',
+ '⤠' => '&rarrbfs;',
+ '⤣' => '&nwarhk;',
+ '⤤' => '&nearhk;',
+ '⤥' => '&searhk;',
+ '⤦' => '&swarhk;',
+ '⤧' => '&nwnear;',
+ '⤨' => '&toea;',
+ '⤩' => '&seswar;',
+ '⤪' => '&swnwar;',
+ '⤳' => '&rarrc;',
+ '⤳̸' => '&nrarrc',
+ '⤵' => '&cudarrr;',
+ '⤶' => '&ldca;',
+ '⤷' => '&rdca;',
+ '⤸' => '&cudarrl;',
+ '⤹' => '&larrpl;',
+ '⤼' => '&curarrm;',
+ '⤽' => '&cularrp;',
+ '⥅' => '&rarrpl;',
+ '⥈' => '&harrcir;',
+ '⥉' => '&Uarrocir;',
+ '⥊' => '&lurdshar;',
+ '⥋' => '&ldrushar;',
+ '⥎' => '&LeftRightVector;',
+ '⥏' => '&RightUpDownVector;',
+ '⥐' => '&DownLeftRightVector;',
+ '⥑' => '&LeftUpDownVector;',
+ '⥒' => '&LeftVectorBar;',
+ '⥓' => '&RightVectorBar;',
+ '⥔' => '&RightUpVectorBar;',
+ '⥕' => '&RightDownVectorBar;',
+ '⥖' => '&DownLeftVectorBar;',
+ '⥗' => '&DownRightVectorBar;',
+ '⥘' => '&LeftUpVectorBar;',
+ '⥙' => '&LeftDownVectorBar;',
+ '⥚' => '&LeftTeeVector;',
+ '⥛' => '&RightTeeVector;',
+ '⥜' => '&RightUpTeeVector;',
+ '⥝' => '&RightDownTeeVector;',
+ '⥞' => '&DownLeftTeeVector;',
+ '⥟' => '&DownRightTeeVector;',
+ '⥠' => '&LeftUpTeeVector;',
+ '⥡' => '&LeftDownTeeVector;',
+ '⥢' => '&lHar;',
+ '⥣' => '&uHar;',
+ '⥤' => '&rHar;',
+ '⥥' => '&dHar;',
+ '⥦' => '&luruhar;',
+ '⥧' => '&ldrdhar;',
+ '⥨' => '&ruluhar;',
+ '⥩' => '&rdldhar;',
+ '⥪' => '&lharul;',
+ '⥫' => '&llhard;',
+ '⥬' => '&rharul;',
+ '⥭' => '&lrhard;',
+ '⥮' => '&udhar;',
+ '⥯' => '&ReverseUpEquilibrium;',
+ '⥰' => '&RoundImplies;',
+ '⥱' => '&erarr;',
+ '⥲' => '&simrarr;',
+ '⥳' => '&larrsim;',
+ '⥴' => '&rarrsim;',
+ '⥵' => '&rarrap;',
+ '⥶' => '&ltlarr;',
+ '⥸' => '&gtrarr;',
+ '⥹' => '&subrarr;',
+ '⥻' => '&suplarr;',
+ '⥼' => '&lfisht;',
+ '⥽' => '&rfisht;',
+ '⥾' => '&ufisht;',
+ '⥿' => '&dfisht;',
+ '⦅' => '&lopar;',
+ '⦆' => '&ropar;',
+ '⦋' => '&lbrke;',
+ '⦌' => '&rbrke;',
+ '⦍' => '&lbrkslu;',
+ '⦎' => '&rbrksld;',
+ '⦏' => '&lbrksld;',
+ '⦐' => '&rbrkslu;',
+ '⦑' => '&langd;',
+ '⦒' => '&rangd;',
+ '⦓' => '&lparlt;',
+ '⦔' => '&rpargt;',
+ '⦕' => '&gtlPar;',
+ '⦖' => '&ltrPar;',
+ '⦚' => '&vzigzag;',
+ '⦜' => '&vangrt;',
+ '⦝' => '&angrtvbd;',
+ '⦤' => '&ange;',
+ '⦥' => '&range;',
+ '⦦' => '&dwangle;',
+ '⦧' => '&uwangle;',
+ '⦨' => '&angmsdaa;',
+ '⦩' => '&angmsdab;',
+ '⦪' => '&angmsdac;',
+ '⦫' => '&angmsdad;',
+ '⦬' => '&angmsdae;',
+ '⦭' => '&angmsdaf;',
+ '⦮' => '&angmsdag;',
+ '⦯' => '&angmsdah;',
+ '⦰' => '&bemptyv;',
+ '⦱' => '&demptyv;',
+ '⦲' => '&cemptyv;',
+ '⦳' => '&raemptyv;',
+ '⦴' => '&laemptyv;',
+ '⦵' => '&ohbar;',
+ '⦶' => '&omid;',
+ '⦷' => '&opar;',
+ '⦹' => '&operp;',
+ '⦻' => '&olcross;',
+ '⦼' => '&odsold;',
+ '⦾' => '&olcir;',
+ '⦿' => '&ofcir;',
+ '⧀' => '&olt;',
+ '⧁' => '&ogt;',
+ '⧂' => '&cirscir;',
+ '⧃' => '&cirE;',
+ '⧄' => '&solb;',
+ '⧅' => '&bsolb;',
+ '⧉' => '&boxbox;',
+ '⧍' => '&trisb;',
+ '⧎' => '&rtriltri;',
+ '⧏' => '&LeftTriangleBar;',
+ '⧏̸' => '&NotLeftTriangleBar',
+ '⧐' => '&RightTriangleBar;',
+ '⧐̸' => '&NotRightTriangleBar',
+ '⧜' => '&iinfin;',
+ '⧝' => '&infintie;',
+ '⧞' => '&nvinfin;',
+ '⧣' => '&eparsl;',
+ '⧤' => '&smeparsl;',
+ '⧥' => '&eqvparsl;',
+ '⧫' => '&lozf;',
+ '⧴' => '&RuleDelayed;',
+ '⧶' => '&dsol;',
+ '⨀' => '&xodot;',
+ '⨁' => '&bigoplus;',
+ '⨂' => '&bigotimes;',
+ '⨄' => '&biguplus;',
+ '⨆' => '&bigsqcup;',
+ '⨌' => '&iiiint;',
+ '⨍' => '&fpartint;',
+ '⨐' => '&cirfnint;',
+ '⨑' => '&awint;',
+ '⨒' => '&rppolint;',
+ '⨓' => '&scpolint;',
+ '⨔' => '&npolint;',
+ '⨕' => '&pointint;',
+ '⨖' => '&quatint;',
+ '⨗' => '&intlarhk;',
+ '⨢' => '&pluscir;',
+ '⨣' => '&plusacir;',
+ '⨤' => '&simplus;',
+ '⨥' => '&plusdu;',
+ '⨦' => '&plussim;',
+ '⨧' => '&plustwo;',
+ '⨩' => '&mcomma;',
+ '⨪' => '&minusdu;',
+ '⨭' => '&loplus;',
+ '⨮' => '&roplus;',
+ '⨯' => '&Cross;',
+ '⨰' => '&timesd;',
+ '⨱' => '&timesbar;',
+ '⨳' => '&smashp;',
+ '⨴' => '&lotimes;',
+ '⨵' => '&rotimes;',
+ '⨶' => '&otimesas;',
+ '⨷' => '&Otimes;',
+ '⨸' => '&odiv;',
+ '⨹' => '&triplus;',
+ '⨺' => '&triminus;',
+ '⨻' => '&tritime;',
+ '⨼' => '&iprod;',
+ '⨿' => '&amalg;',
+ '⩀' => '&capdot;',
+ '⩂' => '&ncup;',
+ '⩃' => '&ncap;',
+ '⩄' => '&capand;',
+ '⩅' => '&cupor;',
+ '⩆' => '&cupcap;',
+ '⩇' => '&capcup;',
+ '⩈' => '&cupbrcap;',
+ '⩉' => '&capbrcup;',
+ '⩊' => '&cupcup;',
+ '⩋' => '&capcap;',
+ '⩌' => '&ccups;',
+ '⩍' => '&ccaps;',
+ '⩐' => '&ccupssm;',
+ '⩓' => '&And;',
+ '⩔' => '&Or;',
+ '⩕' => '&andand;',
+ '⩖' => '&oror;',
+ '⩗' => '&orslope;',
+ '⩘' => '&andslope;',
+ '⩚' => '&andv;',
+ '⩛' => '&orv;',
+ '⩜' => '&andd;',
+ '⩝' => '&ord;',
+ '⩟' => '&wedbar;',
+ '⩦' => '&sdote;',
+ '⩪' => '&simdot;',
+ '⩭' => '&congdot;',
+ '⩭̸' => '&ncongdot',
+ '⩮' => '&easter;',
+ '⩯' => '&apacir;',
+ '⩰' => '&apE;',
+ '⩰̸' => '&napE',
+ '⩱' => '&eplus;',
+ '⩲' => '&pluse;',
+ '⩳' => '&Esim;',
+ '⩴' => '&Colone;',
+ '⩵' => '&Equal;',
+ '⩷' => '&ddotseq;',
+ '⩸' => '&equivDD;',
+ '⩹' => '&ltcir;',
+ '⩺' => '&gtcir;',
+ '⩻' => '&ltquest;',
+ '⩼' => '&gtquest;',
+ '⩽' => '&les;',
+ '⩽̸' => '&nles',
+ '⩾' => '&ges;',
+ '⩾̸' => '&nges',
+ '⩿' => '&lesdot;',
+ '⪀' => '&gesdot;',
+ '⪁' => '&lesdoto;',
+ '⪂' => '&gesdoto;',
+ '⪃' => '&lesdotor;',
+ '⪄' => '&gesdotol;',
+ '⪅' => '&lap;',
+ '⪆' => '&gap;',
+ '⪇' => '&lne;',
+ '⪈' => '&gne;',
+ '⪉' => '&lnap;',
+ '⪊' => '&gnap;',
+ '⪋' => '&lesseqqgtr;',
+ '⪌' => '&gEl;',
+ '⪍' => '&lsime;',
+ '⪎' => '&gsime;',
+ '⪏' => '&lsimg;',
+ '⪐' => '&gsiml;',
+ '⪑' => '&lgE;',
+ '⪒' => '&glE;',
+ '⪓' => '&lesges;',
+ '⪔' => '&gesles;',
+ '⪕' => '&els;',
+ '⪖' => '&egs;',
+ '⪗' => '&elsdot;',
+ '⪘' => '&egsdot;',
+ '⪙' => '&el;',
+ '⪚' => '&eg;',
+ '⪝' => '&siml;',
+ '⪞' => '&simg;',
+ '⪟' => '&simlE;',
+ '⪠' => '&simgE;',
+ '⪡' => '&LessLess;',
+ '⪡̸' => '&NotNestedLessLess',
+ '⪢' => '&GreaterGreater;',
+ '⪢̸' => '&NotNestedGreaterGreater',
+ '⪤' => '&glj;',
+ '⪥' => '&gla;',
+ '⪦' => '&ltcc;',
+ '⪧' => '&gtcc;',
+ '⪨' => '&lescc;',
+ '⪩' => '&gescc;',
+ '⪪' => '&smt;',
+ '⪫' => '&lat;',
+ '⪬' => '&smte;',
+ '⪬︀' => '&smtes',
+ '⪭' => '&late;',
+ '⪭︀' => '&lates',
+ '⪮' => '&bumpE;',
+ '⪯' => '&preceq;',
+ '⪯̸' => '&NotPrecedesEqual',
+ '⪰' => '&SucceedsEqual;',
+ '⪰̸' => '&NotSucceedsEqual',
+ '⪳' => '&prE;',
+ '⪴' => '&scE;',
+ '⪵' => '&precneqq;',
+ '⪶' => '&scnE;',
+ '⪷' => '&precapprox;',
+ '⪸' => '&succapprox;',
+ '⪹' => '&precnapprox;',
+ '⪺' => '&succnapprox;',
+ '⪻' => '&Pr;',
+ '⪼' => '&Sc;',
+ '⪽' => '&subdot;',
+ '⪾' => '&supdot;',
+ '⪿' => '&subplus;',
+ '⫀' => '&supplus;',
+ '⫁' => '&submult;',
+ '⫂' => '&supmult;',
+ '⫃' => '&subedot;',
+ '⫄' => '&supedot;',
+ '⫅' => '&subE;',
+ '⫅̸' => '&nsubE',
+ '⫆' => '&supseteqq;',
+ '⫆̸' => '&nsupseteqq',
+ '⫇' => '&subsim;',
+ '⫈' => '&supsim;',
+ '⫋' => '&subsetneqq;',
+ '⫋︀' => '&vsubnE',
+ '⫌' => '&supnE;',
+ '⫌︀' => '&varsupsetneqq',
+ '⫏' => '&csub;',
+ '⫐' => '&csup;',
+ '⫑' => '&csube;',
+ '⫒' => '&csupe;',
+ '⫓' => '&subsup;',
+ '⫔' => '&supsub;',
+ '⫕' => '&subsub;',
+ '⫖' => '&supsup;',
+ '⫗' => '&suphsub;',
+ '⫘' => '&supdsub;',
+ '⫙' => '&forkv;',
+ '⫚' => '&topfork;',
+ '⫛' => '&mlcp;',
+ '⫤' => '&Dashv;',
+ '⫦' => '&Vdashl;',
+ '⫧' => '&Barv;',
+ '⫨' => '&vBar;',
+ '⫩' => '&vBarv;',
+ '⫫' => '&Vbar;',
+ '⫬' => '&Not;',
+ '⫭' => '&bNot;',
+ '⫮' => '&rnmid;',
+ '⫯' => '&cirmid;',
+ '⫰' => '&midcir;',
+ '⫱' => '&topcir;',
+ '⫲' => '&nhpar;',
+ '⫳' => '&parsim;',
+ '⫽︀' => '&varsupsetneqq',
+ 'ff' => '&fflig;',
+ 'fi' => '&filig;',
+ 'fl' => '&fllig;',
+ 'ffi' => '&ffilig;',
+ 'ffl' => '&ffllig;',
+ '𝒜' => '&Ascr;',
+ '𝒞' => '&Cscr;',
+ '𝒟' => '&Dscr;',
+ '𝒢' => '&Gscr;',
+ '𝒥' => '&Jscr;',
+ '𝒦' => '&Kscr;',
+ '𝒩' => '&Nscr;',
+ '𝒪' => '&Oscr;',
+ '𝒫' => '&Pscr;',
+ '𝒬' => '&Qscr;',
+ '𝒮' => '&Sscr;',
+ '𝒯' => '&Tscr;',
+ '𝒰' => '&Uscr;',
+ '𝒱' => '&Vscr;',
+ '𝒲' => '&Wscr;',
+ '𝒳' => '&Xscr;',
+ '𝒴' => '&Yscr;',
+ '𝒵' => '&Zscr;',
+ '𝒶' => '&ascr;',
+ '𝒷' => '&bscr;',
+ '𝒸' => '&cscr;',
+ '𝒹' => '&dscr;',
+ '𝒻' => '&fscr;',
+ '𝒽' => '&hscr;',
+ '𝒾' => '&iscr;',
+ '𝒿' => '&jscr;',
+ '𝓀' => '&kscr;',
+ '𝓁' => '&lscr;',
+ '𝓂' => '&mscr;',
+ '𝓃' => '&nscr;',
+ '𝓅' => '&pscr;',
+ '𝓆' => '&qscr;',
+ '𝓇' => '&rscr;',
+ '𝓈' => '&sscr;',
+ '𝓉' => '&tscr;',
+ '𝓊' => '&uscr;',
+ '𝓋' => '&vscr;',
+ '𝓌' => '&wscr;',
+ '𝓍' => '&xscr;',
+ '𝓎' => '&yscr;',
+ '𝓏' => '&zscr;',
+ '𝔄' => '&Afr;',
+ '𝔅' => '&Bfr;',
+ '𝔇' => '&Dfr;',
+ '𝔈' => '&Efr;',
+ '𝔉' => '&Ffr;',
+ '𝔊' => '&Gfr;',
+ '𝔍' => '&Jfr;',
+ '𝔎' => '&Kfr;',
+ '𝔏' => '&Lfr;',
+ '𝔐' => '&Mfr;',
+ '𝔑' => '&Nfr;',
+ '𝔒' => '&Ofr;',
+ '𝔓' => '&Pfr;',
+ '𝔔' => '&Qfr;',
+ '𝔖' => '&Sfr;',
+ '𝔗' => '&Tfr;',
+ '𝔘' => '&Ufr;',
+ '𝔙' => '&Vfr;',
+ '𝔚' => '&Wfr;',
+ '𝔛' => '&Xfr;',
+ '𝔜' => '&Yfr;',
+ '𝔞' => '&afr;',
+ '𝔟' => '&bfr;',
+ '𝔠' => '&cfr;',
+ '𝔡' => '&dfr;',
+ '𝔢' => '&efr;',
+ '𝔣' => '&ffr;',
+ '𝔤' => '&gfr;',
+ '𝔥' => '&hfr;',
+ '𝔦' => '&ifr;',
+ '𝔧' => '&jfr;',
+ '𝔨' => '&kfr;',
+ '𝔩' => '&lfr;',
+ '𝔪' => '&mfr;',
+ '𝔫' => '&nfr;',
+ '𝔬' => '&ofr;',
+ '𝔭' => '&pfr;',
+ '𝔮' => '&qfr;',
+ '𝔯' => '&rfr;',
+ '𝔰' => '&sfr;',
+ '𝔱' => '&tfr;',
+ '𝔲' => '&ufr;',
+ '𝔳' => '&vfr;',
+ '𝔴' => '&wfr;',
+ '𝔵' => '&xfr;',
+ '𝔶' => '&yfr;',
+ '𝔷' => '&zfr;',
+ '𝔸' => '&Aopf;',
+ '𝔹' => '&Bopf;',
+ '𝔻' => '&Dopf;',
+ '𝔼' => '&Eopf;',
+ '𝔽' => '&Fopf;',
+ '𝔾' => '&Gopf;',
+ '𝕀' => '&Iopf;',
+ '𝕁' => '&Jopf;',
+ '𝕂' => '&Kopf;',
+ '𝕃' => '&Lopf;',
+ '𝕄' => '&Mopf;',
+ '𝕆' => '&Oopf;',
+ '𝕊' => '&Sopf;',
+ '𝕋' => '&Topf;',
+ '𝕌' => '&Uopf;',
+ '𝕍' => '&Vopf;',
+ '𝕎' => '&Wopf;',
+ '𝕏' => '&Xopf;',
+ '𝕐' => '&Yopf;',
+ '𝕒' => '&aopf;',
+ '𝕓' => '&bopf;',
+ '𝕔' => '&copf;',
+ '𝕕' => '&dopf;',
+ '𝕖' => '&eopf;',
+ '𝕗' => '&fopf;',
+ '𝕘' => '&gopf;',
+ '𝕙' => '&hopf;',
+ '𝕚' => '&iopf;',
+ '𝕛' => '&jopf;',
+ '𝕜' => '&kopf;',
+ '𝕝' => '&lopf;',
+ '𝕞' => '&mopf;',
+ '𝕟' => '&nopf;',
+ '𝕠' => '&oopf;',
+ '𝕡' => '&popf;',
+ '𝕢' => '&qopf;',
+ '𝕣' => '&ropf;',
+ '𝕤' => '&sopf;',
+ '𝕥' => '&topf;',
+ '𝕦' => '&uopf;',
+ '𝕧' => '&vopf;',
+ '𝕨' => '&wopf;',
+ '𝕩' => '&xopf;',
+ '𝕪' => '&yopf;',
+ '𝕫' => '&zopf;'
+ );
}
diff --git a/src/HTML5/Serializer/OutputRules.php b/src/HTML5/Serializer/OutputRules.php
index 2425958..168c65c 100644
--- a/src/HTML5/Serializer/OutputRules.php
+++ b/src/HTML5/Serializer/OutputRules.php
@@ -13,302 +13,325 @@ use Masterminds\HTML5\Elements;
/**
* Generate the output html5 based on element rules.
*/
-class OutputRules implements \Masterminds\HTML5\Serializer\RulesInterface {
+class OutputRules implements \Masterminds\HTML5\Serializer\RulesInterface
+{
- const IM_IN_HTML = 1;
- const IM_IN_SVG = 2;
- const IM_IN_MATHML = 3;
+ const IM_IN_HTML = 1;
- protected $traverser;
- protected $encode = FALSE;
- protected $out;
- protected $outputMode;
+ const IM_IN_SVG = 2;
- const DOCTYPE = '<!DOCTYPE html>';
+ const IM_IN_MATHML = 3;
- public function __construct($output, $options = array()) {
+ protected $traverser;
- if (isset($options['encode_entities'])) {
- $this->encode = $options['encode_entities'];
- }
+ protected $encode = FALSE;
- $this->outputMode = static::IM_IN_HTML;
- $this->out = $output;
- }
+ protected $out;
- public function setTraverser(\Masterminds\HTML5\Serializer\Traverser $traverser) {
- $this->traverser = $traverser;
+ protected $outputMode;
- return $this;
- }
+ const DOCTYPE = '<!DOCTYPE html>';
- public function document($dom) {
- $this->doctype();
- $this->traverser->node($dom->documentElement);
- $this->nl();
- }
+ public function __construct($output, $options = array())
+ {
+ if (isset($options['encode_entities'])) {
+ $this->encode = $options['encode_entities'];
+ }
- protected function doctype() {
- $this->wr(static::DOCTYPE);
- $this->nl();
- }
+ $this->outputMode = static::IM_IN_HTML;
+ $this->out = $output;
+ }
- public function element($ele) {
- $name = $ele->tagName;
+ public function setTraverser(\Masterminds\HTML5\Serializer\Traverser $traverser)
+ {
+ $this->traverser = $traverser;
- // Per spec:
- // If the element has a declared namespace in the HTML, MathML or
- // SVG namespaces, we use the lname instead of the tagName.
- if ($this->traverser->isLocalElement($ele)) {
- $name = $ele->localName;
+ return $this;
}
- // If we are in SVG or MathML there is special handling.
- // Using if/elseif instead of switch because it's faster in PHP.
- if ($name == 'svg') {
- $this->outputMode = static::IM_IN_SVG;
- $name = Elements::normalizeSvgElement($name);
+ public function document($dom)
+ {
+ $this->doctype();
+ $this->traverser->node($dom->documentElement);
+ $this->nl();
}
- elseif ($name == 'math') {
- $this->outputMode = static::IM_IN_MATHML;
- }
-
- $this->openTag($ele);
- // Handle children.
- if ($ele->hasChildNodes()) {
- $this->traverser->children($ele->childNodes);
+ protected function doctype()
+ {
+ $this->wr(static::DOCTYPE);
+ $this->nl();
}
- // Close out the SVG or MathML special handling.
- if ($name == 'svg' || $name == 'math') {
- $this->outputMode = static::IM_IN_HTML;
+ public function element($ele)
+ {
+ $name = $ele->tagName;
+
+ // Per spec:
+ // If the element has a declared namespace in the HTML, MathML or
+ // SVG namespaces, we use the lname instead of the tagName.
+ if ($this->traverser->isLocalElement($ele)) {
+ $name = $ele->localName;
+ }
+
+ // If we are in SVG or MathML there is special handling.
+ // Using if/elseif instead of switch because it's faster in PHP.
+ if ($name == 'svg') {
+ $this->outputMode = static::IM_IN_SVG;
+ $name = Elements::normalizeSvgElement($name);
+ } elseif ($name == 'math') {
+ $this->outputMode = static::IM_IN_MATHML;
+ }
+
+ $this->openTag($ele);
+
+ // Handle children.
+ if ($ele->hasChildNodes()) {
+ $this->traverser->children($ele->childNodes);
+ }
+
+ // Close out the SVG or MathML special handling.
+ if ($name == 'svg' || $name == 'math') {
+ $this->outputMode = static::IM_IN_HTML;
+ }
+
+ // If not unary, add a closing tag.
+ if (! Elements::isA($name, Elements::VOID_TAG)) {
+ $this->closeTag($ele);
+ }
}
- // If not unary, add a closing tag.
- if (!Elements::isA($name, Elements::VOID_TAG)) {
- $this->closeTag($ele);
- }
- }
-
- /**
- * Write a text node.
- *
- * @param \DOMText $ele
- * The text node to write.
- */
- public function text($ele) {
- if (isset($ele->parentNode) && isset($ele->parentNode->tagName) && Elements::isA($ele->parentNode->tagName, Elements::TEXT_RAW)) {
- $this->wr($ele->data);
- return;
+ /**
+ * Write a text node.
+ *
+ * @param \DOMText $ele
+ * The text node to write.
+ */
+ public function text($ele)
+ {
+ if (isset($ele->parentNode) && isset($ele->parentNode->tagName) && Elements::isA($ele->parentNode->tagName, Elements::TEXT_RAW)) {
+ $this->wr($ele->data);
+
+ return;
+ }
+
+ // FIXME: This probably needs some flags set.
+ $this->wr($this->enc($ele->data));
}
- // FIXME: This probably needs some flags set.
- $this->wr($this->enc($ele->data));
-
- }
-
- public function cdata($ele) {
- // This encodes CDATA.
- $this->wr($ele->ownerDocument->saveXML($ele));
- }
-
- public function comment($ele) {
- // These produce identical output.
- //$this->wr('<!--')->wr($ele->data)->wr('-->');
- $this->wr($ele->ownerDocument->saveXML($ele));
- }
-
- public function processorInstruction($ele) {
- $this->wr('<?')->wr($ele->target)->wr(' ')->wr($ele->data)->wr('?>');
- }
-
- /**
- * Write the opening tag.
- *
- * Tags for HTML, MathML, and SVG are in the local name. Otherwise, use the
- * qualified name (8.3).
- *
- * @param \DOMNode $ele
- * The element being written.
- */
- protected function openTag($ele) {
- $this->wr('<')->wr($ele->tagName);
- $this->attrs($ele);
-
- if ($this->outputMode == static::IM_IN_HTML) {
- $this->wr('>');
- }
- // If we are not in html mode we are in SVG, MathML, or XML embedded content.
- else {
- if ($ele->hasChildNodes()) {
- $this->wr('>');
- }
- // If there are no children this is self closing.
- else {
- $this->wr(' />');
- }
+ public function cdata($ele)
+ {
+ // This encodes CDATA.
+ $this->wr($ele->ownerDocument->saveXML($ele));
}
- }
- protected function attrs($ele) {
- // FIXME: Needs support for xml, xmlns, xlink, and namespaced elements.
- if (!$ele->hasAttributes()) {
- return $this;
+ public function comment($ele)
+ {
+ // These produce identical output.
+ // $this->wr('<!--')->wr($ele->data)->wr('-->');
+ $this->wr($ele->ownerDocument->saveXML($ele));
}
- // TODO: Currently, this always writes name="value", and does not do
- // value-less attributes.
- $map = $ele->attributes;
- $len = $map->length;
- for ($i = 0; $i < $len; ++$i) {
- $node = $map->item($i);
- $val = $this->enc($node->value, TRUE);
-
- // XXX: The spec says that we need to ensure that anything in
- // the XML, XMLNS, or XLink NS's should use the canonical
- // prefix. It seems that DOM does this for us already, but there
- // may be exceptions.
- $name = $node->name;
-
- // Special handling for attributes in SVG and MathML.
- // Using if/elseif instead of switch because it's faster in PHP.
- if ($this->outputMode == static::IM_IN_SVG) {
- $name = Elements::normalizeSvgAttribute($name);
- }
- elseif ($this->outputMode == static::IM_IN_MATHML) {
- $name = Elements::normalizeMathMlAttribute($name);
- }
-
- $this->wr(' ')->wr($name);
- if (isset($val) && $val !== '') {
- $this->wr('="')->wr($val)->wr('"');
- }
+ public function processorInstruction($ele)
+ {
+ $this->wr('<?')
+ ->wr($ele->target)
+ ->wr(' ')
+ ->wr($ele->data)
+ ->wr('?>');
}
- }
-
- /**
- * Write the closing tag.
- *
- * Tags for HTML, MathML, and SVG are in the local name. Otherwise, use the
- * qualified name (8.3).
- *
- * @param \DOMNode $ele
- * The element being written.
- */
- protected function closeTag($ele) {
- if ($this->outputMode == static::IM_IN_HTML || $ele->hasChildNodes()) {
- $this->wr('</')->wr($ele->tagName)->wr('>');
+
+ /**
+ * Write the opening tag.
+ *
+ * Tags for HTML, MathML, and SVG are in the local name. Otherwise, use the
+ * qualified name (8.3).
+ *
+ * @param \DOMNode $ele
+ * The element being written.
+ */
+ protected function openTag($ele)
+ {
+ $this->wr('<')->wr($ele->tagName);
+ $this->attrs($ele);
+
+ if ($this->outputMode == static::IM_IN_HTML) {
+ $this->wr('>');
+ } // If we are not in html mode we are in SVG, MathML, or XML embedded content.
+ else {
+ if ($ele->hasChildNodes()) {
+ $this->wr('>');
+ } // If there are no children this is self closing.
+ else {
+ $this->wr(' />');
+ }
+ }
}
- }
-
- /**
- * Write to the output.
- *
- * @param string $text
- * The string to put into the output.
- *
- * @return Masterminds\HTML5\Serializer\Traverser
- * $this so it can be used in chaining.
- */
- protected function wr($text) {
- fwrite($this->out, $text);
- return $this;
- }
-
- /**
- * Write a new line character.
- *
- * @return Masterminds\HTML5\Serializer\Traverser
- * $this so it can be used in chaining.
- */
- protected function nl() {
- fwrite($this->out, PHP_EOL);
- return $this;
- }
-
- /**
- * Encode text.
- *
- * When encode is set to FALSE, the default value, the text passed in is
- * escaped per section 8.3 of the html5 spec. For details on how text is
- * escaped see the escape() method.
- *
- * When encoding is set to true the text is converted to named character
- * references where appropriate. Section 8.1.4 Character references of the
- * html5 spec refers to using named character references. This is useful for
- * characters that can't otherwise legally be used in the text.
- *
- * The named character references are listed in section 8.5.
- *
- * @see http://www.w3.org/TR/2013/CR-html5-20130806/syntax.html#named-character-references
- *
- * True encoding will turn all named character references into their entities.
- * This includes such characters as +.# and many other common ones. By default
- * encoding here will just escape &'<>".
- *
- * Note, PHP 5.4+ has better html5 encoding.
- *
- * @todo Use the Entities class in php 5.3 to have html5 entities.
- *
- * @param string $text
- * text to encode.
- * @param boolean $attribute
- * True if we are encoding an attrubute, false otherwise
- *
- * @return string
- * The encoded text.
- */
- protected function enc($text, $attribute = FALSE) {
-
- // Escape the text rather than convert to named character references.
- if (!$this->encode) {
- return $this->escape($text, $attribute);
+
+ protected function attrs($ele)
+ {
+ // FIXME: Needs support for xml, xmlns, xlink, and namespaced elements.
+ if (! $ele->hasAttributes()) {
+ return $this;
+ }
+
+ // TODO: Currently, this always writes name="value", and does not do
+ // value-less attributes.
+ $map = $ele->attributes;
+ $len = $map->length;
+ for ($i = 0; $i < $len; ++ $i) {
+ $node = $map->item($i);
+ $val = $this->enc($node->value, TRUE);
+
+ // XXX: The spec says that we need to ensure that anything in
+ // the XML, XMLNS, or XLink NS's should use the canonical
+ // prefix. It seems that DOM does this for us already, but there
+ // may be exceptions.
+ $name = $node->name;
+
+ // Special handling for attributes in SVG and MathML.
+ // Using if/elseif instead of switch because it's faster in PHP.
+ if ($this->outputMode == static::IM_IN_SVG) {
+ $name = Elements::normalizeSvgAttribute($name);
+ } elseif ($this->outputMode == static::IM_IN_MATHML) {
+ $name = Elements::normalizeMathMlAttribute($name);
+ }
+
+ $this->wr(' ')->wr($name);
+ if (isset($val) && $val !== '') {
+ $this->wr('="')->wr($val)->wr('"');
+ }
+ }
}
- // If we are in PHP 5.4+ we can use the native html5 entity functionality to
- // convert the named character references.
- if (defined('ENT_HTML5')) {
- return htmlentities($text, ENT_HTML5 | ENT_SUBSTITUTE | ENT_QUOTES, 'UTF-8', FALSE);
+ /**
+ * Write the closing tag.
+ *
+ * Tags for HTML, MathML, and SVG are in the local name. Otherwise, use the
+ * qualified name (8.3).
+ *
+ * @param \DOMNode $ele
+ * The element being written.
+ */
+ protected function closeTag($ele)
+ {
+ if ($this->outputMode == static::IM_IN_HTML || $ele->hasChildNodes()) {
+ $this->wr('</')->wr($ele->tagName)->wr('>');
+ }
}
- // If a version earlier than 5.4 html5 entities are not entirely handled.
- // This manually handles them.
- else {
- return strtr($text, \Masterminds\HTML5\Serializer\HTML5Entities::$map);
+
+ /**
+ * Write to the output.
+ *
+ * @param string $text
+ * The string to put into the output.
+ *
+ * @return Masterminds\HTML5\Serializer\Traverser $this so it can be used in chaining.
+ */
+ protected function wr($text)
+ {
+ fwrite($this->out, $text);
+
+ return $this;
}
- }
-
- /**
- * Escape test.
- *
- * According to the html5 spec section 8.3 Serializing HTML fragments, text
- * within tags that are not style, script, xmp, iframe, noembed, and noframes
- * need to be properly escaped.
- *
- * The & should be converted to &amp;, no breaking space unicode characters
- * converted to &nbsp;, when in attribute mode the " should be converted to
- * &quot;, and when not in attribute mode the < and > should be converted to
- * &lt; and &gt;.
- *
- * @see http://www.w3.org/TR/2013/CR-html5-20130806/syntax.html#escapingString
- *
- * @param string $text
- * text to escape.
- * @param boolean $attribute
- * True if we are escaping an attrubute, false otherwise
- */
- protected function escape($text, $attribute = FALSE) {
-
- // Not using htmlspecialchars because, while it does escaping, it doesn't
- // match the requirements of section 8.5. For example, it doesn't handle
- // non-breaking spaces.
- if ($attribute) {
- $replace = array('"'=>'&quot;', '&'=>'&amp;', "\xc2\xa0"=>'&nbsp;');
+
+ /**
+ * Write a new line character.
+ *
+ * @return Masterminds\HTML5\Serializer\Traverser $this so it can be used in chaining.
+ */
+ protected function nl()
+ {
+ fwrite($this->out, PHP_EOL);
+
+ return $this;
}
- else {
- $replace = array('<'=>'&lt;', '>'=>'&gt;', '&'=>'&amp;', "\xc2\xa0"=>'&nbsp;');
+
+ /**
+ * Encode text.
+ *
+ * When encode is set to FALSE, the default value, the text passed in is
+ * escaped per section 8.3 of the html5 spec. For details on how text is
+ * escaped see the escape() method.
+ *
+ * When encoding is set to true the text is converted to named character
+ * references where appropriate. Section 8.1.4 Character references of the
+ * html5 spec refers to using named character references. This is useful for
+ * characters that can't otherwise legally be used in the text.
+ *
+ * The named character references are listed in section 8.5.
+ *
+ * @see http://www.w3.org/TR/2013/CR-html5-20130806/syntax.html#named-character-references True encoding will turn all named character references into their entities.
+ * This includes such characters as +.# and many other common ones. By default
+ * encoding here will just escape &'<>".
+ *
+ * Note, PHP 5.4+ has better html5 encoding.
+ *
+ * @todo Use the Entities class in php 5.3 to have html5 entities.
+ *
+ * @param string $text
+ * text to encode.
+ * @param boolean $attribute
+ * True if we are encoding an attrubute, false otherwise
+ *
+ * @return string The encoded text.
+ */
+ protected function enc($text, $attribute = FALSE)
+ {
+ // Escape the text rather than convert to named character references.
+ if (! $this->encode) {
+ return $this->escape($text, $attribute);
+ }
+
+ // If we are in PHP 5.4+ we can use the native html5 entity functionality to
+ // convert the named character references.
+ if (defined('ENT_HTML5')) {
+ return htmlentities($text, ENT_HTML5 | ENT_SUBSTITUTE | ENT_QUOTES, 'UTF-8', FALSE);
+ } // If a version earlier than 5.4 html5 entities are not entirely handled.
+ // This manually handles them.
+ else {
+ return strtr($text, \Masterminds\HTML5\Serializer\HTML5Entities::$map);
+ }
}
- return strtr($text, $replace);
- }
+ /**
+ * Escape test.
+ *
+ * According to the html5 spec section 8.3 Serializing HTML fragments, text
+ * within tags that are not style, script, xmp, iframe, noembed, and noframes
+ * need to be properly escaped.
+ *
+ * The & should be converted to &amp;, no breaking space unicode characters
+ * converted to &nbsp;, when in attribute mode the " should be converted to
+ * &quot;, and when not in attribute mode the < and > should be converted to
+ * &lt; and &gt;.
+ *
+ * @see http://www.w3.org/TR/2013/CR-html5-20130806/syntax.html#escapingString
+ *
+ * @param string $text
+ * text to escape.
+ * @param boolean $attribute
+ * True if we are escaping an attrubute, false otherwise
+ */
+ protected function escape($text, $attribute = FALSE)
+ {
+ // Not using htmlspecialchars because, while it does escaping, it doesn't
+ // match the requirements of section 8.5. For example, it doesn't handle
+ // non-breaking spaces.
+ if ($attribute) {
+ $replace = array(
+ '"' => '&quot;',
+ '&' => '&amp;',
+ "\xc2\xa0" => '&nbsp;'
+ );
+ } else {
+ $replace = array(
+ '<' => '&lt;',
+ '>' => '&gt;',
+ '&' => '&amp;',
+ "\xc2\xa0" => '&nbsp;'
+ );
+ }
+
+ return strtr($text, $replace);
+ }
}
diff --git a/src/HTML5/Serializer/RulesInterface.php b/src/HTML5/Serializer/RulesInterface.php
index 86f44af..6ef5e5e 100644
--- a/src/HTML5/Serializer/RulesInterface.php
+++ b/src/HTML5/Serializer/RulesInterface.php
@@ -7,96 +7,97 @@ namespace Masterminds\HTML5\Serializer;
/**
* To create a new rule set for writing output the RulesInterface needs to be
- * implemented. The resulting class can be specified in the options with the
+ * implemented.
+ * The resulting class can be specified in the options with the
* key of rules.
*
* For an example implementation see \Masterminds\HTML5\Serializer\OutputRules.
*/
-interface RulesInterface {
+interface RulesInterface
+{
- /**
- * The class constructor.
- *
- * Note, before the rules can be used a traverser must be registered.
- *
- * @param mixed $output
- * The output stream to write output to.
- * @param array $options
- * An array of options.
- */
- public function __construct($output, $options = array());
+ /**
+ * The class constructor.
+ *
+ * Note, before the rules can be used a traverser must be registered.
+ *
+ * @param mixed $output
+ * The output stream to write output to.
+ * @param array $options
+ * An array of options.
+ */
+ public function __construct($output, $options = array());
- /**
- * Register the traverser used in but the rules.
- *
- * Note, only one traverser can be used by the rules.
- *
- * @param \Masterminds\HTML5\Serializer\Traverser $traverser
- * The traverser used in the rules.
- * @return \Masterminds\HTML5\Serializer\RulesInterface
- * $this for the current object.
- */
- public function setTraverser(\Masterminds\HTML5\Serializer\Traverser $traverser);
+ /**
+ * Register the traverser used in but the rules.
+ *
+ * Note, only one traverser can be used by the rules.
+ *
+ * @param \Masterminds\HTML5\Serializer\Traverser $traverser
+ * The traverser used in the rules.
+ * @return \Masterminds\HTML5\Serializer\RulesInterface $this for the current object.
+ */
+ public function setTraverser(\Masterminds\HTML5\Serializer\Traverser $traverser);
- /**
- * Write a document element (\DOMDocument).
- *
- * Instead of returning the result write it to the output stream ($output)
- * that was passed into the constructor.
- *
- * @param \DOMDocument $dom
- */
- public function document($dom);
+ /**
+ * Write a document element (\DOMDocument).
+ *
+ * Instead of returning the result write it to the output stream ($output)
+ * that was passed into the constructor.
+ *
+ * @param \DOMDocument $dom
+ */
+ public function document($dom);
- /**
- * Write an element.
- *
- * Instead of returning the result write it to the output stream ($output)
- * that was passed into the constructor.
- *
- * @param mixed $ele
- */
- public function element($ele);
+ /**
+ * Write an element.
+ *
+ * Instead of returning the result write it to the output stream ($output)
+ * that was passed into the constructor.
+ *
+ * @param mixed $ele
+ */
+ public function element($ele);
- /**
- * Write a text node.
- *
- * Instead of returning the result write it to the output stream ($output)
- * that was passed into the constructor.
- *
- * @param mixed $ele
- */
- public function text($ele);
+ /**
+ * Write a text node.
+ *
+ * Instead of returning the result write it to the output stream ($output)
+ * that was passed into the constructor.
+ *
+ * @param mixed $ele
+ */
+ public function text($ele);
- /**
- * Write a CDATA node.
- *
- * Instead of returning the result write it to the output stream ($output)
- * that was passed into the constructor.
- *
- * @param mixed $ele
- */
- public function cdata($ele);
+ /**
+ * Write a CDATA node.
+ *
+ * Instead of returning the result write it to the output stream ($output)
+ * that was passed into the constructor.
+ *
+ * @param mixed $ele
+ */
+ public function cdata($ele);
- /**
- * Write a comment node.
- *
- * Instead of returning the result write it to the output stream ($output)
- * that was passed into the constructor.
- *
- * @param mixed $ele
- */
- public function comment($ele);
+ /**
+ * Write a comment node.
+ *
+ * Instead of returning the result write it to the output stream ($output)
+ * that was passed into the constructor.
+ *
+ * @param mixed $ele
+ */
+ public function comment($ele);
- /**
- * Write a processor instruction.
- *
- * To learn about processor instructions see \Masterminds\HTML5\InstructionProcessor
- *
- * Instead of returning the result write it to the output stream ($output)
- * that was passed into the constructor.
- *
- * @param mixed $ele
- */
- public function processorInstruction($ele);
-} \ No newline at end of file
+ /**
+ * Write a processor instruction.
+ *
+ * To learn about processor instructions see \Masterminds\HTML5\InstructionProcessor
+ *
+ * Instead of returning the result write it to the output stream ($output)
+ * that was passed into the constructor.
+ *
+ * @param mixed $ele
+ */
+ public function processorInstruction($ele);
+}
diff --git a/src/HTML5/Serializer/Traverser.php b/src/HTML5/Serializer/Traverser.php
index bcb9814..3bd55b4 100644
--- a/src/HTML5/Serializer/Traverser.php
+++ b/src/HTML5/Serializer/Traverser.php
@@ -4,139 +4,147 @@ namespace Masterminds\HTML5\Serializer;
/**
* Traverser for walking a DOM tree.
*
- * This is a concrete traverser designed to convert a DOM tree into an
- * HTML5 document. It is not intended to be a generic DOMTreeWalker
+ * This is a concrete traverser designed to convert a DOM tree into an
+ * HTML5 document. It is not intended to be a generic DOMTreeWalker
* implementation.
*
* @see http://www.w3.org/TR/2012/CR-html5-20121217/syntax.html#serializing-html-fragments
*/
-class Traverser {
+class Traverser
+{
- /** Namespaces that should be treated as "local" to HTML5. */
- static $local_ns = array(
- 'http://www.w3.org/1999/xhtml' => 'html',
- 'http://www.w3.org/1998/Math/MathML' => 'math',
- 'http://www.w3.org/2000/svg' => 'svg',
- );
+ /**
+ * Namespaces that should be treated as "local" to HTML5.
+ */
+ static $local_ns = array(
+ 'http://www.w3.org/1999/xhtml' => 'html',
+ 'http://www.w3.org/1998/Math/MathML' => 'math',
+ 'http://www.w3.org/2000/svg' => 'svg'
+ );
- protected $dom;
- protected $options;
- protected $encode = FALSE;
- protected $rules;
- protected $out;
+ protected $dom;
- /**
- * Create a traverser.
- *
- * @param DOMNode|DOMNodeList $dom
- * The document or node to traverse.
- * @param resource $out
- * A stream that allows writing. The traverser will output into this
- * stream.
- * @param array $options
- * An array or options for the traverser as key/value pairs. These include:
- * - encode_entities: A bool to specify if full encding should happen for all named
- * charachter references. Defaults to FALSE which escapes &'<>".
- * - output_rules: The path to the class handling the output rules.
- */
- public function __construct($dom, $out, RulesInterface $rules, $options = array()) {
- $this->dom = $dom;
- $this->out = $out;
- $this->rules = $rules;
- $this->options = $options;
+ protected $options;
- $this->rules->setTraverser($this);
- }
+ protected $encode = FALSE;
- /**
- * Tell the traverser to walk the DOM.
- *
- * @return resource $out
- * Returns the output stream.
- */
- public function walk() {
-
- if ($this->dom instanceof \DOMDocument) {
- $this->rules->document($this->dom);
- }
- elseif ($this->dom instanceof \DOMDocumentFragment) {
- // Document fragments are a special case. Only the children need to
- // be serialized.
- if ($this->dom->hasChildNodes()) {
- $this->children($this->dom->childNodes);
- }
- }
- // If NodeList, loop
- elseif ($this->dom instanceof \DOMNodeList) {
- // If this is a NodeList of DOMDocuments this will not work.
- $this->children($this->dom);
- }
- // Else assume this is a DOMNode-like datastructure.
- else {
- $this->node($this->dom);
+ protected $rules;
+
+ protected $out;
+
+ /**
+ * Create a traverser.
+ *
+ * @param DOMNode|DOMNodeList $dom
+ * The document or node to traverse.
+ * @param resource $out
+ * A stream that allows writing. The traverser will output into this
+ * stream.
+ * @param array $options
+ * An array or options for the traverser as key/value pairs. These include:
+ * - encode_entities: A bool to specify if full encding should happen for all named
+ * charachter references. Defaults to FALSE which escapes &'<>".
+ * - output_rules: The path to the class handling the output rules.
+ */
+ public function __construct($dom, $out, RulesInterface $rules, $options = array())
+ {
+ $this->dom = $dom;
+ $this->out = $out;
+ $this->rules = $rules;
+ $this->options = $options;
+
+ $this->rules->setTraverser($this);
}
- return $this->out;
- }
+ /**
+ * Tell the traverser to walk the DOM.
+ *
+ * @return resource $out
+ * Returns the output stream.
+ */
+ public function walk()
+ {
+ if ($this->dom instanceof \DOMDocument) {
+ $this->rules->document($this->dom);
+ } elseif ($this->dom instanceof \DOMDocumentFragment) {
+ // Document fragments are a special case. Only the children need to
+ // be serialized.
+ if ($this->dom->hasChildNodes()) {
+ $this->children($this->dom->childNodes);
+ }
+ } // If NodeList, loop
+ elseif ($this->dom instanceof \DOMNodeList) {
+ // If this is a NodeList of DOMDocuments this will not work.
+ $this->children($this->dom);
+ } // Else assume this is a DOMNode-like datastructure.
+ else {
+ $this->node($this->dom);
+ }
+
+ return $this->out;
+ }
- /**
- * Process a node in the DOM.
- *
- * @param mixed $node
- * A node implementing \DOMNode.
- */
- public function node($node) {
- // A listing of types is at http://php.net/manual/en/dom.constants.php
- switch ($node->nodeType) {
- case XML_ELEMENT_NODE:
- $this->rules->element($node);
- break;
- case XML_TEXT_NODE:
- $this->rules->text($node);
- break;
- case XML_CDATA_SECTION_NODE:
- $this->rules->cdata($node);
- break;
- // FIXME: It appears that the parser doesn't do PI's.
- case XML_PI_NODE:
- $this->rules->processorInstruction($node);
- break;
- case XML_COMMENT_NODE:
- $this->rules->comment($node);
- break;
- // Currently we don't support embedding DTDs.
- default:
- print '<!-- Skipped -->';
- break;
+ /**
+ * Process a node in the DOM.
+ *
+ * @param mixed $node
+ * A node implementing \DOMNode.
+ */
+ public function node($node)
+ {
+ // A listing of types is at http://php.net/manual/en/dom.constants.php
+ switch ($node->nodeType) {
+ case XML_ELEMENT_NODE:
+ $this->rules->element($node);
+ break;
+ case XML_TEXT_NODE:
+ $this->rules->text($node);
+ break;
+ case XML_CDATA_SECTION_NODE:
+ $this->rules->cdata($node);
+ break;
+ // FIXME: It appears that the parser doesn't do PI's.
+ case XML_PI_NODE:
+ $this->rules->processorInstruction($node);
+ break;
+ case XML_COMMENT_NODE:
+ $this->rules->comment($node);
+ break;
+ // Currently we don't support embedding DTDs.
+ default:
+ print '<!-- Skipped -->';
+ break;
+ }
}
- }
- /**
- * Walk through all the nodes on a node list.
- *
- * @param \DOMNodeList $nl
- * A list of child elements to walk through.
- */
- public function children($nl) {
- foreach ($nl as $node) {
- $this->node($node);
+ /**
+ * Walk through all the nodes on a node list.
+ *
+ * @param \DOMNodeList $nl
+ * A list of child elements to walk through.
+ */
+ public function children($nl)
+ {
+ foreach ($nl as $node) {
+ $this->node($node);
+ }
}
- }
- /**
- * Is an element local?
- *
- * @param mixed $ele
- * An element that implement \DOMNode.
- *
- * @return bool
- * True if local and false otherwise.
- */
- public function isLocalElement($ele) {
- $uri = $ele->namespaceURI;
- if (empty($uri)) {
- return FALSE;
+ /**
+ * Is an element local?
+ *
+ * @param mixed $ele
+ * An element that implement \DOMNode.
+ *
+ * @return bool True if local and false otherwise.
+ */
+ public function isLocalElement($ele)
+ {
+ $uri = $ele->namespaceURI;
+ if (empty($uri)) {
+ return FALSE;
+ }
+
+ return isset(static::$local_ns[$uri]);
}
- return isset(static::$local_ns[$uri]);
- }
}
diff --git a/test/HTML5/ElementsTest.php b/test/HTML5/ElementsTest.php
index 4d0709a..629b561 100644
--- a/test/HTML5/ElementsTest.php
+++ b/test/HTML5/ElementsTest.php
@@ -2,412 +2,485 @@
namespace Masterminds\HTML5\Tests;
use Masterminds\HTML5\Elements;
-use Masterminds\HTML5\Tests\TestCase;
-
-class ElementsTest extends TestCase {
-
- public $html5Elements = array(
- "a",
- "abbr",
- "address",
- "area",
- "article",
- "aside",
- "audio",
- "b",
- "base",
- "bdi",
- "bdo",
- "blockquote",
- "body",
- "br",
- "button",
- "canvas",
- "caption",
- "cite",
- "code",
- "col",
- "colgroup",
- "command",
- //"data",
- "datalist",
- "dd",
- "del",
- "details",
- "dfn",
- "dialog",
- "div",
- "dl",
- "dt",
- "em",
- "embed",
- "fieldset",
- "figcaption",
- "figure",
- "footer",
- "form",
- "h1",
- "h2",
- "h3",
- "h4",
- "h5",
- "h6",
- "head",
- "header",
- "hgroup",
- "hr",
- "html",
- "i",
- "iframe",
- "img",
- "input",
- "ins",
- "kbd",
- "keygen",
- "label",
- "legend",
- "li",
- "link",
- "map",
- "mark",
- "menu",
- "meta",
- "meter",
- "nav",
- "noscript",
- "object",
- "ol",
- "optgroup",
- "option",
- "output",
- "p",
- "param",
- "pre",
- "progress",
- "q",
- "rp",
- "rt",
- "ruby",
- "s",
- "samp",
- "script",
- "section",
- "select",
- "small",
- "source",
- "span",
- "strong",
- "style",
- "sub",
- "summary",
- "sup",
- "table",
- "tbody",
- "td",
- "textarea",
- "tfoot",
- "th",
- "thead",
- "time",
- "title",
- "tr",
- "track",
- "u",
- "ul",
- "var",
- "video",
- "wbr",
- );
-
- public $mathmlElements = array(
- "maction",
- "maligngroup",
- "malignmark",
- "math",
- "menclose",
- "merror",
- "mfenced",
- "mfrac",
- "mglyph",
- "mi",
- "mlabeledtr",
- "mlongdiv",
- "mmultiscripts",
- "mn",
- "mo",
- "mover",
- "mpadded",
- "mphantom",
- "mroot",
- "mrow",
- "ms",
- "mscarries",
- "mscarry",
- "msgroup",
- "msline",
- "mspace",
- "msqrt",
- "msrow",
- "mstack",
- "mstyle",
- "msub",
- "msup",
- "msubsup",
- "mtable",
- "mtd",
- "mtext",
- "mtr",
- "munder",
- "munderover",
- );
-
- public $svgElements = array(
- "a",
- "altGlyph",
- "altGlyphDef",
- "altGlyphItem",
- "animate",
- "animateColor",
- "animateMotion",
- "animateTransform",
- "circle",
- "clipPath",
- "color-profile",
- "cursor",
- "defs",
- "desc",
- "ellipse",
- "feBlend",
- "feColorMatrix",
- "feComponentTransfer",
- "feComposite",
- "feConvolveMatrix",
- "feDiffuseLighting",
- "feDisplacementMap",
- "feDistantLight",
- "feFlood",
- "feFuncA",
- "feFuncB",
- "feFuncG",
- "feFuncR",
- "feGaussianBlur",
- "feImage",
- "feMerge",
- "feMergeNode",
- "feMorphology",
- "feOffset",
- "fePointLight",
- "feSpecularLighting",
- "feSpotLight",
- "feTile",
- "feTurbulence",
- "filter",
- "font",
- "font-face",
- "font-face-format",
- "font-face-name",
- "font-face-src",
- "font-face-uri",
- "foreignObject",
- "g",
- "glyph",
- "glyphRef",
- "hkern",
- "image",
- "line",
- "linearGradient",
- "marker",
- "mask",
- "metadata",
- "missing-glyph",
- "mpath",
- "path",
- "pattern",
- "polygon",
- "polyline",
- "radialGradient",
- "rect",
- "script",
- "set",
- "stop",
- "style",
- "svg",
- "switch",
- "symbol",
- "text",
- "textPath",
- "title",
- "tref",
- "tspan",
- "use",
- "view",
- "vkern",
- );
-
- public function testIsHtml5Element() {
-
- foreach ($this->html5Elements as $element) {
- $this->assertTrue(Elements::isHtml5Element($element), 'html5 element test failed on: ' . $element);
-
- $this->assertTrue(Elements::isHtml5Element(strtoupper($element)), 'html5 element test failed on: ' . strtoupper($element));
- }
-
- $nonhtml5 = array('foo', 'bar', 'baz');
- foreach ($nonhtml5 as $element) {
- $this->assertFalse(Elements::isHtml5Element($element), 'html5 element test failed on: ' . $element);
-
- $this->assertFalse(Elements::isHtml5Element(strtoupper($element)), 'html5 element test failed on: ' . strtoupper($element));
- }
- }
-
- public function testIsMathMLElement() {
- foreach ($this->mathmlElements as $element) {
- $this->assertTrue(Elements::isMathMLElement($element), 'MathML element test failed on: ' . $element);
-
- // MathML is case sensetitive so these should all fail.
- $this->assertFalse(Elements::isMathMLElement(strtoupper($element)), 'MathML element test failed on: ' . strtoupper($element));
- }
-
- $nonMathML = array('foo', 'bar', 'baz');
- foreach ($nonMathML as $element) {
- $this->assertFalse(Elements::isMathMLElement($element), 'MathML element test failed on: ' . $element);
- }
- }
- public function testIsSvgElement() {
- foreach ($this->svgElements as $element) {
- $this->assertTrue(Elements::isSvgElement($element), 'SVG element test failed on: ' . $element);
-
- // SVG is case sensetitive so these should all fail.
- $this->assertFalse(Elements::isSvgElement(strtoupper($element)), 'SVG element test failed on: ' . strtoupper($element));
- }
-
- $nonSVG = array('foo', 'bar', 'baz');
- foreach ($nonSVG as $element) {
- $this->assertFalse(Elements::isSvgElement($element), 'SVG element test failed on: ' . $element);
- }
- }
-
- public function testIsElement() {
- foreach ($this->html5Elements as $element) {
- $this->assertTrue(Elements::isElement($element), 'html5 element test failed on: ' . $element);
-
- $this->assertTrue(Elements::isElement(strtoupper($element)), 'html5 element test failed on: ' . strtoupper($element));
- }
+class ElementsTest extends TestCase
+{
+
+ public $html5Elements = array(
+ "a",
+ "abbr",
+ "address",
+ "area",
+ "article",
+ "aside",
+ "audio",
+ "b",
+ "base",
+ "bdi",
+ "bdo",
+ "blockquote",
+ "body",
+ "br",
+ "button",
+ "canvas",
+ "caption",
+ "cite",
+ "code",
+ "col",
+ "colgroup",
+ "command",
+ // "data",
+ "datalist",
+ "dd",
+ "del",
+ "details",
+ "dfn",
+ "dialog",
+ "div",
+ "dl",
+ "dt",
+ "em",
+ "embed",
+ "fieldset",
+ "figcaption",
+ "figure",
+ "footer",
+ "form",
+ "h1",
+ "h2",
+ "h3",
+ "h4",
+ "h5",
+ "h6",
+ "head",
+ "header",
+ "hgroup",
+ "hr",
+ "html",
+ "i",
+ "iframe",
+ "img",
+ "input",
+ "ins",
+ "kbd",
+ "keygen",
+ "label",
+ "legend",
+ "li",
+ "link",
+ "map",
+ "mark",
+ "menu",
+ "meta",
+ "meter",
+ "nav",
+ "noscript",
+ "object",
+ "ol",
+ "optgroup",
+ "option",
+ "output",
+ "p",
+ "param",
+ "pre",
+ "progress",
+ "q",
+ "rp",
+ "rt",
+ "ruby",
+ "s",
+ "samp",
+ "script",
+ "section",
+ "select",
+ "small",
+ "source",
+ "span",
+ "strong",
+ "style",
+ "sub",
+ "summary",
+ "sup",
+ "table",
+ "tbody",
+ "td",
+ "textarea",
+ "tfoot",
+ "th",
+ "thead",
+ "time",
+ "title",
+ "tr",
+ "track",
+ "u",
+ "ul",
+ "var",
+ "video",
+ "wbr"
+ );
- foreach ($this->mathmlElements as $element) {
- $this->assertTrue(Elements::isElement($element), 'MathML element test failed on: ' . $element);
+ public $mathmlElements = array(
+ "maction",
+ "maligngroup",
+ "malignmark",
+ "math",
+ "menclose",
+ "merror",
+ "mfenced",
+ "mfrac",
+ "mglyph",
+ "mi",
+ "mlabeledtr",
+ "mlongdiv",
+ "mmultiscripts",
+ "mn",
+ "mo",
+ "mover",
+ "mpadded",
+ "mphantom",
+ "mroot",
+ "mrow",
+ "ms",
+ "mscarries",
+ "mscarry",
+ "msgroup",
+ "msline",
+ "mspace",
+ "msqrt",
+ "msrow",
+ "mstack",
+ "mstyle",
+ "msub",
+ "msup",
+ "msubsup",
+ "mtable",
+ "mtd",
+ "mtext",
+ "mtr",
+ "munder",
+ "munderover"
+ );
- // MathML is case sensetitive so these should all fail.
- $this->assertFalse(Elements::isElement(strtoupper($element)), 'MathML element test failed on: ' . strtoupper($element));
- }
+ public $svgElements = array(
+ "a",
+ "altGlyph",
+ "altGlyphDef",
+ "altGlyphItem",
+ "animate",
+ "animateColor",
+ "animateMotion",
+ "animateTransform",
+ "circle",
+ "clipPath",
+ "color-profile",
+ "cursor",
+ "defs",
+ "desc",
+ "ellipse",
+ "feBlend",
+ "feColorMatrix",
+ "feComponentTransfer",
+ "feComposite",
+ "feConvolveMatrix",
+ "feDiffuseLighting",
+ "feDisplacementMap",
+ "feDistantLight",
+ "feFlood",
+ "feFuncA",
+ "feFuncB",
+ "feFuncG",
+ "feFuncR",
+ "feGaussianBlur",
+ "feImage",
+ "feMerge",
+ "feMergeNode",
+ "feMorphology",
+ "feOffset",
+ "fePointLight",
+ "feSpecularLighting",
+ "feSpotLight",
+ "feTile",
+ "feTurbulence",
+ "filter",
+ "font",
+ "font-face",
+ "font-face-format",
+ "font-face-name",
+ "font-face-src",
+ "font-face-uri",
+ "foreignObject",
+ "g",
+ "glyph",
+ "glyphRef",
+ "hkern",
+ "image",
+ "line",
+ "linearGradient",
+ "marker",
+ "mask",
+ "metadata",
+ "missing-glyph",
+ "mpath",
+ "path",
+ "pattern",
+ "polygon",
+ "polyline",
+ "radialGradient",
+ "rect",
+ "script",
+ "set",
+ "stop",
+ "style",
+ "svg",
+ "switch",
+ "symbol",
+ "text",
+ "textPath",
+ "title",
+ "tref",
+ "tspan",
+ "use",
+ "view",
+ "vkern"
+ );
- foreach ($this->svgElements as $element) {
- $this->assertTrue(Elements::isElement($element), 'SVG element test failed on: ' . $element);
+ public function testIsHtml5Element()
+ {
+ foreach ($this->html5Elements as $element) {
+ $this->assertTrue(Elements::isHtml5Element($element), 'html5 element test failed on: ' . $element);
- // SVG is case sensetitive so these should all fail. But, there is duplication
- // html5 and SVG. Since html5 is case insensetitive we need to make sure
- // it's not a html5 element first.
- if (!in_array($element, $this->html5Elements)) {
- $this->assertFalse(Elements::isElement(strtoupper($element)), 'SVG element test failed on: ' . strtoupper($element));
- }
- }
+ $this->assertTrue(Elements::isHtml5Element(strtoupper($element)), 'html5 element test failed on: ' . strtoupper($element));
+ }
- $nonhtml5 = array('foo', 'bar', 'baz');
- foreach ($nonhtml5 as $element) {
- $this->assertFalse(Elements::isElement($element), 'html5 element test failed on: ' . $element);
+ $nonhtml5 = array(
+ 'foo',
+ 'bar',
+ 'baz'
+ );
+ foreach ($nonhtml5 as $element) {
+ $this->assertFalse(Elements::isHtml5Element($element), 'html5 element test failed on: ' . $element);
- $this->assertFalse(Elements::isElement(strtoupper($element)), 'html5 element test failed on: ' . strtoupper($element));
+ $this->assertFalse(Elements::isHtml5Element(strtoupper($element)), 'html5 element test failed on: ' . strtoupper($element));
+ }
}
- }
- public function testElement() {
- foreach ($this->html5Elements as $element) {
- $this->assertGreaterThan(0, Elements::element($element));
- }
- $nonhtml5 = array('foo', 'bar', 'baz');
- foreach ($nonhtml5 as $element) {
- $this->assertFalse(Elements::element($element));
+ public function testIsMathMLElement()
+ {
+ foreach ($this->mathmlElements as $element) {
+ $this->assertTrue(Elements::isMathMLElement($element), 'MathML element test failed on: ' . $element);
+
+ // MathML is case sensetitive so these should all fail.
+ $this->assertFalse(Elements::isMathMLElement(strtoupper($element)), 'MathML element test failed on: ' . strtoupper($element));
+ }
+
+ $nonMathML = array(
+ 'foo',
+ 'bar',
+ 'baz'
+ );
+ foreach ($nonMathML as $element) {
+ $this->assertFalse(Elements::isMathMLElement($element), 'MathML element test failed on: ' . $element);
+ }
}
- }
- public function testIsA() {
- $this->assertTrue(Elements::isA('script', Elements::KNOWN_ELEMENT));
- $this->assertFalse(Elements::isA('scriptypoo', Elements::KNOWN_ELEMENT));
- $this->assertTrue(Elements::isA('script', Elements::TEXT_RAW));
- $this->assertFalse(Elements::isA('script', Elements::TEXT_RCDATA));
-
- $voidElements = array( 'area', 'base', 'basefont', 'bgsound', 'br', 'col',
- 'command', 'embed', 'frame', 'hr', 'img',
- );
-
- foreach ($voidElements as $element) {
- $this->assertTrue(Elements::isA($element, Elements::VOID_TAG), 'Void element test failed on: ' . $element);
+ public function testIsSvgElement()
+ {
+ foreach ($this->svgElements as $element) {
+ $this->assertTrue(Elements::isSvgElement($element), 'SVG element test failed on: ' . $element);
+
+ // SVG is case sensetitive so these should all fail.
+ $this->assertFalse(Elements::isSvgElement(strtoupper($element)), 'SVG element test failed on: ' . strtoupper($element));
+ }
+
+ $nonSVG = array(
+ 'foo',
+ 'bar',
+ 'baz'
+ );
+ foreach ($nonSVG as $element) {
+ $this->assertFalse(Elements::isSvgElement($element), 'SVG element test failed on: ' . $element);
+ }
}
- $nonVoid = array('span', 'a', 'div');
- foreach ($nonVoid as $tag) {
- $this->assertFalse(Elements::isA($tag, Elements::VOID_TAG), 'Void element test failed on: ' . $tag);
+ public function testIsElement()
+ {
+ foreach ($this->html5Elements as $element) {
+ $this->assertTrue(Elements::isElement($element), 'html5 element test failed on: ' . $element);
+
+ $this->assertTrue(Elements::isElement(strtoupper($element)), 'html5 element test failed on: ' . strtoupper($element));
+ }
+
+ foreach ($this->mathmlElements as $element) {
+ $this->assertTrue(Elements::isElement($element), 'MathML element test failed on: ' . $element);
+
+ // MathML is case sensetitive so these should all fail.
+ $this->assertFalse(Elements::isElement(strtoupper($element)), 'MathML element test failed on: ' . strtoupper($element));
+ }
+
+ foreach ($this->svgElements as $element) {
+ $this->assertTrue(Elements::isElement($element), 'SVG element test failed on: ' . $element);
+
+ // SVG is case sensetitive so these should all fail. But, there is duplication
+ // html5 and SVG. Since html5 is case insensetitive we need to make sure
+ // it's not a html5 element first.
+ if (! in_array($element, $this->html5Elements)) {
+ $this->assertFalse(Elements::isElement(strtoupper($element)), 'SVG element test failed on: ' . strtoupper($element));
+ }
+ }
+
+ $nonhtml5 = array(
+ 'foo',
+ 'bar',
+ 'baz'
+ );
+ foreach ($nonhtml5 as $element) {
+ $this->assertFalse(Elements::isElement($element), 'html5 element test failed on: ' . $element);
+
+ $this->assertFalse(Elements::isElement(strtoupper($element)), 'html5 element test failed on: ' . strtoupper($element));
+ }
}
- $blockTags = array('address', 'article', 'aside', 'audio', 'blockquote',
- 'canvas', 'dd', 'div', 'dl', 'fieldset', 'figcaption', 'figure', 'footer',
- 'form', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'header', 'hgroup', 'hr',
- 'noscript', 'ol', 'output', 'p', 'pre', 'section', 'table', 'tfoot',
- 'ul', 'video');
-
- foreach ($blockTags as $tag) {
- $this->assertTrue(Elements::isA($tag, Elements::BLOCK_TAG), 'Block tag test failed on: ' . $tag);
+ public function testElement()
+ {
+ foreach ($this->html5Elements as $element) {
+ $this->assertGreaterThan(0, Elements::element($element));
+ }
+ $nonhtml5 = array(
+ 'foo',
+ 'bar',
+ 'baz'
+ );
+ foreach ($nonhtml5 as $element) {
+ $this->assertFalse(Elements::element($element));
+ }
}
- $nonBlockTags = array('span', 'img', 'label');
- foreach($nonBlockTags as $tag) {
- $this->assertFalse(Elements::isA($tag, Elements::BLOCK_TAG), 'Block tag test failed on: ' . $tag);
-
+ public function testIsA()
+ {
+ $this->assertTrue(Elements::isA('script', Elements::KNOWN_ELEMENT));
+ $this->assertFalse(Elements::isA('scriptypoo', Elements::KNOWN_ELEMENT));
+ $this->assertTrue(Elements::isA('script', Elements::TEXT_RAW));
+ $this->assertFalse(Elements::isA('script', Elements::TEXT_RCDATA));
+
+ $voidElements = array(
+ 'area',
+ 'base',
+ 'basefont',
+ 'bgsound',
+ 'br',
+ 'col',
+ 'command',
+ 'embed',
+ 'frame',
+ 'hr',
+ 'img'
+ );
+
+ foreach ($voidElements as $element) {
+ $this->assertTrue(Elements::isA($element, Elements::VOID_TAG), 'Void element test failed on: ' . $element);
+ }
+
+ $nonVoid = array(
+ 'span',
+ 'a',
+ 'div'
+ );
+ foreach ($nonVoid as $tag) {
+ $this->assertFalse(Elements::isA($tag, Elements::VOID_TAG), 'Void element test failed on: ' . $tag);
+ }
+
+ $blockTags = array(
+ 'address',
+ 'article',
+ 'aside',
+ 'audio',
+ 'blockquote',
+ 'canvas',
+ 'dd',
+ 'div',
+ 'dl',
+ 'fieldset',
+ 'figcaption',
+ 'figure',
+ 'footer',
+ 'form',
+ 'h1',
+ 'h2',
+ 'h3',
+ 'h4',
+ 'h5',
+ 'h6',
+ 'header',
+ 'hgroup',
+ 'hr',
+ 'noscript',
+ 'ol',
+ 'output',
+ 'p',
+ 'pre',
+ 'section',
+ 'table',
+ 'tfoot',
+ 'ul',
+ 'video'
+ );
+
+ foreach ($blockTags as $tag) {
+ $this->assertTrue(Elements::isA($tag, Elements::BLOCK_TAG), 'Block tag test failed on: ' . $tag);
+ }
+
+ $nonBlockTags = array(
+ 'span',
+ 'img',
+ 'label'
+ );
+ foreach ($nonBlockTags as $tag) {
+ $this->assertFalse(Elements::isA($tag, Elements::BLOCK_TAG), 'Block tag test failed on: ' . $tag);
+ }
}
- }
-
- public function testNormalizeSvgElement() {
- $tests = array(
- 'foo' => 'foo',
- 'altglyph' => 'altGlyph',
- 'BAR' => 'bar',
- 'fespecularlighting' => 'feSpecularLighting',
- 'bAz' => 'baz',
- 'foreignobject' => 'foreignObject',
- );
-
- foreach ($tests as $input => $expected) {
- $this->assertEquals($expected, Elements::normalizeSvgElement($input));
+ public function testNormalizeSvgElement()
+ {
+ $tests = array(
+ 'foo' => 'foo',
+ 'altglyph' => 'altGlyph',
+ 'BAR' => 'bar',
+ 'fespecularlighting' => 'feSpecularLighting',
+ 'bAz' => 'baz',
+ 'foreignobject' => 'foreignObject'
+ );
+
+ foreach ($tests as $input => $expected) {
+ $this->assertEquals($expected, Elements::normalizeSvgElement($input));
+ }
}
- }
-
- public function testNormalizeSvgAttribute() {
- $tests = array(
- 'foo' => 'foo',
- 'attributename' => 'attributeName',
- 'BAR' => 'bar',
- 'limitingconeangle' => 'limitingConeAngle',
- 'bAz' => 'baz',
- 'patterncontentunits' => 'patternContentUnits',
- );
- foreach ($tests as $input => $expected) {
- $this->assertEquals($expected, Elements::normalizeSvgAttribute($input));
+ public function testNormalizeSvgAttribute()
+ {
+ $tests = array(
+ 'foo' => 'foo',
+ 'attributename' => 'attributeName',
+ 'BAR' => 'bar',
+ 'limitingconeangle' => 'limitingConeAngle',
+ 'bAz' => 'baz',
+ 'patterncontentunits' => 'patternContentUnits'
+ );
+
+ foreach ($tests as $input => $expected) {
+ $this->assertEquals($expected, Elements::normalizeSvgAttribute($input));
+ }
}
- }
-
- public function testNormalizeMathMlAttribute() {
- $tests = array(
- 'foo' => 'foo',
- 'definitionurl' => 'definitionURL',
- 'BAR' => 'bar',
- );
- foreach ($tests as $input => $expected) {
- $this->assertEquals($expected, Elements::normalizeMathMlAttribute($input));
+ public function testNormalizeMathMlAttribute()
+ {
+ $tests = array(
+ 'foo' => 'foo',
+ 'definitionurl' => 'definitionURL',
+ 'BAR' => 'bar'
+ );
+
+ foreach ($tests as $input => $expected) {
+ $this->assertEquals($expected, Elements::normalizeMathMlAttribute($input));
+ }
}
- }
-
}
diff --git a/test/HTML5/Html5Test.php b/test/HTML5/Html5Test.php
index 12c421a..851b2e4 100644
--- a/test/HTML5/Html5Test.php
+++ b/test/HTML5/Html5Test.php
@@ -1,138 +1,152 @@
<?php
namespace Masterminds\HTML5\Tests;
-class Html5Test extends TestCase {
+class Html5Test extends TestCase
+{
public function setUp()
{
$this->html5 = $this->getInstance();
}
- /**
- * Parse and serialize a string.
- */
- protected function cycle($html) {
-
- $dom = $this->html5->loadHTML('<!DOCTYPE html><html><body>' . $html . '</body></html>');
- $out = $this->html5->saveHTML($dom);
-
- return $out;
- }
-
- protected function cycleFragment($fragment) {
-
- $dom = $this->html5->loadHTMLFragment($fragment);
- $out = $this->html5->saveHTML($dom);
-
- return $out;
- }
-
- public function testErrors() {
- $dom = $this->html5->loadHTML('<xx as>');
- $this->assertInstanceOf('\DOMDocument', $dom);
-
- $this->assertNotEmpty($this->html5->getErrors());
- $this->assertTrue($this->html5->hasErrors());
- }
-
- public function testLoad() {
- $dom = $this->html5->load(__DIR__ . '/Html5Test.html');
- $this->assertInstanceOf('\DOMDocument', $dom);
- $this->assertEmpty($this->html5->getErrors());
- $this->assertFalse($this->html5->hasErrors());
-
- $file = fopen(__DIR__ . '/Html5Test.html', 'r');
- $dom = $this->html5->load($file);
- $this->assertInstanceOf('\DOMDocument', $dom);
- $this->assertEmpty($this->html5->getErrors());
-
- $dom = $this->html5->loadHTMLFile(__DIR__ . '/Html5Test.html');
- $this->assertInstanceOf('\DOMDocument', $dom);
- $this->assertEmpty($this->html5->getErrors());
- }
-
- public function testLoadHTML() {
- $contents = file_get_contents(__DIR__ . '/Html5Test.html');
- $dom = $this->html5->loadHTML($contents);
- $this->assertInstanceOf('\DOMDocument', $dom);
- $this->assertEmpty($this->html5->getErrors());
- }
-
- public function testLoadHTMLFragment() {
- $fragment = '<section id="Foo"><div class="Bar">Baz</div></section>';
- $dom = $this->html5->loadHTMLFragment($fragment);
- $this->assertInstanceOf('\DOMDocumentFragment', $dom);
- $this->assertEmpty($this->html5->getErrors());
- }
-
- public function testSaveHTML() {
- $dom = $this->html5->load(__DIR__ . '/Html5Test.html');
- $this->assertInstanceOf('\DOMDocument', $dom);
- $this->assertEmpty($this->html5->getErrors());
-
- $saved = $this->html5->saveHTML($dom);
- $this->assertRegExp('|<p>This is a test.</p>|', $saved);
- }
-
- public function testSaveHTMLFragment() {
- $fragment = '<section id="Foo"><div class="Bar">Baz</div></section>';
- $dom = $this->html5->loadHTMLFragment($fragment);
-
- $string = $this->html5->saveHTML($dom);
- $this->assertEquals($fragment, $string);
- }
-
- public function testSave() {
- $dom = $this->html5->load(__DIR__ . '/Html5Test.html');
- $this->assertInstanceOf('\DOMDocument', $dom);
- $this->assertEmpty($this->html5->getErrors());
-
- // Test resource
- $file = fopen('php://temp', 'w');
- $this->html5->save($dom, $file);
- $content = stream_get_contents($file, -1, 0);
- $this->assertRegExp('|<p>This is a test.</p>|', $content);
-
- // Test file
- $tmpfname = tempnam(sys_get_temp_dir(), "html5-php");
- $this->html5->save($dom, $tmpfname);
- $content = file_get_contents($tmpfname);
- $this->assertRegExp('|<p>This is a test.</p>|', $content);
- unlink($tmpfname);
- }
-
- // This test reads a document into a dom, turn the dom into a document,
- // then tries to read that document again. This makes sure we are reading,
- // and generating a document that works at a high level.
- public function testItWorks() {
- $dom = $this->html5->load(__DIR__ . '/Html5Test.html');
- $this->assertInstanceOf('\DOMDocument', $dom);
- $this->assertEmpty($this->html5->getErrors());
-
- $saved = $this->html5->saveHTML($dom);
-
- $dom2 = $this->html5->loadHTML($saved);
- $this->assertInstanceOf('\DOMDocument', $dom2);
- $this->assertEmpty($this->html5->getErrors());
- }
-
- public function testConfig() {
- $html5 = $this->getInstance();
- $options = $html5->getOptions();
- $this->assertEquals(FALSE, $options['encode_entities']);
-
- $html5 = $this->getInstance(array('foo' => 'bar', 'encode_entities'=> TRUE));
- $options = $html5->getOptions();
- $this->assertEquals('bar', $options['foo']);
- $this->assertEquals(TRUE, $options['encode_entities']);
-
- // Need to reset to original so future tests pass as expected.
- //$this->getInstance()->setOption('encode_entities', FALSE);
-
- }
-
- public function testSvg() {
-
- $dom = $this->html5->loadHTML('<!doctype html>
+
+ /**
+ * Parse and serialize a string.
+ */
+ protected function cycle($html)
+ {
+ $dom = $this->html5->loadHTML('<!DOCTYPE html><html><body>' . $html . '</body></html>');
+ $out = $this->html5->saveHTML($dom);
+
+ return $out;
+ }
+
+ protected function cycleFragment($fragment)
+ {
+ $dom = $this->html5->loadHTMLFragment($fragment);
+ $out = $this->html5->saveHTML($dom);
+
+ return $out;
+ }
+
+ public function testErrors()
+ {
+ $dom = $this->html5->loadHTML('<xx as>');
+ $this->assertInstanceOf('\DOMDocument', $dom);
+
+ $this->assertNotEmpty($this->html5->getErrors());
+ $this->assertTrue($this->html5->hasErrors());
+ }
+
+ public function testLoad()
+ {
+ $dom = $this->html5->load(__DIR__ . '/Html5Test.html');
+ $this->assertInstanceOf('\DOMDocument', $dom);
+ $this->assertEmpty($this->html5->getErrors());
+ $this->assertFalse($this->html5->hasErrors());
+
+ $file = fopen(__DIR__ . '/Html5Test.html', 'r');
+ $dom = $this->html5->load($file);
+ $this->assertInstanceOf('\DOMDocument', $dom);
+ $this->assertEmpty($this->html5->getErrors());
+
+ $dom = $this->html5->loadHTMLFile(__DIR__ . '/Html5Test.html');
+ $this->assertInstanceOf('\DOMDocument', $dom);
+ $this->assertEmpty($this->html5->getErrors());
+ }
+
+ public function testLoadHTML()
+ {
+ $contents = file_get_contents(__DIR__ . '/Html5Test.html');
+ $dom = $this->html5->loadHTML($contents);
+ $this->assertInstanceOf('\DOMDocument', $dom);
+ $this->assertEmpty($this->html5->getErrors());
+ }
+
+ public function testLoadHTMLFragment()
+ {
+ $fragment = '<section id="Foo"><div class="Bar">Baz</div></section>';
+ $dom = $this->html5->loadHTMLFragment($fragment);
+ $this->assertInstanceOf('\DOMDocumentFragment', $dom);
+ $this->assertEmpty($this->html5->getErrors());
+ }
+
+ public function testSaveHTML()
+ {
+ $dom = $this->html5->load(__DIR__ . '/Html5Test.html');
+ $this->assertInstanceOf('\DOMDocument', $dom);
+ $this->assertEmpty($this->html5->getErrors());
+
+ $saved = $this->html5->saveHTML($dom);
+ $this->assertRegExp('|<p>This is a test.</p>|', $saved);
+ }
+
+ public function testSaveHTMLFragment()
+ {
+ $fragment = '<section id="Foo"><div class="Bar">Baz</div></section>';
+ $dom = $this->html5->loadHTMLFragment($fragment);
+
+ $string = $this->html5->saveHTML($dom);
+ $this->assertEquals($fragment, $string);
+ }
+
+ public function testSave()
+ {
+ $dom = $this->html5->load(__DIR__ . '/Html5Test.html');
+ $this->assertInstanceOf('\DOMDocument', $dom);
+ $this->assertEmpty($this->html5->getErrors());
+
+ // Test resource
+ $file = fopen('php://temp', 'w');
+ $this->html5->save($dom, $file);
+ $content = stream_get_contents($file, - 1, 0);
+ $this->assertRegExp('|<p>This is a test.</p>|', $content);
+
+ // Test file
+ $tmpfname = tempnam(sys_get_temp_dir(), "html5-php");
+ $this->html5->save($dom, $tmpfname);
+ $content = file_get_contents($tmpfname);
+ $this->assertRegExp('|<p>This is a test.</p>|', $content);
+ unlink($tmpfname);
+ }
+
+ // This test reads a document into a dom, turn the dom into a document,
+ // then tries to read that document again. This makes sure we are reading,
+ // and generating a document that works at a high level.
+ public function testItWorks()
+ {
+ $dom = $this->html5->load(__DIR__ . '/Html5Test.html');
+ $this->assertInstanceOf('\DOMDocument', $dom);
+ $this->assertEmpty($this->html5->getErrors());
+
+ $saved = $this->html5->saveHTML($dom);
+
+ $dom2 = $this->html5->loadHTML($saved);
+ $this->assertInstanceOf('\DOMDocument', $dom2);
+ $this->assertEmpty($this->html5->getErrors());
+ }
+
+ public function testConfig()
+ {
+ $html5 = $this->getInstance();
+ $options = $html5->getOptions();
+ $this->assertEquals(FALSE, $options['encode_entities']);
+
+ $html5 = $this->getInstance(array(
+ 'foo' => 'bar',
+ 'encode_entities' => TRUE
+ ));
+ $options = $html5->getOptions();
+ $this->assertEquals('bar', $options['foo']);
+ $this->assertEquals(TRUE, $options['encode_entities']);
+
+ // Need to reset to original so future tests pass as expected.
+ // $this->getInstance()->setOption('encode_entities', FALSE);
+ }
+
+ public function testSvg()
+ {
+ $dom = $this->html5->loadHTML(
+ '<!doctype html>
<html lang="en">
<body>
<div id="foo" class="bar baz">foo bar baz</div>
@@ -149,31 +163,32 @@ class Html5Test extends TestCase {
</body>
</html>');
- $this->assertEmpty($this->html5->getErrors());
-
- // Test a mixed case attribute.
- $list = $dom->getElementsByTagName('svg');
- $this->assertNotEmpty($list->length);
- $svg = $list->item(0);
- $this->assertEquals("0 0 3 2", $svg->getAttribute('viewBox'));
- $this->assertFalse($svg->hasAttribute('viewbox'));
-
- // Test a mixed case tag.
- // Note: getElementsByTagName is not case sensetitive.
- $list = $dom->getElementsByTagName('textPath');
- $this->assertNotEmpty($list->length);
- $textPath = $list->item(0);
- $this->assertEquals('textPath', $textPath->tagName);
- $this->assertNotEquals('textpath', $textPath->tagName);
-
- $html = $this->html5->saveHTML($dom);
- $this->assertRegExp('|<svg width="150" height="100" viewBox="0 0 3 2">|',$html);
- $this->assertRegExp('|<rect width="1" height="2" x="0" fill="#008d46" />|',$html);
-
- }
+ $this->assertEmpty($this->html5->getErrors());
+
+ // Test a mixed case attribute.
+ $list = $dom->getElementsByTagName('svg');
+ $this->assertNotEmpty($list->length);
+ $svg = $list->item(0);
+ $this->assertEquals("0 0 3 2", $svg->getAttribute('viewBox'));
+ $this->assertFalse($svg->hasAttribute('viewbox'));
+
+ // Test a mixed case tag.
+ // Note: getElementsByTagName is not case sensetitive.
+ $list = $dom->getElementsByTagName('textPath');
+ $this->assertNotEmpty($list->length);
+ $textPath = $list->item(0);
+ $this->assertEquals('textPath', $textPath->tagName);
+ $this->assertNotEquals('textpath', $textPath->tagName);
+
+ $html = $this->html5->saveHTML($dom);
+ $this->assertRegExp('|<svg width="150" height="100" viewBox="0 0 3 2">|', $html);
+ $this->assertRegExp('|<rect width="1" height="2" x="0" fill="#008d46" />|', $html);
+ }
- public function testMathMl() {
- $dom = $this->html5->loadHTML('<!doctype html>
+ public function testMathMl()
+ {
+ $dom = $this->html5->loadHTML(
+ '<!doctype html>
<html lang="en">
<body>
<div id="foo" class="bar baz" definitionURL="http://example.com">foo bar baz</div>
@@ -187,153 +202,161 @@ class Html5Test extends TestCase {
</body>
</html>');
- $this->assertEmpty($this->html5->getErrors());
- $list = $dom->getElementsByTagName('math');
- $this->assertNotEmpty($list->length);
-
- $list = $dom->getElementsByTagName('div');
- $this->assertNotEmpty($list->length);
- $div = $list->item(0);
- $this->assertEquals('http://example.com', $div->getAttribute('definitionurl'));
- $this->assertFalse($div->hasAttribute('definitionURL'));
- $list = $dom->getElementsByTagName('csymbol');
- $csymbol = $list->item(0);
- $this->assertEquals('http://www.example.com/mathops/multiops.html#plusminus', $csymbol->getAttribute('definitionURL'));
- $this->assertFalse($csymbol->hasAttribute('definitionurl'));
-
- $html = $this->html5->saveHTML($dom);
- $this->assertRegExp('|<csymbol definitionURL="http://www.example.com/mathops/multiops.html#plusminus">|',$html);
- $this->assertRegExp('|<mi>y</mi>|',$html);
- }
-
- public function testUnknownElements() {
- // The : should not have special handling accourding to section 2.9 of the
- // spec. This is differenant than XML. Since we don't know these elements
- // they are handled as normal elements. Note, to do this is really
- // an invalid example and you should not embed prefixed xml in html5.
- $dom = $this->html5->loadHTMLFragment("<f:rug>
+ $this->assertEmpty($this->html5->getErrors());
+ $list = $dom->getElementsByTagName('math');
+ $this->assertNotEmpty($list->length);
+
+ $list = $dom->getElementsByTagName('div');
+ $this->assertNotEmpty($list->length);
+ $div = $list->item(0);
+ $this->assertEquals('http://example.com', $div->getAttribute('definitionurl'));
+ $this->assertFalse($div->hasAttribute('definitionURL'));
+ $list = $dom->getElementsByTagName('csymbol');
+ $csymbol = $list->item(0);
+ $this->assertEquals('http://www.example.com/mathops/multiops.html#plusminus', $csymbol->getAttribute('definitionURL'));
+ $this->assertFalse($csymbol->hasAttribute('definitionurl'));
+
+ $html = $this->html5->saveHTML($dom);
+ $this->assertRegExp('|<csymbol definitionURL="http://www.example.com/mathops/multiops.html#plusminus">|', $html);
+ $this->assertRegExp('|<mi>y</mi>|', $html);
+ }
+
+ public function testUnknownElements()
+ {
+ // The : should not have special handling accourding to section 2.9 of the
+ // spec. This is differenant than XML. Since we don't know these elements
+ // they are handled as normal elements. Note, to do this is really
+ // an invalid example and you should not embed prefixed xml in html5.
+ $dom = $this->html5->loadHTMLFragment(
+ "<f:rug>
<f:name>Big rectangle thing</f:name>
<f:width>40</f:width>
<f:length>80</f:length>
</f:rug>
<sarcasm>um, yeah</sarcasm>");
- $this->assertEmpty($this->html5->getErrors());
- $markup = $this->html5->saveHTML($dom);
- $this->assertRegExp('|<f:name>Big rectangle thing</f:name>|',$markup);
- $this->assertRegExp('|<sarcasm>um, yeah</sarcasm>|',$markup);
- }
-
- public function testElements() {
- // Should have content.
- $res = $this->cycle('<div>FOO</div>');
- $this->assertRegExp('|<div>FOO</div>|', $res);
+ $this->assertEmpty($this->html5->getErrors());
+ $markup = $this->html5->saveHTML($dom);
+ $this->assertRegExp('|<f:name>Big rectangle thing</f:name>|', $markup);
+ $this->assertRegExp('|<sarcasm>um, yeah</sarcasm>|', $markup);
+ }
- // Should be empty
- $res = $this->cycle('<span></span>');
- $this->assertRegExp('|<span></span>|', $res);
+ public function testElements()
+ {
+ // Should have content.
+ $res = $this->cycle('<div>FOO</div>');
+ $this->assertRegExp('|<div>FOO</div>|', $res);
- // Should have content.
- $res = $this->cycleFragment('<div>FOO</div>');
- $this->assertRegExp('|<div>FOO</div>|', $res);
+ // Should be empty
+ $res = $this->cycle('<span></span>');
+ $this->assertRegExp('|<span></span>|', $res);
- // Should be empty
- $res = $this->cycleFragment('<span></span>');
- $this->assertRegExp('|<span></span>|', $res);
+ // Should have content.
+ $res = $this->cycleFragment('<div>FOO</div>');
+ $this->assertRegExp('|<div>FOO</div>|', $res);
- // Should have no closing tag.
- $res = $this->cycle('<hr>');
- $this->assertRegExp('|<hr></body>|', $res);
+ // Should be empty
+ $res = $this->cycleFragment('<span></span>');
+ $this->assertRegExp('|<span></span>|', $res);
- }
+ // Should have no closing tag.
+ $res = $this->cycle('<hr>');
+ $this->assertRegExp('|<hr></body>|', $res);
+ }
- public function testAttributes() {
- $res = $this->cycle('<div attr="val">FOO</div>');
- $this->assertRegExp('|<div attr="val">FOO</div>|', $res);
+ public function testAttributes()
+ {
+ $res = $this->cycle('<div attr="val">FOO</div>');
+ $this->assertRegExp('|<div attr="val">FOO</div>|', $res);
- // XXX: Note that spec does NOT require attrs in the same order.
- $res = $this->cycle('<div attr="val" class="even">FOO</div>');
- $this->assertRegExp('|<div attr="val" class="even">FOO</div>|', $res);
+ // XXX: Note that spec does NOT require attrs in the same order.
+ $res = $this->cycle('<div attr="val" class="even">FOO</div>');
+ $this->assertRegExp('|<div attr="val" class="even">FOO</div>|', $res);
- $res = $this->cycle('<div xmlns:foo="http://example.com">FOO</div>');
- $this->assertRegExp('|<div xmlns:foo="http://example.com">FOO</div>|', $res);
+ $res = $this->cycle('<div xmlns:foo="http://example.com">FOO</div>');
+ $this->assertRegExp('|<div xmlns:foo="http://example.com">FOO</div>|', $res);
- $res = $this->cycleFragment('<div attr="val">FOO</div>');
- $this->assertRegExp('|<div attr="val">FOO</div>|', $res);
+ $res = $this->cycleFragment('<div attr="val">FOO</div>');
+ $this->assertRegExp('|<div attr="val">FOO</div>|', $res);
- // XXX: Note that spec does NOT require attrs in the same order.
- $res = $this->cycleFragment('<div attr="val" class="even">FOO</div>');
- $this->assertRegExp('|<div attr="val" class="even">FOO</div>|', $res);
+ // XXX: Note that spec does NOT require attrs in the same order.
+ $res = $this->cycleFragment('<div attr="val" class="even">FOO</div>');
+ $this->assertRegExp('|<div attr="val" class="even">FOO</div>|', $res);
- $res = $this->cycleFragment('<div xmlns:foo="http://example.com">FOO</div>');
- $this->assertRegExp('|<div xmlns:foo="http://example.com">FOO</div>|', $res);
- }
+ $res = $this->cycleFragment('<div xmlns:foo="http://example.com">FOO</div>');
+ $this->assertRegExp('|<div xmlns:foo="http://example.com">FOO</div>|', $res);
+ }
- public function testPCData() {
- $res = $this->cycle('<a>This is a test.</a>');
- $this->assertRegExp('|This is a test.|', $res);
+ public function testPCData()
+ {
+ $res = $this->cycle('<a>This is a test.</a>');
+ $this->assertRegExp('|This is a test.|', $res);
- $res = $this->cycleFragment('<a>This is a test.</a>');
- $this->assertRegExp('|This is a test.|', $res);
+ $res = $this->cycleFragment('<a>This is a test.</a>');
+ $this->assertRegExp('|This is a test.|', $res);
- $res = $this->cycle('This
+ $res = $this->cycle('This
is
a
test.');
- // Check that newlines are there, but don't count spaces.
- $this->assertRegExp('|This\n\s*is\n\s*a\n\s*test.|', $res);
+ // Check that newlines are there, but don't count spaces.
+ $this->assertRegExp('|This\n\s*is\n\s*a\n\s*test.|', $res);
- $res = $this->cycleFragment('This
+ $res = $this->cycleFragment('This
is
a
test.');
- // Check that newlines are there, but don't count spaces.
- $this->assertRegExp('|This\n\s*is\n\s*a\n\s*test.|', $res);
+ // Check that newlines are there, but don't count spaces.
+ $this->assertRegExp('|This\n\s*is\n\s*a\n\s*test.|', $res);
- $res = $this->cycle('<a>This <em>is</em> a test.</a>');
- $this->assertRegExp('|This <em>is</em> a test.|', $res);
+ $res = $this->cycle('<a>This <em>is</em> a test.</a>');
+ $this->assertRegExp('|This <em>is</em> a test.|', $res);
- $res = $this->cycleFragment('<a>This <em>is</em> a test.</a>');
- $this->assertRegExp('|This <em>is</em> a test.|', $res);
- }
+ $res = $this->cycleFragment('<a>This <em>is</em> a test.</a>');
+ $this->assertRegExp('|This <em>is</em> a test.|', $res);
+ }
- public function testUnescaped() {
- $res = $this->cycle('<script>2 < 1</script>');
- $this->assertRegExp('|2 < 1|', $res);
+ public function testUnescaped()
+ {
+ $res = $this->cycle('<script>2 < 1</script>');
+ $this->assertRegExp('|2 < 1|', $res);
- $res = $this->cycle('<style>div>div>div</style>');
- $this->assertRegExp('|div>div>div|', $res);
+ $res = $this->cycle('<style>div>div>div</style>');
+ $this->assertRegExp('|div>div>div|', $res);
- $res = $this->cycleFragment('<script>2 < 1</script>');
- $this->assertRegExp('|2 < 1|', $res);
+ $res = $this->cycleFragment('<script>2 < 1</script>');
+ $this->assertRegExp('|2 < 1|', $res);
- $res = $this->cycleFragment('<style>div>div>div</style>');
- $this->assertRegExp('|div>div>div|', $res);
- }
+ $res = $this->cycleFragment('<style>div>div>div</style>');
+ $this->assertRegExp('|div>div>div|', $res);
+ }
- public function testEntities() {
- $res = $this->cycle('<a>Apples &amp; bananas.</a>');
- $this->assertRegExp('|Apples &amp; bananas.|', $res);
+ public function testEntities()
+ {
+ $res = $this->cycle('<a>Apples &amp; bananas.</a>');
+ $this->assertRegExp('|Apples &amp; bananas.|', $res);
- $res = $this->cycleFragment('<a>Apples &amp; bananas.</a>');
- $this->assertRegExp('|Apples &amp; bananas.|', $res);
- }
+ $res = $this->cycleFragment('<a>Apples &amp; bananas.</a>');
+ $this->assertRegExp('|Apples &amp; bananas.|', $res);
+ }
- public function testComment() {
- $res = $this->cycle('a<!-- This is a test. -->b');
- $this->assertRegExp('|<!-- This is a test. -->|', $res);
+ public function testComment()
+ {
+ $res = $this->cycle('a<!-- This is a test. -->b');
+ $this->assertRegExp('|<!-- This is a test. -->|', $res);
- $res = $this->cycleFragment('a<!-- This is a test. -->b');
- $this->assertRegExp('|<!-- This is a test. -->|', $res);
- }
+ $res = $this->cycleFragment('a<!-- This is a test. -->b');
+ $this->assertRegExp('|<!-- This is a test. -->|', $res);
+ }
- public function testCDATA() {
- $res = $this->cycle('a<![CDATA[ This <is> a test. ]]>b');
- $this->assertRegExp('|<!\[CDATA\[ This <is> a test\. \]\]>|', $res);
+ public function testCDATA()
+ {
+ $res = $this->cycle('a<![CDATA[ This <is> a test. ]]>b');
+ $this->assertRegExp('|<!\[CDATA\[ This <is> a test\. \]\]>|', $res);
- $res = $this->cycleFragment('a<![CDATA[ This <is> a test. ]]>b');
- $this->assertRegExp('|<!\[CDATA\[ This <is> a test\. \]\]>|', $res);
- }
+ $res = $this->cycleFragment('a<![CDATA[ This <is> a test. ]]>b');
+ $this->assertRegExp('|<!\[CDATA\[ This <is> a test\. \]\]>|', $res);
+ }
}
diff --git a/test/HTML5/Parser/CharacterReferenceTest.php b/test/HTML5/Parser/CharacterReferenceTest.php
index 6dedb00..762bcc2 100644
--- a/test/HTML5/Parser/CharacterReferenceTest.php
+++ b/test/HTML5/Parser/CharacterReferenceTest.php
@@ -6,35 +6,39 @@
namespace Masterminds\HTML5\Tests\Parser;
use Masterminds\HTML5\Parser\CharacterReference;
-class CharacterReferenceTest extends \Masterminds\HTML5\Tests\TestCase {
- public function testLookupName() {
- $this->assertEquals('&', CharacterReference::lookupName('amp'));
- $this->assertEquals('<', CharacterReference::lookupName('lt'));
- $this->assertEquals('>', CharacterReference::lookupName('gt'));
- $this->assertEquals('"', CharacterReference::lookupName('quot'));
- $this->assertEquals('∌', CharacterReference::lookupName('NotReverseElement'));
- $this->assertNull(CharacterReference::lookupName('StinkyCheese'));
- }
+class CharacterReferenceTest extends \Masterminds\HTML5\Tests\TestCase
+{
- public function testLookupHex() {
- $this->assertEquals('<', CharacterReference::lookupHex('3c'));
- $this->assertEquals('<', CharacterReference::lookupHex('003c'));
- $this->assertEquals('&', CharacterReference::lookupHex('26'));
- $this->assertEquals('}', CharacterReference::lookupHex('7d'));
- $this->assertEquals('Σ', CharacterReference::lookupHex('3A3'));
- $this->assertEquals('Σ', CharacterReference::lookupHex('03A3'));
- $this->assertEquals('Σ', CharacterReference::lookupHex('3a3'));
- $this->assertEquals('Σ', CharacterReference::lookupHex('03a3'));
- }
+ public function testLookupName()
+ {
+ $this->assertEquals('&', CharacterReference::lookupName('amp'));
+ $this->assertEquals('<', CharacterReference::lookupName('lt'));
+ $this->assertEquals('>', CharacterReference::lookupName('gt'));
+ $this->assertEquals('"', CharacterReference::lookupName('quot'));
+ $this->assertEquals('∌', CharacterReference::lookupName('NotReverseElement'));
- public function testLookupDecimal() {
- $this->assertEquals('&', CharacterReference::lookupDecimal(38));
- $this->assertEquals('&', CharacterReference::lookupDecimal('38'));
- $this->assertEquals('<', CharacterReference::lookupDecimal(60));
- $this->assertEquals('Σ', CharacterReference::lookupDecimal(931));
- $this->assertEquals('Σ', CharacterReference::lookupDecimal('0931'));
- }
+ $this->assertNull(CharacterReference::lookupName('StinkyCheese'));
+ }
+ public function testLookupHex()
+ {
+ $this->assertEquals('<', CharacterReference::lookupHex('3c'));
+ $this->assertEquals('<', CharacterReference::lookupHex('003c'));
+ $this->assertEquals('&', CharacterReference::lookupHex('26'));
+ $this->assertEquals('}', CharacterReference::lookupHex('7d'));
+ $this->assertEquals('Σ', CharacterReference::lookupHex('3A3'));
+ $this->assertEquals('Σ', CharacterReference::lookupHex('03A3'));
+ $this->assertEquals('Σ', CharacterReference::lookupHex('3a3'));
+ $this->assertEquals('Σ', CharacterReference::lookupHex('03a3'));
+ }
+ public function testLookupDecimal()
+ {
+ $this->assertEquals('&', CharacterReference::lookupDecimal(38));
+ $this->assertEquals('&', CharacterReference::lookupDecimal('38'));
+ $this->assertEquals('<', CharacterReference::lookupDecimal(60));
+ $this->assertEquals('Σ', CharacterReference::lookupDecimal(931));
+ $this->assertEquals('Σ', CharacterReference::lookupDecimal('0931'));
+ }
}
diff --git a/test/HTML5/Parser/DOMTreeBuilderTest.php b/test/HTML5/Parser/DOMTreeBuilderTest.php
index 52dad30..d8b686c 100644
--- a/test/HTML5/Parser/DOMTreeBuilderTest.php
+++ b/test/HTML5/Parser/DOMTreeBuilderTest.php
@@ -5,7 +5,6 @@
*/
namespace Masterminds\HTML5\Tests\Parser;
-use Masterminds\HTML5\Elements;
use Masterminds\HTML5\Parser\StringInputStream;
use Masterminds\HTML5\Parser\Scanner;
use Masterminds\HTML5\Parser\Tokenizer;
@@ -14,119 +13,128 @@ use Masterminds\HTML5\Parser\DOMTreeBuilder;
/**
* These tests are functional, not necessarily unit tests.
*/
-class DOMTreeBuilderTest extends \Masterminds\HTML5\Tests\TestCase {
-
- /**
- * Convenience function for parsing.
- */
- protected function parse($string) {
- $treeBuilder = new DOMTreeBuilder();
- $input = new StringInputStream($string);
- $scanner = new Scanner($input);
- $parser = new Tokenizer($scanner, $treeBuilder);
-
- $parser->parse();
-
- return $treeBuilder->document();
- }
-
- /**
- * Utility function for parsing a fragment of HTML5.
- */
- protected function parseFragment($string) {
- $treeBuilder = new DOMTreeBuilder(TRUE);
- $input = new StringInputStream($string);
- $scanner = new Scanner($input);
- $parser = new Tokenizer($scanner, $treeBuilder);
-
- $parser->parse();
-
- return $treeBuilder->fragment();
- }
-
- public function testDocument() {
- $html = "<!DOCTYPE html><html></html>";
- $doc = $this->parse($html);
-
- $this->assertInstanceOf('\DOMDocument', $doc);
- $this->assertEquals('html', $doc->documentElement->tagName);
- }
-
- public function testFragment() {
- $html = "<div>test</div><span>test2</span>";
- $doc = $this->parseFragment($html);
-
- $this->assertInstanceOf('\DOMDocumentFragment', $doc);
- $this->assertTrue($doc->hasChildNodes());
- $this->assertEquals('div', $doc->childNodes->item(0)->tagName);
- $this->assertEquals('test', $doc->childNodes->item(0)->textContent);
- $this->assertEquals('span', $doc->childNodes->item(1)->tagName);
- $this->assertEquals('test2', $doc->childNodes->item(1)->textContent);
- }
-
- public function testElements() {
- $html = "<!DOCTYPE html><html><head><title></title></head><body></body></html>";
- $doc = $this->parse($html);
- $root = $doc->documentElement;
-
- $this->assertEquals('html', $root->tagName);
- $this->assertEquals('html', $root->localName);
- $this->assertEquals('html', $root->nodeName);
-
- $this->assertEquals(2, $root->childNodes->length);
- $kids = $root->childNodes;
-
- $this->assertEquals('head', $kids->item(0)->tagName);
- $this->assertEquals('body', $kids->item(1)->tagName);
-
- $head = $kids->item(0);
- $this->assertEquals(1, $head->childNodes->length);
- $this->assertEquals('title', $head->childNodes->item(0)->tagName);
- }
-
- public function testAttributes() {
- $html = "<!DOCTYPE html>
+class DOMTreeBuilderTest extends \Masterminds\HTML5\Tests\TestCase
+{
+
+ /**
+ * Convenience function for parsing.
+ */
+ protected function parse($string)
+ {
+ $treeBuilder = new DOMTreeBuilder();
+ $input = new StringInputStream($string);
+ $scanner = new Scanner($input);
+ $parser = new Tokenizer($scanner, $treeBuilder);
+
+ $parser->parse();
+
+ return $treeBuilder->document();
+ }
+
+ /**
+ * Utility function for parsing a fragment of HTML5.
+ */
+ protected function parseFragment($string)
+ {
+ $treeBuilder = new DOMTreeBuilder(TRUE);
+ $input = new StringInputStream($string);
+ $scanner = new Scanner($input);
+ $parser = new Tokenizer($scanner, $treeBuilder);
+
+ $parser->parse();
+
+ return $treeBuilder->fragment();
+ }
+
+ public function testDocument()
+ {
+ $html = "<!DOCTYPE html><html></html>";
+ $doc = $this->parse($html);
+
+ $this->assertInstanceOf('\DOMDocument', $doc);
+ $this->assertEquals('html', $doc->documentElement->tagName);
+ }
+
+ public function testFragment()
+ {
+ $html = "<div>test</div><span>test2</span>";
+ $doc = $this->parseFragment($html);
+
+ $this->assertInstanceOf('\DOMDocumentFragment', $doc);
+ $this->assertTrue($doc->hasChildNodes());
+ $this->assertEquals('div', $doc->childNodes->item(0)->tagName);
+ $this->assertEquals('test', $doc->childNodes->item(0)->textContent);
+ $this->assertEquals('span', $doc->childNodes->item(1)->tagName);
+ $this->assertEquals('test2', $doc->childNodes->item(1)->textContent);
+ }
+
+ public function testElements()
+ {
+ $html = "<!DOCTYPE html><html><head><title></title></head><body></body></html>";
+ $doc = $this->parse($html);
+ $root = $doc->documentElement;
+
+ $this->assertEquals('html', $root->tagName);
+ $this->assertEquals('html', $root->localName);
+ $this->assertEquals('html', $root->nodeName);
+
+ $this->assertEquals(2, $root->childNodes->length);
+ $kids = $root->childNodes;
+
+ $this->assertEquals('head', $kids->item(0)->tagName);
+ $this->assertEquals('body', $kids->item(1)->tagName);
+
+ $head = $kids->item(0);
+ $this->assertEquals(1, $head->childNodes->length);
+ $this->assertEquals('title', $head->childNodes->item(0)->tagName);
+ }
+
+ public function testAttributes()
+ {
+ $html = "<!DOCTYPE html>
<html>
<head><title></title></head>
<body id='a' class='b c'></body>
</html>";
- $doc = $this->parse($html);
- $root = $doc->documentElement;
-
- $body = $root->GetElementsByTagName('body')->item(0);
- $this->assertEquals('body', $body->tagName);
- $this->assertTrue($body->hasAttributes());
- $this->assertEquals('a', $body->getAttribute('id'));
- $this->assertEquals('b c', $body->getAttribute('class'));
-
- $body2 = $doc->getElementById('a');
- $this->assertEquals('body', $body2->tagName);
- $this->assertEquals('a', $body2->getAttribute('id'));
- }
-
- public function testSVGAttributes() {
- $html = "<!DOCTYPE html>
+ $doc = $this->parse($html);
+ $root = $doc->documentElement;
+
+ $body = $root->GetElementsByTagName('body')->item(0);
+ $this->assertEquals('body', $body->tagName);
+ $this->assertTrue($body->hasAttributes());
+ $this->assertEquals('a', $body->getAttribute('id'));
+ $this->assertEquals('b c', $body->getAttribute('class'));
+
+ $body2 = $doc->getElementById('a');
+ $this->assertEquals('body', $body2->tagName);
+ $this->assertEquals('a', $body2->getAttribute('id'));
+ }
+
+ public function testSVGAttributes()
+ {
+ $html = "<!DOCTYPE html>
<html><body>
<svg width='150' viewbox='2'>
<rect textlength='2'/>
<animatecolor>foo</animatecolor>
</svg>
</body></html>";
- $doc = $this->parse($html);
- $root = $doc->documentElement;
+ $doc = $this->parse($html);
+ $root = $doc->documentElement;
- $svg = $root->getElementsByTagName('svg')->item(0);
- $this->assertTrue($svg->hasAttribute('viewBox'));
+ $svg = $root->getElementsByTagName('svg')->item(0);
+ $this->assertTrue($svg->hasAttribute('viewBox'));
- $rect = $root->getElementsByTagName('rect')->item(0);
- $this->assertTrue($rect->hasAttribute('textLength'));
+ $rect = $root->getElementsByTagName('rect')->item(0);
+ $this->assertTrue($rect->hasAttribute('textLength'));
- $ac = $root->getElementsByTagName('animateColor');
- $this->assertEquals(1, $ac->length);
- }
+ $ac = $root->getElementsByTagName('animateColor');
+ $this->assertEquals(1, $ac->length);
+ }
- public function testMathMLAttribute() {
- $html = '<!doctype html>
+ public function testMathMLAttribute()
+ {
+ $html = '<!doctype html>
<html lang="en">
<body>
<math>
@@ -139,124 +147,132 @@ class DOMTreeBuilderTest extends \Masterminds\HTML5\Tests\TestCase {
</body>
</html>';
- $doc = $this->parse($html);
- $root = $doc->documentElement;
-
- $csymbol = $root->getElementsByTagName('csymbol')->item(0);
- $this->assertTrue($csymbol->hasAttribute('definitionURL'));
- }
-
- public function testMissingHtmlTag() {
- $html = "<!DOCTYPE html><title>test</title>";
- $doc = $this->parse($html);
-
- $this->assertEquals('html', $doc->documentElement->tagName);
- $this->assertEquals('title', $doc->documentElement->childNodes->item(0)->tagName);
- }
-
- public function testComment() {
- $html = '<html><!--Hello World.--></html>';
-
- $doc = $this->parse($html);
-
- $comment = $doc->documentElement->childNodes->item(0);
- $this->assertEquals(XML_COMMENT_NODE, $comment->nodeType);
- $this->assertEquals("Hello World.", $comment->data);
-
-
- $html = '<!--Hello World.--><html></html>';
- $doc = $this->parse($html);
-
- $comment = $doc->childNodes->item(1);
- $this->assertEquals(XML_COMMENT_NODE, $comment->nodeType);
- $this->assertEquals("Hello World.", $comment->data);
-
- $comment = $doc->childNodes->item(2);
- $this->assertEquals(XML_ELEMENT_NODE, $comment->nodeType);
- $this->assertEquals("html", $comment->tagName);
- }
-
- public function testCDATA() {
- $html = "<!DOCTYPE html><html><math><![CDATA[test]]></math></html>";
- $doc = $this->parse($html);
-
- $wrapper = $doc->getElementsByTagName('math')->item(0);
- $this->assertEquals(1, $wrapper->childNodes->length);
- $cdata = $wrapper->childNodes->item(0);
- $this->assertEquals(XML_CDATA_SECTION_NODE, $cdata->nodeType);
- $this->assertEquals('test', $cdata->data);
- }
-
- public function testText() {
- $html = "<!DOCTYPE html><html><head></head><body><math>test</math></body></html>";
- $doc = $this->parse($html);
-
- $wrapper = $doc->getElementsByTagName('math')->item(0);
- $this->assertEquals(1, $wrapper->childNodes->length);
- $data = $wrapper->childNodes->item(0);
- $this->assertEquals(XML_TEXT_NODE, $data->nodeType);
- $this->assertEquals('test', $data->data);
-
- // The DomTreeBuilder has special handling for text when in before head mode.
- $html = "<!DOCTYPE html><html>
+ $doc = $this->parse($html);
+ $root = $doc->documentElement;
+
+ $csymbol = $root->getElementsByTagName('csymbol')->item(0);
+ $this->assertTrue($csymbol->hasAttribute('definitionURL'));
+ }
+
+ public function testMissingHtmlTag()
+ {
+ $html = "<!DOCTYPE html><title>test</title>";
+ $doc = $this->parse($html);
+
+ $this->assertEquals('html', $doc->documentElement->tagName);
+ $this->assertEquals('title', $doc->documentElement->childNodes->item(0)->tagName);
+ }
+
+ public function testComment()
+ {
+ $html = '<html><!--Hello World.--></html>';
+
+ $doc = $this->parse($html);
+
+ $comment = $doc->documentElement->childNodes->item(0);
+ $this->assertEquals(XML_COMMENT_NODE, $comment->nodeType);
+ $this->assertEquals("Hello World.", $comment->data);
+
+ $html = '<!--Hello World.--><html></html>';
+ $doc = $this->parse($html);
+
+ $comment = $doc->childNodes->item(1);
+ $this->assertEquals(XML_COMMENT_NODE, $comment->nodeType);
+ $this->assertEquals("Hello World.", $comment->data);
+
+ $comment = $doc->childNodes->item(2);
+ $this->assertEquals(XML_ELEMENT_NODE, $comment->nodeType);
+ $this->assertEquals("html", $comment->tagName);
+ }
+
+ public function testCDATA()
+ {
+ $html = "<!DOCTYPE html><html><math><![CDATA[test]]></math></html>";
+ $doc = $this->parse($html);
+
+ $wrapper = $doc->getElementsByTagName('math')->item(0);
+ $this->assertEquals(1, $wrapper->childNodes->length);
+ $cdata = $wrapper->childNodes->item(0);
+ $this->assertEquals(XML_CDATA_SECTION_NODE, $cdata->nodeType);
+ $this->assertEquals('test', $cdata->data);
+ }
+
+ public function testText()
+ {
+ $html = "<!DOCTYPE html><html><head></head><body><math>test</math></body></html>";
+ $doc = $this->parse($html);
+
+ $wrapper = $doc->getElementsByTagName('math')->item(0);
+ $this->assertEquals(1, $wrapper->childNodes->length);
+ $data = $wrapper->childNodes->item(0);
+ $this->assertEquals(XML_TEXT_NODE, $data->nodeType);
+ $this->assertEquals('test', $data->data);
+
+ // The DomTreeBuilder has special handling for text when in before head mode.
+ $html = "<!DOCTYPE html><html>
Foo<head></head><body></body></html>";
- $doc = $this->parse($html);
- $this->assertEquals('Line 0, Col 0: Unexpected text. Ignoring: Foo', $doc->errors[0]);
- $headElement = $doc->documentElement->firstChild;
- $this->assertEquals('head', $headElement->tagName);
- }
-
- public function testParseErrors() {
- $html = "<!DOCTYPE html><html><math><![CDATA[test";
- $doc = $this->parse($html);
-
- // We're JUST testing that we can access errors. Actual testing of
- // error messages happen in the Tokenizer's tests.
- $this->assertGreaterThan(0, count($doc->errors));
- $this->assertTrue(is_string($doc->errors[0]));
- }
-
- public function testProcessingInstruction() {
- // Test the simple case, which is where PIs are inserted into the DOM.
- $doc = $this->parse('<!DOCTYPE html><html><?foo bar?>');
- $this->assertEquals(1, $doc->documentElement->childNodes->length);
- $pi = $doc->documentElement->firstChild;
- $this->assertInstanceOf('\DOMProcessingInstruction', $pi);
- $this->assertEquals('foo', $pi->nodeName);
- $this->assertEquals('bar', $pi->data);
-
- // Leading xml PIs should be ignored.
- $doc = $this->parse('<?xml version="1.0"?><!DOCTYPE html><html><head></head></html>');
-
- $this->assertEquals(2, $doc->childNodes->length);
- $this->assertInstanceOf('\DOMDocumentType', $doc->childNodes->item(0));
- $this->assertInstanceOf('\DOMElement', $doc->childNodes->item(1));
- }
-
- public function testAutocloseP() {
- $html = "<!DOCTYPE html><html><body><p><figure></body></html>";
- $doc = $this->parse($html);
-
- $p = $doc->getElementsByTagName('p')->item(0);
- $this->assertEquals(0, $p->childNodes->length);
- $this->assertEquals('figure', $p->nextSibling->tagName);
- }
-
- public function testAutocloseLI() {
- $html = '<!doctype html>
+ $doc = $this->parse($html);
+ $this->assertEquals('Line 0, Col 0: Unexpected text. Ignoring: Foo', $doc->errors[0]);
+ $headElement = $doc->documentElement->firstChild;
+ $this->assertEquals('head', $headElement->tagName);
+ }
+
+ public function testParseErrors()
+ {
+ $html = "<!DOCTYPE html><html><math><![CDATA[test";
+ $doc = $this->parse($html);
+
+ // We're JUST testing that we can access errors. Actual testing of
+ // error messages happen in the Tokenizer's tests.
+ $this->assertGreaterThan(0, count($doc->errors));
+ $this->assertTrue(is_string($doc->errors[0]));
+ }
+
+ public function testProcessingInstruction()
+ {
+ // Test the simple case, which is where PIs are inserted into the DOM.
+ $doc = $this->parse('<!DOCTYPE html><html><?foo bar?>');
+ $this->assertEquals(1, $doc->documentElement->childNodes->length);
+ $pi = $doc->documentElement->firstChild;
+ $this->assertInstanceOf('\DOMProcessingInstruction', $pi);
+ $this->assertEquals('foo', $pi->nodeName);
+ $this->assertEquals('bar', $pi->data);
+
+ // Leading xml PIs should be ignored.
+ $doc = $this->parse('<?xml version="1.0"?><!DOCTYPE html><html><head></head></html>');
+
+ $this->assertEquals(2, $doc->childNodes->length);
+ $this->assertInstanceOf('\DOMDocumentType', $doc->childNodes->item(0));
+ $this->assertInstanceOf('\DOMElement', $doc->childNodes->item(1));
+ }
+
+ public function testAutocloseP()
+ {
+ $html = "<!DOCTYPE html><html><body><p><figure></body></html>";
+ $doc = $this->parse($html);
+
+ $p = $doc->getElementsByTagName('p')->item(0);
+ $this->assertEquals(0, $p->childNodes->length);
+ $this->assertEquals('figure', $p->nextSibling->tagName);
+ }
+
+ public function testAutocloseLI()
+ {
+ $html = '<!doctype html>
<html lang="en">
<body>
<ul><li>Foo<li>Bar<li>Baz</ul>
</body>
</html>';
- $doc = $this->parse($html);
- $length = $doc->getElementsByTagName('ul')->item(0)->childNodes->length;
- $this->assertEquals(3, $length);
- }
+ $doc = $this->parse($html);
+ $length = $doc->getElementsByTagName('ul')->item(0)->childNodes->length;
+ $this->assertEquals(3, $length);
+ }
- public function testMathML() {
- $html = '<!doctype html>
+ public function testMathML()
+ {
+ $html = '<!doctype html>
<html lang="en">
<body>
<math xmlns="http://www.w3.org/1998/Math/MathML">
@@ -269,16 +285,17 @@ class DOMTreeBuilderTest extends \Masterminds\HTML5\Tests\TestCase {
</body>
</html>';
- $doc = $this->parse($html);
- $math = $doc->getElementsByTagName('math')->item(0);
- $this->assertEquals('math', $math->tagName);
- $this->assertEquals('math', $math->nodeName);
- $this->assertEquals('math', $math->localName);
- $this->assertEmpty($math->namespaceURI);
- }
-
- public function testSVG() {
- $html = '<!doctype html>
+ $doc = $this->parse($html);
+ $math = $doc->getElementsByTagName('math')->item(0);
+ $this->assertEquals('math', $math->tagName);
+ $this->assertEquals('math', $math->nodeName);
+ $this->assertEquals('math', $math->localName);
+ $this->assertEmpty($math->namespaceURI);
+ }
+
+ public function testSVG()
+ {
+ $html = '<!doctype html>
<html lang="en">
<body>
<svg width="150" height="100" viewBox="0 0 3 2" xmlns="http://www.w3.org/2000/svg">
@@ -292,59 +309,61 @@ class DOMTreeBuilderTest extends \Masterminds\HTML5\Tests\TestCase {
</body>
</html>';
- $doc = $this->parse($html);
- $svg = $doc->getElementsByTagName('svg')->item(0);
- $this->assertEquals('svg', $svg->tagName);
- $this->assertEquals('svg', $svg->nodeName);
- $this->assertEquals('svg', $svg->localName);
- $this->assertEmpty($svg->namespaceURI);
-
- $textPath = $doc->getElementsByTagName('textPath')->item(0);
- $this->assertEquals('textPath', $textPath->tagName);
- }
-
- public function testNoScript() {
- $html = '<!DOCTYPE html><html><head><noscript>No JS</noscript></head></html>';
- $doc = $this->parse($html);
- $this->assertEmpty($doc->errors);
- $noscript = $doc->getElementsByTagName('noscript')->item(0);
- $this->assertEquals('noscript', $noscript->tagName);
- }
-
- /**
- * Regression for issue #13
- */
- public function testRegressionHTMLNoBody() {
- $html = '<!DOCTYPE html><html><span id="test">Test</span></html>';
- $doc = $this->parse($html);
- $span = $doc->getElementById('test');
-
- $this->assertEmpty($doc->errors);
-
- $this->assertEquals('span', $span->tagName);
- $this->assertEquals('Test', $span->textContent);
- }
-
- public function testInstructionProcessor() {
- $string = '<!DOCTYPE html><html><?foo bar ?></html>';
-
- $treeBuilder = new DOMTreeBuilder();
- $is = new InstructionProcessorMock();
- $treeBuilder->setInstructionProcessor($is);
-
- $input = new StringInputStream($string);
- $scanner = new Scanner($input);
- $parser = new Tokenizer($scanner, $treeBuilder);
-
- $parser->parse();
- $dom = $treeBuilder->document();
- $div = $dom->getElementsByTagName('div')->item(0);
-
- $this->assertEquals(1, $is->count);
- $this->assertEquals('foo', $is->name);
- $this->assertEquals('bar ', $is->data);
- $this->assertEquals('div', $div->tagName);
- $this->assertEquals('foo', $div->textContent);
- }
+ $doc = $this->parse($html);
+ $svg = $doc->getElementsByTagName('svg')->item(0);
+ $this->assertEquals('svg', $svg->tagName);
+ $this->assertEquals('svg', $svg->nodeName);
+ $this->assertEquals('svg', $svg->localName);
+ $this->assertEmpty($svg->namespaceURI);
+
+ $textPath = $doc->getElementsByTagName('textPath')->item(0);
+ $this->assertEquals('textPath', $textPath->tagName);
+ }
+
+ public function testNoScript()
+ {
+ $html = '<!DOCTYPE html><html><head><noscript>No JS</noscript></head></html>';
+ $doc = $this->parse($html);
+ $this->assertEmpty($doc->errors);
+ $noscript = $doc->getElementsByTagName('noscript')->item(0);
+ $this->assertEquals('noscript', $noscript->tagName);
+ }
+
+ /**
+ * Regression for issue #13
+ */
+ public function testRegressionHTMLNoBody()
+ {
+ $html = '<!DOCTYPE html><html><span id="test">Test</span></html>';
+ $doc = $this->parse($html);
+ $span = $doc->getElementById('test');
+
+ $this->assertEmpty($doc->errors);
+
+ $this->assertEquals('span', $span->tagName);
+ $this->assertEquals('Test', $span->textContent);
+ }
+
+ public function testInstructionProcessor()
+ {
+ $string = '<!DOCTYPE html><html><?foo bar ?></html>';
+
+ $treeBuilder = new DOMTreeBuilder();
+ $is = new InstructionProcessorMock();
+ $treeBuilder->setInstructionProcessor($is);
+
+ $input = new StringInputStream($string);
+ $scanner = new Scanner($input);
+ $parser = new Tokenizer($scanner, $treeBuilder);
+
+ $parser->parse();
+ $dom = $treeBuilder->document();
+ $div = $dom->getElementsByTagName('div')->item(0);
+
+ $this->assertEquals(1, $is->count);
+ $this->assertEquals('foo', $is->name);
+ $this->assertEquals('bar ', $is->data);
+ $this->assertEquals('div', $div->tagName);
+ $this->assertEquals('foo', $div->textContent);
+ }
}
-
diff --git a/test/HTML5/Parser/EventStack.php b/test/HTML5/Parser/EventStack.php
index 050cb5a..da0d3ec 100644
--- a/test/HTML5/Parser/EventStack.php
+++ b/test/HTML5/Parser/EventStack.php
@@ -14,77 +14,103 @@ use Masterminds\HTML5\Parser\EventHandler;
* script or pre tags. This is to match the behavior required by the HTML5 spec,
* which says that the tree builder must tell the tokenizer when to switch states.
*/
-class EventStack implements EventHandler {
- protected $stack;
-
- public function __construct() {
- $this->stack = array();
- }
-
- /**
- * Get the event stack.
- */
- public function events() {
- return $this->stack;
- }
-
- public function depth() {
- return count($this->stack);
- }
-
- public function get($index) {
- return $this->stack[$index];
- }
-
- protected function store($event, $data = NULL) {
- $this->stack[] = array(
- 'name' => $event,
- 'data' => $data,
- );
- }
-
- public function doctype($name, $type = 0, $id = NULL, $quirks = FALSE) {
- $args = array($name, $type, $id, $quirks);
- $this->store('doctype', $args);
- }
-
- public function startTag($name, $attributes = array(), $selfClosing = FALSE) {
- $args = func_get_args();
- $this->store('startTag', $args);
- if ($name == 'pre' || $name == 'script') {
- return Elements::TEXT_RAW;
+class EventStack implements EventHandler
+{
+
+ protected $stack;
+
+ public function __construct()
+ {
+ $this->stack = array();
+ }
+
+ /**
+ * Get the event stack.
+ */
+ public function events()
+ {
+ return $this->stack;
+ }
+
+ public function depth()
+ {
+ return count($this->stack);
+ }
+
+ public function get($index)
+ {
+ return $this->stack[$index];
+ }
+
+ protected function store($event, $data = NULL)
+ {
+ $this->stack[] = array(
+ 'name' => $event,
+ 'data' => $data
+ );
}
- }
- public function endTag($name) {
- $this->store('endTag', array($name));
- }
+ public function doctype($name, $type = 0, $id = NULL, $quirks = FALSE)
+ {
+ $args = array(
+ $name,
+ $type,
+ $id,
+ $quirks
+ );
+ $this->store('doctype', $args);
+ }
- public function comment($cdata) {
- $this->store('comment', array($cdata));
- }
+ public function startTag($name, $attributes = array(), $selfClosing = FALSE)
+ {
+ $args = func_get_args();
+ $this->store('startTag', $args);
+ if ($name == 'pre' || $name == 'script') {
+ return Elements::TEXT_RAW;
+ }
+ }
- public function cdata($data) {
- $this->store('cdata', func_get_args());
- }
+ public function endTag($name)
+ {
+ $this->store('endTag', array(
+ $name
+ ));
+ }
+
+ public function comment($cdata)
+ {
+ $this->store('comment', array(
+ $cdata
+ ));
+ }
- public function text($cdata) {
- //fprintf(STDOUT, "Received TEXT event with: " . $cdata);
- $this->store('text', array($cdata));
- }
+ public function cdata($data)
+ {
+ $this->store('cdata', func_get_args());
+ }
- public function eof() {
- $this->store('eof');
- }
+ public function text($cdata)
+ {
+ // fprintf(STDOUT, "Received TEXT event with: " . $cdata);
+ $this->store('text', array(
+ $cdata
+ ));
+ }
- public function parseError($msg, $line, $col) {
- //throw new EventStackParseError(sprintf("%s (line %d, col %d)", $msg, $line, $col));
- //$this->store(sprintf("%s (line %d, col %d)", $msg, $line, $col));
- $this->store('error', func_get_args());
- }
+ public function eof()
+ {
+ $this->store('eof');
+ }
- public function processingInstruction($name, $data = NULL) {
- $this->store('pi', func_get_args());
- }
+ public function parseError($msg, $line, $col)
+ {
+ // throw new EventStackParseError(sprintf("%s (line %d, col %d)", $msg, $line, $col));
+ // $this->store(sprintf("%s (line %d, col %d)", $msg, $line, $col));
+ $this->store('error', func_get_args());
+ }
-} \ No newline at end of file
+ public function processingInstruction($name, $data = NULL)
+ {
+ $this->store('pi', func_get_args());
+ }
+}
diff --git a/test/HTML5/Parser/EventStackError.php b/test/HTML5/Parser/EventStackError.php
index e2d6ba7..e58fdff 100644
--- a/test/HTML5/Parser/EventStackError.php
+++ b/test/HTML5/Parser/EventStackError.php
@@ -1,5 +1,6 @@
<?php
namespace Masterminds\HTML5\Tests\Parser;
-class EventStackParseError extends \Exception {
+class EventStackError extends \Exception
+{
}
diff --git a/test/HTML5/Parser/FileInputStreamTest.php b/test/HTML5/Parser/FileInputStreamTest.php
index 4efcbe1..71dd828 100644
--- a/test/HTML5/Parser/FileInputStreamTest.php
+++ b/test/HTML5/Parser/FileInputStreamTest.php
@@ -3,133 +3,176 @@ namespace Masterminds\HTML5\Tests\Parser;
use Masterminds\HTML5\Parser\FileInputStream;
-class FileInputStreamTest extends \Masterminds\HTML5\Tests\TestCase {
-
- function testConstruct() {
- $s = new FileInputStream(__DIR__ . '/FileInputStreamTest.html');
-
- $this->assertInstanceOf('\Masterminds\HTML5\Parser\FileInputStream', $s);
- }
-
- public function testNext() {
- $s = new FileInputStream(__DIR__ . '/FileInputStreamTest.html');
-
- $s->next();
- $this->assertEquals('!', $s->current());
- $s->next();
- $this->assertEquals('d', $s->current());
- }
-
- public function testKey() {
- $s = new FileInputStream(__DIR__ . '/FileInputStreamTest.html');
-
- $this->assertEquals(0, $s->key());
-
- $s->next();
- $this->assertEquals(1, $s->key());
- }
-
- public function testPeek() {
- $s = new FileInputStream(__DIR__ . '/FileInputStreamTest.html');
-
- $this->assertEquals('!', $s->peek());
-
- $s->next();
- $this->assertEquals('d', $s->peek());
- }
-
- public function testCurrent() {
- $s = new FileInputStream(__DIR__ . '/FileInputStreamTest.html');
-
- $this->assertEquals('<', $s->current());
-
- $s->next();
- $this->assertEquals('!', $s->current());
-
- $s->next();
- $this->assertEquals('d', $s->current());
- }
-
- public function testColumnOffset() {
- $s = new FileInputStream(__DIR__ . '/FileInputStreamTest.html');
- $this->assertEquals(0, $s->columnOffset());
- $s->next();
- $this->assertEquals(1, $s->columnOffset());
- $s->next();
- $this->assertEquals(2, $s->columnOffset());
- $s->next();
- $this->assertEquals(3, $s->columnOffset());
-
- // Make sure we get to the second line
- $s->next(); $s->next(); $s->next(); $s->next();
- $s->next(); $s->next(); $s->next(); $s->next();
- $s->next(); $s->next(); $s->next(); $s->next();
- $s->next();
- $this->assertEquals(0, $s->columnOffset());
-
- $s->next();
- $canary = $s->current(); // h
- $this->assertEquals('h', $canary);
- $this->assertEquals(1, $s->columnOffset());
- }
-
- public function testCurrentLine() {
- $s = new FileInputStream(__DIR__ . '/FileInputStreamTest.html');
-
- $this->assertEquals(1, $s->currentLine());
-
- // Make sure we get to the second line
- $s->next(); $s->next(); $s->next(); $s->next();
- $s->next(); $s->next(); $s->next(); $s->next();
- $s->next(); $s->next(); $s->next(); $s->next();
- $s->next(); $s->next(); $s->next(); $s->next();
- $this->assertEquals(2, $s->currentLine());
-
- // Make sure we get to the third line
- $s->next(); $s->next(); $s->next(); $s->next();
- $s->next(); $s->next(); $s->next(); $s->next();
- $s->next(); $s->next(); $s->next(); $s->next();
- $s->next(); $s->next(); $s->next(); $s->next();
- $s->next();
- $this->assertEquals(3, $s->currentLine());
- }
-
- public function testRemainingChars() {
- $text = file_get_contents(__DIR__ . '/FileInputStreamTest.html');
- $s = new FileInputStream(__DIR__ . '/FileInputStreamTest.html');
- $this->assertEquals($text, $s->remainingChars());
-
- $text = substr(file_get_contents(__DIR__ . '/FileInputStreamTest.html'), 1);
- $s = new FileInputStream(__DIR__ . '/FileInputStreamTest.html');
- $s->next(); // Pop one.
- $this->assertEquals($text, $s->remainingChars());
- }
-
- public function testCharsUnitl() {
- $s = new FileInputStream(__DIR__ . '/FileInputStreamTest.html');
-
- $this->assertEquals('', $s->charsUntil('<'));
- // Pointer at '<', moves to ' '
- $this->assertEquals('<!doctype', $s->charsUntil(' ', 20));
-
- // Pointer at ' ', moves to '>'
- $this->assertEquals(' html', $s->charsUntil('>'));
-
- // Pointer at '>', moves to '\n'.
- $this->assertEquals('>', $s->charsUntil("\n"));
-
- // Pointer at '\n', move forward then to the next'\n'.
- $s->next();
- $this->assertEquals('<html lang="en">', $s->charsUntil("\n"));
-
- // Ony get one of the spaces.
- $this->assertEquals("\n ", $s->charsUntil('<', 2));
-
- // Get the other space.
- $this->assertEquals(" ", $s->charsUntil('<'));
-
- // This should scan to the end of the file.
- $text = "<head>
+class FileInputStreamTest extends \Masterminds\HTML5\Tests\TestCase
+{
+
+ public function testConstruct()
+ {
+ $s = new FileInputStream(__DIR__ . '/FileInputStreamTest.html');
+
+ $this->assertInstanceOf('\Masterminds\HTML5\Parser\FileInputStream', $s);
+ }
+
+ public function testNext()
+ {
+ $s = new FileInputStream(__DIR__ . '/FileInputStreamTest.html');
+
+ $s->next();
+ $this->assertEquals('!', $s->current());
+ $s->next();
+ $this->assertEquals('d', $s->current());
+ }
+
+ public function testKey()
+ {
+ $s = new FileInputStream(__DIR__ . '/FileInputStreamTest.html');
+
+ $this->assertEquals(0, $s->key());
+
+ $s->next();
+ $this->assertEquals(1, $s->key());
+ }
+
+ public function testPeek()
+ {
+ $s = new FileInputStream(__DIR__ . '/FileInputStreamTest.html');
+
+ $this->assertEquals('!', $s->peek());
+
+ $s->next();
+ $this->assertEquals('d', $s->peek());
+ }
+
+ public function testCurrent()
+ {
+ $s = new FileInputStream(__DIR__ . '/FileInputStreamTest.html');
+
+ $this->assertEquals('<', $s->current());
+
+ $s->next();
+ $this->assertEquals('!', $s->current());
+
+ $s->next();
+ $this->assertEquals('d', $s->current());
+ }
+
+ public function testColumnOffset()
+ {
+ $s = new FileInputStream(__DIR__ . '/FileInputStreamTest.html');
+ $this->assertEquals(0, $s->columnOffset());
+ $s->next();
+ $this->assertEquals(1, $s->columnOffset());
+ $s->next();
+ $this->assertEquals(2, $s->columnOffset());
+ $s->next();
+ $this->assertEquals(3, $s->columnOffset());
+
+ // Make sure we get to the second line
+ $s->next();
+ $s->next();
+ $s->next();
+ $s->next();
+ $s->next();
+ $s->next();
+ $s->next();
+ $s->next();
+ $s->next();
+ $s->next();
+ $s->next();
+ $s->next();
+ $s->next();
+ $this->assertEquals(0, $s->columnOffset());
+
+ $s->next();
+ $canary = $s->current(); // h
+ $this->assertEquals('h', $canary);
+ $this->assertEquals(1, $s->columnOffset());
+ }
+
+ public function testCurrentLine()
+ {
+ $s = new FileInputStream(__DIR__ . '/FileInputStreamTest.html');
+
+ $this->assertEquals(1, $s->currentLine());
+
+ // Make sure we get to the second line
+ $s->next();
+ $s->next();
+ $s->next();
+ $s->next();
+ $s->next();
+ $s->next();
+ $s->next();
+ $s->next();
+ $s->next();
+ $s->next();
+ $s->next();
+ $s->next();
+ $s->next();
+ $s->next();
+ $s->next();
+ $s->next();
+ $this->assertEquals(2, $s->currentLine());
+
+ // Make sure we get to the third line
+ $s->next();
+ $s->next();
+ $s->next();
+ $s->next();
+ $s->next();
+ $s->next();
+ $s->next();
+ $s->next();
+ $s->next();
+ $s->next();
+ $s->next();
+ $s->next();
+ $s->next();
+ $s->next();
+ $s->next();
+ $s->next();
+ $s->next();
+ $this->assertEquals(3, $s->currentLine());
+ }
+
+ public function testRemainingChars()
+ {
+ $text = file_get_contents(__DIR__ . '/FileInputStreamTest.html');
+ $s = new FileInputStream(__DIR__ . '/FileInputStreamTest.html');
+ $this->assertEquals($text, $s->remainingChars());
+
+ $text = substr(file_get_contents(__DIR__ . '/FileInputStreamTest.html'), 1);
+ $s = new FileInputStream(__DIR__ . '/FileInputStreamTest.html');
+ $s->next(); // Pop one.
+ $this->assertEquals($text, $s->remainingChars());
+ }
+
+ public function testCharsUnitl()
+ {
+ $s = new FileInputStream(__DIR__ . '/FileInputStreamTest.html');
+
+ $this->assertEquals('', $s->charsUntil('<'));
+ // Pointer at '<', moves to ' '
+ $this->assertEquals('<!doctype', $s->charsUntil(' ', 20));
+
+ // Pointer at ' ', moves to '>'
+ $this->assertEquals(' html', $s->charsUntil('>'));
+
+ // Pointer at '>', moves to '\n'.
+ $this->assertEquals('>', $s->charsUntil("\n"));
+
+ // Pointer at '\n', move forward then to the next'\n'.
+ $s->next();
+ $this->assertEquals('<html lang="en">', $s->charsUntil("\n"));
+
+ // Ony get one of the spaces.
+ $this->assertEquals("\n ", $s->charsUntil('<', 2));
+
+ // Get the other space.
+ $this->assertEquals(" ", $s->charsUntil('<'));
+
+ // This should scan to the end of the file.
+ $text = "<head>
<meta charset=\"utf-8\">
<title>Test</title>
</head>
@@ -137,15 +180,16 @@ class FileInputStreamTest extends \Masterminds\HTML5\Tests\TestCase {
<p>This is a test.</p>
</body>
</html>";
- $this->assertEquals($text, $s->charsUntil("\t"));
- }
-
- public function testCharsWhile() {
- $s = new FileInputStream(__DIR__ . '/FileInputStreamTest.html');
-
- $this->assertEquals('<!', $s->charsWhile('!<'));
- $this->assertEquals('', $s->charsWhile('>'));
- $this->assertEquals('doctype', $s->charsWhile('odcyept'));
- $this->assertEquals(' htm', $s->charsWhile('html ', 4));
- }
-} \ No newline at end of file
+ $this->assertEquals($text, $s->charsUntil("\t"));
+ }
+
+ public function testCharsWhile()
+ {
+ $s = new FileInputStream(__DIR__ . '/FileInputStreamTest.html');
+
+ $this->assertEquals('<!', $s->charsWhile('!<'));
+ $this->assertEquals('', $s->charsWhile('>'));
+ $this->assertEquals('doctype', $s->charsWhile('odcyept'));
+ $this->assertEquals(' htm', $s->charsWhile('html ', 4));
+ }
+}
diff --git a/test/HTML5/Parser/InstructionProcessorMock.php b/test/HTML5/Parser/InstructionProcessorMock.php
index b668d26..ec69364 100644
--- a/test/HTML5/Parser/InstructionProcessorMock.php
+++ b/test/HTML5/Parser/InstructionProcessorMock.php
@@ -1,16 +1,20 @@
<?php
namespace Masterminds\HTML5\Tests\Parser;
-class InstructionProcessorMock implements \Masterminds\HTML5\InstructionProcessor {
+class InstructionProcessorMock implements \Masterminds\HTML5\InstructionProcessor
+{
public $name = NULL;
+
public $data = NULL;
+
public $count = 0;
- public function process(\DOMElement $element, $name, $data) {
+ public function process(\DOMElement $element, $name, $data)
+ {
$this->name = $name;
$this->data = $data;
- $this->count++;
+ $this->count ++;
$div = $element->ownerDocument->createElement("div");
$div->nodeValue = 'foo';
@@ -19,4 +23,4 @@ class InstructionProcessorMock implements \Masterminds\HTML5\InstructionProcesso
return $div;
}
-} \ No newline at end of file
+}
diff --git a/test/HTML5/Parser/ScannerTest.php b/test/HTML5/Parser/ScannerTest.php
index b0d638e..8fa5110 100644
--- a/test/HTML5/Parser/ScannerTest.php
+++ b/test/HTML5/Parser/ScannerTest.php
@@ -8,145 +8,164 @@ namespace Masterminds\HTML5\Tests\Parser;
use Masterminds\HTML5\Parser\StringInputStream;
use Masterminds\HTML5\Parser\Scanner;
-class ScannerTest extends \Masterminds\HTML5\Tests\TestCase {
-
- /**
- * A canary test to make sure the basics are setup and working.
- */
- public function testConstruct() {
- $is = new StringInputStream("abc");
- $s = new Scanner($is);
+class ScannerTest extends \Masterminds\HTML5\Tests\TestCase
+{
+
+ /**
+ * A canary test to make sure the basics are setup and working.
+ */
+ public function testConstruct()
+ {
+ $is = new StringInputStream("abc");
+ $s = new Scanner($is);
+
+ $this->assertInstanceOf('\Masterminds\HTML5\Parser\Scanner', $s);
+ }
- $this->assertInstanceOf('\Masterminds\HTML5\Parser\Scanner', $s);
- }
+ public function testNext()
+ {
+ $s = new Scanner(new StringInputStream("abc"));
- public function testNext() {
- $s = new Scanner(new StringInputStream("abc"));
+ $this->assertEquals('b', $s->next());
+ $this->assertEquals('c', $s->next());
+ }
- $this->assertEquals('b', $s->next());
- $this->assertEquals('c', $s->next());
- }
+ public function testPosition()
+ {
+ $s = new Scanner(new StringInputStream("abc"));
- public function testPosition() {
- $s = new Scanner(new StringInputStream("abc"));
+ $this->assertEquals(0, $s->position());
- $this->assertEquals(0, $s->position());
+ $s->next();
+ $this->assertEquals(1, $s->position());
+ }
- $s->next();
- $this->assertEquals(1, $s->position());
- }
+ public function testPeek()
+ {
+ $s = new Scanner(new StringInputStream("abc"));
- public function testPeek() {
- $s = new Scanner(new StringInputStream("abc"));
+ $this->assertEquals('b', $s->peek());
+ $s->next();
+ $this->assertEquals('c', $s->peek());
+ }
- $this->assertEquals('b', $s->peek());
+ public function testCurrent()
+ {
+ $s = new Scanner(new StringInputStream("abc"));
- $s->next();
- $this->assertEquals('c', $s->peek());
- }
+ // Before scanning the string begins the current is empty.
+ $this->assertEquals('a', $s->current());
- public function testCurrent() {
- $s = new Scanner(new StringInputStream("abc"));
+ $c = $s->next();
+ $this->assertEquals('b', $s->current());
- // Before scanning the string begins the current is empty.
- $this->assertEquals('a', $s->current());
+ // Test movement through the string.
+ $c = $s->next();
+ $this->assertEquals('c', $s->current());
+ }
- $c = $s->next();
- $this->assertEquals('b', $s->current());
+ public function testUnconsume()
+ {
+ $s = new Scanner(new StringInputStream("abcdefghijklmnopqrst"));
- // Test movement through the string.
- $c = $s->next();
- $this->assertEquals('c', $s->current());
- }
+ // Get initial position.
+ $s->next();
+ $start = $s->position();
- public function testUnconsume() {
- $s = new Scanner(new StringInputStream("abcdefghijklmnopqrst"));
+ // Move forward a bunch of positions.
+ $amount = 7;
+ for ($i = 0; $i < $amount; $i ++) {
+ $s->next();
+ }
- // Get initial position.
- $s->next();
- $start = $s->position();
+ // Roll back the amount we moved forward.
+ $s->unconsume($amount);
- // Move forward a bunch of positions.
- $amount = 7;
- for($i = 0; $i < $amount; $i++) {
- $s->next();
+ $this->assertEquals($start, $s->position());
}
- // Roll back the amount we moved forward.
- $s->unconsume($amount);
-
- $this->assertEquals($start, $s->position());
- }
-
- public function testGetHex() {
- $s = new Scanner(new StringInputStream("ab13ck45DE*"));
+ public function testGetHex()
+ {
+ $s = new Scanner(new StringInputStream("ab13ck45DE*"));
- $this->assertEquals('ab13c', $s->getHex());
+ $this->assertEquals('ab13c', $s->getHex());
- $s->next();
- $this->assertEquals('45DE', $s->getHex());
- }
-
- public function testGetAsciiAlpha() {
- $s = new Scanner(new StringInputStream("abcdef1%mnop*"));
-
- $this->assertEquals('abcdef', $s->getAsciiAlpha());
+ $s->next();
+ $this->assertEquals('45DE', $s->getHex());
+ }
- // Move past the 1% to scan the next group of text.
- $s->next();
- $s->next();
- $this->assertEquals('mnop', $s->getAsciiAlpha());
- }
+ public function testGetAsciiAlpha()
+ {
+ $s = new Scanner(new StringInputStream("abcdef1%mnop*"));
- public function testGetAsciiAlphaNum() {
- $s = new Scanner(new StringInputStream("abcdef1ghpo#mn94op"));
+ $this->assertEquals('abcdef', $s->getAsciiAlpha());
- $this->assertEquals('abcdef1ghpo', $s->getAsciiAlphaNum());
+ // Move past the 1% to scan the next group of text.
+ $s->next();
+ $s->next();
+ $this->assertEquals('mnop', $s->getAsciiAlpha());
+ }
- // Move past the # to scan the next group of text.
- $s->next();
- $this->assertEquals('mn94op', $s->getAsciiAlphaNum());
- }
+ public function testGetAsciiAlphaNum()
+ {
+ $s = new Scanner(new StringInputStream("abcdef1ghpo#mn94op"));
- public function testGetNumeric() {
- $s = new Scanner(new StringInputStream("1784a 45 9867 #"));
+ $this->assertEquals('abcdef1ghpo', $s->getAsciiAlphaNum());
- $this->assertEquals('1784', $s->getNumeric());
+ // Move past the # to scan the next group of text.
+ $s->next();
+ $this->assertEquals('mn94op', $s->getAsciiAlphaNum());
+ }
- // Move past the 'a ' to scan the next group of text.
- $s->next();
- $s->next();
- $this->assertEquals('45', $s->getNumeric());
- }
+ public function testGetNumeric()
+ {
+ $s = new Scanner(new StringInputStream("1784a 45 9867 #"));
- public function testCurrentLine() {
- $s = new Scanner(new StringInputStream("1784a\n45\n9867 #\nThis is a test."));
+ $this->assertEquals('1784', $s->getNumeric());
- $this->assertEquals(1, $s->currentLine());
+ // Move past the 'a ' to scan the next group of text.
+ $s->next();
+ $s->next();
+ $this->assertEquals('45', $s->getNumeric());
+ }
- // Move to the next line.
- $s->getAsciiAlphaNum(); $s->next();
- $this->assertEquals(2, $s->currentLine());
- }
+ public function testCurrentLine()
+ {
+ $s = new Scanner(new StringInputStream("1784a\n45\n9867 #\nThis is a test."));
- public function testColumnOffset() {
- $s = new Scanner(new StringInputStream("1784a a\n45 9867 #\nThis is a test."));
+ $this->assertEquals(1, $s->currentLine());
- // Move the pointer to the space.
- $s->getAsciiAlphaNum();
- $this->assertEquals(5, $s->columnOffset());
+ // Move to the next line.
+ $s->getAsciiAlphaNum();
+ $s->next();
+ $this->assertEquals(2, $s->currentLine());
+ }
- // We move the pointer ahead. There must be a better way to do this.
- $s->next(); $s->next(); $s->next(); $s->next(); $s->next(); $s->next();
- $this->assertEquals(3, $s->columnOffset());
- }
+ public function testColumnOffset()
+ {
+ $s = new Scanner(new StringInputStream("1784a a\n45 9867 #\nThis is a test."));
+
+ // Move the pointer to the space.
+ $s->getAsciiAlphaNum();
+ $this->assertEquals(5, $s->columnOffset());
+
+ // We move the pointer ahead. There must be a better way to do this.
+ $s->next();
+ $s->next();
+ $s->next();
+ $s->next();
+ $s->next();
+ $s->next();
+ $this->assertEquals(3, $s->columnOffset());
+ }
- public function testRemainingChars() {
- $string = "\n45\n9867 #\nThis is a test.";
- $s = new Scanner(new StringInputStream("1784a\n45\n9867 #\nThis is a test."));
+ public function testRemainingChars()
+ {
+ $string = "\n45\n9867 #\nThis is a test.";
+ $s = new Scanner(new StringInputStream("1784a\n45\n9867 #\nThis is a test."));
- $s->getAsciiAlphaNum();
- $this->assertEquals($string, $s->remainingChars());
- }
-} \ No newline at end of file
+ $s->getAsciiAlphaNum();
+ $this->assertEquals($string, $s->remainingChars());
+ }
+}
diff --git a/test/HTML5/Parser/StringInputStreamTest.php b/test/HTML5/Parser/StringInputStreamTest.php
index 5148e45..3ee768a 100644
--- a/test/HTML5/Parser/StringInputStreamTest.php
+++ b/test/HTML5/Parser/StringInputStreamTest.php
@@ -3,337 +3,325 @@ namespace Masterminds\HTML5\Tests\Parser;
use Masterminds\HTML5\Parser\StringInputStream;
-class StringInputStreamTest extends \Masterminds\HTML5\Tests\TestCase {
-
- /**
- * A canary test to make sure the basics are setup and working.
- */
- public function testConstruct() {
- $s = new StringInputStream("abc");
-
- $this->assertInstanceOf('\Masterminds\HTML5\Parser\StringInputStream', $s);
- }
-
- public function testNext() {
- $s = new StringInputStream("abc");
-
- $s->next();
- $this->assertEquals('b', $s->current());
- $s->next();
- $this->assertEquals('c', $s->current());
- }
-
- public function testKey() {
- $s = new StringInputStream("abc");
-
- $this->assertEquals(0, $s->key());
-
- $s->next();
- $this->assertEquals(1, $s->key());
- }
-
- public function testPeek() {
- $s = new StringInputStream("abc");
-
- $this->assertEquals('b', $s->peek());
-
- $s->next();
- $this->assertEquals('c', $s->peek());
- }
-
- public function testCurrent() {
- $s = new StringInputStream("abc");
-
- // Before scanning the string begins the current is empty.
- $this->assertEquals('a', $s->current());
-
- $s->next();
- $this->assertEquals('b', $s->current());
-
- // Test movement through the string.
- $s->next();
- $this->assertEquals('c', $s->current());
- }
-
- public function testColumnOffset() {
- $s = new StringInputStream("abc\ndef\n");
- $this->assertEquals(0, $s->columnOffset());
- $s->next();
- $this->assertEquals(1, $s->columnOffset());
- $s->next();
- $this->assertEquals(2, $s->columnOffset());
- $s->next();
- $this->assertEquals(3, $s->columnOffset());
- $s->next(); // LF
- $this->assertEquals(0, $s->columnOffset());
- $s->next();
- $canary = $s->current(); // e
- $this->assertEquals('e', $canary);
- $this->assertEquals(1, $s->columnOffset());
-
- $s = new StringInputStream("abc");
- $this->assertEquals(0, $s->columnOffset());
- $s->next();
- $this->assertEquals(1, $s->columnOffset());
- $s->next();
- $this->assertEquals(2, $s->columnOffset());
- }
-
- public function testCurrentLine() {
- $txt = "1\n2\n\n\n\n3";
- $stream = new StringInputStream($txt);
- $this->assertEquals(1, $stream->currentLine());
-
- // Advance over 1 and LF on to line 2 value 2.
- $stream->next(); $stream->next();
- $canary = $stream->current();
- $this->assertEquals(2, $stream->currentLine());
- $this->assertEquals('2', $canary);
-
-
- // Advance over 4x LF
- $stream->next(); $stream->next();
- $stream->next(); $stream->next();
- $stream->next();
- $this->assertEquals(6, $stream->currentLine());
- $this->assertEquals('3', $stream->current());
-
- // Make sure it doesn't do 7.
- $this->assertEquals(6, $stream->currentLine());
- }
-
- public function testRemainingChars() {
- $text = "abcd";
- $s = new StringInputStream($text);
- $this->assertEquals($text, $s->remainingChars());
-
- $text = "abcd";
- $s = new StringInputStream($text);
- $s->next(); // Pop one.
- $this->assertEquals('bcd', $s->remainingChars());
-
- }
-
- public function testCharsUnitl() {
- $text = "abcdefffffffghi";
- $s = new StringInputStream($text);
- $this->assertEquals('', $s->charsUntil('a'));
- // Pointer at 'a', moves 2 to 'c'
- $this->assertEquals('ab', $s->charsUntil('w', 2));
-
- // Pointer at 'c', moves to first 'f'
- $this->assertEquals('cde', $s->charsUntil('fzxv'));
-
- // Only get five 'f's
- $this->assertEquals('fffff', $s->charsUntil('g', 5));
-
- // Get just the last two 'f's
- $this->assertEquals('ff', $s->charsUntil('g'));
-
- // This should scan to the end.
- $this->assertEquals('ghi', $s->charsUntil('w', 9));
-
- }
-
- public function testCharsWhile() {
- $text = "abcdefffffffghi";
- $s = new StringInputStream($text);
-
- $this->assertEquals('ab', $s->charsWhile('ba'));
-
- $this->assertEquals('', $s->charsWhile('a'));
- $this->assertEquals('cde', $s->charsWhile('cdeba'));
- $this->assertEquals('ff', $s->charsWhile('f', 2));
- $this->assertEquals('fffff', $s->charsWhile('f'));
- $this->assertEquals('g', $s->charsWhile('fg'));
- $this->assertEquals('hi', $s->charsWhile('fghi', 99));
-
- }
-
- public function testBOM() {
-
- // Ignore in-text BOM.
- $stream = new StringInputStream("a\xEF\xBB\xBF");
- $this->assertEquals("a\xEF\xBB\xBF", $stream->remainingChars(), 'A non-leading U+FEFF (BOM/ZWNBSP) should remain');
-
- // Strip leading BOM
- $leading = new StringInputStream("\xEF\xBB\xBFa");
- $this->assertEquals('a', $leading->current(), 'BOM should be stripped');
- }
-
- public function testCarriageReturn() {
-
- // Replace NULL with Unicode replacement.
- $stream = new StringInputStream("\0\0\0");
- $this->assertEquals("\xEF\xBF\xBD\xEF\xBF\xBD\xEF\xBF\xBD", $stream->remainingChars(), 'Null character should be replaced by U+FFFD');
- $this->assertEquals(3, count($stream->errors), 'Null character should set parse error: ' . print_r($stream->errors, TRUE));
-
- // Remove CR when next to LF.
- $stream = new StringInputStream("\r\n");
- $this->assertEquals("\n", $stream->remainingChars(), 'CRLF should be replaced by LF');
-
- // Convert CR to LF when on its own.
- $stream = new StringInputStream("\r");
- $this->assertEquals("\n", $stream->remainingChars(), 'CR should be replaced by LF');
- }
-
-
- public function invalidParseErrorTestHandler($input, $numErrors, $name) {
- $stream = new StringInputStream($input, 'UTF-8');
- $this->assertEquals($input, $stream->remainingChars(), $name . ' (stream content)');
- $this->assertEquals($numErrors, count($stream->errors), $name . ' (number of errors)');
- }
-
- public function testInvalidReplace() {
- $invalidTest = array(
-
- // Min/max overlong
- "\xC0\x80a" => 'Overlong representation of U+0000',
- "\xE0\x80\x80a" => 'Overlong representation of U+0000',
- "\xF0\x80\x80\x80a" => 'Overlong representation of U+0000',
- "\xF8\x80\x80\x80\x80a" => 'Overlong representation of U+0000',
- "\xFC\x80\x80\x80\x80\x80a" => 'Overlong representation of U+0000',
- "\xC1\xBFa" => 'Overlong representation of U+007F',
- "\xE0\x9F\xBFa" => 'Overlong representation of U+07FF',
- "\xF0\x8F\xBF\xBFa" => 'Overlong representation of U+FFFF',
-
-
- "a\xDF" => 'Incomplete two byte sequence (missing final byte)',
- "a\xEF\xBF" => 'Incomplete three byte sequence (missing final byte)',
- "a\xF4\xBF\xBF" => 'Incomplete four byte sequence (missing final byte)',
-
- // Min/max continuation bytes
- "a\x80" => 'Lone 80 continuation byte',
- "a\xBF" => 'Lone BF continuation byte',
-
- // Invalid bytes (these can never occur)
- "a\xFE" => 'Invalid FE byte',
- "a\xFF" => 'Invalid FF byte',
- );
- foreach ($invalidTest as $test => $note) {
- $stream = new StringInputStream($test);
- $this->assertEquals('a', $stream->remainingChars(), $note);
- }
-
- // MPB:
- // It appears that iconv just leaves these alone. Not sure what to
- // do.
- /*
- $converted = array(
- "a\xF5\x90\x80\x80" => 'U+110000, off unicode planes.',
- );
- foreach ($converted as $test => $note) {
- $stream = new StringInputStream($test);
- $this->assertEquals(2, mb_strlen($stream->remainingChars()), $note);
- }
- */
- }
-
- public function testInvalidParseError() {
- // C0 controls (except U+0000 and U+000D due to different handling)
- $this->invalidParseErrorTestHandler("\x01", 1, 'U+0001 (C0 control)');
- $this->invalidParseErrorTestHandler("\x02", 1, 'U+0002 (C0 control)');
- $this->invalidParseErrorTestHandler("\x03", 1, 'U+0003 (C0 control)');
- $this->invalidParseErrorTestHandler("\x04", 1, 'U+0004 (C0 control)');
- $this->invalidParseErrorTestHandler("\x05", 1, 'U+0005 (C0 control)');
- $this->invalidParseErrorTestHandler("\x06", 1, 'U+0006 (C0 control)');
- $this->invalidParseErrorTestHandler("\x07", 1, 'U+0007 (C0 control)');
- $this->invalidParseErrorTestHandler("\x08", 1, 'U+0008 (C0 control)');
- $this->invalidParseErrorTestHandler("\x09", 0, 'U+0009 (C0 control)');
- $this->invalidParseErrorTestHandler("\x0A", 0, 'U+000A (C0 control)');
- $this->invalidParseErrorTestHandler("\x0B", 1, 'U+000B (C0 control)');
- $this->invalidParseErrorTestHandler("\x0C", 0, 'U+000C (C0 control)');
- $this->invalidParseErrorTestHandler("\x0E", 1, 'U+000E (C0 control)');
- $this->invalidParseErrorTestHandler("\x0F", 1, 'U+000F (C0 control)');
- $this->invalidParseErrorTestHandler("\x10", 1, 'U+0010 (C0 control)');
- $this->invalidParseErrorTestHandler("\x11", 1, 'U+0011 (C0 control)');
- $this->invalidParseErrorTestHandler("\x12", 1, 'U+0012 (C0 control)');
- $this->invalidParseErrorTestHandler("\x13", 1, 'U+0013 (C0 control)');
- $this->invalidParseErrorTestHandler("\x14", 1, 'U+0014 (C0 control)');
- $this->invalidParseErrorTestHandler("\x15", 1, 'U+0015 (C0 control)');
- $this->invalidParseErrorTestHandler("\x16", 1, 'U+0016 (C0 control)');
- $this->invalidParseErrorTestHandler("\x17", 1, 'U+0017 (C0 control)');
- $this->invalidParseErrorTestHandler("\x18", 1, 'U+0018 (C0 control)');
- $this->invalidParseErrorTestHandler("\x19", 1, 'U+0019 (C0 control)');
- $this->invalidParseErrorTestHandler("\x1A", 1, 'U+001A (C0 control)');
- $this->invalidParseErrorTestHandler("\x1B", 1, 'U+001B (C0 control)');
- $this->invalidParseErrorTestHandler("\x1C", 1, 'U+001C (C0 control)');
- $this->invalidParseErrorTestHandler("\x1D", 1, 'U+001D (C0 control)');
- $this->invalidParseErrorTestHandler("\x1E", 1, 'U+001E (C0 control)');
- $this->invalidParseErrorTestHandler("\x1F", 1, 'U+001F (C0 control)');
-
- // DEL (U+007F)
- $this->invalidParseErrorTestHandler("\x7F", 1, 'U+007F');
-
- // C1 Controls
- $this->invalidParseErrorTestHandler("\xC2\x80", 1, 'U+0080 (C1 control)');
- $this->invalidParseErrorTestHandler("\xC2\x9F", 1, 'U+009F (C1 control)');
- $this->invalidParseErrorTestHandler("\xC2\xA0", 0, 'U+00A0 (first codepoint above highest C1 control)');
-
- // Charcters surrounding surrogates
- $this->invalidParseErrorTestHandler("\xED\x9F\xBF", 0, 'U+D7FF (one codepoint below lowest surrogate codepoint)');
- $this->invalidParseErrorTestHandler("\xEF\xBF\xBD", 0, 'U+DE00 (one codepoint above highest surrogate codepoint)');
-
- // Permanent noncharacters
- $this->invalidParseErrorTestHandler("\xEF\xB7\x90", 1, 'U+FDD0 (permanent noncharacter)');
- $this->invalidParseErrorTestHandler("\xEF\xB7\xAF", 1, 'U+FDEF (permanent noncharacter)');
- $this->invalidParseErrorTestHandler("\xEF\xBF\xBE", 1, 'U+FFFE (permanent noncharacter)');
- $this->invalidParseErrorTestHandler("\xEF\xBF\xBF", 1, 'U+FFFF (permanent noncharacter)');
- $this->invalidParseErrorTestHandler("\xF0\x9F\xBF\xBE", 1, 'U+1FFFE (permanent noncharacter)');
- $this->invalidParseErrorTestHandler("\xF0\x9F\xBF\xBF", 1, 'U+1FFFF (permanent noncharacter)');
- $this->invalidParseErrorTestHandler("\xF0\xAF\xBF\xBE", 1, 'U+2FFFE (permanent noncharacter)');
- $this->invalidParseErrorTestHandler("\xF0\xAF\xBF\xBF", 1, 'U+2FFFF (permanent noncharacter)');
- $this->invalidParseErrorTestHandler("\xF0\xBF\xBF\xBE", 1, 'U+3FFFE (permanent noncharacter)');
- $this->invalidParseErrorTestHandler("\xF0\xBF\xBF\xBF", 1, 'U+3FFFF (permanent noncharacter)');
- $this->invalidParseErrorTestHandler("\xF1\x8F\xBF\xBE", 1, 'U+4FFFE (permanent noncharacter)');
- $this->invalidParseErrorTestHandler("\xF1\x8F\xBF\xBF", 1, 'U+4FFFF (permanent noncharacter)');
- $this->invalidParseErrorTestHandler("\xF1\x9F\xBF\xBE", 1, 'U+5FFFE (permanent noncharacter)');
- $this->invalidParseErrorTestHandler("\xF1\x9F\xBF\xBF", 1, 'U+5FFFF (permanent noncharacter)');
- $this->invalidParseErrorTestHandler("\xF1\xAF\xBF\xBE", 1, 'U+6FFFE (permanent noncharacter)');
- $this->invalidParseErrorTestHandler("\xF1\xAF\xBF\xBF", 1, 'U+6FFFF (permanent noncharacter)');
- $this->invalidParseErrorTestHandler("\xF1\xBF\xBF\xBE", 1, 'U+7FFFE (permanent noncharacter)');
- $this->invalidParseErrorTestHandler("\xF1\xBF\xBF\xBF", 1, 'U+7FFFF (permanent noncharacter)');
- $this->invalidParseErrorTestHandler("\xF2\x8F\xBF\xBE", 1, 'U+8FFFE (permanent noncharacter)');
- $this->invalidParseErrorTestHandler("\xF2\x8F\xBF\xBF", 1, 'U+8FFFF (permanent noncharacter)');
- $this->invalidParseErrorTestHandler("\xF2\x9F\xBF\xBE", 1, 'U+9FFFE (permanent noncharacter)');
- $this->invalidParseErrorTestHandler("\xF2\x9F\xBF\xBF", 1, 'U+9FFFF (permanent noncharacter)');
- $this->invalidParseErrorTestHandler("\xF2\xAF\xBF\xBE", 1, 'U+AFFFE (permanent noncharacter)');
- $this->invalidParseErrorTestHandler("\xF2\xAF\xBF\xBF", 1, 'U+AFFFF (permanent noncharacter)');
- $this->invalidParseErrorTestHandler("\xF2\xBF\xBF\xBE", 1, 'U+BFFFE (permanent noncharacter)');
- $this->invalidParseErrorTestHandler("\xF2\xBF\xBF\xBF", 1, 'U+BFFFF (permanent noncharacter)');
- $this->invalidParseErrorTestHandler("\xF3\x8F\xBF\xBE", 1, 'U+CFFFE (permanent noncharacter)');
- $this->invalidParseErrorTestHandler("\xF3\x8F\xBF\xBF", 1, 'U+CFFFF (permanent noncharacter)');
- $this->invalidParseErrorTestHandler("\xF3\x9F\xBF\xBE", 1, 'U+DFFFE (permanent noncharacter)');
- $this->invalidParseErrorTestHandler("\xF3\x9F\xBF\xBF", 1, 'U+DFFFF (permanent noncharacter)');
- $this->invalidParseErrorTestHandler("\xF3\xAF\xBF\xBE", 1, 'U+EFFFE (permanent noncharacter)');
- $this->invalidParseErrorTestHandler("\xF3\xAF\xBF\xBF", 1, 'U+EFFFF (permanent noncharacter)');
- $this->invalidParseErrorTestHandler("\xF3\xBF\xBF\xBE", 1, 'U+FFFFE (permanent noncharacter)');
- $this->invalidParseErrorTestHandler("\xF3\xBF\xBF\xBF", 1, 'U+FFFFF (permanent noncharacter)');
- $this->invalidParseErrorTestHandler("\xF4\x8F\xBF\xBE", 1, 'U+10FFFE (permanent noncharacter)');
- $this->invalidParseErrorTestHandler("\xF4\x8F\xBF\xBF", 1, 'U+10FFFF (permanent noncharacter)');
-
- // MPB: These pass on some versions of iconv, and fail on others. Since we aren't in the
- // business of writing tests against iconv, I've just commented these out. Should revisit
- // at a later point.
- /*
- $this->invalidParseErrorTestHandler("\xED\xA0\x80", 1, 'U+D800 (UTF-16 surrogate character)');
- $this->invalidParseErrorTestHandler("\xED\xAD\xBF", 1, 'U+DB7F (UTF-16 surrogate character)');
- $this->invalidParseErrorTestHandler("\xED\xAE\x80", 1, 'U+DB80 (UTF-16 surrogate character)');
- $this->invalidParseErrorTestHandler("\xED\xAF\xBF", 1, 'U+DBFF (UTF-16 surrogate character)');
- $this->invalidParseErrorTestHandler("\xED\xB0\x80", 1, 'U+DC00 (UTF-16 surrogate character)');
- $this->invalidParseErrorTestHandler("\xED\xBE\x80", 1, 'U+DF80 (UTF-16 surrogate character)');
- $this->invalidParseErrorTestHandler("\xED\xBF\xBF", 1, 'U+DFFF (UTF-16 surrogate character)');
-
- // Paired UTF-16 surrogates
- $this->invalidParseErrorTestHandler("\xED\xA0\x80\xED\xB0\x80", 2, 'U+D800 U+DC00 (paired UTF-16 surrogates)');
- $this->invalidParseErrorTestHandler("\xED\xA0\x80\xED\xBF\xBF", 2, 'U+D800 U+DFFF (paired UTF-16 surrogates)');
- $this->invalidParseErrorTestHandler("\xED\xAD\xBF\xED\xB0\x80", 2, 'U+DB7F U+DC00 (paired UTF-16 surrogates)');
- $this->invalidParseErrorTestHandler("\xED\xAD\xBF\xED\xBF\xBF", 2, 'U+DB7F U+DFFF (paired UTF-16 surrogates)');
- $this->invalidParseErrorTestHandler("\xED\xAE\x80\xED\xB0\x80", 2, 'U+DB80 U+DC00 (paired UTF-16 surrogates)');
- $this->invalidParseErrorTestHandler("\xED\xAE\x80\xED\xBF\xBF", 2, 'U+DB80 U+DFFF (paired UTF-16 surrogates)');
- $this->invalidParseErrorTestHandler("\xED\xAF\xBF\xED\xB0\x80", 2, 'U+DBFF U+DC00 (paired UTF-16 surrogates)');
- $this->invalidParseErrorTestHandler("\xED\xAF\xBF\xED\xBF\xBF", 2, 'U+DBFF U+DFFF (paired UTF-16 surrogates)');
- */
- }
-
+class StringInputStreamTest extends \Masterminds\HTML5\Tests\TestCase
+{
+
+ /**
+ * A canary test to make sure the basics are setup and working.
+ */
+ public function testConstruct()
+ {
+ $s = new StringInputStream("abc");
+
+ $this->assertInstanceOf('\Masterminds\HTML5\Parser\StringInputStream', $s);
+ }
+
+ public function testNext()
+ {
+ $s = new StringInputStream("abc");
+
+ $s->next();
+ $this->assertEquals('b', $s->current());
+ $s->next();
+ $this->assertEquals('c', $s->current());
+ }
+
+ public function testKey()
+ {
+ $s = new StringInputStream("abc");
+
+ $this->assertEquals(0, $s->key());
+
+ $s->next();
+ $this->assertEquals(1, $s->key());
+ }
+
+ public function testPeek()
+ {
+ $s = new StringInputStream("abc");
+
+ $this->assertEquals('b', $s->peek());
+
+ $s->next();
+ $this->assertEquals('c', $s->peek());
+ }
+
+ public function testCurrent()
+ {
+ $s = new StringInputStream("abc");
+
+ // Before scanning the string begins the current is empty.
+ $this->assertEquals('a', $s->current());
+
+ $s->next();
+ $this->assertEquals('b', $s->current());
+
+ // Test movement through the string.
+ $s->next();
+ $this->assertEquals('c', $s->current());
+ }
+
+ public function testColumnOffset()
+ {
+ $s = new StringInputStream("abc\ndef\n");
+ $this->assertEquals(0, $s->columnOffset());
+ $s->next();
+ $this->assertEquals(1, $s->columnOffset());
+ $s->next();
+ $this->assertEquals(2, $s->columnOffset());
+ $s->next();
+ $this->assertEquals(3, $s->columnOffset());
+ $s->next(); // LF
+ $this->assertEquals(0, $s->columnOffset());
+ $s->next();
+ $canary = $s->current(); // e
+ $this->assertEquals('e', $canary);
+ $this->assertEquals(1, $s->columnOffset());
+
+ $s = new StringInputStream("abc");
+ $this->assertEquals(0, $s->columnOffset());
+ $s->next();
+ $this->assertEquals(1, $s->columnOffset());
+ $s->next();
+ $this->assertEquals(2, $s->columnOffset());
+ }
+
+ public function testCurrentLine()
+ {
+ $txt = "1\n2\n\n\n\n3";
+ $stream = new StringInputStream($txt);
+ $this->assertEquals(1, $stream->currentLine());
+
+ // Advance over 1 and LF on to line 2 value 2.
+ $stream->next();
+ $stream->next();
+ $canary = $stream->current();
+ $this->assertEquals(2, $stream->currentLine());
+ $this->assertEquals('2', $canary);
+
+ // Advance over 4x LF
+ $stream->next();
+ $stream->next();
+ $stream->next();
+ $stream->next();
+ $stream->next();
+ $this->assertEquals(6, $stream->currentLine());
+ $this->assertEquals('3', $stream->current());
+
+ // Make sure it doesn't do 7.
+ $this->assertEquals(6, $stream->currentLine());
+ }
+
+ public function testRemainingChars()
+ {
+ $text = "abcd";
+ $s = new StringInputStream($text);
+ $this->assertEquals($text, $s->remainingChars());
+
+ $text = "abcd";
+ $s = new StringInputStream($text);
+ $s->next(); // Pop one.
+ $this->assertEquals('bcd', $s->remainingChars());
+ }
+
+ public function testCharsUnitl()
+ {
+ $text = "abcdefffffffghi";
+ $s = new StringInputStream($text);
+ $this->assertEquals('', $s->charsUntil('a'));
+ // Pointer at 'a', moves 2 to 'c'
+ $this->assertEquals('ab', $s->charsUntil('w', 2));
+
+ // Pointer at 'c', moves to first 'f'
+ $this->assertEquals('cde', $s->charsUntil('fzxv'));
+
+ // Only get five 'f's
+ $this->assertEquals('fffff', $s->charsUntil('g', 5));
+
+ // Get just the last two 'f's
+ $this->assertEquals('ff', $s->charsUntil('g'));
+
+ // This should scan to the end.
+ $this->assertEquals('ghi', $s->charsUntil('w', 9));
+ }
+
+ public function testCharsWhile()
+ {
+ $text = "abcdefffffffghi";
+ $s = new StringInputStream($text);
+
+ $this->assertEquals('ab', $s->charsWhile('ba'));
+
+ $this->assertEquals('', $s->charsWhile('a'));
+ $this->assertEquals('cde', $s->charsWhile('cdeba'));
+ $this->assertEquals('ff', $s->charsWhile('f', 2));
+ $this->assertEquals('fffff', $s->charsWhile('f'));
+ $this->assertEquals('g', $s->charsWhile('fg'));
+ $this->assertEquals('hi', $s->charsWhile('fghi', 99));
+ }
+
+ public function testBOM()
+ {
+ // Ignore in-text BOM.
+ $stream = new StringInputStream("a\xEF\xBB\xBF");
+ $this->assertEquals("a\xEF\xBB\xBF", $stream->remainingChars(), 'A non-leading U+FEFF (BOM/ZWNBSP) should remain');
+
+ // Strip leading BOM
+ $leading = new StringInputStream("\xEF\xBB\xBFa");
+ $this->assertEquals('a', $leading->current(), 'BOM should be stripped');
+ }
+
+ public function testCarriageReturn()
+ {
+ // Replace NULL with Unicode replacement.
+ $stream = new StringInputStream("\0\0\0");
+ $this->assertEquals("\xEF\xBF\xBD\xEF\xBF\xBD\xEF\xBF\xBD", $stream->remainingChars(), 'Null character should be replaced by U+FFFD');
+ $this->assertEquals(3, count($stream->errors), 'Null character should set parse error: ' . print_r($stream->errors, TRUE));
+
+ // Remove CR when next to LF.
+ $stream = new StringInputStream("\r\n");
+ $this->assertEquals("\n", $stream->remainingChars(), 'CRLF should be replaced by LF');
+
+ // Convert CR to LF when on its own.
+ $stream = new StringInputStream("\r");
+ $this->assertEquals("\n", $stream->remainingChars(), 'CR should be replaced by LF');
+ }
+
+ public function invalidParseErrorTestHandler($input, $numErrors, $name)
+ {
+ $stream = new StringInputStream($input, 'UTF-8');
+ $this->assertEquals($input, $stream->remainingChars(), $name . ' (stream content)');
+ $this->assertEquals($numErrors, count($stream->errors), $name . ' (number of errors)');
+ }
+
+ public function testInvalidReplace()
+ {
+ $invalidTest = array(
+
+ // Min/max overlong
+ "\xC0\x80a" => 'Overlong representation of U+0000',
+ "\xE0\x80\x80a" => 'Overlong representation of U+0000',
+ "\xF0\x80\x80\x80a" => 'Overlong representation of U+0000',
+ "\xF8\x80\x80\x80\x80a" => 'Overlong representation of U+0000',
+ "\xFC\x80\x80\x80\x80\x80a" => 'Overlong representation of U+0000',
+ "\xC1\xBFa" => 'Overlong representation of U+007F',
+ "\xE0\x9F\xBFa" => 'Overlong representation of U+07FF',
+ "\xF0\x8F\xBF\xBFa" => 'Overlong representation of U+FFFF',
+
+ "a\xDF" => 'Incomplete two byte sequence (missing final byte)',
+ "a\xEF\xBF" => 'Incomplete three byte sequence (missing final byte)',
+ "a\xF4\xBF\xBF" => 'Incomplete four byte sequence (missing final byte)',
+
+ // Min/max continuation bytes
+ "a\x80" => 'Lone 80 continuation byte',
+ "a\xBF" => 'Lone BF continuation byte',
+
+ // Invalid bytes (these can never occur)
+ "a\xFE" => 'Invalid FE byte',
+ "a\xFF" => 'Invalid FF byte'
+ );
+ foreach ($invalidTest as $test => $note) {
+ $stream = new StringInputStream($test);
+ $this->assertEquals('a', $stream->remainingChars(), $note);
+ }
+
+ // MPB:
+ // It appears that iconv just leaves these alone. Not sure what to
+ // do.
+ /*
+ * $converted = array( "a\xF5\x90\x80\x80" => 'U+110000, off unicode planes.', ); foreach ($converted as $test => $note) { $stream = new StringInputStream($test); $this->assertEquals(2, mb_strlen($stream->remainingChars()), $note); }
+ */
+ }
+
+ public function testInvalidParseError()
+ {
+ // C0 controls (except U+0000 and U+000D due to different handling)
+ $this->invalidParseErrorTestHandler("\x01", 1, 'U+0001 (C0 control)');
+ $this->invalidParseErrorTestHandler("\x02", 1, 'U+0002 (C0 control)');
+ $this->invalidParseErrorTestHandler("\x03", 1, 'U+0003 (C0 control)');
+ $this->invalidParseErrorTestHandler("\x04", 1, 'U+0004 (C0 control)');
+ $this->invalidParseErrorTestHandler("\x05", 1, 'U+0005 (C0 control)');
+ $this->invalidParseErrorTestHandler("\x06", 1, 'U+0006 (C0 control)');
+ $this->invalidParseErrorTestHandler("\x07", 1, 'U+0007 (C0 control)');
+ $this->invalidParseErrorTestHandler("\x08", 1, 'U+0008 (C0 control)');
+ $this->invalidParseErrorTestHandler("\x09", 0, 'U+0009 (C0 control)');
+ $this->invalidParseErrorTestHandler("\x0A", 0, 'U+000A (C0 control)');
+ $this->invalidParseErrorTestHandler("\x0B", 1, 'U+000B (C0 control)');
+ $this->invalidParseErrorTestHandler("\x0C", 0, 'U+000C (C0 control)');
+ $this->invalidParseErrorTestHandler("\x0E", 1, 'U+000E (C0 control)');
+ $this->invalidParseErrorTestHandler("\x0F", 1, 'U+000F (C0 control)');
+ $this->invalidParseErrorTestHandler("\x10", 1, 'U+0010 (C0 control)');
+ $this->invalidParseErrorTestHandler("\x11", 1, 'U+0011 (C0 control)');
+ $this->invalidParseErrorTestHandler("\x12", 1, 'U+0012 (C0 control)');
+ $this->invalidParseErrorTestHandler("\x13", 1, 'U+0013 (C0 control)');
+ $this->invalidParseErrorTestHandler("\x14", 1, 'U+0014 (C0 control)');
+ $this->invalidParseErrorTestHandler("\x15", 1, 'U+0015 (C0 control)');
+ $this->invalidParseErrorTestHandler("\x16", 1, 'U+0016 (C0 control)');
+ $this->invalidParseErrorTestHandler("\x17", 1, 'U+0017 (C0 control)');
+ $this->invalidParseErrorTestHandler("\x18", 1, 'U+0018 (C0 control)');
+ $this->invalidParseErrorTestHandler("\x19", 1, 'U+0019 (C0 control)');
+ $this->invalidParseErrorTestHandler("\x1A", 1, 'U+001A (C0 control)');
+ $this->invalidParseErrorTestHandler("\x1B", 1, 'U+001B (C0 control)');
+ $this->invalidParseErrorTestHandler("\x1C", 1, 'U+001C (C0 control)');
+ $this->invalidParseErrorTestHandler("\x1D", 1, 'U+001D (C0 control)');
+ $this->invalidParseErrorTestHandler("\x1E", 1, 'U+001E (C0 control)');
+ $this->invalidParseErrorTestHandler("\x1F", 1, 'U+001F (C0 control)');
+
+ // DEL (U+007F)
+ $this->invalidParseErrorTestHandler("\x7F", 1, 'U+007F');
+
+ // C1 Controls
+ $this->invalidParseErrorTestHandler("\xC2\x80", 1, 'U+0080 (C1 control)');
+ $this->invalidParseErrorTestHandler("\xC2\x9F", 1, 'U+009F (C1 control)');
+ $this->invalidParseErrorTestHandler("\xC2\xA0", 0, 'U+00A0 (first codepoint above highest C1 control)');
+
+ // Charcters surrounding surrogates
+ $this->invalidParseErrorTestHandler("\xED\x9F\xBF", 0, 'U+D7FF (one codepoint below lowest surrogate codepoint)');
+ $this->invalidParseErrorTestHandler("\xEF\xBF\xBD", 0, 'U+DE00 (one codepoint above highest surrogate codepoint)');
+
+ // Permanent noncharacters
+ $this->invalidParseErrorTestHandler("\xEF\xB7\x90", 1, 'U+FDD0 (permanent noncharacter)');
+ $this->invalidParseErrorTestHandler("\xEF\xB7\xAF", 1, 'U+FDEF (permanent noncharacter)');
+ $this->invalidParseErrorTestHandler("\xEF\xBF\xBE", 1, 'U+FFFE (permanent noncharacter)');
+ $this->invalidParseErrorTestHandler("\xEF\xBF\xBF", 1, 'U+FFFF (permanent noncharacter)');
+ $this->invalidParseErrorTestHandler("\xF0\x9F\xBF\xBE", 1, 'U+1FFFE (permanent noncharacter)');
+ $this->invalidParseErrorTestHandler("\xF0\x9F\xBF\xBF", 1, 'U+1FFFF (permanent noncharacter)');
+ $this->invalidParseErrorTestHandler("\xF0\xAF\xBF\xBE", 1, 'U+2FFFE (permanent noncharacter)');
+ $this->invalidParseErrorTestHandler("\xF0\xAF\xBF\xBF", 1, 'U+2FFFF (permanent noncharacter)');
+ $this->invalidParseErrorTestHandler("\xF0\xBF\xBF\xBE", 1, 'U+3FFFE (permanent noncharacter)');
+ $this->invalidParseErrorTestHandler("\xF0\xBF\xBF\xBF", 1, 'U+3FFFF (permanent noncharacter)');
+ $this->invalidParseErrorTestHandler("\xF1\x8F\xBF\xBE", 1, 'U+4FFFE (permanent noncharacter)');
+ $this->invalidParseErrorTestHandler("\xF1\x8F\xBF\xBF", 1, 'U+4FFFF (permanent noncharacter)');
+ $this->invalidParseErrorTestHandler("\xF1\x9F\xBF\xBE", 1, 'U+5FFFE (permanent noncharacter)');
+ $this->invalidParseErrorTestHandler("\xF1\x9F\xBF\xBF", 1, 'U+5FFFF (permanent noncharacter)');
+ $this->invalidParseErrorTestHandler("\xF1\xAF\xBF\xBE", 1, 'U+6FFFE (permanent noncharacter)');
+ $this->invalidParseErrorTestHandler("\xF1\xAF\xBF\xBF", 1, 'U+6FFFF (permanent noncharacter)');
+ $this->invalidParseErrorTestHandler("\xF1\xBF\xBF\xBE", 1, 'U+7FFFE (permanent noncharacter)');
+ $this->invalidParseErrorTestHandler("\xF1\xBF\xBF\xBF", 1, 'U+7FFFF (permanent noncharacter)');
+ $this->invalidParseErrorTestHandler("\xF2\x8F\xBF\xBE", 1, 'U+8FFFE (permanent noncharacter)');
+ $this->invalidParseErrorTestHandler("\xF2\x8F\xBF\xBF", 1, 'U+8FFFF (permanent noncharacter)');
+ $this->invalidParseErrorTestHandler("\xF2\x9F\xBF\xBE", 1, 'U+9FFFE (permanent noncharacter)');
+ $this->invalidParseErrorTestHandler("\xF2\x9F\xBF\xBF", 1, 'U+9FFFF (permanent noncharacter)');
+ $this->invalidParseErrorTestHandler("\xF2\xAF\xBF\xBE", 1, 'U+AFFFE (permanent noncharacter)');
+ $this->invalidParseErrorTestHandler("\xF2\xAF\xBF\xBF", 1, 'U+AFFFF (permanent noncharacter)');
+ $this->invalidParseErrorTestHandler("\xF2\xBF\xBF\xBE", 1, 'U+BFFFE (permanent noncharacter)');
+ $this->invalidParseErrorTestHandler("\xF2\xBF\xBF\xBF", 1, 'U+BFFFF (permanent noncharacter)');
+ $this->invalidParseErrorTestHandler("\xF3\x8F\xBF\xBE", 1, 'U+CFFFE (permanent noncharacter)');
+ $this->invalidParseErrorTestHandler("\xF3\x8F\xBF\xBF", 1, 'U+CFFFF (permanent noncharacter)');
+ $this->invalidParseErrorTestHandler("\xF3\x9F\xBF\xBE", 1, 'U+DFFFE (permanent noncharacter)');
+ $this->invalidParseErrorTestHandler("\xF3\x9F\xBF\xBF", 1, 'U+DFFFF (permanent noncharacter)');
+ $this->invalidParseErrorTestHandler("\xF3\xAF\xBF\xBE", 1, 'U+EFFFE (permanent noncharacter)');
+ $this->invalidParseErrorTestHandler("\xF3\xAF\xBF\xBF", 1, 'U+EFFFF (permanent noncharacter)');
+ $this->invalidParseErrorTestHandler("\xF3\xBF\xBF\xBE", 1, 'U+FFFFE (permanent noncharacter)');
+ $this->invalidParseErrorTestHandler("\xF3\xBF\xBF\xBF", 1, 'U+FFFFF (permanent noncharacter)');
+ $this->invalidParseErrorTestHandler("\xF4\x8F\xBF\xBE", 1, 'U+10FFFE (permanent noncharacter)');
+ $this->invalidParseErrorTestHandler("\xF4\x8F\xBF\xBF", 1, 'U+10FFFF (permanent noncharacter)');
+
+ // MPB: These pass on some versions of iconv, and fail on others. Since we aren't in the
+ // business of writing tests against iconv, I've just commented these out. Should revisit
+ // at a later point.
+ /*
+ * $this->invalidParseErrorTestHandler("\xED\xA0\x80", 1, 'U+D800 (UTF-16 surrogate character)'); $this->invalidParseErrorTestHandler("\xED\xAD\xBF", 1, 'U+DB7F (UTF-16 surrogate character)'); $this->invalidParseErrorTestHandler("\xED\xAE\x80", 1, 'U+DB80 (UTF-16 surrogate character)'); $this->invalidParseErrorTestHandler("\xED\xAF\xBF", 1, 'U+DBFF (UTF-16 surrogate character)'); $this->invalidParseErrorTestHandler("\xED\xB0\x80", 1, 'U+DC00 (UTF-16 surrogate character)'); $this->invalidParseErrorTestHandler("\xED\xBE\x80", 1, 'U+DF80 (UTF-16 surrogate character)'); $this->invalidParseErrorTestHandler("\xED\xBF\xBF", 1, 'U+DFFF (UTF-16 surrogate character)'); // Paired UTF-16 surrogates $this->invalidParseErrorTestHandler("\xED\xA0\x80\xED\xB0\x80", 2, 'U+D800 U+DC00 (paired UTF-16 surrogates)'); $this->invalidParseErrorTestHandler("\xED\xA0\x80\xED\xBF\xBF", 2, 'U+D800 U+DFFF (paired UTF-16 surrogates)'); $this->invalidParseErrorTestHandler("\xED\xAD\xBF\xED\xB0\x80", 2, 'U+DB7F U+DC00 (paired UTF-16 surrogates)'); $this->invalidParseErrorTestHandler("\xED\xAD\xBF\xED\xBF\xBF", 2, 'U+DB7F U+DFFF (paired UTF-16 surrogates)'); $this->invalidParseErrorTestHandler("\xED\xAE\x80\xED\xB0\x80", 2, 'U+DB80 U+DC00 (paired UTF-16 surrogates)'); $this->invalidParseErrorTestHandler("\xED\xAE\x80\xED\xBF\xBF", 2, 'U+DB80 U+DFFF (paired UTF-16 surrogates)'); $this->invalidParseErrorTestHandler("\xED\xAF\xBF\xED\xB0\x80", 2, 'U+DBFF U+DC00 (paired UTF-16 surrogates)'); $this->invalidParseErrorTestHandler("\xED\xAF\xBF\xED\xBF\xBF", 2, 'U+DBFF U+DFFF (paired UTF-16 surrogates)');
+ */
+ }
}
diff --git a/test/HTML5/Parser/TokenizerTest.php b/test/HTML5/Parser/TokenizerTest.php
index 27c3535..32f0fcb 100644
--- a/test/HTML5/Parser/TokenizerTest.php
+++ b/test/HTML5/Parser/TokenizerTest.php
@@ -6,593 +6,948 @@ use Masterminds\HTML5\Parser\StringInputStream;
use Masterminds\HTML5\Parser\Scanner;
use Masterminds\HTML5\Parser\Tokenizer;
-class TokenizerTest extends \Masterminds\HTML5\Tests\TestCase {
- // ================================================================
- // Additional assertions.
- // ================================================================
- /**
- * Tests that an event matches both the event type and the expected value.
- *
- * @param string $type
- * Expected event type.
- * @param string $expects
- * The value expected in $event['data'][0].
- */
- public function assertEventEquals($type, $expects, $event) {
- $this->assertEquals($type, $event['name'], "Event $type for " . print_r($event, TRUE));
- if (is_array($expects)) {
- $this->assertEquals($expects, $event['data'], "Event $type should equal " . print_r($expects, TRUE) . ": " . print_r($event, TRUE));
+class TokenizerTest extends \Masterminds\HTML5\Tests\TestCase
+{
+ // ================================================================
+ // Additional assertions.
+ // ================================================================
+ /**
+ * Tests that an event matches both the event type and the expected value.
+ *
+ * @param string $type
+ * Expected event type.
+ * @param string $expects
+ * The value expected in $event['data'][0].
+ */
+ public function assertEventEquals($type, $expects, $event)
+ {
+ $this->assertEquals($type, $event['name'], "Event $type for " . print_r($event, TRUE));
+ if (is_array($expects)) {
+ $this->assertEquals($expects, $event['data'], "Event $type should equal " . print_r($expects, TRUE) . ": " . print_r($event, TRUE));
+ } else {
+ $this->assertEquals($expects, $event['data'][0], "Event $type should equal $expects: " . print_r($event, TRUE));
+ }
}
- else {
- $this->assertEquals($expects, $event['data'][0], "Event $type should equal $expects: " . print_r($event, TRUE));
+
+ /**
+ * Assert that a given event is 'error'.
+ */
+ public function assertEventError($event)
+ {
+ $this->assertEquals('error', $event['name'], "Expected error for event: " . print_r($event, TRUE));
+ }
+
+ /**
+ * Asserts that all of the tests are good.
+ *
+ * This loops through a map of tests/expectations and runs a few assertions on each test.
+ *
+ * Checks:
+ * - depth (if depth is > 0)
+ * - event name
+ * - matches on event 0.
+ */
+ protected function isAllGood($name, $depth, $tests, $debug = FALSE)
+ {
+ foreach ($tests as $try => $expects) {
+ if ($debug) {
+ fprintf(STDOUT, "%s expects %s\n", $try, print_r($expects, TRUE));
+ }
+ $e = $this->parse($try);
+ if ($depth > 0) {
+ $this->assertEquals($depth, $e->depth(), "Expected depth $depth for test $try." . print_r($e, TRUE));
+ }
+ $this->assertEventEquals($name, $expects, $e->get(0));
+ }
+ }
+
+ // ================================================================
+ // Utility functions.
+ // ================================================================
+ public function testParse()
+ {
+ list ($tok, $events) = $this->createTokenizer('');
+
+ $tok->parse();
+ $e1 = $events->get(0);
+
+ $this->assertEquals(1, $events->Depth());
+ $this->assertEquals('eof', $e1['name']);
+ }
+
+ public function testWhitespace()
+ {
+ $spaces = ' ';
+ list ($tok, $events) = $this->createTokenizer($spaces);
+
+ $tok->parse();
+
+ $this->assertEquals(2, $events->depth());
+
+ $e1 = $events->get(0);
+
+ $this->assertEquals('text', $e1['name']);
+ $this->assertEquals($spaces, $e1['data'][0]);
}
- }
-
- /**
- * Assert that a given event is 'error'.
- */
- public function assertEventError($event) {
- $this->assertEquals('error', $event['name'], "Expected error for event: " . print_r($event, TRUE));
- }
-
- /**
- * Asserts that all of the tests are good.
- *
- * This loops through a map of tests/expectations and runs a few assertions on each test.
- *
- * Checks:
- * - depth (if depth is > 0)
- * - event name
- * - matches on event 0.
- */
- protected function isAllGood($name, $depth, $tests, $debug = FALSE) {
- foreach ($tests as $try => $expects) {
- if ($debug) {
- fprintf(STDOUT, "%s expects %s\n", $try, print_r($expects, TRUE));
- }
- $e = $this->parse($try);
- if ($depth > 0) {
- $this->assertEquals($depth, $e->depth(), "Expected depth $depth for test $try." . print_r($e, TRUE));
- }
- $this->assertEventEquals($name, $expects, $e->get(0));
+
+ public function testCharacterReference()
+ {
+ $good = array(
+ '&amp;' => '&',
+ '&#x0003c;' => '<',
+ '&#38;' => '&',
+ '&' => '&'
+ );
+ $this->isAllGood('text', 2, $good);
+
+ // Test with broken charref
+ $str = '&foo';
+ $events = $this->parse($str);
+ $e1 = $events->get(0);
+ $this->assertEquals('error', $e1['name']);
+
+ $str = '&#xfoo';
+ $events = $this->parse($str);
+ $e1 = $events->get(0);
+ $this->assertEquals('error', $e1['name']);
+
+ $str = '&#foo';
+ $events = $this->parse($str);
+ $e1 = $events->get(0);
+ $this->assertEquals('error', $e1['name']);
+
+ // FIXME: Once the text processor is done, need to verify that the
+ // tokens are transformed correctly into text.
}
- }
-
-
- // ================================================================
- // Utility functions.
- // ================================================================
-
- public function testParse() {
- list($tok, $events) = $this->createTokenizer('');
-
- $tok->parse();
- $e1 = $events->get(0);
-
- $this->assertEquals(1, $events->Depth());
- $this->assertEquals('eof', $e1['name']);
- }
-
- public function testWhitespace() {
- $spaces = ' ';
- list($tok, $events) = $this->createTokenizer($spaces);
-
- $tok->parse();
-
- $this->assertEquals(2, $events->depth());
-
- $e1 = $events->get(0);
-
- $this->assertEquals('text', $e1['name']);
- $this->assertEquals($spaces, $e1['data'][0]);
- }
-
- public function testCharacterReference() {
- $good = array(
- '&amp;' => '&',
- '&#x0003c;' => '<',
- '&#38;' => '&',
- '&' => '&',
- );
- $this->isAllGood('text', 2, $good);
-
- // Test with broken charref
- $str = '&foo';
- $events = $this->parse($str);
- $e1 = $events->get(0);
- $this->assertEquals('error', $e1['name']);
-
- $str = '&#xfoo';
- $events = $this->parse($str);
- $e1 = $events->get(0);
- $this->assertEquals('error', $e1['name']);
-
- $str = '&#foo';
- $events = $this->parse($str);
- $e1 = $events->get(0);
- $this->assertEquals('error', $e1['name']);
-
- // FIXME: Once the text processor is done, need to verify that the
- // tokens are transformed correctly into text.
- }
-
- public function testBogusComment() {
- $bogus = array(
- '</+this is a bogus comment. +>',
- '<!+this is a bogus comment. !>',
- '<!D OCTYPE foo bar>',
- '<!DOCTYEP foo bar>',
- '<![CADATA[ TEST ]]>',
- '<![CDATA Hello ]]>',
- '<![CDATA[ Hello [[>',
- '<!CDATA[[ test ]]>',
- '<![CDATA[',
- '<![CDATA[hellooooo hello',
- '<? Hello World ?>',
- '<? Hello World',
- );
- foreach ($bogus as $str) {
- $events = $this->parse($str);
- $this->assertEventError($events->get(0));
- $this->assertEventEquals('comment', $str, $events->get(1));
+
+ public function testBogusComment()
+ {
+ $bogus = array(
+ '</+this is a bogus comment. +>',
+ '<!+this is a bogus comment. !>',
+ '<!D OCTYPE foo bar>',
+ '<!DOCTYEP foo bar>',
+ '<![CADATA[ TEST ]]>',
+ '<![CDATA Hello ]]>',
+ '<![CDATA[ Hello [[>',
+ '<!CDATA[[ test ]]>',
+ '<![CDATA[',
+ '<![CDATA[hellooooo hello',
+ '<? Hello World ?>',
+ '<? Hello World'
+ );
+ foreach ($bogus as $str) {
+ $events = $this->parse($str);
+ $this->assertEventError($events->get(0));
+ $this->assertEventEquals('comment', $str, $events->get(1));
+ }
}
- }
- public function testEndTag() {
- $succeed = array(
- '</a>' => 'a',
- '</test>' => 'test',
- '</test
+ public function testEndTag()
+ {
+ $succeed = array(
+ '</a>' => 'a',
+ '</test>' => 'test',
+ '</test
>' => 'test',
- '</thisIsTheTagThatDoesntEndItJustGoesOnAndOnMyFriend>' =>
- 'thisisthetagthatdoesntenditjustgoesonandonmyfriend',
- // See 8.2.4.10, which requires this and does not say error.
- '</a<b>' => 'a<b',
- );
- $this->isAllGood('endTag', 2, $succeed);
-
- // Recoverable failures
- $fail = array(
- '</a class="monkey">' => 'a',
- '</a <b>' => 'a',
- '</a <b <c>' => 'a',
- '</a is the loneliest letter>' => 'a',
- '</a' => 'a',
- );
- foreach ($fail as $test => $result) {
- $events = $this->parse($test);
- $this->assertEquals(3, $events->depth());
- // Should have triggered an error.
- $this->assertEventError($events->get(0));
- // Should have tried to parse anyway.
- $this->assertEventEquals('endTag', $result, $events->get(1));
+ '</thisIsTheTagThatDoesntEndItJustGoesOnAndOnMyFriend>' => 'thisisthetagthatdoesntenditjustgoesonandonmyfriend',
+ // See 8.2.4.10, which requires this and does not say error.
+ '</a<b>' => 'a<b'
+ );
+ $this->isAllGood('endTag', 2, $succeed);
+
+ // Recoverable failures
+ $fail = array(
+ '</a class="monkey">' => 'a',
+ '</a <b>' => 'a',
+ '</a <b <c>' => 'a',
+ '</a is the loneliest letter>' => 'a',
+ '</a' => 'a'
+ );
+ foreach ($fail as $test => $result) {
+ $events = $this->parse($test);
+ $this->assertEquals(3, $events->depth());
+ // Should have triggered an error.
+ $this->assertEventError($events->get(0));
+ // Should have tried to parse anyway.
+ $this->assertEventEquals('endTag', $result, $events->get(1));
+ }
+
+ // BogoComments
+ $comments = array(
+ '</>' => '</>',
+ '</ >' => '</ >',
+ '</ a>' => '</ a>'
+ );
+ foreach ($comments as $test => $result) {
+ $events = $this->parse($test);
+ $this->assertEquals(3, $events->depth());
+
+ // Should have triggered an error.
+ $this->assertEventError($events->get(0));
+
+ // Should have tried to parse anyway.
+ $this->assertEventEquals('comment', $result, $events->get(1));
+ }
}
- // BogoComments
- $comments = array(
- '</>' => '</>',
- '</ >' => '</ >',
- '</ a>' => '</ a>',
- );
- foreach ($comments as $test => $result) {
- $events = $this->parse($test);
- $this->assertEquals(3, $events->depth());
-
- // Should have triggered an error.
- $this->assertEventError($events->get(0));
-
- // Should have tried to parse anyway.
- $this->assertEventEquals('comment', $result, $events->get(1));
+ public function testComment()
+ {
+ $good = array(
+ '<!--easy-->' => 'easy',
+ '<!-- 1 > 0 -->' => ' 1 > 0 ',
+ '<!-- --$i -->' => ' --$i ',
+ '<!----$i-->' => '--$i',
+ '<!-- 1 > 0 -->' => ' 1 > 0 ',
+ "<!--\nHello World.\na-->" => "\nHello World.\na",
+ '<!-- <!-- -->' => ' <!-- '
+ );
+ foreach ($good as $test => $expected) {
+ $events = $this->parse($test);
+ $this->assertEventEquals('comment', $expected, $events->get(0));
+ }
+
+ $fail = array(
+ '<!-->' => '',
+ '<!--Hello' => 'Hello',
+ "<!--\0Hello" => UTF8Utils::FFFD . 'Hello',
+ '<!--' => ''
+ );
+ foreach ($fail as $test => $expected) {
+ $events = $this->parse($test);
+ $this->assertEquals(3, $events->depth());
+ $this->assertEventError($events->get(0));
+ $this->assertEventEquals('comment', $expected, $events->get(1));
+ }
}
- }
-
- public function testComment() {
- $good = array(
- '<!--easy-->' => 'easy',
- '<!-- 1 > 0 -->' => ' 1 > 0 ',
- '<!-- --$i -->' => ' --$i ',
- '<!----$i-->' => '--$i',
- '<!-- 1 > 0 -->' => ' 1 > 0 ',
- "<!--\nHello World.\na-->" => "\nHello World.\na",
- '<!-- <!-- -->' => ' <!-- ',
- );
- foreach ($good as $test => $expected) {
- $events = $this->parse($test);
- $this->assertEventEquals('comment', $expected, $events->get(0));
+
+ public function testCDATASection()
+ {
+ $good = array(
+ '<![CDATA[ This is a test. ]]>' => ' This is a test. ',
+ '<![CDATA[CDATA]]>' => 'CDATA',
+ '<![CDATA[ ]] > ]]>' => ' ]] > ',
+ '<![CDATA[ ]]>' => ' '
+ );
+ $this->isAllGood('cdata', 2, $good);
}
- $fail = array(
- '<!-->' => '',
- '<!--Hello' => 'Hello',
- "<!--\0Hello" => UTF8Utils::FFFD . 'Hello',
- '<!--' => '',
- );
- foreach ($fail as $test => $expected) {
- $events = $this->parse($test);
- $this->assertEquals(3, $events->depth());
- $this->assertEventError($events->get(0));
- $this->assertEventEquals('comment', $expected, $events->get(1));
+ public function testDoctype()
+ {
+ $good = array(
+ '<!DOCTYPE html>' => array(
+ 'html',
+ 0,
+ NULL,
+ FALSE
+ ),
+ '<!doctype html>' => array(
+ 'html',
+ 0,
+ NULL,
+ FALSE
+ ),
+ '<!DocType html>' => array(
+ 'html',
+ 0,
+ NULL,
+ FALSE
+ ),
+ "<!DOCTYPE\nhtml>" => array(
+ 'html',
+ 0,
+ NULL,
+ FALSE
+ ),
+ "<!DOCTYPE\fhtml>" => array(
+ 'html',
+ 0,
+ NULL,
+ FALSE
+ ),
+ '<!DOCTYPE html PUBLIC "foo bar">' => array(
+ 'html',
+ EventStack::DOCTYPE_PUBLIC,
+ 'foo bar',
+ FALSE
+ ),
+ "<!DOCTYPE html PUBLIC 'foo bar'>" => array(
+ 'html',
+ EventStack::DOCTYPE_PUBLIC,
+ 'foo bar',
+ FALSE
+ ),
+ '<!DOCTYPE html PUBLIC "foo bar" >' => array(
+ 'html',
+ EventStack::DOCTYPE_PUBLIC,
+ 'foo bar',
+ FALSE
+ ),
+ "<!DOCTYPE html \nPUBLIC\n'foo bar'>" => array(
+ 'html',
+ EventStack::DOCTYPE_PUBLIC,
+ 'foo bar',
+ FALSE
+ ),
+ '<!DOCTYPE html SYSTEM "foo bar">' => array(
+ 'html',
+ EventStack::DOCTYPE_SYSTEM,
+ 'foo bar',
+ FALSE
+ ),
+ "<!DOCTYPE html SYSTEM 'foo bar'>" => array(
+ 'html',
+ EventStack::DOCTYPE_SYSTEM,
+ 'foo bar',
+ FALSE
+ ),
+ '<!DOCTYPE html SYSTEM "foo/bar" >' => array(
+ 'html',
+ EventStack::DOCTYPE_SYSTEM,
+ 'foo/bar',
+ FALSE
+ ),
+ "<!DOCTYPE html \nSYSTEM\n'foo bar'>" => array(
+ 'html',
+ EventStack::DOCTYPE_SYSTEM,
+ 'foo bar',
+ FALSE
+ )
+ );
+ $this->isAllGood('doctype', 2, $good);
+
+ $bad = array(
+ '<!DOCTYPE>' => array(
+ NULL,
+ EventStack::DOCTYPE_NONE,
+ NULL,
+ TRUE
+ ),
+ '<!DOCTYPE >' => array(
+ NULL,
+ EventStack::DOCTYPE_NONE,
+ NULL,
+ TRUE
+ ),
+ '<!DOCTYPE foo' => array(
+ 'foo',
+ EventStack::DOCTYPE_NONE,
+ NULL,
+ TRUE
+ ),
+ '<!DOCTYPE foo PUB' => array(
+ 'foo',
+ EventStack::DOCTYPE_NONE,
+ NULL,
+ TRUE
+ ),
+ '<!DOCTYPE foo PUB>' => array(
+ 'foo',
+ EventStack::DOCTYPE_NONE,
+ NULL,
+ TRUE
+ ),
+ '<!DOCTYPE foo PUB "Looks good">' => array(
+ 'foo',
+ EventStack::DOCTYPE_NONE,
+ NULL,
+ TRUE
+ ),
+ '<!DOCTYPE foo SYSTME "Looks good"' => array(
+ 'foo',
+ EventStack::DOCTYPE_NONE,
+ NULL,
+ TRUE
+ ),
+
+ // Can't tell whether these are ids or ID types, since the context is chopped.
+ '<!DOCTYPE foo PUBLIC' => array(
+ 'foo',
+ EventStack::DOCTYPE_NONE,
+ NULL,
+ TRUE
+ ),
+ '<!DOCTYPE foo PUBLIC>' => array(
+ 'foo',
+ EventStack::DOCTYPE_NONE,
+ NULL,
+ TRUE
+ ),
+ '<!DOCTYPE foo SYSTEM' => array(
+ 'foo',
+ EventStack::DOCTYPE_NONE,
+ NULL,
+ TRUE
+ ),
+ '<!DOCTYPE foo SYSTEM>' => array(
+ 'foo',
+ EventStack::DOCTYPE_NONE,
+ NULL,
+ TRUE
+ ),
+
+ '<!DOCTYPE html SYSTEM "foo bar"' => array(
+ 'html',
+ EventStack::DOCTYPE_SYSTEM,
+ 'foo bar',
+ TRUE
+ ),
+ '<!DOCTYPE html SYSTEM "foo bar" more stuff>' => array(
+ 'html',
+ EventStack::DOCTYPE_SYSTEM,
+ 'foo bar',
+ TRUE
+ )
+ );
+ foreach ($bad as $test => $expects) {
+ $events = $this->parse($test);
+ // fprintf(STDOUT, $test . PHP_EOL);
+ $this->assertEquals(3, $events->depth(), "Counting events for '$test': " . print_r($events, TRUE));
+ $this->assertEventError($events->get(0));
+ $this->assertEventEquals('doctype', $expects, $events->get(1));
+ }
}
- }
-
- public function testCDATASection() {
- $good = array(
- '<![CDATA[ This is a test. ]]>' => ' This is a test. ',
- '<![CDATA[CDATA]]>' => 'CDATA',
- '<![CDATA[ ]] > ]]>' => ' ]] > ',
- '<![CDATA[ ]]>' => ' ',
- );
- $this->isAllGood('cdata', 2, $good);
- }
-
- public function testDoctype() {
- $good = array(
- '<!DOCTYPE html>' => array('html', 0, NULL, FALSE),
- '<!doctype html>' => array('html', 0, NULL, FALSE),
- '<!DocType html>' => array('html', 0, NULL, FALSE),
- "<!DOCTYPE\nhtml>" => array('html', 0, NULL, FALSE),
- "<!DOCTYPE\fhtml>" => array('html', 0, NULL, FALSE),
- '<!DOCTYPE html PUBLIC "foo bar">' => array('html', EventStack::DOCTYPE_PUBLIC, 'foo bar', FALSE),
- "<!DOCTYPE html PUBLIC 'foo bar'>" => array('html', EventStack::DOCTYPE_PUBLIC, 'foo bar', FALSE),
- '<!DOCTYPE html PUBLIC "foo bar" >' => array('html', EventStack::DOCTYPE_PUBLIC, 'foo bar', FALSE),
- "<!DOCTYPE html \nPUBLIC\n'foo bar'>" => array('html', EventStack::DOCTYPE_PUBLIC, 'foo bar', FALSE),
- '<!DOCTYPE html SYSTEM "foo bar">' => array('html', EventStack::DOCTYPE_SYSTEM, 'foo bar', FALSE),
- "<!DOCTYPE html SYSTEM 'foo bar'>" => array('html', EventStack::DOCTYPE_SYSTEM, 'foo bar', FALSE),
- '<!DOCTYPE html SYSTEM "foo/bar" >' => array('html', EventStack::DOCTYPE_SYSTEM, 'foo/bar', FALSE),
- "<!DOCTYPE html \nSYSTEM\n'foo bar'>" => array('html', EventStack::DOCTYPE_SYSTEM, 'foo bar', FALSE),
- );
- $this->isAllGood('doctype', 2, $good);
-
- $bad = array(
- '<!DOCTYPE>' => array(NULL, EventStack::DOCTYPE_NONE, NULL, TRUE),
- '<!DOCTYPE >' => array(NULL, EventStack::DOCTYPE_NONE, NULL, TRUE),
- '<!DOCTYPE foo' => array('foo', EventStack::DOCTYPE_NONE, NULL, TRUE),
- '<!DOCTYPE foo PUB' => array('foo', EventStack::DOCTYPE_NONE, NULL, TRUE),
- '<!DOCTYPE foo PUB>' => array('foo', EventStack::DOCTYPE_NONE, NULL, TRUE),
- '<!DOCTYPE foo PUB "Looks good">' => array('foo', EventStack::DOCTYPE_NONE, NULL, TRUE),
- '<!DOCTYPE foo SYSTME "Looks good"' => array('foo', EventStack::DOCTYPE_NONE, NULL, TRUE),
-
- // Can't tell whether these are ids or ID types, since the context is chopped.
- '<!DOCTYPE foo PUBLIC' => array('foo', EventStack::DOCTYPE_NONE, NULL, TRUE),
- '<!DOCTYPE foo PUBLIC>' => array('foo', EventStack::DOCTYPE_NONE, NULL, TRUE),
- '<!DOCTYPE foo SYSTEM' => array('foo', EventStack::DOCTYPE_NONE, NULL, TRUE),
- '<!DOCTYPE foo SYSTEM>' => array('foo', EventStack::DOCTYPE_NONE, NULL, TRUE),
-
- '<!DOCTYPE html SYSTEM "foo bar"' => array('html', EventStack::DOCTYPE_SYSTEM, 'foo bar', TRUE),
- '<!DOCTYPE html SYSTEM "foo bar" more stuff>' => array('html', EventStack::DOCTYPE_SYSTEM, 'foo bar', TRUE),
- );
- foreach ($bad as $test => $expects) {
- $events = $this->parse($test);
- //fprintf(STDOUT, $test . PHP_EOL);
- $this->assertEquals(3, $events->depth(), "Counting events for '$test': " . print_r($events, TRUE));
- $this->assertEventError($events->get(0));
- $this->assertEventEquals('doctype', $expects, $events->get(1));
+ public function testProcessorInstruction()
+ {
+ $good = array(
+ '<?hph ?>' => 'hph',
+ '<?hph echo "Hello World"; ?>' => array(
+ 'hph',
+ 'echo "Hello World"; '
+ ),
+ "<?hph \necho 'Hello World';\n?>" => array(
+ 'hph',
+ "echo 'Hello World';\n"
+ )
+ );
+ $this->isAllGood('pi', 2, $good);
}
- }
-
- public function testProcessorInstruction() {
- $good = array(
- '<?hph ?>' => 'hph',
- '<?hph echo "Hello World"; ?>' => array('hph', 'echo "Hello World"; '),
- "<?hph \necho 'Hello World';\n?>" => array('hph', "echo 'Hello World';\n"),
- );
- $this->isAllGood('pi', 2, $good);
- }
-
- /**
- * This tests just simple tags.
- */
- public function testSimpleTags() {
- $open = array(
- '<foo>' => 'foo',
- '<FOO>' => 'foo',
- '<fOO>' => 'foo',
- '<foo >' => 'foo',
- "<foo\n\n\n\n>" => 'foo',
- '<foo:bar>' => 'foo:bar',
- );
- $this->isAllGood('startTag', 2, $open);
-
- $selfClose= array(
- '<foo/>' => 'foo',
- '<FOO/>' => 'foo',
- '<foo />' => 'foo',
- "<foo\n\n\n\n/>" => 'foo',
- '<foo:bar/>' => 'foo:bar',
- );
- foreach ($selfClose as $test => $expects) {
- $events = $this->parse($test);
- $this->assertEquals(3, $events->depth(), "Counting events for '$test'" . print_r($events, TRUE));
- $this->assertEventEquals('startTag', $expects, $events->get(0));
- $this->assertEventEquals('endTag', $expects, $events->get(1));
+
+ /**
+ * This tests just simple tags.
+ */
+ public function testSimpleTags()
+ {
+ $open = array(
+ '<foo>' => 'foo',
+ '<FOO>' => 'foo',
+ '<fOO>' => 'foo',
+ '<foo >' => 'foo',
+ "<foo\n\n\n\n>" => 'foo',
+ '<foo:bar>' => 'foo:bar'
+ );
+ $this->isAllGood('startTag', 2, $open);
+
+ $selfClose = array(
+ '<foo/>' => 'foo',
+ '<FOO/>' => 'foo',
+ '<foo />' => 'foo',
+ "<foo\n\n\n\n/>" => 'foo',
+ '<foo:bar/>' => 'foo:bar'
+ );
+ foreach ($selfClose as $test => $expects) {
+ $events = $this->parse($test);
+ $this->assertEquals(3, $events->depth(), "Counting events for '$test'" . print_r($events, TRUE));
+ $this->assertEventEquals('startTag', $expects, $events->get(0));
+ $this->assertEventEquals('endTag', $expects, $events->get(1));
+ }
+
+ $bad = array(
+ '<foo' => 'foo',
+ '<foo ' => 'foo',
+ '<foo/' => 'foo',
+ '<foo /' => 'foo'
+ );
+
+ foreach ($bad as $test => $expects) {
+ $events = $this->parse($test);
+ $this->assertEquals(3, $events->depth(), "Counting events for '$test': " . print_r($events, TRUE));
+ $this->assertEventError($events->get(0));
+ $this->assertEventEquals('startTag', $expects, $events->get(1));
+ }
}
- $bad = array(
- '<foo' => 'foo',
- '<foo ' => 'foo',
- '<foo/' => 'foo',
- '<foo /' => 'foo',
- );
-
- foreach ($bad as $test => $expects) {
- $events = $this->parse($test);
- $this->assertEquals(3, $events->depth(), "Counting events for '$test': " . print_r($events, TRUE));
- $this->assertEventError($events->get(0));
- $this->assertEventEquals('startTag', $expects, $events->get(1));
+ public function testTagsWithAttributeAndMissingName()
+ {
+ $cases = array(
+ '<id="top_featured">' => 'id',
+ '<color="white">' => 'color',
+ "<class='neaktivni_stranka'>" => 'class',
+ '<bgcolor="white">' => 'bgcolor',
+ '<class="nom">' => 'class'
+ );
+
+ foreach ($cases as $html => $expected) {
+ $events = $this->parse($html);
+ $this->assertEventError($events->get(0));
+ $this->assertEventError($events->get(1));
+ $this->assertEventError($events->get(2));
+ $this->assertEventEquals('startTag', $expected, $events->get(3));
+ $this->assertEventEquals('eof', NULL, $events->get(4));
+ }
}
- }
-
- public function testTagsWithAttributeAndMissingName() {
- $cases = array(
- '<id="top_featured">' => 'id',
- '<color="white">' => 'color',
- "<class='neaktivni_stranka'>" => 'class',
- '<bgcolor="white">' => 'bgcolor',
- '<class="nom">' => 'class',
- );
-
- foreach($cases as $html => $expected) {
- $events = $this->parse($html);
- $this->assertEventError($events->get(0));
- $this->assertEventError($events->get(1));
- $this->assertEventError($events->get(2));
- $this->assertEventEquals('startTag', $expected, $events->get(3));
- $this->assertEventEquals('eof', NULL, $events->get(4));
+
+ public function testTagNotClosedAfterTagName()
+ {
+ $cases = array(
+ "<noscript<img>" => array(
+ 'noscript',
+ 'img'
+ ),
+ '<center<a>' => array(
+ 'center',
+ 'a'
+ ),
+ '<br<br>' => array(
+ 'br',
+ 'br'
+ )
+ );
+
+ foreach ($cases as $html => $expected) {
+ $events = $this->parse($html);
+ $this->assertEventError($events->get(0));
+ $this->assertEventEquals('startTag', $expected[0], $events->get(1));
+ $this->assertEventEquals('startTag', $expected[1], $events->get(2));
+ $this->assertEventEquals('eof', NULL, $events->get(3));
+ }
+
+ $events = $this->parse('<span<>02</span>');
+ $this->assertEventError($events->get(0));
+ $this->assertEventEquals('startTag', 'span', $events->get(1));
+ $this->assertEventError($events->get(2));
+ $this->assertEventEquals('text', '>02', $events->get(3));
+ $this->assertEventEquals('endTag', 'span', $events->get(4));
+ $this->assertEventEquals('eof', NULL, $events->get(5));
+
+ $events = $this->parse('<p</p>');
+ $this->assertEventError($events->get(0));
+ $this->assertEventEquals('startTag', 'p', $events->get(1));
+ $this->assertEventEquals('endTag', 'p', $events->get(2));
+ $this->assertEventEquals('eof', NULL, $events->get(3));
+
+ $events = $this->parse('<strong><WordPress</strong>');
+ $this->assertEventEquals('startTag', 'strong', $events->get(0));
+ $this->assertEventError($events->get(1));
+ $this->assertEventEquals('startTag', 'wordpress', $events->get(2));
+ $this->assertEventEquals('endTag', 'strong', $events->get(3));
+ $this->assertEventEquals('eof', NULL, $events->get(4));
+
+ $events = $this->parse('<src=<a>');
+ $this->assertEventError($events->get(0));
+ $this->assertEventError($events->get(1));
+ $this->assertEventError($events->get(2));
+ $this->assertEventEquals('startTag', 'src', $events->get(3));
+ $this->assertEventEquals('startTag', 'a', $events->get(4));
+ $this->assertEventEquals('eof', NULL, $events->get(5));
+
+ $events = $this->parse('<br...<a>');
+ $this->assertEventError($events->get(0));
+ $this->assertEventEquals('startTag', 'br', $events->get(1));
+ $this->assertEventEquals('eof', NULL, $events->get(2));
}
- }
-
- public function testTagNotClosedAfterTagName() {
- $cases = array(
- "<noscript<img>" => array('noscript', 'img'),
- '<center<a>' => array('center', 'a'),
- '<br<br>' => array('br', 'br'),
- );
-
- foreach($cases as $html => $expected) {
- $events = $this->parse($html);
- $this->assertEventError($events->get(0));
- $this->assertEventEquals('startTag', $expected[0], $events->get(1));
- $this->assertEventEquals('startTag', $expected[1], $events->get(2));
- $this->assertEventEquals('eof', NULL, $events->get(3));
+
+ public function testIllegalTagNames()
+ {
+ $cases = array(
+ '<li">' => 'li',
+ '<p">' => 'p',
+ '<b&nbsp; >' => 'b',
+ '<static*all>' => 'static',
+ '<h*0720/>' => 'h',
+ '<st*ATTRIBUTE />' => 'st',
+ '<a-href="http://url.com/">' => 'a'
+ );
+
+ foreach ($cases as $html => $expected) {
+ $events = $this->parse($html);
+ $this->assertEventError($events->get(0));
+ $this->assertEventEquals('startTag', $expected, $events->get(1));
+ }
}
- $events = $this->parse('<span<>02</span>');
- $this->assertEventError($events->get(0));
- $this->assertEventEquals('startTag', 'span', $events->get(1));
- $this->assertEventError($events->get(2));
- $this->assertEventEquals('text', '>02', $events->get(3));
- $this->assertEventEquals('endTag', 'span', $events->get(4));
- $this->assertEventEquals('eof', NULL, $events->get(5));
-
- $events = $this->parse('<p</p>');
- $this->assertEventError($events->get(0));
- $this->assertEventEquals('startTag', 'p', $events->get(1));
- $this->assertEventEquals('endTag', 'p', $events->get(2));
- $this->assertEventEquals('eof', NULL, $events->get(3));
-
- $events = $this->parse('<strong><WordPress</strong>');
- $this->assertEventEquals('startTag', 'strong', $events->get(0));
- $this->assertEventError($events->get(1));
- $this->assertEventEquals('startTag', 'wordpress', $events->get(2));
- $this->assertEventEquals('endTag', 'strong', $events->get(3));
- $this->assertEventEquals('eof', NULL, $events->get(4));
-
- $events = $this->parse('<src=<a>');
- $this->assertEventError($events->get(0));
- $this->assertEventError($events->get(1));
- $this->assertEventError($events->get(2));
- $this->assertEventEquals('startTag', 'src', $events->get(3));
- $this->assertEventEquals('startTag', 'a', $events->get(4));
- $this->assertEventEquals('eof', NULL, $events->get(5));
-
- $events = $this->parse('<br...<a>');
- $this->assertEventError($events->get(0));
- $this->assertEventEquals('startTag', 'br', $events->get(1));
- $this->assertEventEquals('eof', NULL, $events->get(2));
- }
-
- public function testIllegalTagNames() {
- $cases = array(
- '<li">' => 'li',
- '<p">' => 'p',
- '<b&nbsp; >' => 'b',
- '<static*all>' => 'static',
- '<h*0720/>' => 'h',
- '<st*ATTRIBUTE />' => 'st',
- '<a-href="http://url.com/">' => 'a',
- );
-
- foreach($cases as $html => $expected) {
- $events = $this->parse($html);
- $this->assertEventError($events->get(0));
- $this->assertEventEquals('startTag', $expected, $events->get(1));
+ /**
+ * @depends testCharacterReference
+ */
+ public function testTagAttributes()
+ {
+ // Opening tags.
+ $good = array(
+ '<foo bar="baz">' => array(
+ 'foo',
+ array(
+ 'bar' => 'baz'
+ ),
+ FALSE
+ ),
+ '<foo bar=" baz ">' => array(
+ 'foo',
+ array(
+ 'bar' => ' baz '
+ ),
+ FALSE
+ ),
+ "<foo bar=\"\nbaz\n\">" => array(
+ 'foo',
+ array(
+ 'bar' => "\nbaz\n"
+ ),
+ FALSE
+ ),
+ "<foo bar='baz'>" => array(
+ 'foo',
+ array(
+ 'bar' => 'baz'
+ ),
+ FALSE
+ ),
+ '<foo bar="A full sentence.">' => array(
+ 'foo',
+ array(
+ 'bar' => 'A full sentence.'
+ ),
+ FALSE
+ ),
+ "<foo a='1' b=\"2\">" => array(
+ 'foo',
+ array(
+ 'a' => '1',
+ 'b' => '2'
+ ),
+ FALSE
+ ),
+ "<foo ns:bar='baz'>" => array(
+ 'foo',
+ array(
+ 'ns:bar' => 'baz'
+ ),
+ FALSE
+ ),
+ "<foo a='blue&amp;red'>" => array(
+ 'foo',
+ array(
+ 'a' => 'blue&red'
+ ),
+ FALSE
+ ),
+ "<foo a='blue&&amp;red'>" => array(
+ 'foo',
+ array(
+ 'a' => 'blue&&red'
+ ),
+ FALSE
+ ),
+ "<foo\nbar='baz'\n>" => array(
+ 'foo',
+ array(
+ 'bar' => 'baz'
+ ),
+ FALSE
+ ),
+ '<doe a deer>' => array(
+ 'doe',
+ array(
+ 'a' => NULL,
+ 'deer' => NULL
+ ),
+ FALSE
+ ),
+ '<foo bar=baz>' => array(
+ 'foo',
+ array(
+ 'bar' => 'baz'
+ ),
+ FALSE
+ ),
+
+ // Updated for 8.1.2.3
+ '<foo bar = "baz" >' => array(
+ 'foo',
+ array(
+ 'bar' => 'baz'
+ ),
+ FALSE
+ ),
+
+ // The spec allows an unquoted value '/'. This will not be a closing
+ // tag.
+ '<foo bar=/>' => array(
+ 'foo',
+ array(
+ 'bar' => '/'
+ ),
+ FALSE
+ ),
+ '<foo bar=baz/>' => array(
+ 'foo',
+ array(
+ 'bar' => 'baz/'
+ ),
+ FALSE
+ )
+ );
+ $this->isAllGood('startTag', 2, $good);
+
+ // Self-closing tags.
+ $withEnd = array(
+ '<foo bar="baz"/>' => array(
+ 'foo',
+ array(
+ 'bar' => 'baz'
+ ),
+ TRUE
+ ),
+ '<foo BAR="baz"/>' => array(
+ 'foo',
+ array(
+ 'bar' => 'baz'
+ ),
+ TRUE
+ ),
+ '<foo BAR="BAZ"/>' => array(
+ 'foo',
+ array(
+ 'bar' => 'BAZ'
+ ),
+ TRUE
+ ),
+ "<foo a='1' b=\"2\" c=3 d/>" => array(
+ 'foo',
+ array(
+ 'a' => '1',
+ 'b' => '2',
+ 'c' => '3',
+ 'd' => NULL
+ ),
+ TRUE
+ )
+ );
+ $this->isAllGood('startTag', 3, $withEnd);
+
+ // Cause a parse error.
+ $bad = array(
+ // This will emit an entity lookup failure for &red.
+ "<foo a='blue&red'>" => array(
+ 'foo',
+ array(
+ 'a' => 'blue&red'
+ ),
+ FALSE
+ ),
+ "<foo a='blue&&amp;&red'>" => array(
+ 'foo',
+ array(
+ 'a' => 'blue&&&red'
+ ),
+ FALSE
+ ),
+ '<foo bar=>' => array(
+ 'foo',
+ array(
+ 'bar' => NULL
+ ),
+ FALSE
+ ),
+ '<foo bar="oh' => array(
+ 'foo',
+ array(
+ 'bar' => 'oh'
+ ),
+ FALSE
+ ),
+ '<foo bar=oh">' => array(
+ 'foo',
+ array(
+ 'bar' => 'oh"'
+ ),
+ FALSE
+ ),
+
+ // these attributes are ignored because of current implementation
+ // of method "DOMElement::setAttribute"
+ // see issue #23: https://github.com/Masterminds/html5-php/issues/23
+ '<foo b"="baz">' => array(
+ 'foo',
+ array(),
+ FALSE
+ ),
+ '<foo 2abc="baz">' => array(
+ 'foo',
+ array(),
+ FALSE
+ ),
+ '<foo ?="baz">' => array(
+ 'foo',
+ array(),
+ FALSE
+ ),
+ '<foo foo?bar="baz">' => array(
+ 'foo',
+ array(),
+ FALSE
+ )
+ )
+ ;
+ foreach ($bad as $test => $expects) {
+ $events = $this->parse($test);
+ $this->assertEquals(3, $events->depth(), "Counting events for '$test': " . print_r($events, TRUE));
+ $this->assertEventError($events->get(0));
+ $this->assertEventEquals('startTag', $expects, $events->get(1));
+ }
+
+ // Cause multiple parse errors.
+ $reallyBad = array(
+ '<foo ="bar">' => array(
+ 'foo',
+ array(
+ '=' => NULL,
+ '"bar"' => NULL
+ ),
+ FALSE
+ ),
+ '<foo////>' => array(
+ 'foo',
+ array(),
+ TRUE
+ ),
+ // character "&" in unquoted attribute shouldn't cause an infinite loop
+ '<foo bar=index.php?str=1&amp;id=29>' => array(
+ 'foo',
+ array(
+ 'bar' => 'index.php?str=1&id=29'
+ ),
+ FALSE
+ )
+ );
+ foreach ($reallyBad as $test => $expects) {
+ $events = $this->parse($test);
+ // fprintf(STDOUT, $test . print_r($events, TRUE));
+ $this->assertEventError($events->get(0));
+ $this->assertEventError($events->get(1));
+ // $this->assertEventEquals('startTag', $expects, $events->get(1));
+ }
+
+ // Regression: Malformed elements should be detected.
+ // '<foo baz="1" <bar></foo>' => array('foo', array('baz' => '1'), FALSE),
+ $events = $this->parse('<foo baz="1" <bar></foo>');
+ $this->assertEventError($events->get(0));
+ $this->assertEventEquals('startTag', array(
+ 'foo',
+ array(
+ 'baz' => '1'
+ ),
+ FALSE
+ ), $events->get(1));
+ $this->assertEventEquals('startTag', array(
+ 'bar',
+ array(),
+ FALSE
+ ), $events->get(2));
+ $this->assertEventEquals('endTag', array(
+ 'foo'
+ ), $events->get(3));
}
- }
-
- /**
- * @depends testCharacterReference
- */
- public function testTagAttributes() {
- // Opening tags.
- $good = array(
- '<foo bar="baz">' => array('foo', array('bar' => 'baz'), FALSE),
- '<foo bar=" baz ">' => array('foo', array('bar' => ' baz '), FALSE),
- "<foo bar=\"\nbaz\n\">" => array('foo', array('bar' => "\nbaz\n"), FALSE),
- "<foo bar='baz'>" => array('foo', array('bar' => 'baz'), FALSE),
- '<foo bar="A full sentence.">' => array('foo', array('bar' => 'A full sentence.'), FALSE),
- "<foo a='1' b=\"2\">" => array('foo', array('a' => '1', 'b' => '2'), FALSE),
- "<foo ns:bar='baz'>" => array('foo', array('ns:bar' => 'baz'), FALSE),
- "<foo a='blue&amp;red'>" => array('foo', array('a' => 'blue&red'), FALSE),
- "<foo a='blue&&amp;red'>" => array('foo', array('a' => 'blue&&red'), FALSE),
- "<foo\nbar='baz'\n>" => array('foo', array('bar' => 'baz'), FALSE),
- '<doe a deer>' => array('doe', array('a' => NULL, 'deer' => NULL), FALSE),
- '<foo bar=baz>' => array('foo', array('bar' => 'baz'), FALSE),
-
- // Updated for 8.1.2.3
- '<foo bar = "baz" >' => array('foo', array('bar' => 'baz'), FALSE),
-
- // The spec allows an unquoted value '/'. This will not be a closing
- // tag.
- '<foo bar=/>' => array('foo', array('bar' => '/'), FALSE),
- '<foo bar=baz/>' => array('foo', array('bar' => 'baz/'), FALSE),
- );
- $this->isAllGood('startTag', 2, $good);
-
- // Self-closing tags.
- $withEnd = array(
- '<foo bar="baz"/>' => array('foo', array('bar' => 'baz'), TRUE),
- '<foo BAR="baz"/>' => array('foo', array('bar' => 'baz'), TRUE),
- '<foo BAR="BAZ"/>' => array('foo', array('bar' => 'BAZ'), TRUE),
- "<foo a='1' b=\"2\" c=3 d/>" => array('foo', array('a' => '1', 'b' => '2', 'c' => '3', 'd' => NULL), TRUE),
- );
- $this->isAllGood('startTag', 3, $withEnd);
-
- // Cause a parse error.
- $bad = array(
- // This will emit an entity lookup failure for &red.
- "<foo a='blue&red'>" => array('foo', array('a' => 'blue&red'), FALSE),
- "<foo a='blue&&amp;&red'>" => array('foo', array('a' => 'blue&&&red'), FALSE),
- '<foo bar=>' => array('foo', array('bar' => NULL), FALSE),
- '<foo bar="oh' => array('foo', array('bar' => 'oh'), FALSE),
- '<foo bar=oh">' => array('foo', array('bar' => 'oh"'), FALSE),
-
- // these attributes are ignored because of current implementation
- // of method "DOMElement::setAttribute"
- // see issue #23: https://github.com/Masterminds/html5-php/issues/23
- '<foo b"="baz">' => array('foo', array(), FALSE),
- '<foo 2abc="baz">' => array('foo', array(), FALSE),
- '<foo ?="baz">' => array('foo', array(), FALSE),
- '<foo foo?bar="baz">' => array('foo', array(), FALSE),
-
- );
- foreach ($bad as $test => $expects) {
- $events = $this->parse($test);
- $this->assertEquals(3, $events->depth(), "Counting events for '$test': " . print_r($events, TRUE));
- $this->assertEventError($events->get(0));
- $this->assertEventEquals('startTag', $expects, $events->get(1));
+
+ public function testRawText()
+ {
+ $good = array(
+ '<script>abcd efg hijk lmnop</script> ' => 'abcd efg hijk lmnop',
+ '<script><not/><the/><tag></script>' => '<not/><the/><tag>',
+ '<script><<<<<<<<</script>' => '<<<<<<<<',
+ '<script>hello</script</script>' => 'hello</script',
+ "<script>\nhello</script\n</script>" => "\nhello</script\n",
+ '<script>&amp;</script>' => '&amp;',
+ '<script><!--not a comment--></script>' => '<!--not a comment-->',
+ '<script><![CDATA[not a comment]]></script>' => '<![CDATA[not a comment]]>'
+ );
+ foreach ($good as $test => $expects) {
+ $events = $this->parse($test);
+ $this->assertEventEquals('startTag', 'script', $events->get(0));
+ $this->assertEventEquals('text', $expects, $events->get(1));
+ $this->assertEventEquals('endTag', 'script', $events->get(2));
+ }
+
+ $bad = array(
+ '<script>&amp;</script' => '&amp;</script',
+ '<script>Hello world' => 'Hello world'
+ );
+ foreach ($bad as $test => $expects) {
+ $events = $this->parse($test);
+ $this->assertEquals(4, $events->depth(), "Counting events for '$test': " . print_r($events, TRUE));
+ $this->assertEventEquals('startTag', 'script', $events->get(0));
+ $this->assertEventError($events->get(1));
+ $this->assertEventEquals('text', $expects, $events->get(2));
+ }
+
+ // Testing case sensitivity
+ $events = $this->parse('<TITLE>a test</TITLE>');
+ $this->assertEventEquals('startTag', 'title', $events->get(0));
+ $this->assertEventEquals('text', 'a test', $events->get(1));
+ $this->assertEventEquals('endTag', 'title', $events->get(2));
}
- // Cause multiple parse errors.
- $reallyBad = array(
- '<foo ="bar">' => array('foo', array('=' => NULL, '"bar"' => NULL), FALSE),
- '<foo////>' => array('foo', array(), TRUE),
- // character "&" in unquoted attribute shouldn't cause an infinite loop
- '<foo bar=index.php?str=1&amp;id=29>' => array('foo', array('bar' => 'index.php?str=1&id=29'), FALSE),
- );
- foreach ($reallyBad as $test => $expects) {
- $events = $this->parse($test);
- //fprintf(STDOUT, $test . print_r($events, TRUE));
- $this->assertEventError($events->get(0));
- $this->assertEventError($events->get(1));
- //$this->assertEventEquals('startTag', $expects, $events->get(1));
+ public function testRcdata()
+ {
+ list ($tok, $events) = $this->createTokenizer('<title>&#x27;<!-- not a comment --></TITLE>');
+ $tok->setTextMode(\Masterminds\HTML5\Elements::TEXT_RCDATA, 'title');
+ $tok->parse();
+ $this->assertEventEquals('text', "'<!-- not a comment -->", $events->get(1));
}
- // Regression: Malformed elements should be detected.
- // '<foo baz="1" <bar></foo>' => array('foo', array('baz' => '1'), FALSE),
- $events = $this->parse('<foo baz="1" <bar></foo>');
- $this->assertEventError($events->get(0));
- $this->assertEventEquals('startTag', array('foo', array('baz' => '1'), FALSE), $events->get(1));
- $this->assertEventEquals('startTag', array('bar', array(), FALSE), $events->get(2));
- $this->assertEventEquals('endTag', array('foo'), $events->get(3));
- }
-
- public function testRawText() {
- $good = array(
- '<script>abcd efg hijk lmnop</script> ' => 'abcd efg hijk lmnop',
- '<script><not/><the/><tag></script>' => '<not/><the/><tag>',
- '<script><<<<<<<<</script>' => '<<<<<<<<',
- '<script>hello</script</script>' => 'hello</script',
- "<script>\nhello</script\n</script>" => "\nhello</script\n",
- '<script>&amp;</script>' => '&amp;',
- '<script><!--not a comment--></script>' => '<!--not a comment-->',
- '<script><![CDATA[not a comment]]></script>' => '<![CDATA[not a comment]]>',
- );
- foreach ($good as $test => $expects) {
- $events = $this->parse($test);
- $this->assertEventEquals('startTag', 'script', $events->get(0));
- $this->assertEventEquals('text', $expects, $events->get(1));
- $this->assertEventEquals('endTag', 'script', $events->get(2));
+ public function testText()
+ {
+ $events = $this->parse('a<br>b');
+ $this->assertEquals(4, $events->depth(), "Events: " . print_r($events, TRUE));
+ $this->assertEventEquals('text', 'a', $events->get(0));
+ $this->assertEventEquals('startTag', 'br', $events->get(1));
+ $this->assertEventEquals('text', 'b', $events->get(2));
+
+ $events = $this->parse('<a>Test</a>');
+ $this->assertEquals(4, $events->depth(), "Events: " . print_r($events, TRUE));
+ $this->assertEventEquals('startTag', 'a', $events->get(0));
+ $this->assertEventEquals('text', 'Test', $events->get(1));
+ $this->assertEventEquals('endTag', 'a', $events->get(2));
+
+ $events = $this->parse('a<![CDATA[test]]>b');
+ $this->assertEquals(4, $events->depth(), "Events: " . print_r($events, TRUE));
+ $this->assertEventEquals('text', 'a', $events->get(0));
+ $this->assertEventEquals('cdata', 'test', $events->get(1));
+ $this->assertEventEquals('text', 'b', $events->get(2));
+
+ $events = $this->parse('a<!--test-->b');
+ $this->assertEquals(4, $events->depth(), "Events: " . print_r($events, TRUE));
+ $this->assertEventEquals('text', 'a', $events->get(0));
+ $this->assertEventEquals('comment', 'test', $events->get(1));
+ $this->assertEventEquals('text', 'b', $events->get(2));
+
+ $events = $this->parse('a&amp;b');
+ $this->assertEquals(2, $events->depth(), "Events: " . print_r($events, TRUE));
+ $this->assertEventEquals('text', 'a&b', $events->get(0));
}
- $bad = array(
- '<script>&amp;</script' => '&amp;</script',
- '<script>Hello world' => 'Hello world',
- );
- foreach ($bad as $test => $expects) {
- $events = $this->parse($test);
- $this->assertEquals(4, $events->depth(), "Counting events for '$test': " . print_r($events, TRUE));
- $this->assertEventEquals('startTag', 'script', $events->get(0));
- $this->assertEventError($events->get(1));
- $this->assertEventEquals('text', $expects, $events->get(2));
+ // ================================================================
+ // Utility functions.
+ // ================================================================
+ protected function createTokenizer($string, $debug = FALSE)
+ {
+ $eventHandler = new EventStack();
+ $stream = new StringInputStream($string);
+ $scanner = new Scanner($stream);
+
+ $scanner->debug = $debug;
+
+ return array(
+ new Tokenizer($scanner, $eventHandler),
+ $eventHandler
+ );
}
- // Testing case sensitivity
- $events = $this->parse('<TITLE>a test</TITLE>');
- $this->assertEventEquals('startTag', 'title', $events->get(0));
- $this->assertEventEquals('text', 'a test', $events->get(1));
- $this->assertEventEquals('endTag', 'title', $events->get(2));
-
- }
-
- public function testRcdata() {
- list($tok, $events) = $this->createTokenizer('<title>&#x27;<!-- not a comment --></TITLE>');
- $tok->setTextMode(\Masterminds\HTML5\Elements::TEXT_RCDATA, 'title');
- $tok->parse();
- $this->assertEventEquals('text', "'<!-- not a comment -->", $events->get(1));
- }
-
- public function testText() {
-
- $events = $this->parse('a<br>b');
- $this->assertEquals(4, $events->depth(), "Events: " . print_r($events, TRUE));
- $this->assertEventEquals('text', 'a', $events->get(0));
- $this->assertEventEquals('startTag', 'br', $events->get(1));
- $this->assertEventEquals('text', 'b', $events->get(2));
-
- $events = $this->parse('<a>Test</a>');
- $this->assertEquals(4, $events->depth(), "Events: " . print_r($events, TRUE));
- $this->assertEventEquals('startTag', 'a', $events->get(0));
- $this->assertEventEquals('text', 'Test', $events->get(1));
- $this->assertEventEquals('endTag', 'a', $events->get(2));
-
- $events = $this->parse('a<![CDATA[test]]>b');
- $this->assertEquals(4, $events->depth(), "Events: " . print_r($events, TRUE));
- $this->assertEventEquals('text', 'a', $events->get(0));
- $this->assertEventEquals('cdata', 'test', $events->get(1));
- $this->assertEventEquals('text', 'b', $events->get(2));
-
- $events = $this->parse('a<!--test-->b');
- $this->assertEquals(4, $events->depth(), "Events: " . print_r($events, TRUE));
- $this->assertEventEquals('text', 'a', $events->get(0));
- $this->assertEventEquals('comment', 'test', $events->get(1));
- $this->assertEventEquals('text', 'b', $events->get(2));
-
- $events = $this->parse('a&amp;b');
- $this->assertEquals(2, $events->depth(), "Events: " . print_r($events, TRUE));
- $this->assertEventEquals('text', 'a&b', $events->get(0));
- }
-
- // ================================================================
- // Utility functions.
- // ================================================================
- protected function createTokenizer($string, $debug = FALSE) {
- $eventHandler = new EventStack();
- $stream = new StringInputStream($string);
- $scanner = new Scanner($stream);
-
- $scanner->debug = $debug;
-
- return array(
- new Tokenizer($scanner, $eventHandler),
- $eventHandler,
- );
- }
-
- public function parse($string, $debug = FALSE) {
- list($tok, $events) = $this->createTokenizer($string, $debug);
- $tok->parse();
-
- return $events;
- }
+ public function parse($string, $debug = FALSE)
+ {
+ list ($tok, $events) = $this->createTokenizer($string, $debug);
+ $tok->parse();
+
+ return $events;
+ }
}
diff --git a/test/HTML5/Parser/TreeBuildingRulesTest.php b/test/HTML5/Parser/TreeBuildingRulesTest.php
index ff07a0a..cb9b1e4 100644
--- a/test/HTML5/Parser/TreeBuildingRulesTest.php
+++ b/test/HTML5/Parser/TreeBuildingRulesTest.php
@@ -5,77 +5,78 @@
*/
namespace Masterminds\HTML5\Tests\Parser;
-use Masterminds\HTML5\Elements;
use Masterminds\HTML5\Parser\TreeBuildingRules;
use Masterminds\HTML5\Parser\Tokenizer;
use Masterminds\HTML5\Parser\Scanner;
use Masterminds\HTML5\Parser\StringInputStream;
use Masterminds\HTML5\Parser\DOMTreeBuilder;
-
-
/**
* These tests are functional, not necessarily unit tests.
*/
-class TreeBuildingRulesTest extends \Masterminds\HTML5\Tests\TestCase {
+class TreeBuildingRulesTest extends \Masterminds\HTML5\Tests\TestCase
+{
- const HTML_STUB = '<!DOCTYPE html><html><head><title>test</title></head><body>%s</body></html>';
+ const HTML_STUB = '<!DOCTYPE html><html><head><title>test</title></head><body>%s</body></html>';
- /**
- * Convenience function for parsing.
- */
- protected function parse($string) {
- $treeBuilder = new DOMTreeBuilder();
- $input = new StringInputStream($string);
- $scanner = new Scanner($input);
- $parser = new Tokenizer($scanner, $treeBuilder);
+ /**
+ * Convenience function for parsing.
+ */
+ protected function parse($string)
+ {
+ $treeBuilder = new DOMTreeBuilder();
+ $input = new StringInputStream($string);
+ $scanner = new Scanner($input);
+ $parser = new Tokenizer($scanner, $treeBuilder);
- $parser->parse();
+ $parser->parse();
- return $treeBuilder->document();
- }
+ return $treeBuilder->document();
+ }
- public function testHasRules() {
- $doc = new \DOMDocument('1.0');
- $engine = new TreeBuildingRules($doc);
+ public function testHasRules()
+ {
+ $doc = new \DOMDocument('1.0');
+ $engine = new TreeBuildingRules($doc);
- $this->assertTrue($engine->hasRules('li'));
- $this->assertFalse($engine->hasRules('imaginary'));
- }
+ $this->assertTrue($engine->hasRules('li'));
+ $this->assertFalse($engine->hasRules('imaginary'));
+ }
- public function testHandleLI() {
- $html = sprintf(self::HTML_STUB, '<ul id="a"><li>test<li>test2</ul><a></a>');
- $doc = $this->parse($html);
+ public function testHandleLI()
+ {
+ $html = sprintf(self::HTML_STUB, '<ul id="a"><li>test<li>test2</ul><a></a>');
+ $doc = $this->parse($html);
- $list = $doc->getElementById('a');
+ $list = $doc->getElementById('a');
- $this->assertEquals(2, $list->childNodes->length);
- foreach($list->childNodes as $ele) {
- $this->assertEquals('li', $ele->tagName);
+ $this->assertEquals(2, $list->childNodes->length);
+ foreach ($list->childNodes as $ele) {
+ $this->assertEquals('li', $ele->tagName);
+ }
}
- }
+ public function testHandleDT()
+ {
+ $html = sprintf(self::HTML_STUB, '<dl id="a"><dt>Hello<dd>Hi</dl><a></a>');
+ $doc = $this->parse($html);
- public function testHandleDT() {
- $html = sprintf(self::HTML_STUB, '<dl id="a"><dt>Hello<dd>Hi</dl><a></a>');
- $doc = $this->parse($html);
+ $list = $doc->getElementById('a');
- $list = $doc->getElementById('a');
-
- $this->assertEquals(2, $list->childNodes->length);
- $this->assertEquals('dt', $list->firstChild->tagName);
- $this->assertEquals('dd', $list->lastChild->tagName);
- }
-
- public function testTable() {
- $html = sprintf(self::HTML_STUB, '<table><thead id="a"><th>foo<td>bar<td>baz');
- $doc = $this->parse($html);
+ $this->assertEquals(2, $list->childNodes->length);
+ $this->assertEquals('dt', $list->firstChild->tagName);
+ $this->assertEquals('dd', $list->lastChild->tagName);
+ }
- $list = $doc->getElementById('a');
+ public function testTable()
+ {
+ $html = sprintf(self::HTML_STUB, '<table><thead id="a"><th>foo<td>bar<td>baz');
+ $doc = $this->parse($html);
- $this->assertEquals(3, $list->childNodes->length);
- $this->assertEquals('th', $list->firstChild->tagName);
- $this->assertEquals('td', $list->lastChild->tagName);
- }
+ $list = $doc->getElementById('a');
+ $this->assertEquals(3, $list->childNodes->length);
+ $this->assertEquals('th', $list->firstChild->tagName);
+ $this->assertEquals('td', $list->lastChild->tagName);
+ }
}
diff --git a/test/HTML5/Serializer/OutputRulesTest.php b/test/HTML5/Serializer/OutputRulesTest.php
index daae43a..27c66c4 100644
--- a/test/HTML5/Serializer/OutputRulesTest.php
+++ b/test/HTML5/Serializer/OutputRulesTest.php
@@ -3,11 +3,11 @@ namespace Masterminds\HTML5\Tests\Serializer;
use Masterminds\HTML5\Serializer\OutputRules;
use Masterminds\HTML5\Serializer\Traverser;
-use Masterminds\HTML5\Parser;
-class OutputRulesTest extends \Masterminds\HTML5\Tests\TestCase {
+class OutputRulesTest extends \Masterminds\HTML5\Tests\TestCase
+{
- protected $markup = '<!doctype html>
+ protected $markup = '<!doctype html>
<html lang="en">
<head>
<meta charset="utf-8">
@@ -17,69 +17,82 @@ class OutputRulesTest extends \Masterminds\HTML5\Tests\TestCase {
<p>This is a test.</p>
</body>
</html>';
- public function setUp()
- {
- $this->html5 = $this->getInstance();
- }
- /**
- * Using reflection we make a protected method accessible for testing.
- *
- * @param string $name
- * The name of the method on the Traverser class to test.
- *
- * @return \ReflectionMethod
- * \ReflectionMethod for the specified method
- */
- function getProtectedMethod($name) {
- $class = new \ReflectionClass('\Masterminds\HTML5\Serializer\OutputRules');
- $method = $class->getMethod($name);
- $method->setAccessible(true);
- return $method;
- }
-
- function getTraverserProtectedProperty($name) {
- $class = new \ReflectionClass('\Masterminds\HTML5\Serializer\Traverser');
- $property = $class->getProperty($name);
- $property->setAccessible(true);
- return $property;
- }
-
- function getOutputRules($options = array()) {
- $options = $options + $this->html5->getOptions();
- $stream = fopen('php://temp', 'w');
- $dom = $this->html5->loadHTML($this->markup);
- $r = new OutputRules($stream, $options);
- $t = new Traverser($dom, $stream, $r, $options);
-
- return array($r, $stream);
- }
-
- function testDocument() {
- $dom = $this->html5->loadHTML('<!doctype html><html lang="en"><body>foo</body></html>');
-
- $stream = fopen('php://temp', 'w');
- $r = new OutputRules($stream, $this->html5->getOptions());
- $t = new Traverser($dom, $stream, $r, $this->html5->getOptions());
-
- $r->document($dom);
- $expected = '<!DOCTYPE html>' . PHP_EOL . '<html lang="en"><body>foo</body></html>' . PHP_EOL;
- $this->assertEquals($expected, stream_get_contents($stream, -1, 0));
- }
-
- function testDoctype() {
- $dom = $this->html5->loadHTML('<!doctype html><html lang="en"><body>foo</body></html>');
-
- $stream = fopen('php://temp', 'w');
- $r = new OutputRules($stream, $this->html5->getOptions());
- $t = new Traverser($dom, $stream, $r, $this->html5->getOptions());
-
- $m = $this->getProtectedMethod('doctype');
- $m->invoke($r, 'foo');
- $this->assertEquals("<!DOCTYPE html>" . PHP_EOL, stream_get_contents($stream, -1, 0));
- }
-
- function testElement() {
- $dom = $this->html5->loadHTML('<!doctype html>
+
+ public function setUp()
+ {
+ $this->html5 = $this->getInstance();
+ }
+
+ /**
+ * Using reflection we make a protected method accessible for testing.
+ *
+ * @param string $name
+ * The name of the method on the Traverser class to test.
+ *
+ * @return \ReflectionMethod \ReflectionMethod for the specified method
+ */
+ public function getProtectedMethod($name)
+ {
+ $class = new \ReflectionClass('\Masterminds\HTML5\Serializer\OutputRules');
+ $method = $class->getMethod($name);
+ $method->setAccessible(true);
+
+ return $method;
+ }
+
+ public function getTraverserProtectedProperty($name)
+ {
+ $class = new \ReflectionClass('\Masterminds\HTML5\Serializer\Traverser');
+ $property = $class->getProperty($name);
+ $property->setAccessible(true);
+
+ return $property;
+ }
+
+ public function getOutputRules($options = array())
+ {
+ $options = $options + $this->html5->getOptions();
+ $stream = fopen('php://temp', 'w');
+ $dom = $this->html5->loadHTML($this->markup);
+ $r = new OutputRules($stream, $options);
+ $t = new Traverser($dom, $stream, $r, $options);
+
+ return array(
+ $r,
+ $stream
+ );
+ }
+
+ public function testDocument()
+ {
+ $dom = $this->html5->loadHTML('<!doctype html><html lang="en"><body>foo</body></html>');
+
+ $stream = fopen('php://temp', 'w');
+ $r = new OutputRules($stream, $this->html5->getOptions());
+ $t = new Traverser($dom, $stream, $r, $this->html5->getOptions());
+
+ $r->document($dom);
+ $expected = '<!DOCTYPE html>' . PHP_EOL . '<html lang="en"><body>foo</body></html>' . PHP_EOL;
+ $this->assertEquals($expected, stream_get_contents($stream, - 1, 0));
+ }
+
+ public function testDoctype()
+ {
+ $dom = $this->html5->loadHTML('<!doctype html><html lang="en"><body>foo</body></html>');
+
+ $stream = fopen('php://temp', 'w');
+ $r = new OutputRules($stream, $this->html5->getOptions());
+ $t = new Traverser($dom, $stream, $r, $this->html5->getOptions());
+
+ $m = $this->getProtectedMethod('doctype');
+ $m->invoke($r, 'foo');
+ $this->assertEquals("<!DOCTYPE html>" . PHP_EOL, stream_get_contents($stream, - 1, 0));
+ }
+
+ public function testElement()
+ {
+ $dom = $this->html5->loadHTML(
+ '<!doctype html>
<html lang="en">
<body>
<div id="foo" class="bar baz">foo bar baz</div>
@@ -91,23 +104,25 @@ class OutputRulesTest extends \Masterminds\HTML5\Tests\TestCase {
</body>
</html>');
- $stream = fopen('php://temp', 'w');
- $r = new OutputRules($stream, $this->html5->getOptions());
- $t = new Traverser($dom, $stream, $r, $this->html5->getOptions());
+ $stream = fopen('php://temp', 'w');
+ $r = new OutputRules($stream, $this->html5->getOptions());
+ $t = new Traverser($dom, $stream, $r, $this->html5->getOptions());
- $list = $dom->getElementsByTagName('div');
- $r->element($list->item(0));
- $this->assertEquals('<div id="foo" class="bar baz">foo bar baz</div>', stream_get_contents($stream, -1, 0));
- }
+ $list = $dom->getElementsByTagName('div');
+ $r->element($list->item(0));
+ $this->assertEquals('<div id="foo" class="bar baz">foo bar baz</div>', stream_get_contents($stream, - 1, 0));
+ }
- function testElementWithScript() {
- $dom = $this->html5->loadHTML('<!doctype html>
+ public function testElementWithScript()
+ {
+ $dom = $this->html5->loadHTML(
+ '<!doctype html>
<html lang="en">
<head>
<script>
var $jQ = jQuery.noConflict();
// Use jQuery via $jQ(...)
- $jQ(document).ready(function(){
+ $jQ(document).ready(function () {
$jQ("#mktFrmSubmit").wrap("<div class=\'buttonSubmit\'></div>");
$jQ(".buttonSubmit").prepend("<span></span>");
});
@@ -118,24 +133,27 @@ class OutputRulesTest extends \Masterminds\HTML5\Tests\TestCase {
</body>
</html>');
- $stream = fopen('php://temp', 'w');
- $r = new OutputRules($stream, $this->html5->getOptions());
- $t = new Traverser($dom, $stream, $r, $this->html5->getOptions());
+ $stream = fopen('php://temp', 'w');
+ $r = new OutputRules($stream, $this->html5->getOptions());
+ $t = new Traverser($dom, $stream, $r, $this->html5->getOptions());
- $script = $dom->getElementsByTagName('script');
- $r->element($script->item(0));
- $this->assertEquals('<script>
+ $script = $dom->getElementsByTagName('script');
+ $r->element($script->item(0));
+ $this->assertEquals(
+ '<script>
var $jQ = jQuery.noConflict();
// Use jQuery via $jQ(...)
- $jQ(document).ready(function(){
+ $jQ(document).ready(function () {
$jQ("#mktFrmSubmit").wrap("<div class=\'buttonSubmit\'></div>");
$jQ(".buttonSubmit").prepend("<span></span>");
});
- </script>', stream_get_contents($stream, -1, 0));
- }
+ </script>', stream_get_contents($stream, - 1, 0));
+ }
- function testElementWithStyle() {
- $dom = $this->html5->loadHTML('<!doctype html>
+ public function testElementWithStyle()
+ {
+ $dom = $this->html5->loadHTML(
+ '<!doctype html>
<html lang="en">
<head>
<style>
@@ -149,220 +167,265 @@ class OutputRulesTest extends \Masterminds\HTML5\Tests\TestCase {
</body>
</html>');
- $stream = fopen('php://temp', 'w');
- $r = new OutputRules($stream, $this->html5->getOptions());
- $t = new Traverser($dom, $stream, $r, $this->html5->getOptions());
+ $stream = fopen('php://temp', 'w');
+ $r = new OutputRules($stream, $this->html5->getOptions());
+ $t = new Traverser($dom, $stream, $r, $this->html5->getOptions());
- $style = $dom->getElementsByTagName('style');
- $r->element($style->item(0));
- $this->assertEquals('<style>
+ $style = $dom->getElementsByTagName('style');
+ $r->element($style->item(0));
+ $this->assertEquals('<style>
body > .bar {
display: none;
}
- </style>', stream_get_contents($stream, -1, 0));
- }
+ </style>', stream_get_contents($stream, - 1, 0));
+ }
- function testOpenTag() {
- $dom = $this->html5->loadHTML('<!doctype html>
+ public function testOpenTag()
+ {
+ $dom = $this->html5->loadHTML('<!doctype html>
<html lang="en">
<body>
<div id="foo" class="bar baz">foo bar baz</div>
</body>
</html>');
- $stream = fopen('php://temp', 'w');
- $r = new OutputRules($stream, $this->html5->getOptions());
- $t = new Traverser($dom, $stream, $r, $this->html5->getOptions());
+ $stream = fopen('php://temp', 'w');
+ $r = new OutputRules($stream, $this->html5->getOptions());
+ $t = new Traverser($dom, $stream, $r, $this->html5->getOptions());
- $list = $dom->getElementsByTagName('div');
- $m = $this->getProtectedMethod('openTag');
- $m->invoke($r, $list->item(0));
- $this->assertEquals('<div id="foo" class="bar baz">', stream_get_contents($stream, -1, 0));
- }
+ $list = $dom->getElementsByTagName('div');
+ $m = $this->getProtectedMethod('openTag');
+ $m->invoke($r, $list->item(0));
+ $this->assertEquals('<div id="foo" class="bar baz">', stream_get_contents($stream, - 1, 0));
+ }
- function testCData() {
- $dom = $this->html5->loadHTML('<!doctype html>
+ public function testCData()
+ {
+ $dom = $this->html5->loadHTML('<!doctype html>
<html lang="en">
<body>
<div><![CDATA[bar]]></div>
</body>
</html>');
- $stream = fopen('php://temp', 'w');
- $r = new OutputRules($stream, $this->html5->getOptions());
- $t = new Traverser($dom, $stream, $r, $this->html5->getOptions());
+ $stream = fopen('php://temp', 'w');
+ $r = new OutputRules($stream, $this->html5->getOptions());
+ $t = new Traverser($dom, $stream, $r, $this->html5->getOptions());
- $list = $dom->getElementsByTagName('div');
- $r->cdata($list->item(0)->childNodes->item(0));
- $this->assertEquals('<![CDATA[bar]]>', stream_get_contents($stream, -1, 0));
+ $list = $dom->getElementsByTagName('div');
+ $r->cdata($list->item(0)->childNodes->item(0));
+ $this->assertEquals('<![CDATA[bar]]>', stream_get_contents($stream, - 1, 0));
- $dom = $this->html5->loadHTML('<!doctype html>
+ $dom = $this->html5->loadHTML('<!doctype html>
<html lang="en">
<body>
<div id="foo"></div>
</body>
</html>');
+ $dom->getElementById('foo')->appendChild(new \DOMCdataSection("]]>Foo<[![CDATA test ]]>"));
- $dom->getElementById('foo')->appendChild(new \DOMCdataSection("]]>Foo<[![CDATA test ]]>"));
-
- $stream = fopen('php://temp', 'w');
- $r = new OutputRules($stream, $this->html5->getOptions());
- $t = new Traverser($dom, $stream, $r, $this->html5->getOptions());
- $list = $dom->getElementsByTagName('div');
- $r->cdata($list->item(0)->childNodes->item(0));
+ $stream = fopen('php://temp', 'w');
+ $r = new OutputRules($stream, $this->html5->getOptions());
+ $t = new Traverser($dom, $stream, $r, $this->html5->getOptions());
+ $list = $dom->getElementsByTagName('div');
+ $r->cdata($list->item(0)->childNodes->item(0));
- $this->assertEquals('<![CDATA[]]]]><![CDATA[>Foo<[![CDATA test ]]]]><![CDATA[>]]>', stream_get_contents($stream, -1, 0));
- }
+ $this->assertEquals('<![CDATA[]]]]><![CDATA[>Foo<[![CDATA test ]]]]><![CDATA[>]]>', stream_get_contents($stream, - 1, 0));
+ }
- function testComment() {
- $dom = $this->html5->loadHTML('<!doctype html>
+ public function testComment()
+ {
+ $dom = $this->html5->loadHTML('<!doctype html>
<html lang="en">
<body>
<div><!-- foo --></div>
</body>
</html>');
- $stream = fopen('php://temp', 'w');
- $r = new OutputRules($stream, $this->html5->getOptions());
- $t = new Traverser($dom, $stream, $r, $this->html5->getOptions());
-
- $list = $dom->getElementsByTagName('div');
- $r->comment($list->item(0)->childNodes->item(0));
- $this->assertEquals('<!-- foo -->', stream_get_contents($stream, -1, 0));
+ $stream = fopen('php://temp', 'w');
+ $r = new OutputRules($stream, $this->html5->getOptions());
+ $t = new Traverser($dom, $stream, $r, $this->html5->getOptions());
+ $list = $dom->getElementsByTagName('div');
+ $r->comment($list->item(0)->childNodes->item(0));
+ $this->assertEquals('<!-- foo -->', stream_get_contents($stream, - 1, 0));
- $dom = $this->html5->loadHTML('<!doctype html>
+ $dom = $this->html5->loadHTML('<!doctype html>
<html lang="en">
<body>
<div id="foo"></div>
</body>
</html>');
- $dom->getElementById('foo')->appendChild(new \DOMComment('<!-- --> --> Foo -->'));
+ $dom->getElementById('foo')->appendChild(new \DOMComment('<!-- --> --> Foo -->'));
- $stream = fopen('php://temp', 'w');
- $r = new OutputRules($stream, $this->html5->getOptions());
- $t = new Traverser($dom, $stream, $r, $this->html5->getOptions());
+ $stream = fopen('php://temp', 'w');
+ $r = new OutputRules($stream, $this->html5->getOptions());
+ $t = new Traverser($dom, $stream, $r, $this->html5->getOptions());
- $list = $dom->getElementsByTagName('div');
- $r->comment($list->item(0)->childNodes->item(0));
+ $list = $dom->getElementsByTagName('div');
+ $r->comment($list->item(0)->childNodes->item(0));
- // Could not find more definitive guidelines on what this should be. Went with
- // what the HTML5 spec says and what \DOMDocument::saveXML() produces.
- $this->assertEquals('<!--<!-- --> --> Foo -->-->', stream_get_contents($stream, -1, 0));
- }
+ // Could not find more definitive guidelines on what this should be. Went with
+ // what the HTML5 spec says and what \DOMDocument::saveXML() produces.
+ $this->assertEquals('<!--<!-- --> --> Foo -->-->', stream_get_contents($stream, - 1, 0));
+ }
- function testText() {
- $dom = $this->html5->loadHTML('<!doctype html>
+ public function testText()
+ {
+ $dom = $this->html5->loadHTML('<!doctype html>
<html lang="en">
<head>
<script>baz();</script>
</head>
</html>');
- $stream = fopen('php://temp', 'w');
- $r = new OutputRules($stream, $this->html5->getOptions());
- $t = new Traverser($dom, $stream, $r, $this->html5->getOptions());
+ $stream = fopen('php://temp', 'w');
+ $r = new OutputRules($stream, $this->html5->getOptions());
+ $t = new Traverser($dom, $stream, $r, $this->html5->getOptions());
- $list = $dom->getElementsByTagName('script');
- $r->text($list->item(0)->childNodes->item(0));
- $this->assertEquals('baz();', stream_get_contents($stream, -1, 0));
+ $list = $dom->getElementsByTagName('script');
+ $r->text($list->item(0)->childNodes->item(0));
+ $this->assertEquals('baz();', stream_get_contents($stream, - 1, 0));
- $dom = $this->html5->loadHTML('<!doctype html>
+ $dom = $this->html5->loadHTML('<!doctype html>
<html lang="en">
<head id="foo"></head>
</html>');
- $dom->getElementById('foo')->appendChild(new \DOMText('<script>alert("hi");</script>'));
-
- $stream = fopen('php://temp', 'w');
- $r = new OutputRules($stream, $this->html5->getOptions());
- $t = new Traverser($dom, $stream, $r, $this->html5->getOptions());
-
- $item = $dom->getElementById('foo');
- $r->text($item->firstChild);
- $this->assertEquals('&lt;script&gt;alert("hi");&lt;/script&gt;', stream_get_contents($stream, -1, 0));
- }
-
- function testNl() {
- list($o, $s) = $this->getOutputRules();
-
- $m = $this->getProtectedMethod('nl');
- $m->invoke($o);
- $this->assertEquals(PHP_EOL, stream_get_contents($s, -1, 0));
- }
-
- function testWr() {
- list($o, $s) = $this->getOutputRules();
-
- $m = $this->getProtectedMethod('wr');
- $m->invoke($o, 'foo');
- $this->assertEquals('foo', stream_get_contents($s, -1, 0));
- }
-
- function getEncData(){
- return array(
- array(FALSE, '&\'<>"', '&amp;\'&lt;&gt;"', '&amp;&apos;&lt;&gt;&quot;'),
- array(FALSE, 'This + is. a < test', 'This + is. a &lt; test', 'This &plus; is&period; a &lt; test'),
- array(FALSE, '.+#', '.+#', '&period;&plus;&num;'),
-
- array(TRUE, '.+#\'', '.+#\'', '&period;&plus;&num;&apos;'),
- array(TRUE, '&".<', '&amp;&quot;.<', '&amp;&quot;&period;&lt;'),
- array(TRUE, '&\'<>"', '&amp;\'<>&quot;', '&amp;&apos;&lt;&gt;&quot;'),
- array(TRUE, "\xc2\xa0\"'", '&nbsp;&quot;\'', '&nbsp;&quot;&apos;'),
- );
- }
-
- /**
- * Test basic encoding of text.
- * @dataProvider getEncData
- */
- function testEnc($isAttribute, $test, $expected, $expectedEncoded) {
-
- list($o, $s) = $this->getOutputRules();
- $m = $this->getProtectedMethod('enc');
-
- $this->assertEquals($expected, $m->invoke($o, $test, $isAttribute));
-
- list($o, $s) = $this->getOutputRules(array('encode_entities' => TRUE));
- $m = $this->getProtectedMethod('enc');
- $this->assertEquals($expectedEncoded, $m->invoke($o, $test, $isAttribute));
- }
-
- /**
- * Test basic encoding of text.
- * @dataProvider getEncData
- */
- function testEscape($isAttribute, $test, $expected, $expectedEncoded) {
-
- list($o, $s) = $this->getOutputRules();
- $m = $this->getProtectedMethod('escape');
-
- $this->assertEquals($expected, $m->invoke($o, $test, $isAttribute));
- }
-
- function testAttrs() {
- $dom = $this->html5->loadHTML('<!doctype html>
+ $dom->getElementById('foo')->appendChild(new \DOMText('<script>alert("hi");</script>'));
+
+ $stream = fopen('php://temp', 'w');
+ $r = new OutputRules($stream, $this->html5->getOptions());
+ $t = new Traverser($dom, $stream, $r, $this->html5->getOptions());
+
+ $item = $dom->getElementById('foo');
+ $r->text($item->firstChild);
+ $this->assertEquals('&lt;script&gt;alert("hi");&lt;/script&gt;', stream_get_contents($stream, - 1, 0));
+ }
+
+ public function testNl()
+ {
+ list ($o, $s) = $this->getOutputRules();
+
+ $m = $this->getProtectedMethod('nl');
+ $m->invoke($o);
+ $this->assertEquals(PHP_EOL, stream_get_contents($s, - 1, 0));
+ }
+
+ public function testWr()
+ {
+ list ($o, $s) = $this->getOutputRules();
+
+ $m = $this->getProtectedMethod('wr');
+ $m->invoke($o, 'foo');
+ $this->assertEquals('foo', stream_get_contents($s, - 1, 0));
+ }
+
+ public function getEncData()
+ {
+ return array(
+ array(
+ FALSE,
+ '&\'<>"',
+ '&amp;\'&lt;&gt;"',
+ '&amp;&apos;&lt;&gt;&quot;'
+ ),
+ array(
+ FALSE,
+ 'This + is. a < test',
+ 'This + is. a &lt; test',
+ 'This &plus; is&period; a &lt; test'
+ ),
+ array(
+ FALSE,
+ '.+#',
+ '.+#',
+ '&period;&plus;&num;'
+ ),
+
+ array(
+ TRUE,
+ '.+#\'',
+ '.+#\'',
+ '&period;&plus;&num;&apos;'
+ ),
+ array(
+ TRUE,
+ '&".<',
+ '&amp;&quot;.<',
+ '&amp;&quot;&period;&lt;'
+ ),
+ array(
+ TRUE,
+ '&\'<>"',
+ '&amp;\'<>&quot;',
+ '&amp;&apos;&lt;&gt;&quot;'
+ ),
+ array(
+ TRUE,
+ "\xc2\xa0\"'",
+ '&nbsp;&quot;\'',
+ '&nbsp;&quot;&apos;'
+ )
+ );
+ }
+
+ /**
+ * Test basic encoding of text.
+ * @dataProvider getEncData
+ */
+ public function testEnc($isAttribute, $test, $expected, $expectedEncoded)
+ {
+ list ($o, $s) = $this->getOutputRules();
+ $m = $this->getProtectedMethod('enc');
+
+ $this->assertEquals($expected, $m->invoke($o, $test, $isAttribute));
+
+ list ($o, $s) = $this->getOutputRules(array(
+ 'encode_entities' => TRUE
+ ));
+ $m = $this->getProtectedMethod('enc');
+ $this->assertEquals($expectedEncoded, $m->invoke($o, $test, $isAttribute));
+ }
+
+ /**
+ * Test basic encoding of text.
+ * @dataProvider getEncData
+ */
+ public function testEscape($isAttribute, $test, $expected, $expectedEncoded)
+ {
+ list ($o, $s) = $this->getOutputRules();
+ $m = $this->getProtectedMethod('escape');
+
+ $this->assertEquals($expected, $m->invoke($o, $test, $isAttribute));
+ }
+
+ public function testAttrs()
+ {
+ $dom = $this->html5->loadHTML('<!doctype html>
<html lang="en">
<body>
<div id="foo" class="bar baz" disabled>foo bar baz</div>
</body>
</html>');
- $stream = fopen('php://temp', 'w');
- $r = new OutputRules($stream, $this->html5->getOptions());
- $t = new Traverser($dom, $stream, $r, $this->html5->getOptions());
+ $stream = fopen('php://temp', 'w');
+ $r = new OutputRules($stream, $this->html5->getOptions());
+ $t = new Traverser($dom, $stream, $r, $this->html5->getOptions());
- $list = $dom->getElementsByTagName('div');
+ $list = $dom->getElementsByTagName('div');
- $m = $this->getProtectedMethod('attrs');
- $m->invoke($r, $list->item(0));
+ $m = $this->getProtectedMethod('attrs');
+ $m->invoke($r, $list->item(0));
- $content = stream_get_contents($stream, -1, 0);
- $this->assertEquals(' id="foo" class="bar baz" disabled', $content);
- }
+ $content = stream_get_contents($stream, - 1, 0);
+ $this->assertEquals(' id="foo" class="bar baz" disabled', $content);
+ }
- function testSvg() {
- $dom = $this->html5->loadHTML('<!doctype html>
+ public function testSvg()
+ {
+ $dom = $this->html5->loadHTML(
+ '<!doctype html>
<html lang="en">
<body>
<div id="foo" class="bar baz">foo bar baz</div>
@@ -377,20 +440,22 @@ class OutputRulesTest extends \Masterminds\HTML5\Tests\TestCase {
</body>
</html>');
- $stream = fopen('php://temp', 'w');
- $r = new OutputRules($stream, $this->html5->getOptions());
- $t = new Traverser($dom, $stream, $r, $this->html5->getOptions());
-
- $list = $dom->getElementsByTagName('svg');
- $r->element($list->item(0));
- $contents = stream_get_contents($stream, -1, 0);
- $this->assertRegExp('|<svg width="150" height="100" viewBox="0 0 3 2">|', $contents);
- $this->assertRegExp('|<rect width="1" height="2" x="0" fill="#008d46" />|', $contents);
- $this->assertRegExp('|<rect id="Bar" x="300" y="100" width="300" height="100" fill="rgb\(255,255,0\)">|', $contents);
- }
-
- function testMath() {
- $dom = $this->html5->loadHTML('<!doctype html>
+ $stream = fopen('php://temp', 'w');
+ $r = new OutputRules($stream, $this->html5->getOptions());
+ $t = new Traverser($dom, $stream, $r, $this->html5->getOptions());
+
+ $list = $dom->getElementsByTagName('svg');
+ $r->element($list->item(0));
+ $contents = stream_get_contents($stream, - 1, 0);
+ $this->assertRegExp('|<svg width="150" height="100" viewBox="0 0 3 2">|', $contents);
+ $this->assertRegExp('|<rect width="1" height="2" x="0" fill="#008d46" />|', $contents);
+ $this->assertRegExp('|<rect id="Bar" x="300" y="100" width="300" height="100" fill="rgb\(255,255,0\)">|', $contents);
+ }
+
+ public function testMath()
+ {
+ $dom = $this->html5->loadHTML(
+ '<!doctype html>
<html lang="en">
<body>
<div id="foo" class="bar baz">foo bar baz</div>
@@ -404,26 +469,27 @@ class OutputRulesTest extends \Masterminds\HTML5\Tests\TestCase {
</body>
</html>');
- $stream = fopen('php://temp', 'w');
- $r = new OutputRules($stream, $this->html5->getOptions());
- $t = new Traverser($dom, $stream, $r, $this->html5->getOptions());
-
- $list = $dom->getElementsByTagName('math');
- $r->element($list->item(0));
- $content = stream_get_contents($stream, -1, 0);
- $this->assertRegExp('|<math>|', $content);
- $this->assertRegExp('|<csymbol definitionURL="http://www.example.com/mathops/multiops.html#plusminus">|', $content);
- }
-
- function testProcessorInstruction() {
- $dom = $this->html5->loadHTMLFragment('<?foo bar ?>');
-
- $stream = fopen('php://temp', 'w');
- $r = new OutputRules($stream, $this->html5->getOptions());
- $t = new Traverser($dom, $stream, $r, $this->html5->getOptions());
-
- $r->processorInstruction($dom->firstChild);
- $content = stream_get_contents($stream, -1, 0);
- $this->assertRegExp('|<\?foo bar \?>|', $content);
- }
+ $stream = fopen('php://temp', 'w');
+ $r = new OutputRules($stream, $this->html5->getOptions());
+ $t = new Traverser($dom, $stream, $r, $this->html5->getOptions());
+
+ $list = $dom->getElementsByTagName('math');
+ $r->element($list->item(0));
+ $content = stream_get_contents($stream, - 1, 0);
+ $this->assertRegExp('|<math>|', $content);
+ $this->assertRegExp('|<csymbol definitionURL="http://www.example.com/mathops/multiops.html#plusminus">|', $content);
+ }
+
+ public function testProcessorInstruction()
+ {
+ $dom = $this->html5->loadHTMLFragment('<?foo bar ?>');
+
+ $stream = fopen('php://temp', 'w');
+ $r = new OutputRules($stream, $this->html5->getOptions());
+ $t = new Traverser($dom, $stream, $r, $this->html5->getOptions());
+
+ $r->processorInstruction($dom->firstChild);
+ $content = stream_get_contents($stream, - 1, 0);
+ $this->assertRegExp('|<\?foo bar \?>|', $content);
+ }
}
diff --git a/test/HTML5/Serializer/TraverserTest.php b/test/HTML5/Serializer/TraverserTest.php
index 2fecc9c..c914633 100644
--- a/test/HTML5/Serializer/TraverserTest.php
+++ b/test/HTML5/Serializer/TraverserTest.php
@@ -5,9 +5,10 @@ use Masterminds\HTML5\Serializer\OutputRules;
use Masterminds\HTML5\Serializer\Traverser;
use Masterminds\HTML5\Parser;
-class TraverserTest extends \Masterminds\HTML5\Tests\TestCase {
+class TraverserTest extends \Masterminds\HTML5\Tests\TestCase
+{
- protected $markup = '<!doctype html>
+ protected $markup = '<!doctype html>
<html lang="en">
<head>
<meta charset="utf-8">
@@ -17,79 +18,88 @@ class TraverserTest extends \Masterminds\HTML5\Tests\TestCase {
<p>This is a test.</p>
</body>
</html>';
+
public function setUp()
{
$this->html5 = $this->getInstance();
}
- /**
- * Using reflection we make a protected method accessible for testing.
- *
- * @param string $name
- * The name of the method on the Traverser class to test.
- *
- * @return \ReflectionMethod
- * \ReflectionMethod for the specified method
- */
- function getProtectedMethod($name) {
- $class = new \ReflectionClass('\Masterminds\HTML5\Serializer\Traverser');
- $method = $class->getMethod($name);
- $method->setAccessible(true);
- return $method;
- }
- function getTraverser() {
- $stream = fopen('php://temp', 'w');
+ /**
+ * Using reflection we make a protected method accessible for testing.
+ *
+ * @param string $name
+ * The name of the method on the Traverser class to test.
+ *
+ * @return \ReflectionMethod \ReflectionMethod for the specified method
+ */
+ public function getProtectedMethod($name)
+ {
+ $class = new \ReflectionClass('\Masterminds\HTML5\Serializer\Traverser');
+ $method = $class->getMethod($name);
+ $method->setAccessible(true);
+
+ return $method;
+ }
- $dom = $this->html5->loadHTML($this->markup);
- $t = new Traverser($dom, $stream, $html5->getOptions());
+ public function getTraverser()
+ {
+ $stream = fopen('php://temp', 'w');
- // We return both the traverser and stream so we can pull from it.
- return array($t, $stream);
- }
+ $dom = $this->html5->loadHTML($this->markup);
+ $t = new Traverser($dom, $stream, $html5->getOptions());
- function testConstruct() {
+ // We return both the traverser and stream so we can pull from it.
+ return array(
+ $t,
+ $stream
+ );
+ }
- // The traverser needs a place to write the output to. In our case we
- // use a stream in temp space.
- $stream = fopen('php://temp', 'w');
+ public function testConstruct()
+ {
+ // The traverser needs a place to write the output to. In our case we
+ // use a stream in temp space.
+ $stream = fopen('php://temp', 'w');
- $html5 = $this->getInstance();
+ $html5 = $this->getInstance();
- $r = new OutputRules($stream, $this->html5->getOptions());
- $dom = $this->html5->loadHTML($this->markup);
+ $r = new OutputRules($stream, $this->html5->getOptions());
+ $dom = $this->html5->loadHTML($this->markup);
- $t = new Traverser($dom, $stream, $r, $html5->getOptions());
+ $t = new Traverser($dom, $stream, $r, $html5->getOptions());
- $this->assertInstanceOf('\Masterminds\HTML5\Serializer\Traverser', $t);
- }
+ $this->assertInstanceOf('\Masterminds\HTML5\Serializer\Traverser', $t);
+ }
- function testFragment() {
- $html = '<span class="bar">foo</span><span></span><div>bar</div>';
- $input = new \Masterminds\HTML5\Parser\StringInputStream($html);
- $dom = $this->html5->parseFragment($input);
+ public function testFragment()
+ {
+ $html = '<span class="bar">foo</span><span></span><div>bar</div>';
+ $input = new \Masterminds\HTML5\Parser\StringInputStream($html);
+ $dom = $this->html5->parseFragment($input);
- $this->assertInstanceOf('\DOMDocumentFragment', $dom);
+ $this->assertInstanceOf('\DOMDocumentFragment', $dom);
- $stream = fopen('php://temp', 'w');
- $r = new OutputRules($stream, $this->html5->getOptions());
- $t = new Traverser($dom, $stream, $r, $this->html5->getOptions());
+ $stream = fopen('php://temp', 'w');
+ $r = new OutputRules($stream, $this->html5->getOptions());
+ $t = new Traverser($dom, $stream, $r, $this->html5->getOptions());
- $out = $t->walk();
- $this->assertEquals($html, stream_get_contents($stream, -1, 0));
- }
+ $out = $t->walk();
+ $this->assertEquals($html, stream_get_contents($stream, - 1, 0));
+ }
- function testProcessorInstruction() {
- $html = '<?foo bar ?>';
- $input = new \Masterminds\HTML5\Parser\StringInputStream($html);
- $dom = $this->html5->parseFragment($input);
+ public function testProcessorInstruction()
+ {
+ $html = '<?foo bar ?>';
+ $input = new \Masterminds\HTML5\Parser\StringInputStream($html);
+ $dom = $this->html5->parseFragment($input);
- $this->assertInstanceOf('\DOMDocumentFragment', $dom);
+ $this->assertInstanceOf('\DOMDocumentFragment', $dom);
- $stream = fopen('php://temp', 'w');
- $r = new OutputRules($stream, $this->html5->getOptions());
- $t = new Traverser($dom, $stream, $r, $this->html5->getOptions());
+ $stream = fopen('php://temp', 'w');
+ $r = new OutputRules($stream, $this->html5->getOptions());
+ $t = new Traverser($dom, $stream, $r, $this->html5->getOptions());
- $out = $t->walk();
- $this->assertEquals($html, stream_get_contents($stream, -1, 0));
- }
+ $out = $t->walk();
+ $this->assertEquals($html, stream_get_contents($stream, - 1, 0));
+ }
}
diff --git a/test/HTML5/TestCase.php b/test/HTML5/TestCase.php
index 56d2eae..3cb8645 100644
--- a/test/HTML5/TestCase.php
+++ b/test/HTML5/TestCase.php
@@ -2,21 +2,26 @@
namespace Masterminds\HTML5\Tests;
use Masterminds\HTML5;
-class TestCase extends \PHPUnit_Framework_TestCase {
- const DOC_OPEN = '<!DOCTYPE html><html><head><title>test</title></head><body>';
- const DOC_CLOSE = '</body></html>';
- public function testFoo() {
- // Placeholder. Why is PHPUnit emitting warnings about no tests?
- }
+class TestCase extends \PHPUnit_Framework_TestCase
+{
- public function getInstance(array $options = array())
- {
- return new HTML5($options);
- }
+ const DOC_OPEN = '<!DOCTYPE html><html><head><title>test</title></head><body>';
- protected function wrap($fragment) {
- return self::DOC_OPEN . $fragment . self::DOC_CLOSE;
- }
+ const DOC_CLOSE = '</body></html>';
+ public function testFoo()
+ {
+ // Placeholder. Why is PHPUnit emitting warnings about no tests?
+ }
+
+ public function getInstance(array $options = array())
+ {
+ return new HTML5($options);
+ }
+
+ protected function wrap($fragment)
+ {
+ return self::DOC_OPEN . $fragment . self::DOC_CLOSE;
+ }
}