path: root/src/HTML5
diff options
Diffstat (limited to 'src/HTML5')
2 files changed, 371 insertions, 114 deletions
diff --git a/src/HTML5/Elements.php b/src/HTML5/Elements.php
index 30e3dc9..fe55d42 100644
--- a/src/HTML5/Elements.php
+++ b/src/HTML5/Elements.php
@@ -1,120 +1,341 @@
+namespace HTML5;
-class Element {
+ * Provide general element functions.
+ *
+ * @todo consider using a bitmask table lookup. There is enought overlap in
+ * naming that this could significantly shrink the size and maybe make it
+ * faster. See the Go teams implementation at
+ */
+class Elements {
const TEXT_RAW = 0x01;
const TEXT_RCDATA = 0x02;
const OMIT_START = 0x0a;
const OMIT_END = 0x0b;
- public static $properties = array(
- "a" => 0,
- "abbr" => 0,
- "address" => 0,
- "area" => 0,
- "article" => 0,
- "aside" => 0,
- "audio" => 0,
- "b" => 0,
- "base" => 0,
- "bdi" => 0,
- "bdo" => 0,
- "blockquote" => 0,
- "body" => 0,
- "br" => 0,
- "button" => 0,
- "canvas" => 0,
- "caption" => 0,
- "cite" => 0,
- "code" => 0,
- "col" => 0,
- "colgroup" => 0,
- "command" => 0,
- "datalist" => 0,
- "dd" => 0,
- "del" => 0,
- "details" => 0,
- "dfn" => 0,
- "dialog" => 0,
- "div" => 0,
- "dl" => 0,
- "dt" => 0,
- "em" => 0,
- "embed" => 0,
- "fieldset" => 0,
- "figcaption" => 0,
- "figure" => 0,
- "footer" => 0,
- "form" => 0,
- "h1" => 0,
- "h2" => 0,
- "h3" => 0,
- "h4" => 0,
- "h5" => 0,
- "h6" => 0,
- "head" => 0,
- "header" => 0,
- "hgroup" => 0,
- "hr" => 0,
- "html" => 0,
- "i" => 0,
- "iframe" => 0,
- "img" => 0,
- "input" => 0,
- "kbd" => 0,
- "ins" => 0,
- "keygen" => 0,
- "label" => 0,
- "legend" => 0,
- "li" => 0,
- "link" => 0,
- "map" => 0,
- "mark" => 0,
- "menu" => 0,
- "meta" => 0,
- "meter" => 0,
- "nav" => 0,
- "noscript" => 0,
- "object" => 0,
- "ol" => 0,
- "optgroup" => 0,
- "option" => 0,
- "output" => 0,
- "p" => 0,
- "param" => 0,
- "pre" => 0,
- "progress" => 0,
- "q" => 0,
- "rp" => 0,
- "rt" => 0,
- "ruby" => 0,
- "s" => 0,
- "samp" => 0,
- "script" => 0,
- "section" => 0,
- "select" => 0,
- "small" => 0,
- "source" => 0,
- "span" => 0,
- "strong" => 0,
- "style" => 0,
- "sub" => 0,
- "summary" => 0,
- "sup" => 0,
- "table" => 0,
- "tbody" => 0,
- "td" => 0,
- "textarea" => 0,
- "tfoot" => 0,
- "th" => 0,
- "thead" => 0,
- "time" => 0,
- "title" => 0,
- "tr" => 0,
- "track" => 0,
- "u" => 0,
- "ul" => 0,
- "var" => 0,
- "wbr" => 0,
+ /**
+ * The HTML5 elements as defined in
+ * @var array
+ */
+ public static $elements = array(
+ "a" => 1,
+ "abbr" => 1,
+ "address" => 1,
+ "area" => 1,
+ "article" => 1,
+ "aside" => 1,
+ "audio" => 1,
+ "b" => 1,
+ "base" => 1,
+ "bdi" => 1,
+ "bdo" => 1,
+ "blockquote" => 1,
+ "body" => 1,
+ "br" => 1,
+ "button" => 1,
+ "canvas" => 1,
+ "caption" => 1,
+ "cite" => 1,
+ "code" => 1,
+ "col" => 1,
+ "colgroup" => 1,
+ "command" => 1,
+ //"data" => 1, // This is highly experimental and only part of the whatwg spec (not w3c). See
+ "datalist" => 1,
+ "dd" => 1,
+ "del" => 1,
+ "details" => 1,
+ "dfn" => 1,
+ "dialog" => 1,
+ "div" => 1,
+ "dl" => 1,
+ "dt" => 1,
+ "em" => 1,
+ "embed" => 1,
+ "fieldset" => 1,
+ "figcaption" => 1,
+ "figure" => 1,
+ "footer" => 1,
+ "form" => 1,
+ "h1" => 1,
+ "h2" => 1,
+ "h3" => 1,
+ "h4" => 1,
+ "h5" => 1,
+ "h6" => 1,
+ "head" => 1,
+ "header" => 1,
+ "hgroup" => 1,
+ "hr" => 1,
+ "html" => 1,
+ "i" => 1,
+ "iframe" => 1,
+ "img" => 1,
+ "input" => 1,
+ "kbd" => 1,
+ "ins" => 1,
+ "keygen" => 1,
+ "label" => 1,
+ "legend" => 1,
+ "li" => 1,
+ "link" => 1,
+ "map" => 1,
+ "mark" => 1,
+ "menu" => 1,
+ "meta" => 1,
+ "meter" => 1,
+ "nav" => 1,
+ "noscript" => 1,
+ "object" => 1,
+ "ol" => 1,
+ "optgroup" => 1,
+ "option" => 1,
+ "output" => 1,
+ "p" => 1,
+ "param" => 1,
+ "pre" => 1,
+ "progress" => 1,
+ "q" => 1,
+ "rp" => 1,
+ "rt" => 1,
+ "ruby" => 1,
+ "s" => 1,
+ "samp" => 1,
+ "script" => 1,
+ "section" => 1,
+ "select" => 1,
+ "small" => 1,
+ "source" => 1,
+ "span" => 1,
+ "strong" => 1,
+ "style" => 1,
+ "sub" => 1,
+ "summary" => 1,
+ "sup" => 1,
+ "table" => 1,
+ "tbody" => 1,
+ "td" => 1,
+ "textarea" => 1,
+ "tfoot" => 1,
+ "th" => 1,
+ "thead" => 1,
+ "time" => 1,
+ "title" => 1,
+ "tr" => 1,
+ "track" => 1,
+ "u" => 1,
+ "ul" => 1,
+ "var" => 1,
+ "video" => 1,
+ "wbr" => 1,
+ /**
+ * The MathML elements. See
+ *
+ * In our case we are only concerned with presentation MathML and not content
+ * MathML. There is a nice list of this subset at
+ *
+ * @var array
+ */
+ public static $mathml = array(
+ "maction" => 1,
+ "maligngroup" => 1,
+ "malignmark" => 1,
+ "math" => 1,
+ "menclose" => 1,
+ "merror" => 1,
+ "mfenced" => 1,
+ "mfrac" => 1,
+ "mglyph" => 1,
+ "mi" => 1,
+ "mlabeledtr" => 1,
+ "mlongdiv" => 1,
+ "mmultiscripts" => 1,
+ "mn" => 1,
+ "mo" => 1,
+ "mover" => 1,
+ "mpadded" => 1,
+ "mphantom" => 1,
+ "mroot" => 1,
+ "mrow" => 1,
+ "ms" => 1,
+ "mscarries" => 1,
+ "mscarry" => 1,
+ "msgroup" => 1,
+ "msline" => 1,
+ "mspace" => 1,
+ "msqrt" => 1,
+ "msrow" => 1,
+ "mstack" => 1,
+ "mstyle" => 1,
+ "msub" => 1,
+ "msup" => 1,
+ "msubsup" => 1,
+ "mtable" => 1,
+ "mtd" => 1,
+ "mtext" => 1,
+ "mtr" => 1,
+ "munder" => 1,
+ "munderover" => 1,
+ );
+ /**
+ * The svg elements.
+ *
+ * The Mozilla documentation has a good list at
+ * The w3c list appears to be lacking in some areas like filter effect elements.
+ * That list can be found at
+ *
+ * Note, FireFox appears to do a better job rendering filter effects than chrome.
+ * While they are in the spec I'm not sure how widely implemented they are.
+ *
+ * @var array
+ */
+ public static $svg = array(
+ "a" => 1,
+ "altGlyph" => 1,
+ "altGlyphDef" => 1,
+ "altGlyphItem" => 1,
+ "animate" => 1,
+ "animateColor" => 1,
+ "animateMotion" => 1,
+ "animateTransform" => 1,
+ "circle" => 1,
+ "clipPath" => 1,
+ "color-profile" => 1,
+ "cursor" => 1,
+ "defs" => 1,
+ "desc" => 1,
+ "ellipse" => 1,
+ "feBlend" => 1,
+ "feColorMatrix" => 1,
+ "feComponentTransfer" => 1,
+ "feComposite" => 1,
+ "feConvolveMatrix" => 1,
+ "feDiffuseLighting" => 1,
+ "feDisplacementMap" => 1,
+ "feDistantLight" => 1,
+ "feFlood" => 1,
+ "feFuncA" => 1,
+ "feFuncB" => 1,
+ "feFuncG" => 1,
+ "feFuncR" => 1,
+ "feGaussianBlur" => 1,
+ "feImage" => 1,
+ "feMerge" => 1,
+ "feMergeNode" => 1,
+ "feMorphology" => 1,
+ "feOffset" => 1,
+ "fePointLight" => 1,
+ "feSpecularLighting" => 1,
+ "feSpotLight" => 1,
+ "feTile" => 1,
+ "feTurbulence" => 1,
+ "filter" => 1,
+ "font" => 1,
+ "font-face" => 1,
+ "font-face-format" => 1,
+ "font-face-name" => 1,
+ "font-face-src" => 1,
+ "font-face-uri" => 1,
+ "foreignObject" => 1,
+ "g" => 1,
+ "glyph" => 1,
+ "glyphRef" => 1,
+ "hkern" => 1,
+ "image" => 1,
+ "line" => 1,
+ "linearGradient" => 1,
+ "marker" => 1,
+ "mask" => 1,
+ "metadata" => 1,
+ "missing-glyph" => 1,
+ "mpath" => 1,
+ "path" => 1,
+ "pattern" => 1,
+ "polygon" => 1,
+ "polyline" => 1,
+ "radialGradient" => 1,
+ "rect" => 1,
+ "script" => 1,
+ "set" => 1,
+ "stop" => 1,
+ "style" => 1,
+ "svg" => 1,
+ "switch" => 1,
+ "symbol" => 1,
+ "text" => 1,
+ "textPath" => 1,
+ "title" => 1,
+ "tref" => 1,
+ "tspan" => 1,
+ "use" => 1,
+ "view" => 1,
+ "vkern" => 1,
+ );
+ /**
+ * Test if an element is a valid html5 element.
+ *
+ * @param string $name
+ * The name of the element.
+ *
+ * @return bool
+ * True if a html5 element and false otherwise.
+ */
+ public static function isHtml5Element($name) {
+ // html5 element names are case insensetitive. Forcing lowercase for the check.
+ // Do we need this check or will all data passed here already be lowercase?
+ return isset(self::$elements[strtolower($name)]);
+ }
+ /**
+ * Test if an element name is a valid MathML presentation element.
+ *
+ * @param string $name
+ * The name of the element.
+ *
+ * @return bool
+ * True if a MathML name and false otherwise.
+ */
+ public static function isMathMLElement($name) {
+ // MathML is case-sensetitive unlike html5 elements.
+ return isset(self::$mathml[$name]);
+ }
+ /**
+ * Test if an element is a valid SVG element.
+ *
+ * @param string $name
+ * The name of the element.
+ *
+ * @return boolean
+ * True if a SVG element and false otherise.
+ */
+ public static function isSvgElement($name) {
+ // SVG is case-sensetitive unlike html5 elements.
+ return isset(self::$svg[$name]);
+ }
+ /**
+ * Is an element name valid in an html5 document.
+ *
+ * This includes html5 elements along with other allowed embedded content
+ * such as svg and mathml.
+ *
+ * @param string $name
+ * The name of the element.
+ *
+ * @return bool
+ * True if valid and false otherwise.
+ */
+ public function isElement($name) {
+ return self::isHtml5Element($name) || self::isMathMLElement($name) || self::isSvgElement($name);
+ }
diff --git a/src/HTML5/Serializer/Traverser.php b/src/HTML5/Serializer/Traverser.php
index a066517..735e413 100644
--- a/src/HTML5/Serializer/Traverser.php
+++ b/src/HTML5/Serializer/Traverser.php
@@ -12,7 +12,31 @@ namespace HTML5\Serializer;
class Traverser {
- static $block_elements = 'html|body|head|p|div|h[1-6]|title|script|link|meta|section|article|table|tbody|tr|th|td';
+ static $block_elements = array(
+ 'html' => 1,
+ 'body' => 1,
+ 'head' => 1,
+ 'p' => 1,
+ 'div' => 1,
+ 'h1' => 1,
+ 'h2' => 1,
+ 'h3' => 1,
+ 'h4' => 1,
+ 'h5' => 1,
+ 'h6' => 1,
+ 'title' => 1,
+ 'script' => 1,
+ 'link' => 1,
+ 'meta' => 1,
+ 'section' => 1,
+ 'article' => 1,
+ 'table' => 1,
+ 'tbody' => 1,
+ 'tr' => 1,
+ 'th' => 1,
+ 'td' => 1,
+ //'form' => 1,
+ );
// TODO: Refactor this into an element mask.
static $literal_elements = array(
@@ -33,7 +57,19 @@ class Traverser {
* input, keygen, link, meta, param, source, track or wbr element, then
* continue on to the next child node at this point.
- static $unary_elements = 'area|base|basefont|bgsound|br|col|command|embed|frame|hr|img';
+ static $unary_elements = array(
+ 'area' => 1,
+ 'base' => 1,
+ 'basefont' => 1,
+ 'bgsound' => 1,
+ 'br' => 1,
+ 'col' => 1,
+ 'command' => 1,
+ 'embed' => 1,
+ 'frame' => 1,
+ 'hr' => 1,
+ 'img' => 1,
+ );
/** Namespaces that should be treated as "local" to HTML5. */
static $local_ns = array(
@@ -264,7 +300,7 @@ class Traverser {
* True if Unary and false otherwise.
protected function isUnary($name) {
- return (bool)preg_match('/^(' . self::$unary_elements . ')$/i', $name);
+ return isset(self::$unary_elements[$name]);
@@ -277,7 +313,7 @@ class Traverser {
* If the element is block level or not.
protected function isBlock($name) {
- return (bool)preg_match('/^(' . self::$block_elements . ')$/i', $name);
+ return isset(self::$block_elements[$name]);