diff options
-rw-r--r-- | src/HTML5/Elements.php | 441 | ||||
-rw-r--r-- | src/HTML5/Serializer/Traverser.php | 44 | ||||
-rw-r--r-- | test/HTML5/ElementsTest.php | 325 | ||||
-rw-r--r-- | test/HTML5/Serializer/TraverserTest.php | 12 |
4 files changed, 696 insertions, 126 deletions
diff --git a/src/HTML5/Elements.php b/src/HTML5/Elements.php index 30e3dc9..fe55d42 100644 --- a/src/HTML5/Elements.php +++ b/src/HTML5/Elements.php @@ -1,120 +1,341 @@ <?php +namespace HTML5; -class Element { +/** + * Provide general element functions. + * + * @todo consider using a bitmask table lookup. There is enought overlap in + * naming that this could significantly shrink the size and maybe make it + * faster. See the Go teams implementation at https://code.google.com/p/go/source/browse/html/atom. + */ +class Elements { const TEXT_RAW = 0x01; const TEXT_RCDATA = 0x02; const OMIT_START = 0x0a; const OMIT_END = 0x0b; - public static $properties = array( - "a" => 0, - "abbr" => 0, - "address" => 0, - "area" => 0, - "article" => 0, - "aside" => 0, - "audio" => 0, - "b" => 0, - "base" => 0, - "bdi" => 0, - "bdo" => 0, - "blockquote" => 0, - "body" => 0, - "br" => 0, - "button" => 0, - "canvas" => 0, - "caption" => 0, - "cite" => 0, - "code" => 0, - "col" => 0, - "colgroup" => 0, - "command" => 0, - "datalist" => 0, - "dd" => 0, - "del" => 0, - "details" => 0, - "dfn" => 0, - "dialog" => 0, - "div" => 0, - "dl" => 0, - "dt" => 0, - "em" => 0, - "embed" => 0, - "fieldset" => 0, - "figcaption" => 0, - "figure" => 0, - "footer" => 0, - "form" => 0, - "h1" => 0, - "h2" => 0, - "h3" => 0, - "h4" => 0, - "h5" => 0, - "h6" => 0, - "head" => 0, - "header" => 0, - "hgroup" => 0, - "hr" => 0, - "html" => 0, - "i" => 0, - "iframe" => 0, - "img" => 0, - "input" => 0, - "kbd" => 0, - "ins" => 0, - "keygen" => 0, - "label" => 0, - "legend" => 0, - "li" => 0, - "link" => 0, - "map" => 0, - "mark" => 0, - "menu" => 0, - "meta" => 0, - "meter" => 0, - "nav" => 0, - "noscript" => 0, - "object" => 0, - "ol" => 0, - "optgroup" => 0, - "option" => 0, - "output" => 0, - "p" => 0, - "param" => 0, - "pre" => 0, - "progress" => 0, - "q" => 0, - "rp" => 0, - "rt" => 0, - "ruby" => 0, - "s" => 0, - "samp" => 0, - "script" => 0, - "section" => 0, - "select" => 0, - "small" => 0, - "source" => 0, - "span" => 0, - "strong" => 0, - "style" => 0, - "sub" => 0, - "summary" => 0, - "sup" => 0, - "table" => 0, - "tbody" => 0, - "td" => 0, - "textarea" => 0, - "tfoot" => 0, - "th" => 0, - "thead" => 0, - "time" => 0, - "title" => 0, - "tr" => 0, - "track" => 0, - "u" => 0, - "ul" => 0, - "var" => 0, - "wbr" => 0, + /** + * The HTML5 elements as defined in http://dev.w3.org/html5/markup/elements.html. + * @var array + */ + public static $elements = array( + "a" => 1, + "abbr" => 1, + "address" => 1, + "area" => 1, + "article" => 1, + "aside" => 1, + "audio" => 1, + "b" => 1, + "base" => 1, + "bdi" => 1, + "bdo" => 1, + "blockquote" => 1, + "body" => 1, + "br" => 1, + "button" => 1, + "canvas" => 1, + "caption" => 1, + "cite" => 1, + "code" => 1, + "col" => 1, + "colgroup" => 1, + "command" => 1, + //"data" => 1, // This is highly experimental and only part of the whatwg spec (not w3c). See https://developer.mozilla.org/en-US/docs/HTML/Element/data + "datalist" => 1, + "dd" => 1, + "del" => 1, + "details" => 1, + "dfn" => 1, + "dialog" => 1, + "div" => 1, + "dl" => 1, + "dt" => 1, + "em" => 1, + "embed" => 1, + "fieldset" => 1, + "figcaption" => 1, + "figure" => 1, + "footer" => 1, + "form" => 1, + "h1" => 1, + "h2" => 1, + "h3" => 1, + "h4" => 1, + "h5" => 1, + "h6" => 1, + "head" => 1, + "header" => 1, + "hgroup" => 1, + "hr" => 1, + "html" => 1, + "i" => 1, + "iframe" => 1, + "img" => 1, + "input" => 1, + "kbd" => 1, + "ins" => 1, + "keygen" => 1, + "label" => 1, + "legend" => 1, + "li" => 1, + "link" => 1, + "map" => 1, + "mark" => 1, + "menu" => 1, + "meta" => 1, + "meter" => 1, + "nav" => 1, + "noscript" => 1, + "object" => 1, + "ol" => 1, + "optgroup" => 1, + "option" => 1, + "output" => 1, + "p" => 1, + "param" => 1, + "pre" => 1, + "progress" => 1, + "q" => 1, + "rp" => 1, + "rt" => 1, + "ruby" => 1, + "s" => 1, + "samp" => 1, + "script" => 1, + "section" => 1, + "select" => 1, + "small" => 1, + "source" => 1, + "span" => 1, + "strong" => 1, + "style" => 1, + "sub" => 1, + "summary" => 1, + "sup" => 1, + "table" => 1, + "tbody" => 1, + "td" => 1, + "textarea" => 1, + "tfoot" => 1, + "th" => 1, + "thead" => 1, + "time" => 1, + "title" => 1, + "tr" => 1, + "track" => 1, + "u" => 1, + "ul" => 1, + "var" => 1, + "video" => 1, + "wbr" => 1, ); + + /** + * The MathML elements. See http://www.w3.org/wiki/MathML/Elements. + * + * In our case we are only concerned with presentation MathML and not content + * MathML. There is a nice list of this subset at https://developer.mozilla.org/en-US/docs/MathML/Element. + * + * @var array + */ + public static $mathml = array( + "maction" => 1, + "maligngroup" => 1, + "malignmark" => 1, + "math" => 1, + "menclose" => 1, + "merror" => 1, + "mfenced" => 1, + "mfrac" => 1, + "mglyph" => 1, + "mi" => 1, + "mlabeledtr" => 1, + "mlongdiv" => 1, + "mmultiscripts" => 1, + "mn" => 1, + "mo" => 1, + "mover" => 1, + "mpadded" => 1, + "mphantom" => 1, + "mroot" => 1, + "mrow" => 1, + "ms" => 1, + "mscarries" => 1, + "mscarry" => 1, + "msgroup" => 1, + "msline" => 1, + "mspace" => 1, + "msqrt" => 1, + "msrow" => 1, + "mstack" => 1, + "mstyle" => 1, + "msub" => 1, + "msup" => 1, + "msubsup" => 1, + "mtable" => 1, + "mtd" => 1, + "mtext" => 1, + "mtr" => 1, + "munder" => 1, + "munderover" => 1, + ); + + /** + * The svg elements. + * + * The Mozilla documentation has a good list at https://developer.mozilla.org/en-US/docs/SVG/Element. + * The w3c list appears to be lacking in some areas like filter effect elements. + * That list can be found at http://www.w3.org/wiki/SVG/Elements. + * + * Note, FireFox appears to do a better job rendering filter effects than chrome. + * While they are in the spec I'm not sure how widely implemented they are. + * + * @var array + */ + public static $svg = array( + "a" => 1, + "altGlyph" => 1, + "altGlyphDef" => 1, + "altGlyphItem" => 1, + "animate" => 1, + "animateColor" => 1, + "animateMotion" => 1, + "animateTransform" => 1, + "circle" => 1, + "clipPath" => 1, + "color-profile" => 1, + "cursor" => 1, + "defs" => 1, + "desc" => 1, + "ellipse" => 1, + "feBlend" => 1, + "feColorMatrix" => 1, + "feComponentTransfer" => 1, + "feComposite" => 1, + "feConvolveMatrix" => 1, + "feDiffuseLighting" => 1, + "feDisplacementMap" => 1, + "feDistantLight" => 1, + "feFlood" => 1, + "feFuncA" => 1, + "feFuncB" => 1, + "feFuncG" => 1, + "feFuncR" => 1, + "feGaussianBlur" => 1, + "feImage" => 1, + "feMerge" => 1, + "feMergeNode" => 1, + "feMorphology" => 1, + "feOffset" => 1, + "fePointLight" => 1, + "feSpecularLighting" => 1, + "feSpotLight" => 1, + "feTile" => 1, + "feTurbulence" => 1, + "filter" => 1, + "font" => 1, + "font-face" => 1, + "font-face-format" => 1, + "font-face-name" => 1, + "font-face-src" => 1, + "font-face-uri" => 1, + "foreignObject" => 1, + "g" => 1, + "glyph" => 1, + "glyphRef" => 1, + "hkern" => 1, + "image" => 1, + "line" => 1, + "linearGradient" => 1, + "marker" => 1, + "mask" => 1, + "metadata" => 1, + "missing-glyph" => 1, + "mpath" => 1, + "path" => 1, + "pattern" => 1, + "polygon" => 1, + "polyline" => 1, + "radialGradient" => 1, + "rect" => 1, + "script" => 1, + "set" => 1, + "stop" => 1, + "style" => 1, + "svg" => 1, + "switch" => 1, + "symbol" => 1, + "text" => 1, + "textPath" => 1, + "title" => 1, + "tref" => 1, + "tspan" => 1, + "use" => 1, + "view" => 1, + "vkern" => 1, + ); + + /** + * Test if an element is a valid html5 element. + * + * @param string $name + * The name of the element. + * + * @return bool + * True if a html5 element and false otherwise. + */ + public static function isHtml5Element($name) { + + // html5 element names are case insensetitive. Forcing lowercase for the check. + // Do we need this check or will all data passed here already be lowercase? + return isset(self::$elements[strtolower($name)]); + } + + /** + * Test if an element name is a valid MathML presentation element. + * + * @param string $name + * The name of the element. + * + * @return bool + * True if a MathML name and false otherwise. + */ + public static function isMathMLElement($name) { + + // MathML is case-sensetitive unlike html5 elements. + return isset(self::$mathml[$name]); + } + + /** + * Test if an element is a valid SVG element. + * + * @param string $name + * The name of the element. + * + * @return boolean + * True if a SVG element and false otherise. + */ + public static function isSvgElement($name) { + + // SVG is case-sensetitive unlike html5 elements. + return isset(self::$svg[$name]); + } + + /** + * Is an element name valid in an html5 document. + * + * This includes html5 elements along with other allowed embedded content + * such as svg and mathml. + * + * @param string $name + * The name of the element. + * + * @return bool + * True if valid and false otherwise. + */ + public function isElement($name) { + return self::isHtml5Element($name) || self::isMathMLElement($name) || self::isSvgElement($name); + } } diff --git a/src/HTML5/Serializer/Traverser.php b/src/HTML5/Serializer/Traverser.php index a066517..735e413 100644 --- a/src/HTML5/Serializer/Traverser.php +++ b/src/HTML5/Serializer/Traverser.php @@ -12,7 +12,31 @@ namespace HTML5\Serializer; */ class Traverser { - static $block_elements = 'html|body|head|p|div|h[1-6]|title|script|link|meta|section|article|table|tbody|tr|th|td'; + static $block_elements = array( + 'html' => 1, + 'body' => 1, + 'head' => 1, + 'p' => 1, + 'div' => 1, + 'h1' => 1, + 'h2' => 1, + 'h3' => 1, + 'h4' => 1, + 'h5' => 1, + 'h6' => 1, + 'title' => 1, + 'script' => 1, + 'link' => 1, + 'meta' => 1, + 'section' => 1, + 'article' => 1, + 'table' => 1, + 'tbody' => 1, + 'tr' => 1, + 'th' => 1, + 'td' => 1, + //'form' => 1, + ); // TODO: Refactor this into an element mask. static $literal_elements = array( @@ -33,7 +57,19 @@ class Traverser { * input, keygen, link, meta, param, source, track or wbr element, then * continue on to the next child node at this point. */ - static $unary_elements = 'area|base|basefont|bgsound|br|col|command|embed|frame|hr|img'; + static $unary_elements = array( + 'area' => 1, + 'base' => 1, + 'basefont' => 1, + 'bgsound' => 1, + 'br' => 1, + 'col' => 1, + 'command' => 1, + 'embed' => 1, + 'frame' => 1, + 'hr' => 1, + 'img' => 1, + ); /** Namespaces that should be treated as "local" to HTML5. */ static $local_ns = array( @@ -264,7 +300,7 @@ class Traverser { * True if Unary and false otherwise. */ protected function isUnary($name) { - return (bool)preg_match('/^(' . self::$unary_elements . ')$/i', $name); + return isset(self::$unary_elements[$name]); } /** @@ -277,7 +313,7 @@ class Traverser { * If the element is block level or not. */ protected function isBlock($name) { - return (bool)preg_match('/^(' . self::$block_elements . ')$/i', $name); + return isset(self::$block_elements[$name]); } /** diff --git a/test/HTML5/ElementsTest.php b/test/HTML5/ElementsTest.php new file mode 100644 index 0000000..20161bb --- /dev/null +++ b/test/HTML5/ElementsTest.php @@ -0,0 +1,325 @@ +<?php +namespace HTML5\Tests; + +use \HTML5\Elements; + +require_once 'TestCase.php'; + +class ElementsTest extends TestCase { + + public $html5Elements = array( + "a", + "abbr", + "address", + "area", + "article", + "aside", + "audio", + "b", + "base", + "bdi", + "bdo", + "blockquote", + "body", + "br", + "button", + "canvas", + "caption", + "cite", + "code", + "col", + "colgroup", + "command", + //"data", + "datalist", + "dd", + "del", + "details", + "dfn", + "dialog", + "div", + "dl", + "dt", + "em", + "embed", + "fieldset", + "figcaption", + "figure", + "footer", + "form", + "h1", + "h2", + "h3", + "h4", + "h5", + "h6", + "head", + "header", + "hgroup", + "hr", + "html", + "i", + "iframe", + "img", + "input", + "ins", + "kbd", + "keygen", + "label", + "legend", + "li", + "link", + "map", + "mark", + "menu", + "meta", + "meter", + "nav", + "noscript", + "object", + "ol", + "optgroup", + "option", + "output", + "p", + "param", + "pre", + "progress", + "q", + "rp", + "rt", + "ruby", + "s", + "samp", + "script", + "section", + "select", + "small", + "source", + "span", + "strong", + "style", + "sub", + "summary", + "sup", + "table", + "tbody", + "td", + "textarea", + "tfoot", + "th", + "thead", + "time", + "title", + "tr", + "track", + "u", + "ul", + "var", + "video", + "wbr", + ); + + public $mathmlElements = array( + "maction", + "maligngroup", + "malignmark", + "math", + "menclose", + "merror", + "mfenced", + "mfrac", + "mglyph", + "mi", + "mlabeledtr", + "mlongdiv", + "mmultiscripts", + "mn", + "mo", + "mover", + "mpadded", + "mphantom", + "mroot", + "mrow", + "ms", + "mscarries", + "mscarry", + "msgroup", + "msline", + "mspace", + "msqrt", + "msrow", + "mstack", + "mstyle", + "msub", + "msup", + "msubsup", + "mtable", + "mtd", + "mtext", + "mtr", + "munder", + "munderover", + ); + + public $svgElements = array( + "a", + "altGlyph", + "altGlyphDef", + "altGlyphItem", + "animate", + "animateColor", + "animateMotion", + "animateTransform", + "circle", + "clipPath", + "color-profile", + "cursor", + "defs", + "desc", + "ellipse", + "feBlend", + "feColorMatrix", + "feComponentTransfer", + "feComposite", + "feConvolveMatrix", + "feDiffuseLighting", + "feDisplacementMap", + "feDistantLight", + "feFlood", + "feFuncA", + "feFuncB", + "feFuncG", + "feFuncR", + "feGaussianBlur", + "feImage", + "feMerge", + "feMergeNode", + "feMorphology", + "feOffset", + "fePointLight", + "feSpecularLighting", + "feSpotLight", + "feTile", + "feTurbulence", + "filter", + "font", + "font-face", + "font-face-format", + "font-face-name", + "font-face-src", + "font-face-uri", + "foreignObject", + "g", + "glyph", + "glyphRef", + "hkern", + "image", + "line", + "linearGradient", + "marker", + "mask", + "metadata", + "missing-glyph", + "mpath", + "path", + "pattern", + "polygon", + "polyline", + "radialGradient", + "rect", + "script", + "set", + "stop", + "style", + "svg", + "switch", + "symbol", + "text", + "textPath", + "title", + "tref", + "tspan", + "use", + "view", + "vkern", + ); + + public function testIsHtml5Element() { + + foreach ($this->html5Elements as $element) { + $this->assertTrue(Elements::isHtml5Element($element), 'html5 element test failed on: ' . $element); + + $this->assertTrue(Elements::isHtml5Element(strtoupper($element)), 'html5 element test failed on: ' . strtoupper($element)); + } + + $nonhtml5 = array('foo', 'bar', 'baz'); + foreach ($nonhtml5 as $element) { + $this->assertFalse(Elements::isHtml5Element($element), 'html5 element test failed on: ' . $element); + + $this->assertFalse(Elements::isHtml5Element(strtoupper($element)), 'html5 element test failed on: ' . strtoupper($element)); + } + } + + public function testIsMathMLElement() { + foreach ($this->mathmlElements as $element) { + $this->assertTrue(Elements::isMathMLElement($element), 'MathML element test failed on: ' . $element); + + // MathML is case sensetitive so these should all fail. + $this->assertFalse(Elements::isMathMLElement(strtoupper($element)), 'MathML element test failed on: ' . strtoupper($element)); + } + + $nonMathML = array('foo', 'bar', 'baz'); + foreach ($nonMathML as $element) { + $this->assertFalse(Elements::isMathMLElement($element), 'MathML element test failed on: ' . $element); + } + } + + public function testIsSvgElement() { + foreach ($this->svgElements as $element) { + $this->assertTrue(Elements::isSvgElement($element), 'SVG element test failed on: ' . $element); + + // SVG is case sensetitive so these should all fail. + $this->assertFalse(Elements::isSvgElement(strtoupper($element)), 'SVG element test failed on: ' . strtoupper($element)); + } + + $nonSVG = array('foo', 'bar', 'baz'); + foreach ($nonSVG as $element) { + $this->assertFalse(Elements::isSvgElement($element), 'SVG element test failed on: ' . $element); + } + } + + public function testIsElement() { + foreach ($this->html5Elements as $element) { + $this->assertTrue(Elements::isElement($element), 'html5 element test failed on: ' . $element); + + $this->assertTrue(Elements::isElement(strtoupper($element)), 'html5 element test failed on: ' . strtoupper($element)); + } + + foreach ($this->mathmlElements as $element) { + $this->assertTrue(Elements::isElement($element), 'MathML element test failed on: ' . $element); + + // MathML is case sensetitive so these should all fail. + $this->assertFalse(Elements::isElement(strtoupper($element)), 'MathML element test failed on: ' . strtoupper($element)); + } + + foreach ($this->svgElements as $element) { + $this->assertTrue(Elements::isElement($element), 'SVG element test failed on: ' . $element); + + // SVG is case sensetitive so these should all fail. But, there is duplication + // html5 and SVG. Since html5 is case insensetitive we need to make sure + // it's not a html5 element first. + if (!in_array($element, $this->html5Elements)) { + $this->assertFalse(Elements::isElement(strtoupper($element)), 'SVG element test failed on: ' . strtoupper($element)); + } + } + + $nonhtml5 = array('foo', 'bar', 'baz'); + foreach ($nonhtml5 as $element) { + $this->assertFalse(Elements::isElement($element), 'html5 element test failed on: ' . $element); + + $this->assertFalse(Elements::isElement(strtoupper($element)), 'html5 element test failed on: ' . strtoupper($element)); + } + } + +}
\ No newline at end of file diff --git a/test/HTML5/Serializer/TraverserTest.php b/test/HTML5/Serializer/TraverserTest.php index 059b314..665f318 100644 --- a/test/HTML5/Serializer/TraverserTest.php +++ b/test/HTML5/Serializer/TraverserTest.php @@ -37,17 +37,11 @@ class TraverserTest extends \HTML5\Tests\TestCase { foreach ($blocks as $block) { $this->assertTrue($method->invoke($t, $block), 'Block test failed on: ' . $block); - - // Also test the uppercase version. - $this->assertTrue($method->invoke($t, strtoupper($block)), 'Block test failed on: ' . strtoupper($block)); } $nonblocks = array('span', 'a', 'img'); foreach ($nonblocks as $tag) { $this->assertFalse($method->invoke($t, $tag), 'Block test failed on: ' . $tag); - - // Also test the uppercase version. - $this->assertFalse($method->invoke($t, strtoupper($tag)), 'Block test failed on: ' . strtoupper($tag)); } } @@ -62,17 +56,11 @@ class TraverserTest extends \HTML5\Tests\TestCase { foreach ($elements as $element) { $this->assertTrue($method->invoke($t, $element), 'Unary test failed on: ' . $element); - - // Also test the uppercase version. - $this->assertTrue($method->invoke($t, strtoupper($element)), 'Unary test failed on: ' . strtoupper($element)); } $nonblocks = array('span', 'a', 'div'); foreach ($nonblocks as $tag) { $this->assertFalse($method->invoke($t, $tag), 'Unary test failed on: ' . $tag); - - // Also test the uppercase version. - $this->assertFalse($method->invoke($t, strtoupper($tag)), 'Unary test failed on: ' . strtoupper($tag)); } } |