summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMatt Butcher <[email protected]>2013-04-04 17:45:58 -0500
committerMatt Butcher <[email protected]>2013-04-04 17:45:58 -0500
commitd04664262ac03de9a0a614de32a90b78a7fbcb13 (patch)
tree48b562790760d522b995491d35bc2f98597ec834
parent5a05b6485ee6f63bbcff31758e4fe9c45f057fca (diff)
Serializing now roughly follows 8.9 of the spec.
There are some namespace issues left to solve, but otherwise it works.
-rw-r--r--example.php2
-rw-r--r--src/HTML5/Traverser.php69
2 files changed, 68 insertions, 3 deletions
diff --git a/example.php b/example.php
index 361369b..5ba4dbc 100644
--- a/example.php
+++ b/example.php
@@ -18,7 +18,9 @@ $html = <<< 'HERE'
<hr>
&amp; Nobody nowhere.
</section>
+ <test xmlns:foo="http://example.com/foo">TEST</test>
<![CDATA[Because we can.]]>
+ &copy;
</body></html>
HERE;
diff --git a/src/HTML5/Traverser.php b/src/HTML5/Traverser.php
index a4ce5fe..b0258eb 100644
--- a/src/HTML5/Traverser.php
+++ b/src/HTML5/Traverser.php
@@ -50,6 +50,35 @@ class Traverser {
'plaintext' => 1,
);
+ /**
+ * Unary elements.
+ * HTML5 section 8.3:
+ * If current node is an
+ * area, base, basefont, bgsound, br, col, command, embed, frame, hr, img,
+ * input, keygen, link, meta, param, source, track or wbr element, then
+ * continue on to the next child node at this point.
+ */
+ static $unary_elements = array(
+ 'area' => 1,
+ 'base' => 1,
+ 'basefont' => 1,
+ 'bgsound' => 1,
+ 'br' => 1,
+ 'col' => 1,
+ 'command' => 1,
+ 'embed' => 1,
+ 'frame' => 1,
+ 'hr' => 1,
+ 'img' => 1,
+ );
+
+ /** Namespaces that should be treated as "local" to HTML5. */
+ static $local_ns = array(
+ 'http://www.w3.org/1999/xhtml' => 'html',
+ 'http://www.w3.org/1998/Math/MathML' => 'mathml',
+ 'http://www.w3.org/2000/svg' => 'svg',
+ );
+
protected $dom;
protected $out;
protected $pretty = TRUE;
@@ -133,6 +162,13 @@ class Traverser {
$name = $ele->tagName;
$block = $this->pretty && $this->isBlock($name);
+ // Per spec:
+ // If the element has a declared namespace in the HTML, MathML or
+ // SVG namespaces, we use the lname instead of the tagName.
+ if ($this->isLocalElement($ele)) {
+ $name = $ele->localName;
+ }
+
// TODO: Really need to fix the spacing.
// Add a newline for a block element.
if ($block) $this->nl();
@@ -158,7 +194,7 @@ class Traverser {
}
// FIXME: This probably needs some flags set.
- $this->wr(htmlentities($ele->wholeText));
+ $this->wr($this->enc($ele->wholeText));
}
@@ -199,7 +235,13 @@ class Traverser {
$len = $map->length;
for ($i = 0; $i < $len; ++$i) {
$node = $map->item($i);
- $this->wr(' ')->wr($node->name)->wr('="')->wr($node->value)->wr('"');
+ $val = $this->enc($node->value);
+
+ // XXX: The spec says that we need to ensure that anything in
+ // the XML, XMLNS, or XLink NS's should use the canonical
+ // prefix. It seems that DOM does this for us already, but there
+ // may be exceptions.
+ $this->wr(' ')->wr($node->name)->wr('="')->wr($val)->wr('"');
}
}
@@ -218,11 +260,23 @@ class Traverser {
return $this;
}
+ protected function enc($text) {
+ $flags = ENT_QUOTES;
+
+ // TODO: Verify on PHP 5.4 that this works as desired.
+ if (defined('ENT_HTML5')) {
+ $flags = ENT_HTML5|ENT_SUBSTITUTE;
+ }
+ $ret = htmlentities($text, $flags, 'UTF-8');
+ //if ($ret != $text) printf("Replaced [%s] with [%s]", $text, $ret);
+ return $ret;
+ }
+
/**
* Is an unary tag.
*/
protected function isUnary($name) {
- return FALSE;
+ return isset(self::$unary_elements[$name]);
}
/**
@@ -240,4 +294,13 @@ class Traverser {
}
+ protected function isLocalElement($ele) {
+ $uri = $ele->namespaceURI;
+ if (empty($uri)) {
+ return FALSE;
+ }
+ return isset(self::$local_ns[$uri]);
+
+ }
+
}