summaryrefslogtreecommitdiff
path: root/src/HTML5
diff options
context:
space:
mode:
Diffstat (limited to 'src/HTML5')
-rw-r--r--src/HTML5/Serializer/OutputRules.php203
-rw-r--r--src/HTML5/Serializer/Serializer.php16
-rw-r--r--src/HTML5/Serializer/Traverser.php254
3 files changed, 230 insertions, 243 deletions
diff --git a/src/HTML5/Serializer/OutputRules.php b/src/HTML5/Serializer/OutputRules.php
new file mode 100644
index 0000000..5780d61
--- /dev/null
+++ b/src/HTML5/Serializer/OutputRules.php
@@ -0,0 +1,203 @@
+<?php
+namespace HTML5\Serializer;
+
+use \HTML5\Elements;
+
+class OutputRules {
+
+ protected $traverser;
+ protected $encode = FALSE;
+ protected $out;
+
+ const DOCTYPE = '<!DOCTYPE html>';
+
+ public function __construct($traverser, $output, $options = array()) {
+ $this->traverser = $traverser;
+
+ if (isset($options['encode'])) {
+ $this->encode = $options['encode'];
+ }
+
+ $this->out = $output;
+
+ }
+
+ public function document($dom) {
+ $this->doctype();
+ $this->traverser->node($dom->documentElement);
+ $this->nl();
+ }
+
+ protected function doctype() {
+ $this->wr(self::DOCTYPE);
+ $this->nl();
+ }
+
+ public function element($ele) {
+ $name = $ele->tagName;
+
+ // Per spec:
+ // If the element has a declared namespace in the HTML, MathML or
+ // SVG namespaces, we use the lname instead of the tagName.
+ if ($this->traverser->isLocalElement($ele)) {
+ $name = $ele->localName;
+ }
+
+ $this->openTag($ele);
+
+ // Handle children.
+ if ($ele->hasChildNodes()) {
+ $this->traverser->children($ele->childNodes);
+ }
+
+ // If not unary, add a closing tag.
+ if (!Elements::isA($name, Elements::VOID_TAG)) {
+ $this->closeTag($ele);
+ }
+ }
+
+ /**
+ * Write a text node.
+ *
+ * @param \DOMText $ele
+ * The text node to write.
+ */
+ public function text($ele) {
+ if (isset($ele->parentNode) && Elements::isA($ele->parentNode->tagName, Elements::TEXT_RAW)) {
+ $this->wr($ele->wholeText);
+ return;
+ }
+
+ // FIXME: This probably needs some flags set.
+ $this->wr($this->enc($ele->wholeText));
+
+ }
+
+ public function cdata($ele) {
+ $this->wr('<![CDATA[')->wr($ele->wholeText)->wr(']]>');
+ }
+
+ public function comment($ele) {
+ $this->wr('<!--')->wr($ele->data)->wr('-->');
+ }
+
+ public function processorInstruction($ele) {
+ $this->wr('<?')->wr($ele->target)->wr(' ')->wr($ele->data)->wr(' ?>');
+ }
+
+ /**
+ * Write the opening tag.
+ *
+ * Tags for HTML, MathML, and SVG are in the local name. Otherwise, use the
+ * qualified name (8.3).
+ *
+ * @param \DOMNode $ele
+ * The element being written.
+ */
+ protected function openTag($ele) {
+ // FIXME: Needs support for SVG, MathML, and namespaced XML.
+ $this->wr('<')->wr($ele->tagName);
+ $this->attrs($ele);
+ $this->wr('>');
+ }
+
+ protected function attrs($ele) {
+ // FIXME: Needs support for xml, xmlns, xlink, and namespaced elements.
+ if (!$ele->hasAttributes()) {
+ return $this;
+ }
+
+ // TODO: Currently, this always writes name="value", and does not do
+ // value-less attributes.
+ $map = $ele->attributes;
+ $len = $map->length;
+ for ($i = 0; $i < $len; ++$i) {
+ $node = $map->item($i);
+ $val = $this->enc($node->value);
+
+ // XXX: The spec says that we need to ensure that anything in
+ // the XML, XMLNS, or XLink NS's should use the canonical
+ // prefix. It seems that DOM does this for us already, but there
+ // may be exceptions.
+ $this->wr(' ')->wr($node->name)->wr('="')->wr($val)->wr('"');
+ }
+ }
+
+ /**
+ * Write the closing tag.
+ *
+ * Tags for HTML, MathML, and SVG are in the local name. Otherwise, use the
+ * qualified name (8.3).
+ *
+ * @param \DOMNode $ele
+ * The element being written.
+ */
+ protected function closeTag($ele) {
+ // FIXME: Needs support for SVG, MathML, and namespaced XML.
+ $this->wr('</')->wr($ele->tagName)->wr('>');
+ }
+
+ /**
+ * Write to the output.
+ *
+ * @param string $text
+ * The string to put into the output.
+ *
+ * @return HTML5\Serializer\Traverser
+ * $this so it can be used in chaining.
+ */
+ protected function wr($text) {
+ fwrite($this->out, $text);
+ return $this;
+ }
+
+ /**
+ * Write a new line character.
+ *
+ * @return HTML5\Serializer\Traverser
+ * $this so it can be used in chaining.
+ */
+ protected function nl() {
+ fwrite($this->out, PHP_EOL);
+ return $this;
+ }
+
+ /**
+ * Encode text.
+ *
+ * True encoding will turn all named character references into their entities.
+ * This includes such characters as +.# and many other common ones. By default
+ * encoding here will just escape &'<>".
+ *
+ * Note, PHP 5.4+ has better html5 encoding.
+ *
+ * @todo Use the Entities class in php 5.3 to have html5 entities.
+ *
+ * @param string $text
+ * text to encode.
+ *
+ * @return string
+ * The encoded text.
+ */
+ protected function enc($text) {
+ $flags = ENT_QUOTES;
+
+ // Escape rather than encode all entities.
+ if (!$this->encode) {
+ return htmlspecialchars($text, $flags, 'UTF-8');
+ }
+
+ // If we are in PHP 5.4+ we can use the native html5 entity functionality.
+ if (defined('ENT_HTML5')) {
+ $flags = ENT_HTML5 | ENT_SUBSTITUTE | ENT_QUOTES;
+ $ret = htmlentities($text, $flags, 'UTF-8', FALSE);
+ }
+ // If a version earlier than 5.4 html5 entities are not entirely handled.
+ // This manually handles them.
+ else {
+ $ret = strtr($text, \HTML5\Serializer\HTML5Entities::$map);
+ }
+ return $ret;
+ }
+
+} \ No newline at end of file
diff --git a/src/HTML5/Serializer/Serializer.php b/src/HTML5/Serializer/Serializer.php
index 3f9187f..7053df5 100644
--- a/src/HTML5/Serializer/Serializer.php
+++ b/src/HTML5/Serializer/Serializer.php
@@ -15,8 +15,7 @@ namespace HTML5\Serializer;
*/
class Serializer {
protected $dom;
- protected $pretty = TRUE;
- protected $encode = FALSE;
+ protected $options = array();
/**
* Create a serializer.
@@ -36,14 +35,7 @@ class Serializer {
*/
public function __construct($dom, $options = array()) {
$this->dom = $dom;
-
- if (isset($options['format']) && is_bool($options['format'])) {
- $this->pretty = $options['format'];
- }
-
- if (isset($options['encode']) && is_bool($options['encode'])) {
- $this->encode = $options['encode'];
- }
+ $this->options = $options;
}
/**
@@ -63,9 +55,7 @@ class Serializer {
else {
$file = fopen($filename, 'w');
}
- $trav = new Traverser($this->dom, $file);
- $trav->formatOutput($this->pretty);
- $trav->encodeOutput($this->encode);
+ $trav = new Traverser($this->dom, $file, $this->options);
$trav->walk();
diff --git a/src/HTML5/Serializer/Traverser.php b/src/HTML5/Serializer/Traverser.php
index 68dea82..bd9d1ce 100644
--- a/src/HTML5/Serializer/Traverser.php
+++ b/src/HTML5/Serializer/Traverser.php
@@ -1,8 +1,6 @@
<?php
namespace HTML5\Serializer;
-use \HTML5\Elements;
-
/**
* Traverser for walking a DOM tree.
*
@@ -22,11 +20,10 @@ class Traverser {
);
protected $dom;
- protected $out;
- protected $pretty = TRUE;
+ protected $options;
protected $encode = FALSE;
-
- const DOCTYPE = '<!DOCTYPE html>';
+ protected $rules;
+ protected $out;
/**
* Create a traverser.
@@ -36,44 +33,19 @@ class Traverser {
* @param resource $out
* A stream that allows writing. The traverser will output into this
* stream.
+ * @param array $options
+ * An array or options for the traverser as key/value pairs. These include:
+ * - encode: A bool to specify if full encding should happen for all named
+ * charachter references. Defaults to FALSE which escapes &'<>".
+ * - rules: The path to the class handling the output rules.
*/
- public function __construct($dom, $out) {
+ public function __construct($dom, $out, $options = array()) {
$this->dom = $dom;
$this->out = $out;
- }
+ $this->options = $options;
- /**
- * Determine whether output should be formatted.
- *
- * IMPORTANT: Neither option will GUARANTEE that the spacing of the output
- * will exactly match the spacing of an origin document. The HTML5 specification
- * does not require any such behavior.
- *
- * Semantically (according to the HTML5 spec's definition), either flag
- * will produce an identical document. (Insignificant
- * whitespace does not impact semantics).
- *
- * @param boolean $useFormatting
- * If TRUE (default) output will be formatted. If FALSE,
- * the little or no formatting is done.
- */
- public function formatOutput($useFormatting = TRUE) {
- $this->pretty = $useFormatting;
- }
-
- /**
- * Set whether encoding should encode all html5 entities.
- *
- * True encoding will turn all named character references into their entities.
- * This includes such characters as +.# and many other common ones. By default
- * encoding here will just escape &'<>". which is what most users expect.
- *
- * @param bool $encode
- * Whether to encode all html5 entities. Defaults to FALSE where only
- * &'<>". are escaped.
- */
- public function encodeOutput($encode = FALSE) {
- $this->encode = $encode;
+ $rulesClass = $this->options['rules'];
+ $this->rules = new $rulesClass($this, $out, $this->options);
}
/**
@@ -85,8 +57,7 @@ class Traverser {
public function walk() {
// If DOMDocument, start with the DOCTYPE and travers.
if ($this->dom instanceof \DOMDocument) {
- $this->doctype();
- $this->document($this->dom);
+ $this->rules->document($this->dom);
}
// If NodeList, loop
elseif ($this->dom instanceof \DOMNodeList) {
@@ -100,40 +71,30 @@ class Traverser {
return $this->out;
}
- protected function doctype() {
- $this->wr(self::DOCTYPE);
- $this->nl();
- }
-
- protected function document($node) {
- $this->node($node->documentElement);
- $this->nl();
- }
-
/**
* Process a node in the DOM.
*
* @param mixed $node
* A node implementing \DOMNode.
*/
- protected function node($node) {
+ public function node($node) {
// A listing of types is at http://php.net/manual/en/dom.constants.php
switch ($node->nodeType) {
case XML_ELEMENT_NODE:
- $this->element($node);
+ $this->rules->element($node);
break;
case XML_TEXT_NODE:
- $this->text($node);
+ $this->rules->text($node);
break;
case XML_CDATA_SECTION_NODE:
- $this->cdata($node);
+ $this->rules->cdata($node);
break;
// FIXME: It appears that the parser doesn't do PI's.
case XML_PI_NODE:
- $this->processorInstruction($ele);
+ $this->rules->processorInstruction($ele);
break;
case XML_COMMENT_NODE:
- $this->comment($node);
+ $this->rules->comment($node);
break;
// Currently we don't support embedding DTDs.
default:
@@ -142,186 +103,19 @@ class Traverser {
}
}
- protected function element($ele) {
- $name = $ele->tagName;
- $block = $this->pretty && Elements::isA($name, Elements::BLOCK_TAG);
-
- // Per spec:
- // If the element has a declared namespace in the HTML, MathML or
- // SVG namespaces, we use the lname instead of the tagName.
- if ($this->isLocalElement($ele)) {
- $name = $ele->localName;
- }
-
- // TODO: Really need to fix the spacing.
- // Add a newline for a block element.
- if ($block) $this->nl();
-
- $this->openTag($ele);
-
- // Handle children.
- if ($ele->hasChildNodes()) {
- $this->children($ele->childNodes);
- }
-
- // If not unary, add a closing tag.
- if (!Elements::isA($name, Elements::VOID_TAG)) {
- $this->closeTag($ele);
- if ($block) $this->nl();
- }
- }
-
/**
- * Write a text node.
+ * Walk through all the nodes on a node list.
*
- * @param \DOMText $ele
- * The text node to write.
+ * @param \DOMNodeList $nl
+ * A list of child elements to walk through.
*/
- protected function text($ele) {
- if (isset($ele->parentNode) && Elements::isA($ele->parentNode->tagName, Elements::TEXT_RAW)) {
- $this->wr($ele->wholeText);
- return;
- }
-
- // FIXME: This probably needs some flags set.
- $this->wr($this->enc($ele->wholeText));
-
- }
-
- protected function cdata($ele) {
- $this->wr('<![CDATA[')->wr($ele->wholeText)->wr(']]>');
- }
-
- protected function comment($ele) {
- $this->wr('<!--')->wr($ele->data)->wr('-->');
- }
-
- protected function processorInstruction($ele) {
- $this->wr('<?')->wr($ele->target)->wr(' ')->wr($ele->data)->wr(' ?>');
- }
-
- protected function children($nl) {
+ public function children($nl) {
foreach ($nl as $node) {
$this->node($node);
}
}
/**
- * Write the opening tag.
- *
- * Tags for HTML, MathML, and SVG are in the local name. Otherwise, use the
- * qualified name (8.3).
- *
- * @param \DOMNode $ele
- * The element being written.
- */
- protected function openTag($ele) {
- // FIXME: Needs support for SVG, MathML, and namespaced XML.
- $this->wr('<')->wr($ele->tagName);
- $this->attrs($ele);
- $this->wr('>');
- }
-
- protected function attrs($ele) {
- // FIXME: Needs support for xml, xmlns, xlink, and namespaced elements.
- if (!$ele->hasAttributes()) {
- return $this;
- }
-
- // TODO: Currently, this always writes name="value", and does not do
- // value-less attributes.
- $map = $ele->attributes;
- $len = $map->length;
- for ($i = 0; $i < $len; ++$i) {
- $node = $map->item($i);
- $val = $this->enc($node->value);
-
- // XXX: The spec says that we need to ensure that anything in
- // the XML, XMLNS, or XLink NS's should use the canonical
- // prefix. It seems that DOM does this for us already, but there
- // may be exceptions.
- $this->wr(' ')->wr($node->name)->wr('="')->wr($val)->wr('"');
- }
- }
-
- /**
- * Write the closing tag.
- *
- * Tags for HTML, MathML, and SVG are in the local name. Otherwise, use the
- * qualified name (8.3).
- *
- * @param \DOMNode $ele
- * The element being written.
- */
- protected function closeTag($ele) {
- // FIXME: Needs support for SVG, MathML, and namespaced XML.
- $this->wr('</')->wr($ele->tagName)->wr('>');
- }
-
- /**
- * Write to the output.
- *
- * @param string $text
- * The string to put into the output.
- *
- * @return HTML5\Serializer\Traverser
- * $this so it can be used in chaining.
- */
- protected function wr($text) {
- fwrite($this->out, $text);
- return $this;
- }
-
- /**
- * Write a new line character.
- *
- * @return HTML5\Serializer\Traverser
- * $this so it can be used in chaining.
- */
- protected function nl() {
- fwrite($this->out, PHP_EOL);
- return $this;
- }
-
- /**
- * Encode text.
- *
- * True encoding will turn all named character references into their entities.
- * This includes such characters as +.# and many other common ones. By default
- * encoding here will just escape &'<>".
- *
- * Note, PHP 5.4+ has better html5 encoding.
- *
- * @todo Use the Entities class in php 5.3 to have html5 entities.
- *
- * @param string $text
- * text to encode.
- *
- * @return string
- * The encoded text.
- */
- protected function enc($text) {
- $flags = ENT_QUOTES;
-
- // Escape rather than encode all entities.
- if (!$this->encode) {
- return htmlspecialchars($text, $flags, 'UTF-8');
- }
-
- // If we are in PHP 5.4+ we can use the native html5 entity functionality.
- if (defined('ENT_HTML5')) {
- $flags = ENT_HTML5 | ENT_SUBSTITUTE | ENT_QUOTES;
- $ret = htmlentities($text, $flags, 'UTF-8', FALSE);
- }
- // If a version earlier than 5.4 html5 entities are not entirely handled.
- // This manually handles them.
- else {
- $ret = strtr($text, \HTML5\Serializer\HTML5Entities::$map);
- }
- return $ret;
- }
-
- /**
* Is an element local?
*
* @param mixed $ele
@@ -330,7 +124,7 @@ class Traverser {
* @return bool
* True if local and false otherwise.
*/
- protected function isLocalElement($ele) {
+ public function isLocalElement($ele) {
$uri = $ele->namespaceURI;
if (empty($uri)) {
return FALSE;