summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAsmir Mustafic <[email protected]>2014-06-17 09:24:47 +0200
committerAsmir Mustafic <[email protected]>2014-06-17 09:24:47 +0200
commit44f07f1f7ec5b995a5279eb4f61ea0dbb0cb616e (patch)
tree52f84b9fec10607cad971c87f674e3b9182b958a
parent03a67b6434dbb1dd97b5f1608cb19d3ab84c01fc (diff)
Custom namespaces (XML style)
-rw-r--r--src/HTML5.php4
-rw-r--r--src/HTML5/Parser/DOMTreeBuilder.php157
-rw-r--r--src/HTML5/Serializer/OutputRules.php18
-rw-r--r--test/HTML5/Parser/DOMTreeBuilderTest.php103
-rw-r--r--test/HTML5/Serializer/OutputRulesTest.php38
5 files changed, 275 insertions, 45 deletions
diff --git a/src/HTML5.php b/src/HTML5.php
index 6d6d4e4..959c3d2 100644
--- a/src/HTML5.php
+++ b/src/HTML5.php
@@ -158,7 +158,7 @@ class HTML5
public function parse(\Masterminds\HTML5\Parser\InputStream $input)
{
$this->errors = array();
- $events = new DOMTreeBuilder();
+ $events = new DOMTreeBuilder(FALSE, $this->options);
$scanner = new Scanner($input);
$parser = new Tokenizer($scanner, $events);
@@ -181,7 +181,7 @@ class HTML5
*/
public function parseFragment(\Masterminds\HTML5\Parser\InputStream $input)
{
- $events = new DOMTreeBuilder(TRUE);
+ $events = new DOMTreeBuilder(TRUE, $this->options);
$scanner = new Scanner($input);
$parser = new Tokenizer($scanner, $events);
diff --git a/src/HTML5/Parser/DOMTreeBuilder.php b/src/HTML5/Parser/DOMTreeBuilder.php
index 9e77312..a1723de 100644
--- a/src/HTML5/Parser/DOMTreeBuilder.php
+++ b/src/HTML5/Parser/DOMTreeBuilder.php
@@ -23,6 +23,56 @@ use Masterminds\HTML5\Elements;
*/
class DOMTreeBuilder implements EventHandler
{
+ /**
+ * Defined in http://www.w3.org/TR/html51/infrastructure.html#html-namespace-0
+ */
+ const NAMESPACE_HTML = 'http://www.w3.org/1999/xhtml';
+
+ const NAMESPACE_MATHML = 'http://www.w3.org/1998/Math/MathML';
+
+ const NAMESPACE_SVG = 'http://www.w3.org/2000/svg';
+
+ const NAMESPACE_XLINK = 'http://www.w3.org/1999/xlink';
+
+ const NAMESPACE_XML = 'http://www.w3.org/XML/1998/namespace';
+
+ const NAMESPACE_XMLNS = 'http://www.w3.org/2000/xmlns/';
+
+ /**
+ * Holds the HTML5 element names that causes a namespace switch
+ *
+ * @var array
+ */
+ protected $nsRoots = array(
+ 'html' => self::NAMESPACE_HTML,
+ 'svg' => self::NAMESPACE_SVG,
+ 'math' => self::NAMESPACE_MATHML
+ );
+
+ /**
+ * Holds the always available namespaces (which does not require the XMLNS declaration).
+ *
+ * @var array
+ */
+ protected $implicitNamespaces = array(
+ 'xml' => self::NAMESPACE_XML,
+ 'xmlns' => self::NAMESPACE_XMLNS,
+ 'xlink' => self::NAMESPACE_XLINK
+ );
+
+ /**
+ * Holds a stack of currently active namespaces.
+ *
+ * @var array
+ */
+ protected $nsStack = array();
+
+ /**
+ * Holds the number of namespaces declared by a node.
+ *
+ * @var array
+ */
+ protected $pushes = array();
/**
* Defined in 8.2.5.
@@ -75,11 +125,15 @@ class DOMTreeBuilder implements EventHandler
const IM_IN_MATHML = 23;
+ protected $options = array();
+
protected $stack = array();
protected $current; // Pointer in the tag hierarchy.
protected $doc;
+ protected $frag;
+
protected $processor;
protected $insertMode = 0;
@@ -91,10 +145,10 @@ class DOMTreeBuilder implements EventHandler
*/
protected $quirks = TRUE;
- public $isFragment = FALSE;
-
- public function __construct($isFragment = FALSE)
+ public function __construct($isFragment = FALSE, array $options = array())
{
+ $this->options = $options;
+
$impl = new \DOMImplementation();
// XXX:
// Create the doctype. For now, we are always creating HTML5
@@ -104,18 +158,20 @@ class DOMTreeBuilder implements EventHandler
$this->doc = $impl->createDocument(NULL, NULL, $dt);
$this->doc->errors = array();
- // $this->current = $this->doc->documentElement;
$this->current = $this->doc; // ->documentElement;
// Create a rules engine for tags.
$this->rules = new TreeBuildingRules($this->doc);
+ // Fill $nsStack with the defalut HTML5 namespaces, plus the "implicitNamespaces" array taken form $options
+ array_unshift($this->nsStack, (isset($this->options["implicitNamespaces"]) ? $this->options["implicitNamespaces"] : array()) + array(
+ '' => self::NAMESPACE_HTML
+ ) + $this->implicitNamespaces);
+
if ($isFragment) {
- $this->isFragment = TRUE;
$this->insertMode = static::IM_IN_BODY;
- $ele = $this->doc->createElement('html');
- $this->doc->appendChild($ele);
- $this->current = $ele;
+ $this->frag = $this->doc->createDocumentFragment();
+ $this->current = $this->frag;
}
}
@@ -139,24 +195,8 @@ class DOMTreeBuilder implements EventHandler
*/
public function fragment()
{
- $append = $this->doc->documentElement->childNodes;
- $frag = $this->doc->createDocumentFragment();
-
- // appendChild() modifies the DOMNodeList, so we
- // have to buffer up the items first, then use the
- // array buffer and loop twice.
- $buffer = array();
- foreach ($append as $node) {
- $buffer[] = $node;
- }
-
- foreach ($buffer as $node) {
- $frag->appendChild($node);
- }
-
- $frag->errors = $this->doc->errors;
-
- return $frag;
+ $this->frag->errors = $this->doc->errors;
+ return $this->frag;
}
/**
@@ -198,7 +238,7 @@ class DOMTreeBuilder implements EventHandler
$lname = $this->normalizeTagName($name);
// Make sure we have an html element.
- if (! $this->doc->documentElement && $name !== 'html') {
+ if (! $this->doc->documentElement && $name !== 'html' && ! $this->frag) {
$this->startTag('html');
}
@@ -252,13 +292,53 @@ class DOMTreeBuilder implements EventHandler
$lname = Elements::normalizeSvgElement($lname);
}
+ $pushes = 0;
+ // when we found a tag thats appears inside $nsRoots, we have to switch the defalut namespace
+ if (isset($this->nsRoots[$lname]) && $this->nsStack[0][''] !== $this->nsRoots[$lname]) {
+ array_unshift($this->nsStack, array(
+ '' => $this->nsRoots[$lname]
+ ) + $this->nsStack[0]);
+ $pushes ++;
+ }
+ if (isset($this->options["xmlNamespaces"]) && $this->options["xmlNamespaces"]) {
+ // when xmlNamespaces is TRUE a and we found a 'xmlns' or 'xmlns:*' attribute, we should add a new item to the $nsStack
+ foreach ($attributes as $aName => $aVal) {
+ if ($aName === 'xmlns') {
+ array_unshift($this->nsStack, array(
+ '' => $aVal
+ ) + $this->nsStack[0]);
+ $pushes ++;
+ } elseif ((($pos = strpos($aName, ':')) ? substr($aName, 0, $pos) : '') === 'xmlns') {
+ array_unshift($this->nsStack, array(
+ substr($aName, $pos + 1) => $aVal
+ ) + $this->nsStack[0]);
+ $pushes ++;
+ }
+ }
+ }
+
try {
- $ele = $this->doc->createElement($lname);
+ $prefix = ($pos = strpos($lname, ':')) ? substr($lname, 0, $pos) : '';
+
+ if (isset($this->nsStack[0][$prefix])) {
+ $ele = $this->doc->createElementNS($this->nsStack[0][$prefix], $lname);
+ } else {
+ $ele = $this->doc->createElement($lname);
+ }
} catch (\DOMException $e) {
$this->parseError("Illegal tag name: <$lname>. Replaced with <invalid>.");
$ele = $this->doc->createElement('invalid');
}
+
+ // when we add some namespacess, we have to track them. Later, when "endElement" is invoked, we have to remove them
+ if ($pushes > 0) {
+ // PHP tends to free the memory used by DOM,
+ // to avoid spl_object_hash collisions whe have to avoid garbage collection of $ele storing it into $pushes
+ // see https://bugs.php.net/bug.php?id=67459
+ $this->pushes[spl_object_hash($ele)] = array($pushes, $ele);
+ }
+
foreach ($attributes as $aName => $aVal) {
if ($this->insertMode == static::IM_IN_SVG) {
@@ -268,7 +348,15 @@ class DOMTreeBuilder implements EventHandler
}
try {
- $ele->setAttribute($aName, $aVal);
+ $prefix = ($pos = strpos($aName, ':')) ? substr($aName, 0, $pos) : false;
+ if ($prefix!==false && $prefix !== 'xmlns' && isset($this->nsStack[0][$prefix])) {
+ $ele->setAttributeNs($this->nsStack[0][$prefix], $aName, $aVal);
+ } elseif ($aName === 'xmlns') {
+ // setAttribute('xmlns', '..') is not possible, so we have to add a fake attribute
+ $ele->setAttribute("xmlns:x___xmlns__x", $aVal);
+ } else {
+ $ele->setAttribute($aName, $aVal);
+ }
} catch (\DOMException $e) {
$this->parseError("Illegal attribute name for tag $name. Ignoring: $aName");
continue;
@@ -350,7 +438,15 @@ class DOMTreeBuilder implements EventHandler
return;
}
- // $this->current = $this->current->parentNode;
+ $cid = spl_object_hash($this->current);
+ // remove the namespaced definded by current node
+ if (isset($this->pushes[$cid])) {
+ for ($i = 0; $i < $this->pushes[$cid][0]; $i ++) {
+ $extr = array_shift($this->nsStack);
+ }
+ unset($this->pushes[$cid]);
+ }
+
if (! $this->autoclose($lname)) {
$this->parseError('Could not find closing tag for ' . $lname);
}
@@ -480,7 +576,6 @@ class DOMTreeBuilder implements EventHandler
return TRUE;
}
} while ($working = $working->parentNode);
-
return FALSE;
}
diff --git a/src/HTML5/Serializer/OutputRules.php b/src/HTML5/Serializer/OutputRules.php
index 168c65c..6f02956 100644
--- a/src/HTML5/Serializer/OutputRules.php
+++ b/src/HTML5/Serializer/OutputRules.php
@@ -108,9 +108,8 @@ class OutputRules implements \Masterminds\HTML5\Serializer\RulesInterface
*/
public function text($ele)
{
- if (isset($ele->parentNode) && isset($ele->parentNode->tagName) && Elements::isA($ele->parentNode->tagName, Elements::TEXT_RAW)) {
+ if (isset($ele->parentNode) && isset($ele->parentNode->tagName) && Elements::isA($ele->parentNode->localName, Elements::TEXT_RAW)) {
$this->wr($ele->data);
-
return;
}
@@ -151,7 +150,7 @@ class OutputRules implements \Masterminds\HTML5\Serializer\RulesInterface
*/
protected function openTag($ele)
{
- $this->wr('<')->wr($ele->tagName);
+ $this->wr('<')->wr($this->traverser->isLocalElement($ele) ? $ele->localName : $ele->tagName);
$this->attrs($ele);
if ($this->outputMode == static::IM_IN_HTML) {
@@ -187,6 +186,9 @@ class OutputRules implements \Masterminds\HTML5\Serializer\RulesInterface
// prefix. It seems that DOM does this for us already, but there
// may be exceptions.
$name = $node->name;
+ if ($name == "xmlns:x___xmlns__x") {
+ $name = "xmlns";
+ }
// Special handling for attributes in SVG and MathML.
// Using if/elseif instead of switch because it's faster in PHP.
@@ -215,7 +217,7 @@ class OutputRules implements \Masterminds\HTML5\Serializer\RulesInterface
protected function closeTag($ele)
{
if ($this->outputMode == static::IM_IN_HTML || $ele->hasChildNodes()) {
- $this->wr('</')->wr($ele->tagName)->wr('>');
+ $this->wr('</')->wr($this->traverser->isLocalElement($ele) ? $ele->localName : $ele->tagName)->wr('>');
}
}
@@ -225,24 +227,22 @@ class OutputRules implements \Masterminds\HTML5\Serializer\RulesInterface
* @param string $text
* The string to put into the output.
*
- * @return Masterminds\HTML5\Serializer\Traverser $this so it can be used in chaining.
+ * @return \Masterminds\HTML5\Serializer\Traverser $this so it can be used in chaining.
*/
protected function wr($text)
{
fwrite($this->out, $text);
-
return $this;
}
/**
* Write a new line character.
*
- * @return Masterminds\HTML5\Serializer\Traverser $this so it can be used in chaining.
+ * @return \Masterminds\HTML5\Serializer\Traverser $this so it can be used in chaining.
*/
protected function nl()
{
fwrite($this->out, PHP_EOL);
-
return $this;
}
@@ -277,6 +277,7 @@ class OutputRules implements \Masterminds\HTML5\Serializer\RulesInterface
*/
protected function enc($text, $attribute = FALSE)
{
+
// Escape the text rather than convert to named character references.
if (! $this->encode) {
return $this->escape($text, $attribute);
@@ -314,6 +315,7 @@ class OutputRules implements \Masterminds\HTML5\Serializer\RulesInterface
*/
protected function escape($text, $attribute = FALSE)
{
+
// Not using htmlspecialchars because, while it does escaping, it doesn't
// match the requirements of section 8.5. For example, it doesn't handle
// non-breaking spaces.
diff --git a/test/HTML5/Parser/DOMTreeBuilderTest.php b/test/HTML5/Parser/DOMTreeBuilderTest.php
index d8b686c..266ca98 100644
--- a/test/HTML5/Parser/DOMTreeBuilderTest.php
+++ b/test/HTML5/Parser/DOMTreeBuilderTest.php
@@ -19,9 +19,9 @@ class DOMTreeBuilderTest extends \Masterminds\HTML5\Tests\TestCase
/**
* Convenience function for parsing.
*/
- protected function parse($string)
+ protected function parse($string, array $options = array())
{
- $treeBuilder = new DOMTreeBuilder();
+ $treeBuilder = new DOMTreeBuilder(FALSE, $options);
$input = new StringInputStream($string);
$scanner = new Scanner($input);
$parser = new Tokenizer($scanner, $treeBuilder);
@@ -89,6 +89,101 @@ class DOMTreeBuilderTest extends \Masterminds\HTML5\Tests\TestCase
$this->assertEquals('title', $head->childNodes->item(0)->tagName);
}
+ public function testImplicitNamespaces()
+ {
+ $dom = $this->parse('<!DOCTYPE html><html><body><a xlink:href="bar">foo</a></body></html>');
+ $a = $dom->getElementsByTagName('a')->item(0);
+ $attr = $a->getAttributeNode('xlink:href');
+ $this->assertEquals('http://www.w3.org/1999/xlink', $attr->namespaceURI);
+
+ $dom = $this->parse('<!DOCTYPE html><html><body><a xml:base="bar">foo</a></body></html>');
+ $a = $dom->getElementsByTagName('a')->item(0);
+ $attr = $a->getAttributeNode('xml:base');
+ $this->assertEquals('http://www.w3.org/XML/1998/namespace', $attr->namespaceURI);
+ }
+
+ public function testCustomImplicitNamespaces()
+ {
+ $dom = $this->parse('<!DOCTYPE html><html><body><a t:href="bar">foo</a></body></html>', array(
+ 'implicitNamespaces' => array(
+ 't' => 'http://www.example.com'
+ )
+ ));
+ $a = $dom->getElementsByTagName('a')->item(0);
+ $attr = $a->getAttributeNode('t:href');
+ $this->assertEquals('http://www.example.com', $attr->namespaceURI);
+
+ $dom = $this->parse('<!DOCTYPE html><html><body><t:a>foo</t:a></body></html>', array(
+ 'implicitNamespaces' => array(
+ 't' => 'http://www.example.com'
+ )
+ ));
+ $list = $dom->getElementsByTagNameNS('http://www.example.com', 'a');
+ $this->assertEquals(1, $list->length);
+ }
+
+ public function testXmlNamespaces()
+ {
+ $dom = $this->parse(
+ '<!DOCTYPE html><html>
+ <t:body xmlns:t="http://www.example.com">
+ <a t:href="bar">foo</a>
+ </body>
+ <div>foo</div>
+ </html>', array(
+ 'xmlNamespaces' => true
+ ));
+ $a = $dom->getElementsByTagName('a')->item(0);
+ $attr = $a->getAttributeNode('t:href');
+ $this->assertEquals('http://www.example.com', $attr->namespaceURI);
+
+ $list = $dom->getElementsByTagNameNS('http://www.example.com', 'body');
+ $this->assertEquals(1, $list->length);
+ }
+
+ public function testXmlNamespaceNesting()
+ {
+ $dom = $this->parse(
+ '<!DOCTYPE html><html>
+ <body xmlns:x="http://www.prefixed.com" id="body">
+ <a id="bar1" xmlns="bar1">
+ <b id="bar4" xmlns="bar4"><x:prefixed id="prefixed"/></b>
+ </a>
+ <svg id="svg"></svg>
+ <c id="bar2" xmlns="bar2"></c>
+ <div id="div"></div>
+ <d id="bar3"></d>
+
+ </body>
+ </html>', array(
+ 'xmlNamespaces' => true
+ ));
+
+ $div = $dom->getElementById('div');
+ $this->assertEquals('http://www.w3.org/1999/xhtml', $div->namespaceURI);
+
+ $body = $dom->getElementById('body');
+ $this->assertEquals('http://www.w3.org/1999/xhtml', $body->namespaceURI);
+
+ $bar1 = $dom->getElementById('bar1');
+ $this->assertEquals('bar1', $bar1->namespaceURI);
+
+ $bar2 = $dom->getElementById('bar2');
+ $this->assertEquals("bar2", $bar2->namespaceURI);
+
+ $bar3 = $dom->getElementById('bar3');
+ $this->assertEquals("http://www.w3.org/1999/xhtml", $bar3->namespaceURI);
+
+ $bar4 = $dom->getElementById('bar4');
+ $this->assertEquals("bar4", $bar4->namespaceURI);
+
+ $svg = $dom->getElementById('svg');
+ $this->assertEquals("http://www.w3.org/2000/svg", $svg->namespaceURI);
+
+ $prefixed = $dom->getElementById('prefixed');
+ $this->assertEquals("http://www.prefixed.com", $prefixed->namespaceURI);
+ }
+
public function testAttributes()
{
$html = "<!DOCTYPE html>
@@ -290,7 +385,7 @@ class DOMTreeBuilderTest extends \Masterminds\HTML5\Tests\TestCase
$this->assertEquals('math', $math->tagName);
$this->assertEquals('math', $math->nodeName);
$this->assertEquals('math', $math->localName);
- $this->assertEmpty($math->namespaceURI);
+ $this->assertEquals('http://www.w3.org/1998/Math/MathML', $math->namespaceURI);
}
public function testSVG()
@@ -314,7 +409,7 @@ class DOMTreeBuilderTest extends \Masterminds\HTML5\Tests\TestCase
$this->assertEquals('svg', $svg->tagName);
$this->assertEquals('svg', $svg->nodeName);
$this->assertEquals('svg', $svg->localName);
- $this->assertEmpty($svg->namespaceURI);
+ $this->assertEquals('http://www.w3.org/2000/svg', $svg->namespaceURI);
$textPath = $doc->getElementsByTagName('textPath')->item(0);
$this->assertEquals('textPath', $textPath->tagName);
diff --git a/test/HTML5/Serializer/OutputRulesTest.php b/test/HTML5/Serializer/OutputRulesTest.php
index 27c66c4..bf200e3 100644
--- a/test/HTML5/Serializer/OutputRulesTest.php
+++ b/test/HTML5/Serializer/OutputRulesTest.php
@@ -113,6 +113,44 @@ class OutputRulesTest extends \Masterminds\HTML5\Tests\TestCase
$this->assertEquals('<div id="foo" class="bar baz">foo bar baz</div>', stream_get_contents($stream, - 1, 0));
}
+ function testSerializeWithNamespaces()
+ {
+ $this->html5 = $this->getInstance(array(
+ 'xmlNamespaces' => true
+ ));
+
+ $source = '<!DOCTYPE html>
+<html><body xmlns:x="http://www.prefixed.com" id="body">
+ <a id="bar1" xmlns="bar1">
+ <b id="bar4" xmlns="bar4"><x:prefixed id="prefixed">x</x:prefixed></b>
+ </a>
+ <svg id="svg">xx</svg>
+ <c id="bar2" xmlns="bar2">xx</c>
+ <div id="div">xx</div>
+ <d id="bar3">xx</d></body>
+</html>
+';
+
+ $dom = $this->html5->loadHTML($source, array(
+ 'xmlNamespaces' => true
+ ));
+
+ $stream = fopen('php://temp', 'w');
+ $r = new OutputRules($stream, $this->html5->getOptions());
+ $t = new Traverser($dom, $stream, $r, $this->html5->getOptions());
+
+ $t->walk();
+ $rendered = stream_get_contents($stream, - 1, 0);
+
+ $this->assertEquals(str_replace(array(
+ "\n",
+ "\r"
+ ), "", $rendered), str_replace(array(
+ "\n",
+ "\r"
+ ), "", $source));
+ }
+
public function testElementWithScript()
{
$dom = $this->html5->loadHTML(