diff options
-rw-r--r-- | src/HTML5/Parser/DOMTreeBuilder.php | 34 | ||||
-rw-r--r-- | src/HTML5/Serializer/OutputRules.php | 68 | ||||
-rw-r--r-- | test/HTML5/Parser/DOMTreeBuilderTest.php | 23 | ||||
-rw-r--r-- | test/HTML5/Serializer/OutputRulesTest.php | 37 |
4 files changed, 127 insertions, 35 deletions
diff --git a/src/HTML5/Parser/DOMTreeBuilder.php b/src/HTML5/Parser/DOMTreeBuilder.php index d2c8020..59504f5 100644 --- a/src/HTML5/Parser/DOMTreeBuilder.php +++ b/src/HTML5/Parser/DOMTreeBuilder.php @@ -301,10 +301,12 @@ class DOMTreeBuilder implements EventHandler ) + $this->nsStack[0]); $pushes ++; } + $needsWorkaround = false; if (isset($this->options["xmlNamespaces"]) && $this->options["xmlNamespaces"]) { // when xmlNamespaces is true a and we found a 'xmlns' or 'xmlns:*' attribute, we should add a new item to the $nsStack foreach ($attributes as $aName => $aVal) { if ($aName === 'xmlns') { + $needsWorkaround = $aVal; array_unshift($this->nsStack, array( '' => $aVal ) + $this->nsStack[0]); @@ -321,17 +323,29 @@ class DOMTreeBuilder implements EventHandler try { $prefix = ($pos = strpos($lname, ':')) ? substr($lname, 0, $pos) : ''; - if (isset($this->nsStack[0][$prefix])) { - $ele = $this->doc->createElementNS($this->nsStack[0][$prefix], $lname); + + if ($needsWorkaround!==false) { + + $xml = "<$lname xmlns=\"$needsWorkaround\" ".(strlen($prefix) && isset($this->nsStack[0][$prefix])?("xmlns:$prefix=\"".$this->nsStack[0][$prefix]."\""):"")."/>"; + + $frag = new \DOMDocument('1.0', 'UTF-8'); + $frag->loadXML($xml); + + $ele = $this->doc->importNode($frag->documentElement, true); + } else { - $ele = $this->doc->createElement($lname); + if (isset($this->nsStack[0][$prefix])) { + $ele = $this->doc->createElementNS($this->nsStack[0][$prefix], $lname); + } else { + $ele = $this->doc->createElement($lname); + } } + } catch (\DOMException $e) { $this->parseError("Illegal tag name: <$lname>. Replaced with <invalid>."); $ele = $this->doc->createElement('invalid'); } - // when we add some namespacess, we have to track them. Later, when "endElement" is invoked, we have to remove them if ($pushes > 0) { // PHP tends to free the memory used by DOM, @@ -346,6 +360,10 @@ class DOMTreeBuilder implements EventHandler } foreach ($attributes as $aName => $aVal) { + // xmlns attributes can't be set + if ($aName === 'xmlns') { + continue; + } if ($this->insertMode == static::IM_IN_SVG) { $aName = Elements::normalizeSvgAttribute($aName); @@ -355,11 +373,11 @@ class DOMTreeBuilder implements EventHandler try { $prefix = ($pos = strpos($aName, ':')) ? substr($aName, 0, $pos) : false; - if ($prefix!==false && $prefix !== 'xmlns' && isset($this->nsStack[0][$prefix])) { + + if ($prefix==='xmlns') { + $ele->setAttributeNs(self::NAMESPACE_XMLNS, $aName, $aVal); + } elseif ($prefix!==false && isset($this->nsStack[0][$prefix])) { $ele->setAttributeNs($this->nsStack[0][$prefix], $aName, $aVal); - } elseif ($aName === 'xmlns') { - // setAttribute('xmlns', '..') is not possible, so we have to add a fake attribute - $ele->setAttribute("xmlns:x___xmlns__x", $aVal); } else { $ele->setAttribute($aName, $aVal); } diff --git a/src/HTML5/Serializer/OutputRules.php b/src/HTML5/Serializer/OutputRules.php index ff8341e..1ab5c76 100644 --- a/src/HTML5/Serializer/OutputRules.php +++ b/src/HTML5/Serializer/OutputRules.php @@ -15,6 +15,32 @@ use Masterminds\HTML5\Elements; */ class OutputRules implements \Masterminds\HTML5\Serializer\RulesInterface { + /** + * Defined in http://www.w3.org/TR/html51/infrastructure.html#html-namespace-0 + */ + const NAMESPACE_HTML = 'http://www.w3.org/1999/xhtml'; + + const NAMESPACE_MATHML = 'http://www.w3.org/1998/Math/MathML'; + + const NAMESPACE_SVG = 'http://www.w3.org/2000/svg'; + + const NAMESPACE_XLINK = 'http://www.w3.org/1999/xlink'; + + const NAMESPACE_XML = 'http://www.w3.org/XML/1998/namespace'; + + const NAMESPACE_XMLNS = 'http://www.w3.org/2000/xmlns/'; + + /** + * Holds the HTML5 element names that causes a namespace switch + * + * @var array + */ + protected $nsRoots = array( + 'html' => self::NAMESPACE_HTML, + 'svg' => self::NAMESPACE_SVG, + 'math' => self::NAMESPACE_MATHML + ); + const IM_IN_HTML = 1; @@ -28,6 +54,8 @@ class OutputRules implements \Masterminds\HTML5\Serializer\RulesInterface protected $encode = false; + protected $xpath; + protected $out; protected $outputMode; @@ -148,6 +176,39 @@ class OutputRules implements \Masterminds\HTML5\Serializer\RulesInterface ->wr($ele->data) ->wr('?>'); } + /** + * Write the namespace attributes + * + * + * @param \DOMNode $ele + * The element being written. + */ + protected function namespaceAttrs($ele) + { + $this->xpath = new \DOMXPath($ele->ownerDocument); + $declared = array(); + + $declared["xmlns:xml"] = "http://www.w3.org/XML/1998/namespace"; + + if ($ele->parentNode) { + foreach( $this->xpath->query('namespace::*', $ele->parentNode ) as $nsNode ) { + $declared[$nsNode->nodeName] = $nsNode->nodeValue; + } + } + foreach( $this->xpath->query('namespace::*', $ele ) as $nsNode ) { + if (isset($declared[$nsNode->nodeName]) && $declared[$nsNode->nodeName] === $nsNode->nodeValue) { + unset($declared[$nsNode->nodeName]); + } else { + $declared[$nsNode->nodeName] = $nsNode->nodeValue; + } + } + + foreach( $declared as $aName => $aValue ) { + if (!in_array($aValue, $this->nsRoots)) { + $this->wr(' ')->wr($aName)->wr('="')->wr($aValue)->wr('"'); + } + } + } /** * Write the opening tag. @@ -161,7 +222,11 @@ class OutputRules implements \Masterminds\HTML5\Serializer\RulesInterface protected function openTag($ele) { $this->wr('<')->wr($this->traverser->isLocalElement($ele) ? $ele->localName : $ele->tagName); + + $this->attrs($ele); + $this->namespaceAttrs($ele); + if ($this->outputMode == static::IM_IN_HTML) { $this->wr('>'); @@ -196,9 +261,6 @@ class OutputRules implements \Masterminds\HTML5\Serializer\RulesInterface // prefix. It seems that DOM does this for us already, but there // may be exceptions. $name = $node->name; - if ($name == "xmlns:x___xmlns__x") { - $name = "xmlns"; - } // Special handling for attributes in SVG and MathML. // Using if/elseif instead of switch because it's faster in PHP. diff --git a/test/HTML5/Parser/DOMTreeBuilderTest.php b/test/HTML5/Parser/DOMTreeBuilderTest.php index c1b19e5..5227c40 100644 --- a/test/HTML5/Parser/DOMTreeBuilderTest.php +++ b/test/HTML5/Parser/DOMTreeBuilderTest.php @@ -157,17 +157,22 @@ class DOMTreeBuilderTest extends \Masterminds\HTML5\Tests\TestCase $dom = $this->parse( '<!DOCTYPE html><html> <body xmlns:x="http://www.prefixed.com" id="body"> - <a id="bar1" xmlns="bar1"> - <b id="bar4" xmlns="bar4"><x:prefixed id="prefixed"/></b> + <a id="bar1" xmlns="http://www.prefixed.com/bar1"> + <b id="bar4" xmlns="http://www.prefixed.com/bar4"><x:prefixed id="prefixed"/></b> </a> <svg id="svg"></svg> - <c id="bar2" xmlns="bar2"></c> + <c id="bar2" xmlns="http://www.prefixed.com/bar2"></c> <div id="div"></div> <d id="bar3"></d> + <xn:d xmlns:xn="http://www.prefixed.com/xn" xmlns="http://www.prefixed.com/bar5_x" id="bar5"><x id="bar5_x"/></xn:d> </body> </html>', array( 'xmlNamespaces' => true )); + + + $this->assertEmpty($this->errors); + $div = $dom->getElementById('div'); $this->assertEquals('http://www.w3.org/1999/xhtml', $div->namespaceURI); @@ -175,22 +180,28 @@ class DOMTreeBuilderTest extends \Masterminds\HTML5\Tests\TestCase $this->assertEquals('http://www.w3.org/1999/xhtml', $body->namespaceURI); $bar1 = $dom->getElementById('bar1'); - $this->assertEquals('bar1', $bar1->namespaceURI); + $this->assertEquals('http://www.prefixed.com/bar1', $bar1->namespaceURI); $bar2 = $dom->getElementById('bar2'); - $this->assertEquals("bar2", $bar2->namespaceURI); + $this->assertEquals("http://www.prefixed.com/bar2", $bar2->namespaceURI); $bar3 = $dom->getElementById('bar3'); $this->assertEquals("http://www.w3.org/1999/xhtml", $bar3->namespaceURI); $bar4 = $dom->getElementById('bar4'); - $this->assertEquals("bar4", $bar4->namespaceURI); + $this->assertEquals("http://www.prefixed.com/bar4", $bar4->namespaceURI); $svg = $dom->getElementById('svg'); $this->assertEquals("http://www.w3.org/2000/svg", $svg->namespaceURI); $prefixed = $dom->getElementById('prefixed'); $this->assertEquals("http://www.prefixed.com", $prefixed->namespaceURI); + + $prefixed = $dom->getElementById('bar5'); + $this->assertEquals("http://www.prefixed.com/xn", $prefixed->namespaceURI); + + $prefixed = $dom->getElementById('bar5_x'); + $this->assertEquals("http://www.prefixed.com/bar5_x", $prefixed->namespaceURI); } public function testAttributes() diff --git a/test/HTML5/Serializer/OutputRulesTest.php b/test/HTML5/Serializer/OutputRulesTest.php index f12acbb..df33faa 100644 --- a/test/HTML5/Serializer/OutputRulesTest.php +++ b/test/HTML5/Serializer/OutputRulesTest.php @@ -119,21 +119,24 @@ class OutputRulesTest extends \Masterminds\HTML5\Tests\TestCase 'xmlNamespaces' => true )); - $source = '<!DOCTYPE html> -<html><body xmlns:x="http://www.prefixed.com" id="body"> - <a id="bar1" xmlns="bar1"> - <b id="bar4" xmlns="bar4"><x:prefixed id="prefixed">x</x:prefixed></b> - </a> - <svg id="svg">xx</svg> - <c id="bar2" xmlns="bar2">xx</c> - <div id="div">xx</div> - <d id="bar3">xx</d></body> -</html> -'; + $source = ' + <!DOCTYPE html> + <html><body id="body" xmlns:x="http://www.prefixed.com"> + <a id="bar1" xmlns="http://www.prefixed.com/bar1"> + <b id="bar4" xmlns="http://www.prefixed.com/bar4"><x:prefixed id="prefixed">xy</x:prefixed></b> + </a> + <svg id="svg">svg</svg> + <c id="bar2" xmlns="http://www.prefixed.com/bar2"></c> + <div id="div"></div> + <d id="bar3"></d> + <xn:d id="bar5" xmlns="http://www.prefixed.com/bar5_x" xmlns:xn="http://www.prefixed.com/xn"><x id="bar5_x">y</x></xn:d> + </body> + </html>'; $dom = $this->html5->loadHTML($source, array( 'xmlNamespaces' => true )); + $this->assertFalse($this->html5->hasErrors(), print_r($this->html5->getErrors(), 1)); $stream = fopen('php://temp', 'w'); $r = new OutputRules($stream, $this->html5->getOptions()); @@ -142,13 +145,11 @@ class OutputRulesTest extends \Masterminds\HTML5\Tests\TestCase $t->walk(); $rendered = stream_get_contents($stream, - 1, 0); - $this->assertEquals(str_replace(array( - "\n", - "\r" - ), "", $rendered), str_replace(array( - "\n", - "\r" - ), "", $source)); + $clear = function($s){ + return trim(preg_replace('/[\s]+/', " ", $s)); + }; + + $this->assertEquals($clear($source), $clear($rendered)); } public function testElementWithScript() |