summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAsmir Mustafic <[email protected]>2015-02-09 17:22:51 +0100
committerAsmir Mustafic <[email protected]>2015-02-09 17:22:51 +0100
commit236faa2648f4e89e7f5fce60cfc5f3ed95243f8c (patch)
tree9c4c443314eedb7d17365e78783d5c99470fa680
parentdda3253aa29800d12795c291e55578bbabbc4cb4 (diff)
parenta50e919341b310b30b617a2e1ea5945d94856017 (diff)
Merge pull request #74 from Masterminds/feature/html-parsing-options
HTML parsing options
-rw-r--r--README.md29
-rw-r--r--RELEASE.md4
-rw-r--r--src/HTML5/Parser/DOMTreeBuilder.php41
-rw-r--r--test/HTML5/Parser/DOMTreeBuilderTest.php24
4 files changed, 84 insertions, 14 deletions
diff --git a/README.md b/README.md
index 65d6889..1a2a96b 100644
--- a/README.md
+++ b/README.md
@@ -76,6 +76,35 @@ $html5->save($dom, 'out.html');
The `$dom` created by the parser is a full `DOMDocument` object. And the
`save()` and `saveHTML()` methods will take any DOMDocument.
+### Options
+
+It is possible to pass in an array of configuration options when loading
+an HTML5 document.
+
+```php
+// An associative array of options
+$options = array(
+ 'option_name' => 'option_value',
+);
+
+// Provide the options to the constructor
+$html5 = new HTML5($options);
+
+$dom = $html5->loadHTML($html);
+```
+
+The following options are supported:
+
+* `encode_entities` (boolean): Indicates that the serializer should aggressively
+ encode characters as entities. Without this, it only encodes the bare
+ minimum.
+* `disable_html_ns` (boolean): Prevents the parser from automatically
+ assigning the HTML5 namespace to the DOM document. This is for
+ non-namespace aware DOM tools.
+* `target_doc` (\DOMDocument): A DOM document that will be used as the
+ destination for the parsed nodes.
+* `implicit_namespaces` (array): An assoc array of namespaces that should be
+ used by the parser. Name is tag prefix, value is NS URI.
## The Low-Level API
diff --git a/RELEASE.md b/RELEASE.md
index f87bad5..36d1630 100644
--- a/RELEASE.md
+++ b/RELEASE.md
@@ -1,6 +1,8 @@
# Release Notes
-X.X.X (XXXX-XX-XX)
+2.1.0 (2015-02-01)
+- #74: Added `disable_html_ns` and `target_doc` dom parsing options
+- Unified option names
- #73: Fixed alphabet, &szlig; now can be detected
- #75 and #76: Allow whitespace in RCDATA tags
- #77: Fixed parsing blunder for json embeds
diff --git a/src/HTML5/Parser/DOMTreeBuilder.php b/src/HTML5/Parser/DOMTreeBuilder.php
index 0349d60..ccad229 100644
--- a/src/HTML5/Parser/DOMTreeBuilder.php
+++ b/src/HTML5/Parser/DOMTreeBuilder.php
@@ -38,6 +38,12 @@ class DOMTreeBuilder implements EventHandler
const NAMESPACE_XMLNS = 'http://www.w3.org/2000/xmlns/';
+ const OPT_DISABLE_HTML_NS = 'disable_html_ns';
+
+ const OPT_TARGET_DOC = 'target_document';
+
+ const OPT_IMPLICIT_NS = 'implicit_namespaces';
+
/**
* Holds the HTML5 element names that causes a namespace switch
*
@@ -157,13 +163,17 @@ class DOMTreeBuilder implements EventHandler
{
$this->options = $options;
- $impl = new \DOMImplementation();
- // XXX:
- // Create the doctype. For now, we are always creating HTML5
- // documents, and attempting to up-convert any older DTDs to HTML5.
- $dt = $impl->createDocumentType('html');
- // $this->doc = \DOMImplementation::createDocument(NULL, 'html', $dt);
- $this->doc = $impl->createDocument(null, null, $dt);
+ if (isset($options[self::OPT_TARGET_DOC])) {
+ $this->doc = $options[self::OPT_TARGET_DOC];
+ } else {
+ $impl = new \DOMImplementation();
+ // XXX:
+ // Create the doctype. For now, we are always creating HTML5
+ // documents, and attempting to up-convert any older DTDs to HTML5.
+ $dt = $impl->createDocumentType('html');
+ // $this->doc = \DOMImplementation::createDocument(NULL, 'html', $dt);
+ $this->doc = $impl->createDocument(null, null, $dt);
+ }
$this->errors = array();
$this->current = $this->doc; // ->documentElement;
@@ -171,8 +181,15 @@ class DOMTreeBuilder implements EventHandler
// Create a rules engine for tags.
$this->rules = new TreeBuildingRules($this->doc);
+ $implicitNS = array();
+ if (isset($this->options[self::OPT_IMPLICIT_NS])) {
+ $implicitNS = $this->options[self::OPT_IMPLICIT_NS];
+ } elseif (isset($this->options["implicitNamespaces"])) {
+ $implicitNS = $this->options["implicitNamespaces"];
+ }
+
// Fill $nsStack with the defalut HTML5 namespaces, plus the "implicitNamespaces" array taken form $options
- array_unshift($this->nsStack, (isset($this->options["implicitNamespaces"]) ? $this->options["implicitNamespaces"] : array()) + array(
+ array_unshift($this->nsStack, $implicitNS + array(
'' => self::NAMESPACE_HTML
) + $this->implicitNamespaces);
@@ -345,10 +362,10 @@ class DOMTreeBuilder implements EventHandler
$ele = $this->doc->importNode($frag->documentElement, true);
} else {
- if (isset($this->nsStack[0][$prefix])) {
- $ele = $this->doc->createElementNS($this->nsStack[0][$prefix], $lname);
- } else {
+ if (!isset($this->nsStack[0][$prefix]) || ($prefix === "" && isset($this->options[self::OPT_DISABLE_HTML_NS]) && $this->options[self::OPT_DISABLE_HTML_NS])) {
$ele = $this->doc->createElement($lname);
+ } else {
+ $ele = $this->doc->createElementNS($this->nsStack[0][$prefix], $lname);
}
}
@@ -664,4 +681,4 @@ class DOMTreeBuilder implements EventHandler
{
return $this->current->tagName == $tagname;
}
-}
+} \ No newline at end of file
diff --git a/test/HTML5/Parser/DOMTreeBuilderTest.php b/test/HTML5/Parser/DOMTreeBuilderTest.php
index 5bba7cc..b2a2d39 100644
--- a/test/HTML5/Parser/DOMTreeBuilderTest.php
+++ b/test/HTML5/Parser/DOMTreeBuilderTest.php
@@ -55,6 +55,7 @@ class DOMTreeBuilderTest extends \Masterminds\HTML5\Tests\TestCase
$this->assertInstanceOf('\DOMDocument', $doc);
$this->assertEquals('html', $doc->documentElement->tagName);
+ $this->assertEquals('http://www.w3.org/1999/xhtml', $doc->documentElement->namespaceURI);
}
public function testStrangeCapitalization()
@@ -78,6 +79,28 @@ class DOMTreeBuilderTest extends \Masterminds\HTML5\Tests\TestCase
$this->assertEquals("foo", $xpath->query( "//x:script" )->item( 0 )->nodeValue);
}
+ public function testDocumentWithDisabledNamespaces()
+ {
+ $html = "<!DOCTYPE html><html></html>";
+ $doc = $this->parse($html, array('disable_html_ns' => true));
+
+ $this->assertInstanceOf('\DOMDocument', $doc);
+ $this->assertEquals('html', $doc->documentElement->tagName);
+ $this->assertNull($doc->documentElement->namespaceURI);
+ }
+
+ public function testDocumentWithATargetDocument()
+ {
+ $targetDom = new \DOMDocument();
+
+ $html = "<!DOCTYPE html><html></html>";
+ $doc = $this->parse($html, array('target_document' => $targetDom));
+
+ $this->assertInstanceOf('\DOMDocument', $doc);
+ $this->assertSame($doc, $targetDom);
+ $this->assertEquals('html', $doc->documentElement->tagName);
+ }
+
public function testDocumentFakeAttrAbsence()
{
$html = "<!DOCTYPE html><html xmlns=\"http://www.w3.org/1999/xhtml\"><body>foo</body></html>";
@@ -85,7 +108,6 @@ class DOMTreeBuilderTest extends \Masterminds\HTML5\Tests\TestCase
$xp = new \DOMXPath($doc);
$this->assertEquals(0, $xp->query("//@html5-php-fake-id-attribute")->length);
-
}
public function testFragment()