summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMatt Butcher <[email protected]>2013-09-15 14:07:03 -0600
committerMatt Butcher <[email protected]>2013-09-15 14:07:03 -0600
commit3b9f5018aa0b72b4eaf8fd9eec98348a0b11f807 (patch)
treecad2730ee983adc2b1cbdd760643b1f8bdb17e84
parent3efe5c017d21c6606f5b301c379371469aa2524d (diff)
Fix for #13: Malformed HTML is getting mangled instead of fixed.
Thanks to @sascha-tasche for reporting it. And to @mattfarina for digging up what we should really do to fix it.
-rw-r--r--src/HTML5/Parser/DOMTreeBuilder.php11
-rw-r--r--test/HTML5/Parser/DOMTreeBuilderTest.php14
2 files changed, 25 insertions, 0 deletions
diff --git a/src/HTML5/Parser/DOMTreeBuilder.php b/src/HTML5/Parser/DOMTreeBuilder.php
index fe22eac..be208e2 100644
--- a/src/HTML5/Parser/DOMTreeBuilder.php
+++ b/src/HTML5/Parser/DOMTreeBuilder.php
@@ -14,6 +14,11 @@ use HTML5\Elements;
* are implemented herein; however, not all of them are. Since we do not
* assume a graphical user agent, no presentation-specific logic is conducted
* during tree building.
+ *
+ * FIXME: The present tree builder does not exactly follow the state machine rules
+ * for insert modes as outlined in the HTML5 spec. The processor needs to be
+ * re-written to accomodate this. See, for example, the Go language HTML5
+ * parser.
*/
class DOMTreeBuilder implements EventHandler {
@@ -251,6 +256,12 @@ class DOMTreeBuilder implements EventHandler {
}
}
+ // This is sort of a last-ditch attempt to correct for cases where no head/body
+ // elements are provided.
+ if ($this->insertMode <= self::IM_BEFORE_HEAD && $name != 'head' && $name != 'html') {
+ $this->insertMode = self::IM_IN_BODY;
+ }
+
// Return the element mask, which the tokenizer can then use to set
// various processing rules.
return Elements::element($name);
diff --git a/test/HTML5/Parser/DOMTreeBuilderTest.php b/test/HTML5/Parser/DOMTreeBuilderTest.php
index 1cb4a8c..adfc2c9 100644
--- a/test/HTML5/Parser/DOMTreeBuilderTest.php
+++ b/test/HTML5/Parser/DOMTreeBuilderTest.php
@@ -300,4 +300,18 @@ class DOMTreeBuilderTest extends \HTML5\Tests\TestCase {
$textPath = $doc->getElementsByTagName('textPath')->item(0);
$this->assertEquals('textPath', $textPath->tagName);
}
+
+ /**
+ * Regression for issue #13
+ */
+ public function testRegressionHTMLNoBody() {
+ $html = '<!DOCTYPE html><html><span id="test">Test</span></html>';
+ $doc = $this->parse($html);
+ $span = $doc->getElementById('test');
+
+ $this->assertEmpty($doc->errors);
+
+ $this->assertEquals('span', $span->tagName);
+ $this->assertEquals('Test', $span->textContent);
+ }
}