diff options
author | Matt Butcher <[email protected]> | 2013-09-15 14:07:03 -0600 |
---|---|---|
committer | Matt Butcher <[email protected]> | 2013-09-15 14:07:03 -0600 |
commit | 3b9f5018aa0b72b4eaf8fd9eec98348a0b11f807 (patch) | |
tree | cad2730ee983adc2b1cbdd760643b1f8bdb17e84 | |
parent | 3efe5c017d21c6606f5b301c379371469aa2524d (diff) |
Fix for #13: Malformed HTML is getting mangled instead of fixed.
Thanks to @sascha-tasche for reporting it. And to @mattfarina
for digging up what we should really do to fix it.
-rw-r--r-- | src/HTML5/Parser/DOMTreeBuilder.php | 11 | ||||
-rw-r--r-- | test/HTML5/Parser/DOMTreeBuilderTest.php | 14 |
2 files changed, 25 insertions, 0 deletions
diff --git a/src/HTML5/Parser/DOMTreeBuilder.php b/src/HTML5/Parser/DOMTreeBuilder.php index fe22eac..be208e2 100644 --- a/src/HTML5/Parser/DOMTreeBuilder.php +++ b/src/HTML5/Parser/DOMTreeBuilder.php @@ -14,6 +14,11 @@ use HTML5\Elements; * are implemented herein; however, not all of them are. Since we do not * assume a graphical user agent, no presentation-specific logic is conducted * during tree building. + * + * FIXME: The present tree builder does not exactly follow the state machine rules + * for insert modes as outlined in the HTML5 spec. The processor needs to be + * re-written to accomodate this. See, for example, the Go language HTML5 + * parser. */ class DOMTreeBuilder implements EventHandler { @@ -251,6 +256,12 @@ class DOMTreeBuilder implements EventHandler { } } + // This is sort of a last-ditch attempt to correct for cases where no head/body + // elements are provided. + if ($this->insertMode <= self::IM_BEFORE_HEAD && $name != 'head' && $name != 'html') { + $this->insertMode = self::IM_IN_BODY; + } + // Return the element mask, which the tokenizer can then use to set // various processing rules. return Elements::element($name); diff --git a/test/HTML5/Parser/DOMTreeBuilderTest.php b/test/HTML5/Parser/DOMTreeBuilderTest.php index 1cb4a8c..adfc2c9 100644 --- a/test/HTML5/Parser/DOMTreeBuilderTest.php +++ b/test/HTML5/Parser/DOMTreeBuilderTest.php @@ -300,4 +300,18 @@ class DOMTreeBuilderTest extends \HTML5\Tests\TestCase { $textPath = $doc->getElementsByTagName('textPath')->item(0); $this->assertEquals('textPath', $textPath->tagName); } + + /** + * Regression for issue #13 + */ + public function testRegressionHTMLNoBody() { + $html = '<!DOCTYPE html><html><span id="test">Test</span></html>'; + $doc = $this->parse($html); + $span = $doc->getElementById('test'); + + $this->assertEmpty($doc->errors); + + $this->assertEquals('span', $span->tagName); + $this->assertEquals('Test', $span->textContent); + } } |