summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/HTML5/Parser/DOMTreeBuilder.php14
-rw-r--r--test/HTML5/Parser/DOMTreeBuilderTest.php8
2 files changed, 18 insertions, 4 deletions
diff --git a/src/HTML5/Parser/DOMTreeBuilder.php b/src/HTML5/Parser/DOMTreeBuilder.php
index f0caeb4..b0e2e11 100644
--- a/src/HTML5/Parser/DOMTreeBuilder.php
+++ b/src/HTML5/Parser/DOMTreeBuilder.php
@@ -323,11 +323,17 @@ class DOMTreeBuilder implements EventHandler {
public function text($data) {
// XXX: Hmmm.... should we really be this strict?
if ($this->insertMode < self::IM_IN_HEAD) {
- $data = trim($data);
- if (!empty($data)) {
+ // Per '8.2.5.4.3 The "before head" insertion mode' we are supposed to
+ // ignore " \t\n\r\f" characters and throw a parse error for other strings.
+ // In this case we are throwing a parse error for other strings while
+ // passing " \t\n\r\f" through to the DOM. Since this parser is not creating
+ // a DOM that will be used for rendering a display and the DOM may be
+ // turned back into html these characters are passed along to the DOM.
+ $dataTmp = trim($data, " \t\n\r\f");
+ if (!empty($dataTmp)) {
//fprintf(STDOUT, "Unexpected insert mode: %d", $this->insertMode);
- $this->parseError("Unexpected text. Ignoring: " . $data);
- return;
+ $this->parseError("Unexpected text. Ignoring: " . $dataTmp);
+ $data = str_replace($dataTmp, '', $data);
}
}
//fprintf(STDOUT, "Appending text %s.", $data);
diff --git a/test/HTML5/Parser/DOMTreeBuilderTest.php b/test/HTML5/Parser/DOMTreeBuilderTest.php
index c27b7f3..a6c6a9c 100644
--- a/test/HTML5/Parser/DOMTreeBuilderTest.php
+++ b/test/HTML5/Parser/DOMTreeBuilderTest.php
@@ -194,6 +194,14 @@ class DOMTreeBuilderTest extends \HTML5\Tests\TestCase {
$data = $wrapper->childNodes->item(0);
$this->assertEquals(XML_TEXT_NODE, $data->nodeType);
$this->assertEquals('test', $data->data);
+
+ // The DomTreeBuilder has special handling for text when in before head mode.
+ $html = "<!DOCTYPE html><html>
+ Foo<head></head><body></body></html>";
+ $doc = $this->parse($html);
+ $this->assertEquals('Line 0, Col 0: Unexpected text. Ignoring: Foo', $doc->errors[0]);
+ $whiteSpace = $doc->documentElement->firstChild;
+ $this->assertEquals("\n ", $whiteSpace->data);
}
public function testParseErrors() {