diff options
author | Matt Farina <[email protected]> | 2013-08-10 09:17:44 -0400 |
---|---|---|
committer | Matt Farina <[email protected]> | 2013-08-10 09:17:44 -0400 |
commit | 18979d6866d816b57f2199084c2dac6feeb9a456 (patch) | |
tree | 22440ec403c740a1e442b52f6117b97fd750e012 /src | |
parent | 65a11b73a06fcf3ce21db2298d9c460bcd286b66 (diff) |
#11: Updated the text handling in the tree builder to more accurately work in before head mode.
Diffstat (limited to 'src')
-rw-r--r-- | src/HTML5/Parser/DOMTreeBuilder.php | 12 |
1 files changed, 5 insertions, 7 deletions
diff --git a/src/HTML5/Parser/DOMTreeBuilder.php b/src/HTML5/Parser/DOMTreeBuilder.php index b0e2e11..fe22eac 100644 --- a/src/HTML5/Parser/DOMTreeBuilder.php +++ b/src/HTML5/Parser/DOMTreeBuilder.php @@ -323,18 +323,16 @@ class DOMTreeBuilder implements EventHandler { public function text($data) { // XXX: Hmmm.... should we really be this strict? if ($this->insertMode < self::IM_IN_HEAD) { - // Per '8.2.5.4.3 The "before head" insertion mode' we are supposed to - // ignore " \t\n\r\f" characters and throw a parse error for other strings. - // In this case we are throwing a parse error for other strings while - // passing " \t\n\r\f" through to the DOM. Since this parser is not creating - // a DOM that will be used for rendering a display and the DOM may be - // turned back into html these characters are passed along to the DOM. + // Per '8.2.5.4.3 The "before head" insertion mode' the characters + // " \t\n\r\f" should be ignored but no mention of a parse error. This is + // practical as most documents contain these characters. Other text is not + // expected here so recording a parse error is necessary. $dataTmp = trim($data, " \t\n\r\f"); if (!empty($dataTmp)) { //fprintf(STDOUT, "Unexpected insert mode: %d", $this->insertMode); $this->parseError("Unexpected text. Ignoring: " . $dataTmp); - $data = str_replace($dataTmp, '', $data); } + return; } //fprintf(STDOUT, "Appending text %s.", $data); $node = $this->doc->createTextNode($data); |