summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorMatt Farina <[email protected]>2013-08-10 09:17:44 -0400
committerMatt Farina <[email protected]>2013-08-10 09:17:44 -0400
commit18979d6866d816b57f2199084c2dac6feeb9a456 (patch)
tree22440ec403c740a1e442b52f6117b97fd750e012 /src
parent65a11b73a06fcf3ce21db2298d9c460bcd286b66 (diff)
#11: Updated the text handling in the tree builder to more accurately work in before head mode.
Diffstat (limited to 'src')
-rw-r--r--src/HTML5/Parser/DOMTreeBuilder.php12
1 files changed, 5 insertions, 7 deletions
diff --git a/src/HTML5/Parser/DOMTreeBuilder.php b/src/HTML5/Parser/DOMTreeBuilder.php
index b0e2e11..fe22eac 100644
--- a/src/HTML5/Parser/DOMTreeBuilder.php
+++ b/src/HTML5/Parser/DOMTreeBuilder.php
@@ -323,18 +323,16 @@ class DOMTreeBuilder implements EventHandler {
public function text($data) {
// XXX: Hmmm.... should we really be this strict?
if ($this->insertMode < self::IM_IN_HEAD) {
- // Per '8.2.5.4.3 The "before head" insertion mode' we are supposed to
- // ignore " \t\n\r\f" characters and throw a parse error for other strings.
- // In this case we are throwing a parse error for other strings while
- // passing " \t\n\r\f" through to the DOM. Since this parser is not creating
- // a DOM that will be used for rendering a display and the DOM may be
- // turned back into html these characters are passed along to the DOM.
+ // Per '8.2.5.4.3 The "before head" insertion mode' the characters
+ // " \t\n\r\f" should be ignored but no mention of a parse error. This is
+ // practical as most documents contain these characters. Other text is not
+ // expected here so recording a parse error is necessary.
$dataTmp = trim($data, " \t\n\r\f");
if (!empty($dataTmp)) {
//fprintf(STDOUT, "Unexpected insert mode: %d", $this->insertMode);
$this->parseError("Unexpected text. Ignoring: " . $dataTmp);
- $data = str_replace($dataTmp, '', $data);
}
+ return;
}
//fprintf(STDOUT, "Appending text %s.", $data);
$node = $this->doc->createTextNode($data);