diff options
author | Matt Farina <[email protected]> | 2013-08-07 11:57:33 -0400 |
---|---|---|
committer | Matt Farina <[email protected]> | 2013-08-07 11:57:33 -0400 |
commit | 65a11b73a06fcf3ce21db2298d9c460bcd286b66 (patch) | |
tree | 0e002119a15259f7ebcfbfa3ec7524e131f165ef /src/HTML5 | |
parent | 2efd0c6052bf1492d3953d442c46e5ab9e45888a (diff) |
#11: Updating the text handling for parsing when in before head mode. Now passing the ignored string characters through to the DOM and giving a parse error on other strings. Since this DOM is not used to render for display and it may be turned back into html it is useful to preserve these characters.
Diffstat (limited to 'src/HTML5')
-rw-r--r-- | src/HTML5/Parser/DOMTreeBuilder.php | 14 |
1 files changed, 10 insertions, 4 deletions
diff --git a/src/HTML5/Parser/DOMTreeBuilder.php b/src/HTML5/Parser/DOMTreeBuilder.php index f0caeb4..b0e2e11 100644 --- a/src/HTML5/Parser/DOMTreeBuilder.php +++ b/src/HTML5/Parser/DOMTreeBuilder.php @@ -323,11 +323,17 @@ class DOMTreeBuilder implements EventHandler { public function text($data) { // XXX: Hmmm.... should we really be this strict? if ($this->insertMode < self::IM_IN_HEAD) { - $data = trim($data); - if (!empty($data)) { + // Per '8.2.5.4.3 The "before head" insertion mode' we are supposed to + // ignore " \t\n\r\f" characters and throw a parse error for other strings. + // In this case we are throwing a parse error for other strings while + // passing " \t\n\r\f" through to the DOM. Since this parser is not creating + // a DOM that will be used for rendering a display and the DOM may be + // turned back into html these characters are passed along to the DOM. + $dataTmp = trim($data, " \t\n\r\f"); + if (!empty($dataTmp)) { //fprintf(STDOUT, "Unexpected insert mode: %d", $this->insertMode); - $this->parseError("Unexpected text. Ignoring: " . $data); - return; + $this->parseError("Unexpected text. Ignoring: " . $dataTmp); + $data = str_replace($dataTmp, '', $data); } } //fprintf(STDOUT, "Appending text %s.", $data); |