summaryrefslogtreecommitdiff
path: root/src/HTML5/Parser
diff options
context:
space:
mode:
authorChristophe Coevoet <[email protected]>2018-11-27 14:16:40 +0100
committerChristophe Coevoet <[email protected]>2018-11-27 14:16:40 +0100
commitced5b845b048a65f63c0b324a1357926fdd8403f (patch)
tree2604002518f4ce2024219a2f10f3d208eea426e0 /src/HTML5/Parser
parent182f34ddba2e31e625f255416d85bba38cc6b39b (diff)
Optimize the processing of text between nodes
Instead of processing the text token one by one in the main loop, it is now processed in batch until the next special token (< and & which have special handling in the main loop and NUL characters which need to report a parse error).
Diffstat (limited to 'src/HTML5/Parser')
-rw-r--r--src/HTML5/Parser/Tokenizer.php16
1 files changed, 11 insertions, 5 deletions
diff --git a/src/HTML5/Parser/Tokenizer.php b/src/HTML5/Parser/Tokenizer.php
index a41fcb4..f4e9652 100644
--- a/src/HTML5/Parser/Tokenizer.php
+++ b/src/HTML5/Parser/Tokenizer.php
@@ -163,15 +163,21 @@ class Tokenizer
break;
default:
- if ('<' !== $tok && '&' !== $tok) {
- // NULL character
- if ("\00" === $tok) {
- $this->parseError('Received null character.');
- }
+ if ('<' === $tok || '&' === $tok) {
+ break;
+ }
+
+ // NULL character
+ if ("\00" === $tok) {
+ $this->parseError('Received null character.');
$this->text .= $tok;
$this->scanner->consume();
+
+ break;
}
+
+ $this->text .= $this->scanner->charsUntil("<&\0");
}
}