diff options
author | Christophe Coevoet <[email protected]> | 2018-11-27 14:16:40 +0100 |
---|---|---|
committer | Christophe Coevoet <[email protected]> | 2018-11-27 14:16:40 +0100 |
commit | ced5b845b048a65f63c0b324a1357926fdd8403f (patch) | |
tree | 2604002518f4ce2024219a2f10f3d208eea426e0 /src/HTML5 | |
parent | 182f34ddba2e31e625f255416d85bba38cc6b39b (diff) |
Optimize the processing of text between nodes
Instead of processing the text token one by one in the main loop, it is
now processed in batch until the next special token (< and & which have
special handling in the main loop and NUL characters which need to report
a parse error).
Diffstat (limited to 'src/HTML5')
-rw-r--r-- | src/HTML5/Parser/Tokenizer.php | 16 |
1 files changed, 11 insertions, 5 deletions
diff --git a/src/HTML5/Parser/Tokenizer.php b/src/HTML5/Parser/Tokenizer.php index a41fcb4..f4e9652 100644 --- a/src/HTML5/Parser/Tokenizer.php +++ b/src/HTML5/Parser/Tokenizer.php @@ -163,15 +163,21 @@ class Tokenizer break; default: - if ('<' !== $tok && '&' !== $tok) { - // NULL character - if ("\00" === $tok) { - $this->parseError('Received null character.'); - } + if ('<' === $tok || '&' === $tok) { + break; + } + + // NULL character + if ("\00" === $tok) { + $this->parseError('Received null character.'); $this->text .= $tok; $this->scanner->consume(); + + break; } + + $this->text .= $this->scanner->charsUntil("<&\0"); } } |