diff options
author | Christophe Coevoet <[email protected]> | 2018-11-24 13:07:15 +0100 |
---|---|---|
committer | Christophe Coevoet <[email protected]> | 2018-11-26 12:27:45 +0100 |
commit | 89e1a7a540719f7ed42629d590eb33a2c0da0e50 (patch) | |
tree | 3200d6a283f00cc4f0c302d1c1290a1622f333c0 | |
parent | 54d066b1664a5fb749511c764e8d208f6bc80c3b (diff) |
Optimize the token comparison
Tokens are always a single char. Using strspn to find whether they belong to
a fixed list is slower than comparing them directly.
-rw-r--r-- | src/HTML5/Parser/Tokenizer.php | 8 |
1 files changed, 3 insertions, 5 deletions
diff --git a/src/HTML5/Parser/Tokenizer.php b/src/HTML5/Parser/Tokenizer.php index 6284733..62c39f1 100644 --- a/src/HTML5/Parser/Tokenizer.php +++ b/src/HTML5/Parser/Tokenizer.php @@ -47,8 +47,6 @@ class Tokenizer const CONFORMANT_HTML = 'html'; protected $mode = self::CONFORMANT_HTML; - const WHITE = "\t\n\f "; - /** * Create a new tokenizer. * @@ -159,7 +157,7 @@ class Tokenizer break; default: - if (!strspn($tok, '<&')) { + if ('<' !== $tok && '&' !== $tok) { // NULL character if ("\00" === $tok) { $this->parseError('Received null character.'); @@ -193,7 +191,7 @@ class Tokenizer case Elements::TEXT_RCDATA: return $this->rcdata($tok); default: - if (strspn($tok, '<&')) { + if ('<' === $tok || '&' === $tok) { return false; } @@ -1093,7 +1091,7 @@ class Tokenizer // These indicate not an entity. We return just // the &. - if (1 === strspn($tok, static::WHITE . '&<')) { + if ("\t" === $tok || "\n" === $tok || "\f" === $tok || ' ' === $tok || '&' === $tok || '<' === $tok) { // $this->scanner->next(); return '&'; } |