summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorChristophe Coevoet <[email protected]>2018-11-24 13:07:15 +0100
committerChristophe Coevoet <[email protected]>2018-11-26 12:27:45 +0100
commit89e1a7a540719f7ed42629d590eb33a2c0da0e50 (patch)
tree3200d6a283f00cc4f0c302d1c1290a1622f333c0 /src
parent54d066b1664a5fb749511c764e8d208f6bc80c3b (diff)
Optimize the token comparison
Tokens are always a single char. Using strspn to find whether they belong to a fixed list is slower than comparing them directly.
Diffstat (limited to 'src')
-rw-r--r--src/HTML5/Parser/Tokenizer.php8
1 files changed, 3 insertions, 5 deletions
diff --git a/src/HTML5/Parser/Tokenizer.php b/src/HTML5/Parser/Tokenizer.php
index 6284733..62c39f1 100644
--- a/src/HTML5/Parser/Tokenizer.php
+++ b/src/HTML5/Parser/Tokenizer.php
@@ -47,8 +47,6 @@ class Tokenizer
const CONFORMANT_HTML = 'html';
protected $mode = self::CONFORMANT_HTML;
- const WHITE = "\t\n\f ";
-
/**
* Create a new tokenizer.
*
@@ -159,7 +157,7 @@ class Tokenizer
break;
default:
- if (!strspn($tok, '<&')) {
+ if ('<' !== $tok && '&' !== $tok) {
// NULL character
if ("\00" === $tok) {
$this->parseError('Received null character.');
@@ -193,7 +191,7 @@ class Tokenizer
case Elements::TEXT_RCDATA:
return $this->rcdata($tok);
default:
- if (strspn($tok, '<&')) {
+ if ('<' === $tok || '&' === $tok) {
return false;
}
@@ -1093,7 +1091,7 @@ class Tokenizer
// These indicate not an entity. We return just
// the &.
- if (1 === strspn($tok, static::WHITE . '&<')) {
+ if ("\t" === $tok || "\n" === $tok || "\f" === $tok || ' ' === $tok || '&' === $tok || '<' === $tok) {
// $this->scanner->next();
return '&';
}