diff options
author | Christophe Coevoet <[email protected]> | 2018-11-24 11:41:33 +0100 |
---|---|---|
committer | Christophe Coevoet <[email protected]> | 2018-11-24 11:50:12 +0100 |
commit | a2432e510d16cac3c563b593791f3422f74092bc (patch) | |
tree | c2d46d534e31dc4ff59aa666be3ff8b0310a6ad3 /src/HTML5 | |
parent | 1fd419b9e62efcab9c79f3848c54dd3717a7e725 (diff) |
Optimize consuming whitespaces
Places consuming whitespaces don't care about the matched substring. They
either need its length, or nothing.
Returning only the length directly avoids computing the substring.
Diffstat (limited to 'src/HTML5')
-rw-r--r-- | src/HTML5/Parser/Scanner.php | 12 | ||||
-rw-r--r-- | src/HTML5/Parser/Tokenizer.php | 6 |
2 files changed, 14 insertions, 4 deletions
diff --git a/src/HTML5/Parser/Scanner.php b/src/HTML5/Parser/Scanner.php index e81b3a9..cec9a13 100644 --- a/src/HTML5/Parser/Scanner.php +++ b/src/HTML5/Parser/Scanner.php @@ -223,10 +223,20 @@ class Scanner * Consume whitespace. * * Whitespace in HTML5 is: formfeed, tab, newline, space. + * + * @return int The length of the matched whitespaces */ public function whitespace() { - return $this->doCharsWhile("\n\t\f "); + if ($this->char >= $this->EOF) { + return false; + } + + $len = strspn($this->data, "\n\t\f ", $this->char); + + $this->char += $len; + + return $len; } /** diff --git a/src/HTML5/Parser/Tokenizer.php b/src/HTML5/Parser/Tokenizer.php index d420209..ba9de52 100644 --- a/src/HTML5/Parser/Tokenizer.php +++ b/src/HTML5/Parser/Tokenizer.php @@ -279,7 +279,7 @@ class Tokenizer } $len = strlen($sequence); $this->scanner->consume($len); - $len += strlen($this->scanner->whitespace()); + $len += $this->scanner->whitespace(); if ($this->scanner->current() !== '>') { $this->parseError("Unclosed RCDATA end tag"); } @@ -779,7 +779,7 @@ class Tokenizer $this->scanner->whitespace(); $pub = strtoupper($this->scanner->getAsciiAlpha()); - $white = strlen($this->scanner->whitespace()); + $white = $this->scanner->whitespace(); // Get ID, and flag it as pub or system. if (($pub == 'PUBLIC' || $pub == 'SYSTEM') && $white > 0) { @@ -909,7 +909,7 @@ class Tokenizer $tok = $this->scanner->next(); $procName = $this->scanner->getAsciiAlpha(); - $white = strlen($this->scanner->whitespace()); + $white = $this->scanner->whitespace(); // If not a PI, send to bogusComment. if (strlen($procName) == 0 || $white == 0 || $this->scanner->current() == false) { |