diff options
author | Asmir Mustafic <[email protected]> | 2018-11-24 19:53:49 +0100 |
---|---|---|
committer | GitHub <[email protected]> | 2018-11-24 19:53:49 +0100 |
commit | c5dba6d2a1a55e20ad00832fb5b0b3db3704874d (patch) | |
tree | f83cf587d3b8fd29ff82b3c30b961ad34ddff6e9 /src/HTML5 | |
parent | e3d1145505a17e7af5a2f0dc3532d480ab8b4cff (diff) | |
parent | a2432e510d16cac3c563b593791f3422f74092bc (diff) |
Merge pull request #153 from stof/optimize_whitespace
Optimize consuming whitespaces
Diffstat (limited to 'src/HTML5')
-rw-r--r-- | src/HTML5/Parser/Scanner.php | 12 | ||||
-rw-r--r-- | src/HTML5/Parser/Tokenizer.php | 6 |
2 files changed, 14 insertions, 4 deletions
diff --git a/src/HTML5/Parser/Scanner.php b/src/HTML5/Parser/Scanner.php index e81b3a9..cec9a13 100644 --- a/src/HTML5/Parser/Scanner.php +++ b/src/HTML5/Parser/Scanner.php @@ -223,10 +223,20 @@ class Scanner * Consume whitespace. * * Whitespace in HTML5 is: formfeed, tab, newline, space. + * + * @return int The length of the matched whitespaces */ public function whitespace() { - return $this->doCharsWhile("\n\t\f "); + if ($this->char >= $this->EOF) { + return false; + } + + $len = strspn($this->data, "\n\t\f ", $this->char); + + $this->char += $len; + + return $len; } /** diff --git a/src/HTML5/Parser/Tokenizer.php b/src/HTML5/Parser/Tokenizer.php index d420209..ba9de52 100644 --- a/src/HTML5/Parser/Tokenizer.php +++ b/src/HTML5/Parser/Tokenizer.php @@ -279,7 +279,7 @@ class Tokenizer } $len = strlen($sequence); $this->scanner->consume($len); - $len += strlen($this->scanner->whitespace()); + $len += $this->scanner->whitespace(); if ($this->scanner->current() !== '>') { $this->parseError("Unclosed RCDATA end tag"); } @@ -779,7 +779,7 @@ class Tokenizer $this->scanner->whitespace(); $pub = strtoupper($this->scanner->getAsciiAlpha()); - $white = strlen($this->scanner->whitespace()); + $white = $this->scanner->whitespace(); // Get ID, and flag it as pub or system. if (($pub == 'PUBLIC' || $pub == 'SYSTEM') && $white > 0) { @@ -909,7 +909,7 @@ class Tokenizer $tok = $this->scanner->next(); $procName = $this->scanner->getAsciiAlpha(); - $white = strlen($this->scanner->whitespace()); + $white = $this->scanner->whitespace(); // If not a PI, send to bogusComment. if (strlen($procName) == 0 || $white == 0 || $this->scanner->current() == false) { |