summaryrefslogtreecommitdiff
path: root/src/HTML5/Parser
diff options
context:
space:
mode:
authorChristophe Coevoet <[email protected]>2018-11-24 11:41:33 +0100
committerChristophe Coevoet <[email protected]>2018-11-24 11:50:12 +0100
commita2432e510d16cac3c563b593791f3422f74092bc (patch)
treec2d46d534e31dc4ff59aa666be3ff8b0310a6ad3 /src/HTML5/Parser
parent1fd419b9e62efcab9c79f3848c54dd3717a7e725 (diff)
Optimize consuming whitespaces
Places consuming whitespaces don't care about the matched substring. They either need its length, or nothing. Returning only the length directly avoids computing the substring.
Diffstat (limited to 'src/HTML5/Parser')
-rw-r--r--src/HTML5/Parser/Scanner.php12
-rw-r--r--src/HTML5/Parser/Tokenizer.php6
2 files changed, 14 insertions, 4 deletions
diff --git a/src/HTML5/Parser/Scanner.php b/src/HTML5/Parser/Scanner.php
index e81b3a9..cec9a13 100644
--- a/src/HTML5/Parser/Scanner.php
+++ b/src/HTML5/Parser/Scanner.php
@@ -223,10 +223,20 @@ class Scanner
* Consume whitespace.
*
* Whitespace in HTML5 is: formfeed, tab, newline, space.
+ *
+ * @return int The length of the matched whitespaces
*/
public function whitespace()
{
- return $this->doCharsWhile("\n\t\f ");
+ if ($this->char >= $this->EOF) {
+ return false;
+ }
+
+ $len = strspn($this->data, "\n\t\f ", $this->char);
+
+ $this->char += $len;
+
+ return $len;
}
/**
diff --git a/src/HTML5/Parser/Tokenizer.php b/src/HTML5/Parser/Tokenizer.php
index d420209..ba9de52 100644
--- a/src/HTML5/Parser/Tokenizer.php
+++ b/src/HTML5/Parser/Tokenizer.php
@@ -279,7 +279,7 @@ class Tokenizer
}
$len = strlen($sequence);
$this->scanner->consume($len);
- $len += strlen($this->scanner->whitespace());
+ $len += $this->scanner->whitespace();
if ($this->scanner->current() !== '>') {
$this->parseError("Unclosed RCDATA end tag");
}
@@ -779,7 +779,7 @@ class Tokenizer
$this->scanner->whitespace();
$pub = strtoupper($this->scanner->getAsciiAlpha());
- $white = strlen($this->scanner->whitespace());
+ $white = $this->scanner->whitespace();
// Get ID, and flag it as pub or system.
if (($pub == 'PUBLIC' || $pub == 'SYSTEM') && $white > 0) {
@@ -909,7 +909,7 @@ class Tokenizer
$tok = $this->scanner->next();
$procName = $this->scanner->getAsciiAlpha();
- $white = strlen($this->scanner->whitespace());
+ $white = $this->scanner->whitespace();
// If not a PI, send to bogusComment.
if (strlen($procName) == 0 || $white == 0 || $this->scanner->current() == false) {