summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorAsmir Mustafic <[email protected]>2018-11-24 19:53:49 +0100
committerGitHub <[email protected]>2018-11-24 19:53:49 +0100
commitc5dba6d2a1a55e20ad00832fb5b0b3db3704874d (patch)
treef83cf587d3b8fd29ff82b3c30b961ad34ddff6e9 /src
parente3d1145505a17e7af5a2f0dc3532d480ab8b4cff (diff)
parenta2432e510d16cac3c563b593791f3422f74092bc (diff)
Merge pull request #153 from stof/optimize_whitespace
Optimize consuming whitespaces
Diffstat (limited to 'src')
-rw-r--r--src/HTML5/Parser/Scanner.php12
-rw-r--r--src/HTML5/Parser/Tokenizer.php6
2 files changed, 14 insertions, 4 deletions
diff --git a/src/HTML5/Parser/Scanner.php b/src/HTML5/Parser/Scanner.php
index e81b3a9..cec9a13 100644
--- a/src/HTML5/Parser/Scanner.php
+++ b/src/HTML5/Parser/Scanner.php
@@ -223,10 +223,20 @@ class Scanner
* Consume whitespace.
*
* Whitespace in HTML5 is: formfeed, tab, newline, space.
+ *
+ * @return int The length of the matched whitespaces
*/
public function whitespace()
{
- return $this->doCharsWhile("\n\t\f ");
+ if ($this->char >= $this->EOF) {
+ return false;
+ }
+
+ $len = strspn($this->data, "\n\t\f ", $this->char);
+
+ $this->char += $len;
+
+ return $len;
}
/**
diff --git a/src/HTML5/Parser/Tokenizer.php b/src/HTML5/Parser/Tokenizer.php
index d420209..ba9de52 100644
--- a/src/HTML5/Parser/Tokenizer.php
+++ b/src/HTML5/Parser/Tokenizer.php
@@ -279,7 +279,7 @@ class Tokenizer
}
$len = strlen($sequence);
$this->scanner->consume($len);
- $len += strlen($this->scanner->whitespace());
+ $len += $this->scanner->whitespace();
if ($this->scanner->current() !== '>') {
$this->parseError("Unclosed RCDATA end tag");
}
@@ -779,7 +779,7 @@ class Tokenizer
$this->scanner->whitespace();
$pub = strtoupper($this->scanner->getAsciiAlpha());
- $white = strlen($this->scanner->whitespace());
+ $white = $this->scanner->whitespace();
// Get ID, and flag it as pub or system.
if (($pub == 'PUBLIC' || $pub == 'SYSTEM') && $white > 0) {
@@ -909,7 +909,7 @@ class Tokenizer
$tok = $this->scanner->next();
$procName = $this->scanner->getAsciiAlpha();
- $white = strlen($this->scanner->whitespace());
+ $white = $this->scanner->whitespace();
// If not a PI, send to bogusComment.
if (strlen($procName) == 0 || $white == 0 || $this->scanner->current() == false) {