summaryrefslogtreecommitdiff
path: root/src/HTML5/Parser
diff options
context:
space:
mode:
authorAsmir Mustafic <[email protected]>2017-08-31 13:22:01 +0200
committerAsmir Mustafic <[email protected]>2017-08-31 13:22:01 +0200
commitca28ecc48fbd2a51ee6b17ee38fd8168243696cf (patch)
tree40bfeb0ed9b1d441bc5fc994fcf3c5c406bad584 /src/HTML5/Parser
parente965886a79a560b4b00a4c471e2bdfafea23fdcb (diff)
reduce number of times "current" is invoked
Diffstat (limited to 'src/HTML5/Parser')
-rw-r--r--src/HTML5/Parser/Tokenizer.php65
1 files changed, 36 insertions, 29 deletions
diff --git a/src/HTML5/Parser/Tokenizer.php b/src/HTML5/Parser/Tokenizer.php
index 45774b2..95dbf84 100644
--- a/src/HTML5/Parser/Tokenizer.php
+++ b/src/HTML5/Parser/Tokenizer.php
@@ -83,11 +83,8 @@ class Tokenizer
*/
public function parse()
{
- $p = 0;
do {
- $p = $this->scanner->position();
$this->consumeData();
-
// FIXME: Add infinite loop protection.
} while ($this->carryOn);
}
@@ -145,7 +142,8 @@ class Tokenizer
*/
protected function characterData()
{
- if ($this->scanner->current() === false) {
+ $tok = $this->scanner->current();
+ if ($tok === false) {
return false;
}
switch ($this->textMode) {
@@ -154,7 +152,6 @@ class Tokenizer
case Elements::TEXT_RCDATA:
return $this->rcdata();
default:
- $tok = $this->scanner->current();
if (strspn($tok, "<&")) {
return false;
}
@@ -408,24 +405,26 @@ class Tokenizer
if ($tok == '/') {
$this->scanner->next();
$this->scanner->whitespace();
- if ($this->scanner->current() == '>') {
+ $tok = $this->scanner->current();
+
+ if ($tok == '>') {
$selfClose = true;
return true;
}
- if ($this->scanner->current() === false) {
+ if ($tok === false) {
$this->parseError("Unexpected EOF inside of tag.");
return true;
}
// Basically, we skip the / token and go on.
// See 8.2.4.43.
- $this->parseError("Unexpected '%s' inside of a tag.", $this->scanner->current());
+ $this->parseError("Unexpected '%s' inside of a tag.", $tok);
return false;
}
- if ($this->scanner->current() == '>') {
+ if ($tok == '>') {
return true;
}
- if ($this->scanner->current() === false) {
+ if ($tok === false) {
$this->parseError("Unexpected EOF inside of tag.");
return true;
}
@@ -541,15 +540,21 @@ class Tokenizer
{
$stoplist = "\f" . $quote;
$val = '';
- $tok = $this->scanner->current();
- while (strspn($tok, $stoplist) == 0 && $tok !== false) {
- if ($tok == '&') {
- $val .= $this->decodeCharacterReference(true);
- $tok = $this->scanner->current();
+
+ while (true) {
+ $tokens = $this->scanner->charsUntil($stoplist.'&');
+ if ($tokens !== false) {
+ $val .= $tokens;
} else {
- $val .= $tok;
- $tok = $this->scanner->next();
+ break;
}
+
+ $tok = $this->scanner->current();
+ if ($tok == '&') {
+ $val .= $this->decodeCharacterReference(true, $tok);
+ continue;
+ }
+ break;
}
$this->scanner->next();
return $val;
@@ -591,18 +596,18 @@ class Tokenizer
*/
protected function bogusComment($leading = '')
{
-
- // TODO: This can be done more efficiently when the
- // scanner exposes a readUntil() method.
$comment = $leading;
+ $tokens = $this->scanner->charsUntil('>');
+ if ($tokens !== false) {
+ $comment .= $tokens;
+ }
$tok = $this->scanner->current();
- do {
+ if ($tok !== false) {
$comment .= $tok;
- $tok = $this->scanner->next();
- } while ($tok !== false && $tok != '>');
+ }
$this->flushBuffer();
- $this->events->comment($comment . $tok);
+ $this->events->comment($comment);
$this->scanner->next();
return true;
@@ -646,15 +651,17 @@ class Tokenizer
*/
protected function isCommentEnd()
{
+ $tok = $this->scanner->current();
+
// EOF
- if ($this->scanner->current() === false) {
+ if ($tok === false) {
// Hit the end.
$this->parseError("Unexpected EOF in a comment.");
return true;
}
// If it doesn't start with -, not the end.
- if ($this->scanner->current() != '-') {
+ if ($tok != '-') {
return false;
}
@@ -737,7 +744,6 @@ class Tokenizer
$pub = strtoupper($this->scanner->getAsciiAlpha());
$white = strlen($this->scanner->whitespace());
- $tok = $this->scanner->current();
// Get ID, and flag it as pub or system.
if (($pub == 'PUBLIC' || $pub == 'SYSTEM') && $white > 0) {
@@ -938,10 +944,11 @@ class Tokenizer
$len = strlen($sequence);
$buffer = '';
for ($i = 0; $i < $len; ++ $i) {
- $buffer .= $this->scanner->current();
+ $tok = $this->scanner->current();
+ $buffer .= $tok;
// EOF. Rewind and let the caller handle it.
- if ($this->scanner->current() === false) {
+ if ($tok === false) {
$this->scanner->unconsume($i);
return false;
}