diff options
author | Asmir Mustafic <[email protected]> | 2018-11-27 12:29:35 +0100 |
---|---|---|
committer | GitHub <[email protected]> | 2018-11-27 12:29:35 +0100 |
commit | 182f34ddba2e31e625f255416d85bba38cc6b39b (patch) | |
tree | 066cceaabbc47c07b9147ba42b9e11cc24306b67 /src/HTML5 | |
parent | 88b7c663e69673f16f4ce10db947135b97f40e00 (diff) | |
parent | 6cdf4283046325b9bc2671d0648b73a2be1d0946 (diff) |
Merge pull request #161 from stof/optimize_main_loop
Optimize main loop
Diffstat (limited to 'src/HTML5')
-rw-r--r-- | src/HTML5/Parser/Tokenizer.php | 74 |
1 files changed, 31 insertions, 43 deletions
diff --git a/src/HTML5/Parser/Tokenizer.php b/src/HTML5/Parser/Tokenizer.php index 44b6875..a41fcb4 100644 --- a/src/HTML5/Parser/Tokenizer.php +++ b/src/HTML5/Parser/Tokenizer.php @@ -131,22 +131,28 @@ class Tokenizer $tok = $this->scanner->next(); - $this->markupDeclaration($tok) - || $this->endTag() - || $this->processingInstruction() - || $this->tagName() - // This always returns false. - || $this->parseError('Illegal tag opening') - || $this->characterData(); + if ('!' === $tok) { + $this->markupDeclaration(); + } elseif ('/' === $tok) { + $this->endTag(); + } elseif ('?' === $tok) { + $this->processingInstruction(); + } elseif (ctype_alpha($tok)) { + $this->tagName(); + } else { + $this->parseError('Illegal tag opening'); + // TODO is this necessary ? + $this->characterData(); + } $tok = $this->scanner->current(); } - // Handle end of document - $this->eof($tok); - - // Parse character - if (false !== $tok) { + if (false === $tok) { + // Handle end of document + $this->eof(); + } else { + // Parse character switch ($this->textMode) { case Elements::TEXT_RAW: $this->rawText($tok); @@ -288,29 +294,19 @@ class Tokenizer /** * If the document is read, emit an EOF event. */ - protected function eof($tok) + protected function eof() { - if (false === $tok) { - // fprintf(STDOUT, "EOF"); - $this->flushBuffer(); - $this->events->eof(); - $this->carryOn = false; - - return true; - } - - return false; + // fprintf(STDOUT, "EOF"); + $this->flushBuffer(); + $this->events->eof(); + $this->carryOn = false; } /** * Look for markup. */ - protected function markupDeclaration($tok) + protected function markupDeclaration() { - if ('!' != $tok) { - return false; - } - $tok = $this->scanner->next(); // Comment: @@ -377,11 +373,6 @@ class Tokenizer */ protected function tagName() { - $tok = $this->scanner->current(); - if (!ctype_alpha($tok)) { - return false; - } - // We know this is at least one char. $name = $this->scanner->charsWhile(':_-0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz'); $name = self::CONFORMANT_XML === $this->mode ? $name : strtolower($name); @@ -743,12 +734,11 @@ class Tokenizer */ protected function doctype() { - if (strcasecmp($this->scanner->current(), 'D')) { - return false; - } // Check that string is DOCTYPE. - $chars = $this->scanner->charsWhile('DOCTYPEdoctype'); - if (strcasecmp($chars, 'DOCTYPE')) { + if ($this->scanner->sequenceMatches('DOCTYPE', false)) { + $this->scanner->consume(7); + } else { + $chars = $this->scanner->charsWhile('DOCTYPEdoctype'); $this->parseError('Expected DOCTYPE, got %s', $chars); return $this->bogusComment('<!' . $chars); @@ -760,8 +750,9 @@ class Tokenizer // EOF: die. if (false === $tok) { $this->events->doctype('html5', EventHandler::DOCTYPE_NONE, '', true); + $this->eof(); - return $this->eof($tok); + return true; } // NULL char: convert. @@ -812,7 +803,7 @@ class Tokenizer if (false === $id) { $this->events->doctype($doctypeName, $type, $pub, false); - return false; + return true; } // Premature EOF. @@ -887,9 +878,6 @@ class Tokenizer */ protected function cdataSection() { - if ('[' != $this->scanner->current()) { - return false; - } $cdata = ''; $this->scanner->consume(); |