diff options
author | Titouan Galopin <[email protected]> | 2018-11-05 01:35:23 +0100 |
---|---|---|
committer | Titouan Galopin <[email protected]> | 2018-11-05 01:35:23 +0100 |
commit | f7a954df2f0647c93b1d3d22c317aa5297ea4b05 (patch) | |
tree | 67cfe50d285bab2db880f0a2d92dbd757a0f347e /src | |
parent | b3ef91f0a13914d25469af64d01cb7df5964c978 (diff) |
Inline tag open in Tokenizer to further improve performances
Diffstat (limited to 'src')
-rw-r--r-- | src/HTML5/Parser/Tokenizer.php | 61 |
1 files changed, 28 insertions, 33 deletions
diff --git a/src/HTML5/Parser/Tokenizer.php b/src/HTML5/Parser/Tokenizer.php index e1ca660..d08cba4 100644 --- a/src/HTML5/Parser/Tokenizer.php +++ b/src/HTML5/Parser/Tokenizer.php @@ -121,16 +121,30 @@ class Tokenizer */ protected function consumeData() { - // Character Ref - /* - * $this->characterReference() || $this->tagOpen() || $this->eof() || $this->characterData(); - */ + // Character reference $this->characterReference(); - $this->tagOpen(); - $this->eof(); - // Inline the parsing of characters as it's the critical performance path + // Parse tag + if ($this->scanner->current() === '<') { + // Any buffered text data can go out now. + $this->flushBuffer(); + + $tok = $this->scanner->next(); + + $this->markupDeclaration($tok) + || $this->endTag() + || $this->processingInstruction() + || $this->tagName() + // This always returns false. + || $this->parseError("Illegal tag opening") + || $this->characterData(); + } + + // Handle end of document $tok = $this->scanner->current(); + $this->eof($tok); + + // Parse character if ($tok !== false) { switch ($this->textMode) { case Elements::TEXT_RAW: @@ -272,15 +286,17 @@ class Tokenizer /** * If the document is read, emit an EOF event. */ - protected function eof() + protected function eof($tok) { - if ($this->scanner->current() === false) { + if ($tok === false) { // fprintf(STDOUT, "EOF"); $this->flushBuffer(); $this->events->eof(); $this->carryOn = false; + return true; } + return false; } @@ -303,32 +319,11 @@ class Tokenizer } /** - * Emit a tagStart event on encountering a tag. - * - * 8.2.4.8 - */ - protected function tagOpen() - { - if ($this->scanner->current() != '<') { - return false; - } - - // Any buffered text data can go out now. - $this->flushBuffer(); - - $this->scanner->next(); - - return $this->markupDeclaration() || $this->endTag() || $this->processingInstruction() || $this->tagName() || - // This always returns false. - $this->parseError("Illegal tag opening") || $this->characterData(); - } - - /** * Look for markup. */ - protected function markupDeclaration() + protected function markupDeclaration($tok) { - if ($this->scanner->current() != '!') { + if ($tok != '!') { return false; } @@ -756,7 +751,7 @@ class Tokenizer // EOF: die. if ($tok === false) { $this->events->doctype('html5', EventHandler::DOCTYPE_NONE, '', true); - return $this->eof(); + return $this->eof($tok); } // NULL char: convert. |