From 7b339b5d8c364d62b0b982604f63085c91720702 Mon Sep 17 00:00:00 2001 From: Christophe Coevoet Date: Sat, 24 Nov 2018 15:43:40 +0100 Subject: Optimize the handling of the EOF detection in the main loop The eof() method is a no-op when the token is not false. As the main loop already needs to identify that case anyway, skipping the method call allows to reduce the cost of parsing text tokens. --- src/HTML5/Parser/Tokenizer.php | 29 ++++++++++++----------------- 1 file changed, 12 insertions(+), 17 deletions(-) (limited to 'src') diff --git a/src/HTML5/Parser/Tokenizer.php b/src/HTML5/Parser/Tokenizer.php index bce9da9..74d86a3 100644 --- a/src/HTML5/Parser/Tokenizer.php +++ b/src/HTML5/Parser/Tokenizer.php @@ -144,11 +144,11 @@ class Tokenizer $tok = $this->scanner->current(); } - // Handle end of document - $this->eof($tok); - - // Parse character - if (false !== $tok) { + if (false === $tok) { + // Handle end of document + $this->eof(); + } else { + // Parse character switch ($this->textMode) { case Elements::TEXT_RAW: $this->rawText($tok); @@ -290,18 +290,12 @@ class Tokenizer /** * If the document is read, emit an EOF event. */ - protected function eof($tok) + protected function eof() { - if (false === $tok) { - // fprintf(STDOUT, "EOF"); - $this->flushBuffer(); - $this->events->eof(); - $this->carryOn = false; - - return true; - } - - return false; + // fprintf(STDOUT, "EOF"); + $this->flushBuffer(); + $this->events->eof(); + $this->carryOn = false; } /** @@ -744,8 +738,9 @@ class Tokenizer // EOF: die. if (false === $tok) { $this->events->doctype('html5', EventHandler::DOCTYPE_NONE, '', true); + $this->eof(); - return $this->eof($tok); + return true; } // NULL char: convert. -- cgit v1.2.3