From a56b43814bbe140e6aa94311fc0308f0fa9b220f Mon Sep 17 00:00:00 2001 From: Christophe Coevoet Date: Sat, 24 Nov 2018 10:41:49 +0100 Subject: Optimize the handling of references when consuming data --- src/HTML5/Parser/Tokenizer.php | 30 ++++++++---------------------- 1 file changed, 8 insertions(+), 22 deletions(-) (limited to 'src') diff --git a/src/HTML5/Parser/Tokenizer.php b/src/HTML5/Parser/Tokenizer.php index 7b0b3f3..4aeeb50 100644 --- a/src/HTML5/Parser/Tokenizer.php +++ b/src/HTML5/Parser/Tokenizer.php @@ -121,11 +121,16 @@ class Tokenizer */ protected function consumeData() { - // Character reference - $this->characterReference(); - $tok = $this->scanner->current(); + if ($tok === '&') { + // Character reference + $ref = $this->decodeCharacterReference(); + $this->buffer($ref); + + $tok = $this->scanner->current(); + } + // Parse tag if ($tok === '<') { // Any buffered text data can go out now. @@ -303,25 +308,6 @@ class Tokenizer return false; } - /** - * Handle character references (aka entities). - * - * This version is specific to PCDATA, as it buffers data into the - * text buffer. For a generic version, see decodeCharacterReference(). - * - * HTML5 8.2.4.2 - */ - protected function characterReference() - { - if ($this->scanner->current() !== '&') { - return false; - } - - $ref = $this->decodeCharacterReference(); - $this->buffer($ref); - return true; - } - /** * Look for markup. */ -- cgit v1.2.3