diff options
author | Christophe Coevoet <[email protected]> | 2018-11-24 10:41:49 +0100 |
---|---|---|
committer | Christophe Coevoet <[email protected]> | 2018-11-24 10:41:49 +0100 |
commit | a56b43814bbe140e6aa94311fc0308f0fa9b220f (patch) | |
tree | d296d6121f76004a872e3d89bc5e795fa48b08fd /src/HTML5/Parser | |
parent | 9d32f2d6f451e736634aeaffd21ede3990616f01 (diff) |
Optimize the handling of references when consuming data
Diffstat (limited to 'src/HTML5/Parser')
-rw-r--r-- | src/HTML5/Parser/Tokenizer.php | 30 |
1 files changed, 8 insertions, 22 deletions
diff --git a/src/HTML5/Parser/Tokenizer.php b/src/HTML5/Parser/Tokenizer.php index 7b0b3f3..4aeeb50 100644 --- a/src/HTML5/Parser/Tokenizer.php +++ b/src/HTML5/Parser/Tokenizer.php @@ -121,11 +121,16 @@ class Tokenizer */ protected function consumeData() { - // Character reference - $this->characterReference(); - $tok = $this->scanner->current(); + if ($tok === '&') { + // Character reference + $ref = $this->decodeCharacterReference(); + $this->buffer($ref); + + $tok = $this->scanner->current(); + } + // Parse tag if ($tok === '<') { // Any buffered text data can go out now. @@ -304,25 +309,6 @@ class Tokenizer } /** - * Handle character references (aka entities). - * - * This version is specific to PCDATA, as it buffers data into the - * text buffer. For a generic version, see decodeCharacterReference(). - * - * HTML5 8.2.4.2 - */ - protected function characterReference() - { - if ($this->scanner->current() !== '&') { - return false; - } - - $ref = $this->decodeCharacterReference(); - $this->buffer($ref); - return true; - } - - /** * Look for markup. */ protected function markupDeclaration($tok) |