diff options
author | Asmir Mustafic <[email protected]> | 2018-11-24 11:21:05 +0100 |
---|---|---|
committer | GitHub <[email protected]> | 2018-11-24 11:21:05 +0100 |
commit | 855fadd5a9ca31c09a4bc8b6a3656366f1755ea8 (patch) | |
tree | d296d6121f76004a872e3d89bc5e795fa48b08fd /src/HTML5/Parser | |
parent | 9d32f2d6f451e736634aeaffd21ede3990616f01 (diff) | |
parent | a56b43814bbe140e6aa94311fc0308f0fa9b220f (diff) |
Merge pull request #151 from stof/optimize_character_reference
Optimize the handling of references when consuming data
Diffstat (limited to 'src/HTML5/Parser')
-rw-r--r-- | src/HTML5/Parser/Tokenizer.php | 30 |
1 files changed, 8 insertions, 22 deletions
diff --git a/src/HTML5/Parser/Tokenizer.php b/src/HTML5/Parser/Tokenizer.php index 7b0b3f3..4aeeb50 100644 --- a/src/HTML5/Parser/Tokenizer.php +++ b/src/HTML5/Parser/Tokenizer.php @@ -121,11 +121,16 @@ class Tokenizer */ protected function consumeData() { - // Character reference - $this->characterReference(); - $tok = $this->scanner->current(); + if ($tok === '&') { + // Character reference + $ref = $this->decodeCharacterReference(); + $this->buffer($ref); + + $tok = $this->scanner->current(); + } + // Parse tag if ($tok === '<') { // Any buffered text data can go out now. @@ -304,25 +309,6 @@ class Tokenizer } /** - * Handle character references (aka entities). - * - * This version is specific to PCDATA, as it buffers data into the - * text buffer. For a generic version, see decodeCharacterReference(). - * - * HTML5 8.2.4.2 - */ - protected function characterReference() - { - if ($this->scanner->current() !== '&') { - return false; - } - - $ref = $this->decodeCharacterReference(); - $this->buffer($ref); - return true; - } - - /** * Look for markup. */ protected function markupDeclaration($tok) |