summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorChristophe Coevoet <[email protected]>2018-11-24 10:41:49 +0100
committerChristophe Coevoet <[email protected]>2018-11-24 10:41:49 +0100
commita56b43814bbe140e6aa94311fc0308f0fa9b220f (patch)
treed296d6121f76004a872e3d89bc5e795fa48b08fd /src
parent9d32f2d6f451e736634aeaffd21ede3990616f01 (diff)
Optimize the handling of references when consuming data
Diffstat (limited to 'src')
-rw-r--r--src/HTML5/Parser/Tokenizer.php30
1 files changed, 8 insertions, 22 deletions
diff --git a/src/HTML5/Parser/Tokenizer.php b/src/HTML5/Parser/Tokenizer.php
index 7b0b3f3..4aeeb50 100644
--- a/src/HTML5/Parser/Tokenizer.php
+++ b/src/HTML5/Parser/Tokenizer.php
@@ -121,11 +121,16 @@ class Tokenizer
*/
protected function consumeData()
{
- // Character reference
- $this->characterReference();
-
$tok = $this->scanner->current();
+ if ($tok === '&') {
+ // Character reference
+ $ref = $this->decodeCharacterReference();
+ $this->buffer($ref);
+
+ $tok = $this->scanner->current();
+ }
+
// Parse tag
if ($tok === '<') {
// Any buffered text data can go out now.
@@ -304,25 +309,6 @@ class Tokenizer
}
/**
- * Handle character references (aka entities).
- *
- * This version is specific to PCDATA, as it buffers data into the
- * text buffer. For a generic version, see decodeCharacterReference().
- *
- * HTML5 8.2.4.2
- */
- protected function characterReference()
- {
- if ($this->scanner->current() !== '&') {
- return false;
- }
-
- $ref = $this->decodeCharacterReference();
- $this->buffer($ref);
- return true;
- }
-
- /**
* Look for markup.
*/
protected function markupDeclaration($tok)