diff options
Diffstat (limited to 'src/HTML5/Parser/Tokenizer.php')
-rw-r--r-- | src/HTML5/Parser/Tokenizer.php | 13 |
1 files changed, 7 insertions, 6 deletions
diff --git a/src/HTML5/Parser/Tokenizer.php b/src/HTML5/Parser/Tokenizer.php index df77b5d..a79781f 100644 --- a/src/HTML5/Parser/Tokenizer.php +++ b/src/HTML5/Parser/Tokenizer.php @@ -498,6 +498,7 @@ class Tokenizer { while (strspn($tok, $stoplist) == 0 && $tok !== FALSE) { if ($tok == '&') { $val .= $this->decodeCharacterReference(TRUE); + $tok = $this->scanner->current(); } else { if(strspn($tok, "\"'<=`") > 0) { @@ -789,7 +790,7 @@ class Tokenizer { * * XML processing instructions are supposed to be ignored in HTML5, * treated as "bogus comments". However, since we're not a user - * agent, we allow them. We consume until ?> and then issue a + * agent, we allow them. We consume until ?> and then issue a * EventListener::processingInstruction() event. */ protected function processingInstruction() { @@ -834,7 +835,7 @@ class Tokenizer { // ================================================================ /** - * Read from the input stream until we get to the desired sequene + * Read from the input stream until we get to the desired sequene * or hit the end of the input stream. */ protected function readUntilSequence($sequence) { @@ -864,11 +865,11 @@ class Tokenizer { * This will read the stream for the $sequence. If it's * found, this will return TRUE. If not, return FALSE. * Since this unconsumes any chars it reads, the caller - * will still need to read the next sequence, even if + * will still need to read the next sequence, even if * this returns TRUE. * * Example: $this->sequenceMatches('</script>') will - * see if the input stream is at the start of a + * see if the input stream is at the start of a * '</script>' string. */ protected function sequenceMatches($sequence) { @@ -917,7 +918,7 @@ class Tokenizer { /** * Emit a parse error. * - * A parse error always returns FALSE because it never consumes any + * A parse error always returns FALSE because it never consumes any * characters. */ protected function parseError($msg) { @@ -1023,7 +1024,7 @@ class Tokenizer { return $entity; } - // If in an attribute, then failing to match ; means unconsume the + // If in an attribute, then failing to match ; means unconsume the // entire string. Otherwise, failure to match is an error. if ($inAttribute) { $this->scanner->unconsume($this->scanner->position() - $start); |