diff options
author | Asmir Mustafic <[email protected]> | 2017-08-28 14:44:01 +0200 |
---|---|---|
committer | Asmir Mustafic <[email protected]> | 2017-08-28 14:44:01 +0200 |
commit | 2a38f56f3772f943be436c7b411c2ae5fac6cee6 (patch) | |
tree | 2b1cec9a30b2301afeda759a16341972b5a492bd /src/HTML5 | |
parent | e965886a79a560b4b00a4c471e2bdfafea23fdcb (diff) |
Fixes https://github.com/Masterminds/html5-php/issues/124
Reference: https://www.w3.org/TR/html52/syntax.html#character-reference-state
If the character reference was consumed as part of an attribute (return state is either attribute value (double-quoted) state, attribute value (single-quoted) state or attribute value (unquoted) state), and the last character matched is not a U+003B SEMICOLON character (;), and the next input character is either a U+003D EQUALS SIGN character (=) or an alphanumeric ASCII character, then, for historical reasons, switch to the character reference end state.
If the last character matched is not a U+003B SEMICOLON character (;), this is a parse error.
Diffstat (limited to 'src/HTML5')
-rw-r--r-- | src/HTML5/Parser/Tokenizer.php | 10 |
1 files changed, 7 insertions, 3 deletions
diff --git a/src/HTML5/Parser/Tokenizer.php b/src/HTML5/Parser/Tokenizer.php index 45774b2..6f0eb47 100644 --- a/src/HTML5/Parser/Tokenizer.php +++ b/src/HTML5/Parser/Tokenizer.php @@ -1067,8 +1067,10 @@ class Tokenizer } $entity = CharacterReference::lookupDecimal($numeric); } - } // String entity. - else { + } elseif ($tok === '=' && $inAttribute) { + return '&'; + } else { // String entity. + // Attempt to consume a string up to a ';'. // [a-zA-Z0-9]+; $cname = $this->scanner->getAsciiAlphaNum(); @@ -1078,7 +1080,9 @@ class Tokenizer // and continue on as the & is not part of an entity. The & will // be converted to & elsewhere. if ($entity == null) { - $this->parseError("No match in entity table for '%s'", $cname); + if (!$inAttribute || strlen($cname) === 0) { + $this->parseError("No match in entity table for '%s'", $cname); + } $this->scanner->unconsume($this->scanner->position() - $start); return '&'; } |