diff options
author | Asmir Mustafic <[email protected]> | 2018-11-26 18:40:53 +0100 |
---|---|---|
committer | GitHub <[email protected]> | 2018-11-26 18:40:53 +0100 |
commit | 88b7c663e69673f16f4ce10db947135b97f40e00 (patch) | |
tree | ec36bd20a39f4659438e793b5c25d3588690b884 /src/HTML5 | |
parent | 3ed3bdc45997dfaa92787a4639f3fe7a44de0aae (diff) | |
parent | 88431be37966f89f2a96210ee855565212f34969 (diff) |
Merge pull request #155 from stof/optimize_attributes
Optimize the parsing of unquoted attributes
Diffstat (limited to 'src/HTML5')
-rw-r--r-- | src/HTML5/Parser/Tokenizer.php | 38 |
1 files changed, 28 insertions, 10 deletions
diff --git a/src/HTML5/Parser/Tokenizer.php b/src/HTML5/Parser/Tokenizer.php index be09b21..44b6875 100644 --- a/src/HTML5/Parser/Tokenizer.php +++ b/src/HTML5/Parser/Tokenizer.php @@ -592,19 +592,37 @@ class Tokenizer protected function unquotedAttributeValue() { - $stoplist = "\t\n\f >"; $val = ''; $tok = $this->scanner->current(); - while (0 == strspn($tok, $stoplist) && false !== $tok) { - if ('&' == $tok) { - $val .= $this->decodeCharacterReference(true); - $tok = $this->scanner->current(); - } else { - if (strspn($tok, "\"'<=`") > 0) { + while (false !== $tok) { + switch ($tok) { + case "\n": + case "\f": + case ' ': + case "\t": + case '>': + break 2; + + case '&': + $val .= $this->decodeCharacterReference(true); + $tok = $this->scanner->current(); + + break; + + case "'": + case '"': + case '<': + case '=': + case '`': $this->parseError('Unexpected chars in unquoted attribute value %s', $tok); - } - $val .= $tok; - $tok = $this->scanner->next(); + $val .= $tok; + $tok = $this->scanner->next(); + break; + + default: + $val .= $this->scanner->charsUntil("\t\n\f >&\"'<=`"); + + $tok = $this->scanner->current(); } } |