summaryrefslogtreecommitdiff
path: root/src/HTML5/Parser
diff options
context:
space:
mode:
authorAsmir Mustafic <[email protected]>2018-11-26 18:40:53 +0100
committerGitHub <[email protected]>2018-11-26 18:40:53 +0100
commit88b7c663e69673f16f4ce10db947135b97f40e00 (patch)
treeec36bd20a39f4659438e793b5c25d3588690b884 /src/HTML5/Parser
parent3ed3bdc45997dfaa92787a4639f3fe7a44de0aae (diff)
parent88431be37966f89f2a96210ee855565212f34969 (diff)
Merge pull request #155 from stof/optimize_attributes
Optimize the parsing of unquoted attributes
Diffstat (limited to 'src/HTML5/Parser')
-rw-r--r--src/HTML5/Parser/Tokenizer.php38
1 files changed, 28 insertions, 10 deletions
diff --git a/src/HTML5/Parser/Tokenizer.php b/src/HTML5/Parser/Tokenizer.php
index be09b21..44b6875 100644
--- a/src/HTML5/Parser/Tokenizer.php
+++ b/src/HTML5/Parser/Tokenizer.php
@@ -592,19 +592,37 @@ class Tokenizer
protected function unquotedAttributeValue()
{
- $stoplist = "\t\n\f >";
$val = '';
$tok = $this->scanner->current();
- while (0 == strspn($tok, $stoplist) && false !== $tok) {
- if ('&' == $tok) {
- $val .= $this->decodeCharacterReference(true);
- $tok = $this->scanner->current();
- } else {
- if (strspn($tok, "\"'<=`") > 0) {
+ while (false !== $tok) {
+ switch ($tok) {
+ case "\n":
+ case "\f":
+ case ' ':
+ case "\t":
+ case '>':
+ break 2;
+
+ case '&':
+ $val .= $this->decodeCharacterReference(true);
+ $tok = $this->scanner->current();
+
+ break;
+
+ case "'":
+ case '"':
+ case '<':
+ case '=':
+ case '`':
$this->parseError('Unexpected chars in unquoted attribute value %s', $tok);
- }
- $val .= $tok;
- $tok = $this->scanner->next();
+ $val .= $tok;
+ $tok = $this->scanner->next();
+ break;
+
+ default:
+ $val .= $this->scanner->charsUntil("\t\n\f >&\"'<=`");
+
+ $tok = $this->scanner->current();
}
}