summaryrefslogtreecommitdiff
path: root/src/HTML5/Parser
diff options
context:
space:
mode:
authorChristophe Coevoet <[email protected]>2018-11-24 13:53:53 +0100
committerChristophe Coevoet <[email protected]>2018-11-26 12:28:27 +0100
commit88431be37966f89f2a96210ee855565212f34969 (patch)
treeb7f7b443b0ac279a126637d73392352a72bb2b2d /src/HTML5/Parser
parent54d066b1664a5fb749511c764e8d208f6bc80c3b (diff)
Optimize the parsing of unquoted attributes
Diffstat (limited to 'src/HTML5/Parser')
-rw-r--r--src/HTML5/Parser/Tokenizer.php38
1 files changed, 28 insertions, 10 deletions
diff --git a/src/HTML5/Parser/Tokenizer.php b/src/HTML5/Parser/Tokenizer.php
index 6284733..49f5fc0 100644
--- a/src/HTML5/Parser/Tokenizer.php
+++ b/src/HTML5/Parser/Tokenizer.php
@@ -594,19 +594,37 @@ class Tokenizer
protected function unquotedAttributeValue()
{
- $stoplist = "\t\n\f >";
$val = '';
$tok = $this->scanner->current();
- while (0 == strspn($tok, $stoplist) && false !== $tok) {
- if ('&' == $tok) {
- $val .= $this->decodeCharacterReference(true);
- $tok = $this->scanner->current();
- } else {
- if (strspn($tok, "\"'<=`") > 0) {
+ while (false !== $tok) {
+ switch ($tok) {
+ case "\n":
+ case "\f":
+ case ' ':
+ case "\t":
+ case '>':
+ break 2;
+
+ case '&':
+ $val .= $this->decodeCharacterReference(true);
+ $tok = $this->scanner->current();
+
+ break;
+
+ case "'":
+ case '"':
+ case '<':
+ case '=':
+ case '`':
$this->parseError('Unexpected chars in unquoted attribute value %s', $tok);
- }
- $val .= $tok;
- $tok = $this->scanner->next();
+ $val .= $tok;
+ $tok = $this->scanner->next();
+ break;
+
+ default:
+ $val .= $this->scanner->charsUntil("\t\n\f >&\"'<=`");
+
+ $tok = $this->scanner->current();
}
}