diff options
Diffstat (limited to 'src/HTML5')
-rw-r--r-- | src/HTML5/Parser/Tokenizer.php | 46 |
1 files changed, 31 insertions, 15 deletions
diff --git a/src/HTML5/Parser/Tokenizer.php b/src/HTML5/Parser/Tokenizer.php index c2abb4f..a41fcb4 100644 --- a/src/HTML5/Parser/Tokenizer.php +++ b/src/HTML5/Parser/Tokenizer.php @@ -47,8 +47,6 @@ class Tokenizer const CONFORMANT_HTML = 'html'; protected $mode = self::CONFORMANT_HTML; - const WHITE = "\t\n\f "; - /** * Create a new tokenizer. * @@ -165,7 +163,7 @@ class Tokenizer break; default: - if (!strspn($tok, '<&')) { + if ('<' !== $tok && '&' !== $tok) { // NULL character if ("\00" === $tok) { $this->parseError('Received null character.'); @@ -199,7 +197,7 @@ class Tokenizer case Elements::TEXT_RCDATA: return $this->rcdata($tok); default: - if (strspn($tok, '<&')) { + if ('<' === $tok || '&' === $tok) { return false; } @@ -585,19 +583,37 @@ class Tokenizer protected function unquotedAttributeValue() { - $stoplist = "\t\n\f >"; $val = ''; $tok = $this->scanner->current(); - while (0 == strspn($tok, $stoplist) && false !== $tok) { - if ('&' == $tok) { - $val .= $this->decodeCharacterReference(true); - $tok = $this->scanner->current(); - } else { - if (strspn($tok, "\"'<=`") > 0) { + while (false !== $tok) { + switch ($tok) { + case "\n": + case "\f": + case ' ': + case "\t": + case '>': + break 2; + + case '&': + $val .= $this->decodeCharacterReference(true); + $tok = $this->scanner->current(); + + break; + + case "'": + case '"': + case '<': + case '=': + case '`': $this->parseError('Unexpected chars in unquoted attribute value %s', $tok); - } - $val .= $tok; - $tok = $this->scanner->next(); + $val .= $tok; + $tok = $this->scanner->next(); + break; + + default: + $val .= $this->scanner->charsUntil("\t\n\f >&\"'<=`"); + + $tok = $this->scanner->current(); } } @@ -1080,7 +1096,7 @@ class Tokenizer // These indicate not an entity. We return just // the &. - if (1 === strspn($tok, static::WHITE . '&<')) { + if ("\t" === $tok || "\n" === $tok || "\f" === $tok || ' ' === $tok || '&' === $tok || '<' === $tok) { // $this->scanner->next(); return '&'; } |