summaryrefslogtreecommitdiff
path: root/src/HTML5
diff options
context:
space:
mode:
Diffstat (limited to 'src/HTML5')
-rw-r--r--src/HTML5/Parser/Tokenizer.php46
1 files changed, 31 insertions, 15 deletions
diff --git a/src/HTML5/Parser/Tokenizer.php b/src/HTML5/Parser/Tokenizer.php
index c2abb4f..a41fcb4 100644
--- a/src/HTML5/Parser/Tokenizer.php
+++ b/src/HTML5/Parser/Tokenizer.php
@@ -47,8 +47,6 @@ class Tokenizer
const CONFORMANT_HTML = 'html';
protected $mode = self::CONFORMANT_HTML;
- const WHITE = "\t\n\f ";
-
/**
* Create a new tokenizer.
*
@@ -165,7 +163,7 @@ class Tokenizer
break;
default:
- if (!strspn($tok, '<&')) {
+ if ('<' !== $tok && '&' !== $tok) {
// NULL character
if ("\00" === $tok) {
$this->parseError('Received null character.');
@@ -199,7 +197,7 @@ class Tokenizer
case Elements::TEXT_RCDATA:
return $this->rcdata($tok);
default:
- if (strspn($tok, '<&')) {
+ if ('<' === $tok || '&' === $tok) {
return false;
}
@@ -585,19 +583,37 @@ class Tokenizer
protected function unquotedAttributeValue()
{
- $stoplist = "\t\n\f >";
$val = '';
$tok = $this->scanner->current();
- while (0 == strspn($tok, $stoplist) && false !== $tok) {
- if ('&' == $tok) {
- $val .= $this->decodeCharacterReference(true);
- $tok = $this->scanner->current();
- } else {
- if (strspn($tok, "\"'<=`") > 0) {
+ while (false !== $tok) {
+ switch ($tok) {
+ case "\n":
+ case "\f":
+ case ' ':
+ case "\t":
+ case '>':
+ break 2;
+
+ case '&':
+ $val .= $this->decodeCharacterReference(true);
+ $tok = $this->scanner->current();
+
+ break;
+
+ case "'":
+ case '"':
+ case '<':
+ case '=':
+ case '`':
$this->parseError('Unexpected chars in unquoted attribute value %s', $tok);
- }
- $val .= $tok;
- $tok = $this->scanner->next();
+ $val .= $tok;
+ $tok = $this->scanner->next();
+ break;
+
+ default:
+ $val .= $this->scanner->charsUntil("\t\n\f >&\"'<=`");
+
+ $tok = $this->scanner->current();
}
}
@@ -1080,7 +1096,7 @@ class Tokenizer
// These indicate not an entity. We return just
// the &.
- if (1 === strspn($tok, static::WHITE . '&<')) {
+ if ("\t" === $tok || "\n" === $tok || "\f" === $tok || ' ' === $tok || '&' === $tok || '<' === $tok) {
// $this->scanner->next();
return '&';
}