diff options
author | Technosophos <[email protected]> | 2013-04-18 11:11:56 -0500 |
---|---|---|
committer | Technosophos <[email protected]> | 2013-04-18 11:11:56 -0500 |
commit | 16916cc975700e35c0aaced9f1c604f8acd78c96 (patch) | |
tree | 247de06bee81ef10137267111891fcde9c739e94 /src/HTML5 | |
parent | a2960d3c4d088440b75d317a14af4d8f7b2bf3a3 (diff) |
Well-formed attribute values are working.
Diffstat (limited to 'src/HTML5')
-rw-r--r-- | src/HTML5/Parser/Tokenizer.php | 32 |
1 files changed, 22 insertions, 10 deletions
diff --git a/src/HTML5/Parser/Tokenizer.php b/src/HTML5/Parser/Tokenizer.php index 58b77a8..8250a8f 100644 --- a/src/HTML5/Parser/Tokenizer.php +++ b/src/HTML5/Parser/Tokenizer.php @@ -336,7 +336,7 @@ class Tokenizer { do { $this->scanner->whitespace(); - $this->attributes($attributes); + $this->attribute($attributes); } while (!$this->isTagEnd($selfClose)); @@ -386,18 +386,13 @@ class Tokenizer { /** * Parse attributes from inside of a tag. */ - protected function attributes(&$attributes) { + protected function attribute(&$attributes) { $tok = $this->scanner->current(); if ($tok == '/' || $tok == '>' || $tok === FALSE) { return FALSE; } - list($k, $v) = $this->attribute(); - $attributes[$k] = $v; - } - - protected function attribute() { - $name = $this->scanner->charsUntil("/>=\n\f\t "); + $name = strtolower($this->scanner->charsUntil("/>=\n\f\t ")); if (strlen($name) == 0) { $this->parseError("Expected an attribute name, got %s.", $this->scanner->current()); @@ -412,7 +407,9 @@ class Tokenizer { $this->scanner->whitespace(); $val = $this->attributeValue(); - return array($name, $val); + //return array($name, $val); + $attributes[$name] = $val; + return TRUE; } /** @@ -437,6 +434,7 @@ class Tokenizer { return NULL; case '"': case "'": + $this->scanner->next(); return $this->quotedAttributeValue($tok); case '>': // case '/': // 8.2.4.37 seems to allow foo=/ as a valid attr. @@ -468,16 +466,30 @@ class Tokenizer { while (strspn($tok, $stoplist) == 0 && $tok !== FALSE) { if ($tok == '&') { $val .= $this->decodeCharacterReference(TRUE); + $tok = $this->scanner->current(); } else { $val .= $tok; $tok = $this->scanner->next(); } } + $this->scanner->next(); return $val; } protected function unquotedAttributeValue() { - return $this->quotedAttributeValue(" >"); + $stoplist = "\t\n\f >"; + $val = ''; + $tok = $this->scanner->current(); + while (strspn($tok, $stoplist) == 0 && $tok !== FALSE) { + if ($tok == '&') { + $val .= $this->decodeCharacterReference(TRUE); + } + else { + $val .= $tok; + $tok = $this->scanner->next(); + } + } + return $val; } |