summaryrefslogtreecommitdiff
path: root/src/HTML5/Parser/Tokenizer.php
diff options
context:
space:
mode:
authorTechnosophos <[email protected]>2013-04-18 11:11:56 -0500
committerTechnosophos <[email protected]>2013-04-18 11:11:56 -0500
commit16916cc975700e35c0aaced9f1c604f8acd78c96 (patch)
tree247de06bee81ef10137267111891fcde9c739e94 /src/HTML5/Parser/Tokenizer.php
parenta2960d3c4d088440b75d317a14af4d8f7b2bf3a3 (diff)
Well-formed attribute values are working.
Diffstat (limited to 'src/HTML5/Parser/Tokenizer.php')
-rw-r--r--src/HTML5/Parser/Tokenizer.php32
1 files changed, 22 insertions, 10 deletions
diff --git a/src/HTML5/Parser/Tokenizer.php b/src/HTML5/Parser/Tokenizer.php
index 58b77a8..8250a8f 100644
--- a/src/HTML5/Parser/Tokenizer.php
+++ b/src/HTML5/Parser/Tokenizer.php
@@ -336,7 +336,7 @@ class Tokenizer {
do {
$this->scanner->whitespace();
- $this->attributes($attributes);
+ $this->attribute($attributes);
}
while (!$this->isTagEnd($selfClose));
@@ -386,18 +386,13 @@ class Tokenizer {
/**
* Parse attributes from inside of a tag.
*/
- protected function attributes(&$attributes) {
+ protected function attribute(&$attributes) {
$tok = $this->scanner->current();
if ($tok == '/' || $tok == '>' || $tok === FALSE) {
return FALSE;
}
- list($k, $v) = $this->attribute();
- $attributes[$k] = $v;
- }
-
- protected function attribute() {
- $name = $this->scanner->charsUntil("/>=\n\f\t ");
+ $name = strtolower($this->scanner->charsUntil("/>=\n\f\t "));
if (strlen($name) == 0) {
$this->parseError("Expected an attribute name, got %s.", $this->scanner->current());
@@ -412,7 +407,9 @@ class Tokenizer {
$this->scanner->whitespace();
$val = $this->attributeValue();
- return array($name, $val);
+ //return array($name, $val);
+ $attributes[$name] = $val;
+ return TRUE;
}
/**
@@ -437,6 +434,7 @@ class Tokenizer {
return NULL;
case '"':
case "'":
+ $this->scanner->next();
return $this->quotedAttributeValue($tok);
case '>':
// case '/': // 8.2.4.37 seems to allow foo=/ as a valid attr.
@@ -468,16 +466,30 @@ class Tokenizer {
while (strspn($tok, $stoplist) == 0 && $tok !== FALSE) {
if ($tok == '&') {
$val .= $this->decodeCharacterReference(TRUE);
+ $tok = $this->scanner->current();
}
else {
$val .= $tok;
$tok = $this->scanner->next();
}
}
+ $this->scanner->next();
return $val;
}
protected function unquotedAttributeValue() {
- return $this->quotedAttributeValue(" >");
+ $stoplist = "\t\n\f >";
+ $val = '';
+ $tok = $this->scanner->current();
+ while (strspn($tok, $stoplist) == 0 && $tok !== FALSE) {
+ if ($tok == '&') {
+ $val .= $this->decodeCharacterReference(TRUE);
+ }
+ else {
+ $val .= $tok;
+ $tok = $this->scanner->next();
+ }
+ }
+ return $val;
}