summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorTechnosophos <[email protected]>2013-04-18 20:48:04 -0500
committerTechnosophos <[email protected]>2013-04-18 20:48:04 -0500
commit3b0441037f6ee680a0099b91e87f4fd1544e59e8 (patch)
tree32fb9d6701655b6304d0d7c2f66bc010b97917f1 /src
parent16916cc975700e35c0aaced9f1c604f8acd78c96 (diff)
Tokenizer now handles sophisticated tags.
Diffstat (limited to 'src')
-rw-r--r--src/HTML5/Parser/Tokenizer.php13
1 files changed, 9 insertions, 4 deletions
diff --git a/src/HTML5/Parser/Tokenizer.php b/src/HTML5/Parser/Tokenizer.php
index 8250a8f..d98f619 100644
--- a/src/HTML5/Parser/Tokenizer.php
+++ b/src/HTML5/Parser/Tokenizer.php
@@ -159,7 +159,7 @@ class Tokenizer {
// These indicate not an entity. We return just
// the &.
if (strspn($tok, self::WHITE . "&<") == 1) {
- $this->scanner->next();
+ //$this->scanner->next();
return '&';
}
@@ -401,10 +401,12 @@ class Tokenizer {
$name = $this->scanner->current();
$this->scanner->next();
}
- if (preg_match('/\'\"/', $name)) {
- $this->parseError("Unexpected characters in attribute name");
+ if (preg_match('/[\'\"]/', $name)) {
+ //if (strspn($name, '\'\"')) {
+ $this->parseError("Unexpected characters in attribute name: %s", $name);
}
- $this->scanner->whitespace();
+ // Whitespace not allowed between name and =.
+ //$this->scanner->whitespace();
$val = $this->attributeValue();
//return array($name, $val);
@@ -485,6 +487,9 @@ class Tokenizer {
$val .= $this->decodeCharacterReference(TRUE);
}
else {
+ if(strspn($tok, "\"'<=`") > 0) {
+ $this->parseError("Unexpected chars in unquoted attribute value %s", $tok);
+ }
$val .= $tok;
$tok = $this->scanner->next();
}