diff options
author | Technosophos <[email protected]> | 2013-04-18 20:48:04 -0500 |
---|---|---|
committer | Technosophos <[email protected]> | 2013-04-18 20:48:04 -0500 |
commit | 3b0441037f6ee680a0099b91e87f4fd1544e59e8 (patch) | |
tree | 32fb9d6701655b6304d0d7c2f66bc010b97917f1 /src | |
parent | 16916cc975700e35c0aaced9f1c604f8acd78c96 (diff) |
Tokenizer now handles sophisticated tags.
Diffstat (limited to 'src')
-rw-r--r-- | src/HTML5/Parser/Tokenizer.php | 13 |
1 files changed, 9 insertions, 4 deletions
diff --git a/src/HTML5/Parser/Tokenizer.php b/src/HTML5/Parser/Tokenizer.php index 8250a8f..d98f619 100644 --- a/src/HTML5/Parser/Tokenizer.php +++ b/src/HTML5/Parser/Tokenizer.php @@ -159,7 +159,7 @@ class Tokenizer { // These indicate not an entity. We return just // the &. if (strspn($tok, self::WHITE . "&<") == 1) { - $this->scanner->next(); + //$this->scanner->next(); return '&'; } @@ -401,10 +401,12 @@ class Tokenizer { $name = $this->scanner->current(); $this->scanner->next(); } - if (preg_match('/\'\"/', $name)) { - $this->parseError("Unexpected characters in attribute name"); + if (preg_match('/[\'\"]/', $name)) { + //if (strspn($name, '\'\"')) { + $this->parseError("Unexpected characters in attribute name: %s", $name); } - $this->scanner->whitespace(); + // Whitespace not allowed between name and =. + //$this->scanner->whitespace(); $val = $this->attributeValue(); //return array($name, $val); @@ -485,6 +487,9 @@ class Tokenizer { $val .= $this->decodeCharacterReference(TRUE); } else { + if(strspn($tok, "\"'<=`") > 0) { + $this->parseError("Unexpected chars in unquoted attribute value %s", $tok); + } $val .= $tok; $tok = $this->scanner->next(); } |