summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/HTML5/Parser/Tokenizer.php20
-rw-r--r--test/HTML5/Parser/EventStack.php6
-rw-r--r--test/HTML5/Parser/TokenizerTest.php2
3 files changed, 24 insertions, 4 deletions
diff --git a/src/HTML5/Parser/Tokenizer.php b/src/HTML5/Parser/Tokenizer.php
index 500588c..1d786c5 100644
--- a/src/HTML5/Parser/Tokenizer.php
+++ b/src/HTML5/Parser/Tokenizer.php
@@ -82,6 +82,12 @@ class Tokenizer {
$this->text .= $str;
}
+ protected function parseError($msg) {
+ $line = $this->scanner->currentLine();
+ $col = $this->scanner->columnOffset();
+ $this->events->parseError($msg, $line, $col);
+ }
+
/**
* Consume a character and make a move.
* HTML5 8.2.4.1
@@ -147,7 +153,9 @@ class Tokenizer {
$tok = $this->scanner->next(); // Consume x
$hex = $this->scanner->getHex();
if (empty($hex)) {
- throw new ParseError("Expected &#xHEX;, got &#x" . $tok);
+ //throw new ParseError("Expected &#xHEX;, got &#x" . $tok);
+ $this->parseError("Expected &#xHEX;, got &#x" . $tok);
+ return;
}
$entity = CharacterReference::lookupHex($hex);
}
@@ -156,7 +164,9 @@ class Tokenizer {
else {
$numeric = $this->scanner->getNumeric();
if (empty($numeric)) {
- throw ParseError("Expected &#DIGITS;, got $#" . $tok);
+ //throw ParseError("Expected &#DIGITS;, got $#" . $tok);
+ $this->parseError("Expected &#DIGITS;, got $#" . $tok);
+ return;
}
$entity = CharacterReference::lookupDecimal($numeric);
}
@@ -166,6 +176,9 @@ class Tokenizer {
// [a-zA-Z0-9]+;
$cname = $this->scanner->getAsciiAlpha();
$entity = CharacterReference::lookupName($cname);
+ if ($entity == NULL) {
+ $this->parseError("No match in entity table for " . $entity);
+ }
}
// The scanner has advanced the cursor for us.
@@ -185,7 +198,8 @@ class Tokenizer {
return;
}
- throw new ParseError("Expected &ENTITY;, got &ENTITY (no trailing ;) " . $tok);
+ //throw new ParseError("Expected &ENTITY;, got &ENTITY (no trailing ;) " . $tok);
+ $this->parseError("Expected &ENTITY;, got &ENTITY (no trailing ;) " . $tok);
}
diff --git a/test/HTML5/Parser/EventStack.php b/test/HTML5/Parser/EventStack.php
index 9754334..68f0aa9 100644
--- a/test/HTML5/Parser/EventStack.php
+++ b/test/HTML5/Parser/EventStack.php
@@ -61,5 +61,11 @@ class EventStack implements EventHandler {
$this->store('eof');
}
+ public function parseError($msg, $line, $col) {
+ throw new EventStackParseError(sprintf("%s (line %d, col %d)", $msg, $line, $col));
+ }
+
}
+class EventStackParseError extends \Exception {
+}
diff --git a/test/HTML5/Parser/TokenizerTest.php b/test/HTML5/Parser/TokenizerTest.php
index 9b1727f..b6ce01f 100644
--- a/test/HTML5/Parser/TokenizerTest.php
+++ b/test/HTML5/Parser/TokenizerTest.php
@@ -76,7 +76,7 @@ class TokenizerTest extends \HTML5\Tests\TestCase {
}
/**
- * @expectedException \HTML5\Parser\ParseError
+ * @expectedException \HTML5\Parser\EventStackParseError
*/
public function testBrokenCharacterReference() {
// Test with broken charref