summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/HTML5/Parser/CharacterReference.php5
-rw-r--r--src/HTML5/Parser/Tokenizer.php3
-rw-r--r--test/HTML5/Parser/CharacterReferenceTest.php3
-rw-r--r--test/HTML5/Parser/EventStack.php2
-rw-r--r--test/HTML5/Parser/TokenizerTest.php37
5 files changed, 42 insertions, 8 deletions
diff --git a/src/HTML5/Parser/CharacterReference.php b/src/HTML5/Parser/CharacterReference.php
index cc71f8f..ea6a527 100644
--- a/src/HTML5/Parser/CharacterReference.php
+++ b/src/HTML5/Parser/CharacterReference.php
@@ -19,9 +19,8 @@ class CharacterReference {
* The character sequence. In UTF-8 this may be more than one byte.
*/
public static function lookupName($name) {
- $char = Entities::$byName[$name];
-
- return $char;
+ // Do we really want to return NULL here? or FFFD
+ return isset(Entities::$byName[$name]) ? Entities::$byName[$name] : NULL;
}
/**
diff --git a/src/HTML5/Parser/Tokenizer.php b/src/HTML5/Parser/Tokenizer.php
index 2e74c5f..500588c 100644
--- a/src/HTML5/Parser/Tokenizer.php
+++ b/src/HTML5/Parser/Tokenizer.php
@@ -144,9 +144,10 @@ class Tokenizer {
// X[0-9a-fA-F]+;
// x[0-9a-fA-F]+;
if ($tok == 'x' || $tok == 'X') {
+ $tok = $this->scanner->next(); // Consume x
$hex = $this->scanner->getHex();
if (empty($hex)) {
- throw ParseError("Expected &#xHEX;, got &#x" . $tok);
+ throw new ParseError("Expected &#xHEX;, got &#x" . $tok);
}
$entity = CharacterReference::lookupHex($hex);
}
diff --git a/test/HTML5/Parser/CharacterReferenceTest.php b/test/HTML5/Parser/CharacterReferenceTest.php
index bb9b393..b530f1c 100644
--- a/test/HTML5/Parser/CharacterReferenceTest.php
+++ b/test/HTML5/Parser/CharacterReferenceTest.php
@@ -14,6 +14,8 @@ class CharacterReferenceTest extends \HTML5\Tests\TestCase {
$this->assertEquals('>', CharacterReference::lookupName('gt'));
$this->assertEquals('"', CharacterReference::lookupName('quot'));
$this->assertEquals('∌', CharacterReference::lookupName('NotReverseElement'));
+
+ $this->assertNull(CharacterReference::lookupName('StinkyCheese'));
}
public function testLookupHex() {
@@ -35,4 +37,5 @@ class CharacterReferenceTest extends \HTML5\Tests\TestCase {
$this->assertEquals('Σ', CharacterReference::lookupDecimal('0931'));
}
+
}
diff --git a/test/HTML5/Parser/EventStack.php b/test/HTML5/Parser/EventStack.php
index a75402e..9754334 100644
--- a/test/HTML5/Parser/EventStack.php
+++ b/test/HTML5/Parser/EventStack.php
@@ -53,7 +53,7 @@ class EventStack implements EventHandler {
}
public function text($cdata) {
- fprintf(STDOUT, "Received TEXT event with: " . $cdata);
+ //fprintf(STDOUT, "Received TEXT event with: " . $cdata);
$this->store('text', array($cdata));
}
diff --git a/test/HTML5/Parser/TokenizerTest.php b/test/HTML5/Parser/TokenizerTest.php
index 0e93bb5..9b1727f 100644
--- a/test/HTML5/Parser/TokenizerTest.php
+++ b/test/HTML5/Parser/TokenizerTest.php
@@ -14,10 +14,18 @@ class TokenizerTest extends \HTML5\Tests\TestCase {
);
}
+ public function parse($string) {
+ list($tok, $events) = $this->createTokenizer($string);
+ $tok->parse();
+
+ return $events;
+ }
+
public function testParse() {
list($tok, $events) = $this->createTokenizer('');
$tok->parse();
+ $e1 = $events->get(0);
$this->assertEquals(1, $events->Depth());
$this->assertEquals('eof', $e1['name']);
@@ -39,17 +47,40 @@ class TokenizerTest extends \HTML5\Tests\TestCase {
public function testCharacterReference() {
$str = '&';
- list($tok, $events) = $this->createTokenizer($str);
+ $events = $this->parse($str);
- $tok->parse();
$this->assertEquals(2, $events->depth());
$e1 = $events->get(0);
$this->assertEquals('&', $e1['data'][0]);
// Test with hex charref
+ $str = '<';
+ $events = $this->parse($str);
+ $e1 = $events->get(0);
+ $this->assertEquals('<', $e1['data'][0]);
+
// Test with decimal charref
- // Test with broken charref
+ $str = '&#38;';
+ $events = $this->parse($str);
+ $e1 = $events->get(0);
+ $this->assertEquals('&', $e1['data'][0]);
+
// Test with stand-alone ampersand
+ $str = '& ';
+ $events = $this->parse($str);
+ $e1 = $events->get(0);
+ $this->assertEquals('&', $e1['data'][0][0]);
+
+
+ }
+
+ /**
+ * @expectedException \HTML5\Parser\ParseError
+ */
+ public function testBrokenCharacterReference() {
+ // Test with broken charref
+ $str = '&foo';
+ $events = $this->parse($str);
}
}