diff options
-rw-r--r-- | src/HTML5/Parser/CharacterReference.php | 5 | ||||
-rw-r--r-- | src/HTML5/Parser/Tokenizer.php | 3 | ||||
-rw-r--r-- | test/HTML5/Parser/CharacterReferenceTest.php | 3 | ||||
-rw-r--r-- | test/HTML5/Parser/EventStack.php | 2 | ||||
-rw-r--r-- | test/HTML5/Parser/TokenizerTest.php | 37 |
5 files changed, 42 insertions, 8 deletions
diff --git a/src/HTML5/Parser/CharacterReference.php b/src/HTML5/Parser/CharacterReference.php index cc71f8f..ea6a527 100644 --- a/src/HTML5/Parser/CharacterReference.php +++ b/src/HTML5/Parser/CharacterReference.php @@ -19,9 +19,8 @@ class CharacterReference { * The character sequence. In UTF-8 this may be more than one byte. */ public static function lookupName($name) { - $char = Entities::$byName[$name]; - - return $char; + // Do we really want to return NULL here? or FFFD + return isset(Entities::$byName[$name]) ? Entities::$byName[$name] : NULL; } /** diff --git a/src/HTML5/Parser/Tokenizer.php b/src/HTML5/Parser/Tokenizer.php index 2e74c5f..500588c 100644 --- a/src/HTML5/Parser/Tokenizer.php +++ b/src/HTML5/Parser/Tokenizer.php @@ -144,9 +144,10 @@ class Tokenizer { // X[0-9a-fA-F]+; // x[0-9a-fA-F]+; if ($tok == 'x' || $tok == 'X') { + $tok = $this->scanner->next(); // Consume x $hex = $this->scanner->getHex(); if (empty($hex)) { - throw ParseError("Expected &#xHEX;, got &#x" . $tok); + throw new ParseError("Expected &#xHEX;, got &#x" . $tok); } $entity = CharacterReference::lookupHex($hex); } diff --git a/test/HTML5/Parser/CharacterReferenceTest.php b/test/HTML5/Parser/CharacterReferenceTest.php index bb9b393..b530f1c 100644 --- a/test/HTML5/Parser/CharacterReferenceTest.php +++ b/test/HTML5/Parser/CharacterReferenceTest.php @@ -14,6 +14,8 @@ class CharacterReferenceTest extends \HTML5\Tests\TestCase { $this->assertEquals('>', CharacterReference::lookupName('gt')); $this->assertEquals('"', CharacterReference::lookupName('quot')); $this->assertEquals('∌', CharacterReference::lookupName('NotReverseElement')); + + $this->assertNull(CharacterReference::lookupName('StinkyCheese')); } public function testLookupHex() { @@ -35,4 +37,5 @@ class CharacterReferenceTest extends \HTML5\Tests\TestCase { $this->assertEquals('Σ', CharacterReference::lookupDecimal('0931')); } + } diff --git a/test/HTML5/Parser/EventStack.php b/test/HTML5/Parser/EventStack.php index a75402e..9754334 100644 --- a/test/HTML5/Parser/EventStack.php +++ b/test/HTML5/Parser/EventStack.php @@ -53,7 +53,7 @@ class EventStack implements EventHandler { } public function text($cdata) { - fprintf(STDOUT, "Received TEXT event with: " . $cdata); + //fprintf(STDOUT, "Received TEXT event with: " . $cdata); $this->store('text', array($cdata)); } diff --git a/test/HTML5/Parser/TokenizerTest.php b/test/HTML5/Parser/TokenizerTest.php index 0e93bb5..9b1727f 100644 --- a/test/HTML5/Parser/TokenizerTest.php +++ b/test/HTML5/Parser/TokenizerTest.php @@ -14,10 +14,18 @@ class TokenizerTest extends \HTML5\Tests\TestCase { ); } + public function parse($string) { + list($tok, $events) = $this->createTokenizer($string); + $tok->parse(); + + return $events; + } + public function testParse() { list($tok, $events) = $this->createTokenizer(''); $tok->parse(); + $e1 = $events->get(0); $this->assertEquals(1, $events->Depth()); $this->assertEquals('eof', $e1['name']); @@ -39,17 +47,40 @@ class TokenizerTest extends \HTML5\Tests\TestCase { public function testCharacterReference() { $str = '&'; - list($tok, $events) = $this->createTokenizer($str); + $events = $this->parse($str); - $tok->parse(); $this->assertEquals(2, $events->depth()); $e1 = $events->get(0); $this->assertEquals('&', $e1['data'][0]); // Test with hex charref + $str = '<'; + $events = $this->parse($str); + $e1 = $events->get(0); + $this->assertEquals('<', $e1['data'][0]); + // Test with decimal charref - // Test with broken charref + $str = '&'; + $events = $this->parse($str); + $e1 = $events->get(0); + $this->assertEquals('&', $e1['data'][0]); + // Test with stand-alone ampersand + $str = '& '; + $events = $this->parse($str); + $e1 = $events->get(0); + $this->assertEquals('&', $e1['data'][0][0]); + + + } + + /** + * @expectedException \HTML5\Parser\ParseError + */ + public function testBrokenCharacterReference() { + // Test with broken charref + $str = '&foo'; + $events = $this->parse($str); } } |