diff options
author | Mišo Belica <[email protected]> | 2014-04-14 16:46:42 +0200 |
---|---|---|
committer | Mišo Belica <[email protected]> | 2014-04-16 08:42:11 +0200 |
commit | 4401688e65b1d979705fe4e82dfdb283813f0696 (patch) | |
tree | daf0eb5b5c10f7a3121f22a4eb89e21274555535 | |
parent | 1f2861b528ddf2e4d7306979d3620910f45bac74 (diff) |
Don't throw an exception for invalid tag names
-rw-r--r-- | src/HTML5/Parser/DOMTreeBuilder.php | 8 | ||||
-rw-r--r-- | src/HTML5/Parser/Tokenizer.php | 4 | ||||
-rw-r--r-- | test/HTML5/Parser/TokenizerTest.php | 87 |
3 files changed, 97 insertions, 2 deletions
diff --git a/src/HTML5/Parser/DOMTreeBuilder.php b/src/HTML5/Parser/DOMTreeBuilder.php index b79c298..964d662 100644 --- a/src/HTML5/Parser/DOMTreeBuilder.php +++ b/src/HTML5/Parser/DOMTreeBuilder.php @@ -223,8 +223,14 @@ class DOMTreeBuilder implements EventHandler { $lname = Elements::normalizeSvgElement($lname); } + try { + $ele = $this->doc->createElement($lname); + } + catch(\DOMException $e) { + $this->parseError("Illegal tag name: <$lname>. Replaced with <invalid>."); + $ele = $this->doc->createElement('invalid'); + } - $ele = $this->doc->createElement($lname); foreach ($attributes as $aName => $aVal) { if ($this->insertMode == static::IM_IN_SVG) { diff --git a/src/HTML5/Parser/Tokenizer.php b/src/HTML5/Parser/Tokenizer.php index f21d30b..a78cf23 100644 --- a/src/HTML5/Parser/Tokenizer.php +++ b/src/HTML5/Parser/Tokenizer.php @@ -322,7 +322,9 @@ class Tokenizer { } // We know this is at least one char. - $name = strtolower($this->scanner->charsUntil("/> \n\f\t")); + $name = strtolower($this->scanner->charsWhile( + ":0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz" + )); $attributes = array(); $selfClose = FALSE; diff --git a/test/HTML5/Parser/TokenizerTest.php b/test/HTML5/Parser/TokenizerTest.php index 2a111bc..4ca8ed3 100644 --- a/test/HTML5/Parser/TokenizerTest.php +++ b/test/HTML5/Parser/TokenizerTest.php @@ -320,6 +320,93 @@ class TokenizerTest extends \HTML5\Tests\TestCase { } } + public function testTagsWithAttributeAndMissingName() { + $cases = array( + '<id="top_featured">' => 'id', + '<color="white">' => 'color', + "<class='neaktivni_stranka'>" => 'class', + '<bgcolor="white">' => 'bgcolor', + '<class="nom">' => 'class', + ); + + foreach($cases as $html => $expected) { + $events = $this->parse($html); + $this->assertEventError($events->get(0)); + $this->assertEventError($events->get(1)); + $this->assertEventError($events->get(2)); + $this->assertEventEquals('startTag', $expected, $events->get(3)); + $this->assertEventEquals('eof', NULL, $events->get(4)); + } + } + + public function testTagNotClosedAfterTagName() { + $cases = array( + "<noscript<img>" => array('noscript', 'img'), + '<center<a>' => array('center', 'a'), + '<br<br>' => array('br', 'br'), + ); + + foreach($cases as $html => $expected) { + $events = $this->parse($html); + $this->assertEventError($events->get(0)); + $this->assertEventEquals('startTag', $expected[0], $events->get(1)); + $this->assertEventEquals('startTag', $expected[1], $events->get(2)); + $this->assertEventEquals('eof', NULL, $events->get(3)); + } + + $events = $this->parse('<span<>02</span>'); + $this->assertEventError($events->get(0)); + $this->assertEventEquals('startTag', 'span', $events->get(1)); + $this->assertEventError($events->get(2)); + $this->assertEventEquals('text', '>02', $events->get(3)); + $this->assertEventEquals('endTag', 'span', $events->get(4)); + $this->assertEventEquals('eof', NULL, $events->get(5)); + + $events = $this->parse('<p</p>'); + $this->assertEventError($events->get(0)); + $this->assertEventEquals('startTag', 'p', $events->get(1)); + $this->assertEventEquals('endTag', 'p', $events->get(2)); + $this->assertEventEquals('eof', NULL, $events->get(3)); + + $events = $this->parse('<strong><WordPress</strong>'); + $this->assertEventEquals('startTag', 'strong', $events->get(0)); + $this->assertEventError($events->get(1)); + $this->assertEventEquals('startTag', 'wordpress', $events->get(2)); + $this->assertEventEquals('endTag', 'strong', $events->get(3)); + $this->assertEventEquals('eof', NULL, $events->get(4)); + + $events = $this->parse('<src=<a>'); + $this->assertEventError($events->get(0)); + $this->assertEventError($events->get(1)); + $this->assertEventError($events->get(2)); + $this->assertEventEquals('startTag', 'src', $events->get(3)); + $this->assertEventEquals('startTag', 'a', $events->get(4)); + $this->assertEventEquals('eof', NULL, $events->get(5)); + + $events = $this->parse('<br...<a>'); + $this->assertEventError($events->get(0)); + $this->assertEventEquals('startTag', 'br', $events->get(1)); + $this->assertEventEquals('eof', NULL, $events->get(2)); + } + + public function testIllegalTagNames() { + $cases = array( + '<li">' => 'li', + '<p">' => 'p', + '<b >' => 'b', + '<static*all>' => 'static', + '<h*0720/>' => 'h', + '<st*ATTRIBUTE />' => 'st', + '<a-href="http://url.com/">' => 'a', + ); + + foreach($cases as $html => $expected) { + $events = $this->parse($html); + $this->assertEventError($events->get(0)); + $this->assertEventEquals('startTag', $expected, $events->get(1)); + } + } + /** * @depends testCharacterReference */ |