summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMišo Belica <[email protected]>2014-04-14 16:46:42 +0200
committerMišo Belica <[email protected]>2014-04-16 08:42:11 +0200
commit4401688e65b1d979705fe4e82dfdb283813f0696 (patch)
treedaf0eb5b5c10f7a3121f22a4eb89e21274555535
parent1f2861b528ddf2e4d7306979d3620910f45bac74 (diff)
Don't throw an exception for invalid tag names
-rw-r--r--src/HTML5/Parser/DOMTreeBuilder.php8
-rw-r--r--src/HTML5/Parser/Tokenizer.php4
-rw-r--r--test/HTML5/Parser/TokenizerTest.php87
3 files changed, 97 insertions, 2 deletions
diff --git a/src/HTML5/Parser/DOMTreeBuilder.php b/src/HTML5/Parser/DOMTreeBuilder.php
index b79c298..964d662 100644
--- a/src/HTML5/Parser/DOMTreeBuilder.php
+++ b/src/HTML5/Parser/DOMTreeBuilder.php
@@ -223,8 +223,14 @@ class DOMTreeBuilder implements EventHandler {
$lname = Elements::normalizeSvgElement($lname);
}
+ try {
+ $ele = $this->doc->createElement($lname);
+ }
+ catch(\DOMException $e) {
+ $this->parseError("Illegal tag name: <$lname>. Replaced with <invalid>.");
+ $ele = $this->doc->createElement('invalid');
+ }
- $ele = $this->doc->createElement($lname);
foreach ($attributes as $aName => $aVal) {
if ($this->insertMode == static::IM_IN_SVG) {
diff --git a/src/HTML5/Parser/Tokenizer.php b/src/HTML5/Parser/Tokenizer.php
index f21d30b..a78cf23 100644
--- a/src/HTML5/Parser/Tokenizer.php
+++ b/src/HTML5/Parser/Tokenizer.php
@@ -322,7 +322,9 @@ class Tokenizer {
}
// We know this is at least one char.
- $name = strtolower($this->scanner->charsUntil("/> \n\f\t"));
+ $name = strtolower($this->scanner->charsWhile(
+ ":0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"
+ ));
$attributes = array();
$selfClose = FALSE;
diff --git a/test/HTML5/Parser/TokenizerTest.php b/test/HTML5/Parser/TokenizerTest.php
index 2a111bc..4ca8ed3 100644
--- a/test/HTML5/Parser/TokenizerTest.php
+++ b/test/HTML5/Parser/TokenizerTest.php
@@ -320,6 +320,93 @@ class TokenizerTest extends \HTML5\Tests\TestCase {
}
}
+ public function testTagsWithAttributeAndMissingName() {
+ $cases = array(
+ '<id="top_featured">' => 'id',
+ '<color="white">' => 'color',
+ "<class='neaktivni_stranka'>" => 'class',
+ '<bgcolor="white">' => 'bgcolor',
+ '<class="nom">' => 'class',
+ );
+
+ foreach($cases as $html => $expected) {
+ $events = $this->parse($html);
+ $this->assertEventError($events->get(0));
+ $this->assertEventError($events->get(1));
+ $this->assertEventError($events->get(2));
+ $this->assertEventEquals('startTag', $expected, $events->get(3));
+ $this->assertEventEquals('eof', NULL, $events->get(4));
+ }
+ }
+
+ public function testTagNotClosedAfterTagName() {
+ $cases = array(
+ "<noscript<img>" => array('noscript', 'img'),
+ '<center<a>' => array('center', 'a'),
+ '<br<br>' => array('br', 'br'),
+ );
+
+ foreach($cases as $html => $expected) {
+ $events = $this->parse($html);
+ $this->assertEventError($events->get(0));
+ $this->assertEventEquals('startTag', $expected[0], $events->get(1));
+ $this->assertEventEquals('startTag', $expected[1], $events->get(2));
+ $this->assertEventEquals('eof', NULL, $events->get(3));
+ }
+
+ $events = $this->parse('<span<>02</span>');
+ $this->assertEventError($events->get(0));
+ $this->assertEventEquals('startTag', 'span', $events->get(1));
+ $this->assertEventError($events->get(2));
+ $this->assertEventEquals('text', '>02', $events->get(3));
+ $this->assertEventEquals('endTag', 'span', $events->get(4));
+ $this->assertEventEquals('eof', NULL, $events->get(5));
+
+ $events = $this->parse('<p</p>');
+ $this->assertEventError($events->get(0));
+ $this->assertEventEquals('startTag', 'p', $events->get(1));
+ $this->assertEventEquals('endTag', 'p', $events->get(2));
+ $this->assertEventEquals('eof', NULL, $events->get(3));
+
+ $events = $this->parse('<strong><WordPress</strong>');
+ $this->assertEventEquals('startTag', 'strong', $events->get(0));
+ $this->assertEventError($events->get(1));
+ $this->assertEventEquals('startTag', 'wordpress', $events->get(2));
+ $this->assertEventEquals('endTag', 'strong', $events->get(3));
+ $this->assertEventEquals('eof', NULL, $events->get(4));
+
+ $events = $this->parse('<src=<a>');
+ $this->assertEventError($events->get(0));
+ $this->assertEventError($events->get(1));
+ $this->assertEventError($events->get(2));
+ $this->assertEventEquals('startTag', 'src', $events->get(3));
+ $this->assertEventEquals('startTag', 'a', $events->get(4));
+ $this->assertEventEquals('eof', NULL, $events->get(5));
+
+ $events = $this->parse('<br...<a>');
+ $this->assertEventError($events->get(0));
+ $this->assertEventEquals('startTag', 'br', $events->get(1));
+ $this->assertEventEquals('eof', NULL, $events->get(2));
+ }
+
+ public function testIllegalTagNames() {
+ $cases = array(
+ '<li">' => 'li',
+ '<p">' => 'p',
+ '<b&nbsp; >' => 'b',
+ '<static*all>' => 'static',
+ '<h*0720/>' => 'h',
+ '<st*ATTRIBUTE />' => 'st',
+ '<a-href="http://url.com/">' => 'a',
+ );
+
+ foreach($cases as $html => $expected) {
+ $events = $this->parse($html);
+ $this->assertEventError($events->get(0));
+ $this->assertEventEquals('startTag', $expected, $events->get(1));
+ }
+ }
+
/**
* @depends testCharacterReference
*/