summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAsmir Mustafic <[email protected]>2014-12-17 21:31:54 +0100
committerAsmir Mustafic <[email protected]>2014-12-17 21:31:54 +0100
commit398ebb68c9395a67858d230d0610aa3676bebdc7 (patch)
treeb0757c652a8991895c33c035d2012e841579f67d
parentde3a3af4aa683d0dae5bd09b62fe74127974c3ea (diff)
parent4df8acdb61a2eb27512cd9d2b2e682399c9bc2e5 (diff)
Merge pull request #64 from goetas/i63
Case insensitive tags
-rw-r--r--src/HTML5/Parser/Tokenizer.php10
-rw-r--r--test/HTML5/Parser/DOMTreeBuilderTest.php22
2 files changed, 28 insertions, 4 deletions
diff --git a/src/HTML5/Parser/Tokenizer.php b/src/HTML5/Parser/Tokenizer.php
index f7b450f..958ade8 100644
--- a/src/HTML5/Parser/Tokenizer.php
+++ b/src/HTML5/Parser/Tokenizer.php
@@ -203,7 +203,9 @@ class Tokenizer
$sequence = '</' . $this->untilTag . '>';
$txt = '';
$tok = $this->scanner->current();
- while ($tok !== false && ! ($tok == '<' && ($this->sequenceMatches($sequence) || $this->sequenceMatches(strtoupper($sequence))))) {
+
+ $caseSensitive = !Elements::isHtml5Element($this->untilTag);
+ while ($tok !== false && ! ($tok == '<' && ($this->sequenceMatches($sequence, $caseSensitive)))) {
if ($tok == '&') {
$txt .= $this->decodeCharacterReference();
$tok = $this->scanner->current();
@@ -891,7 +893,7 @@ class Tokenizer
$buffer .= $this->scanner->charsUntil($first);
// Stop as soon as we hit the stopping condition.
- if ($this->sequenceMatches($sequence) || $this->sequenceMatches(strtoupper($sequence))) {
+ if ($this->sequenceMatches($sequence, false)) {
return $buffer;
}
$buffer .= $this->scanner->current();
@@ -916,7 +918,7 @@ class Tokenizer
* see if the input stream is at the start of a
* '</script>' string.
*/
- protected function sequenceMatches($sequence)
+ protected function sequenceMatches($sequence, $caseSensitive = true)
{
$len = strlen($sequence);
$buffer = '';
@@ -932,7 +934,7 @@ class Tokenizer
}
$this->scanner->unconsume($len);
- return $buffer == $sequence;
+ return $caseSensitive ? $buffer == $sequence : strcasecmp($buffer, $sequence) === 0;
}
/**
diff --git a/test/HTML5/Parser/DOMTreeBuilderTest.php b/test/HTML5/Parser/DOMTreeBuilderTest.php
index 9a7d5d8..5bba7cc 100644
--- a/test/HTML5/Parser/DOMTreeBuilderTest.php
+++ b/test/HTML5/Parser/DOMTreeBuilderTest.php
@@ -56,6 +56,28 @@ class DOMTreeBuilderTest extends \Masterminds\HTML5\Tests\TestCase
$this->assertInstanceOf('\DOMDocument', $doc);
$this->assertEquals('html', $doc->documentElement->tagName);
}
+
+ public function testStrangeCapitalization()
+ {
+ $html = "<!doctype html>
+ <html>
+ <head>
+ <Title>Hello, world!</TitlE>
+ </head>
+ <body>TheBody<script>foo</script></body>
+ </html>";
+ $doc = $this->parse($html);
+
+ $this->assertInstanceOf('\DOMDocument', $doc);
+ $this->assertEquals('html', $doc->documentElement->tagName);
+
+ $xpath = new \DOMXPath( $doc );
+ $xpath->registerNamespace( "x", "http://www.w3.org/1999/xhtml" );
+
+ $this->assertEquals("Hello, world!", $xpath->query( "//x:title" )->item( 0 )->nodeValue);
+ $this->assertEquals("foo", $xpath->query( "//x:script" )->item( 0 )->nodeValue);
+ }
+
public function testDocumentFakeAttrAbsence()
{
$html = "<!DOCTYPE html><html xmlns=\"http://www.w3.org/1999/xhtml\"><body>foo</body></html>";