summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/HTML5/Parser/Tokenizer.php8
-rw-r--r--test/HTML5/Parser/DOMTreeBuilderTest.php23
2 files changed, 27 insertions, 4 deletions
diff --git a/src/HTML5/Parser/Tokenizer.php b/src/HTML5/Parser/Tokenizer.php
index 92510de..b587a1f 100644
--- a/src/HTML5/Parser/Tokenizer.php
+++ b/src/HTML5/Parser/Tokenizer.php
@@ -203,7 +203,7 @@ class Tokenizer
$sequence = '</' . $this->untilTag . '>';
$txt = '';
$tok = $this->scanner->current();
- while ($tok !== false && ! ($tok == '<' && ($this->sequenceMatches($sequence) || $this->sequenceMatches(strtoupper($sequence))))) {
+ while ($tok !== false && ! ($tok == '<' && ($this->sequenceMatches($sequence, false)))) {
if ($tok == '&') {
$txt .= $this->decodeCharacterReference();
$tok = $this->scanner->current();
@@ -891,7 +891,7 @@ class Tokenizer
$buffer .= $this->scanner->charsUntil($first);
// Stop as soon as we hit the stopping condition.
- if ($this->sequenceMatches($sequence) || $this->sequenceMatches(strtoupper($sequence))) {
+ if ($this->sequenceMatches($sequence, false)) {
return $buffer;
}
$buffer .= $this->scanner->current();
@@ -916,7 +916,7 @@ class Tokenizer
* see if the input stream is at the start of a
* '</script>' string.
*/
- protected function sequenceMatches($sequence)
+ protected function sequenceMatches($sequence, $caseSensitive = true)
{
$len = strlen($sequence);
$buffer = '';
@@ -932,7 +932,7 @@ class Tokenizer
}
$this->scanner->unconsume($len);
- return $buffer == $sequence;
+ return $caseSensitive ? $buffer == $sequence : strcasecmp($buffer, $sequence) === 0;
}
/**
diff --git a/test/HTML5/Parser/DOMTreeBuilderTest.php b/test/HTML5/Parser/DOMTreeBuilderTest.php
index 5227c40..f9323ed 100644
--- a/test/HTML5/Parser/DOMTreeBuilderTest.php
+++ b/test/HTML5/Parser/DOMTreeBuilderTest.php
@@ -56,6 +56,29 @@ class DOMTreeBuilderTest extends \Masterminds\HTML5\Tests\TestCase
$this->assertInstanceOf('\DOMDocument', $doc);
$this->assertEquals('html', $doc->documentElement->tagName);
}
+
+ public function testStrangeCapitalization()
+ {
+ $html = "<!doctype html>
+ <html>
+ <head>
+ <Title>Hello, world!</Title>
+ </head>
+ <body>TheBody</body>
+ </html>";
+ $doc = $this->parse($html);
+
+ $this->assertInstanceOf('\DOMDocument', $doc);
+ $this->assertEquals('html', $doc->documentElement->tagName);
+
+ $xpath = new \DOMXPath( $doc );
+ $xpath->registerNamespace( "x", "http://www.w3.org/1999/xhtml" );
+
+ $this->assertEquals("Hello, world!", $xpath->query( "//x:title" )->item( 0 )->nodeValue);
+ $this->assertEquals("TheBody", $xpath->query( "//x:body" )->item( 0 )->nodeValue);
+
+ }
+
public function testDocumentFakeAttrAbsence()
{
$html = "<!DOCTYPE html><html xmlns=\"http://www.w3.org/1999/xhtml\"><body>foo</body></html>";