From af21e63663d3f1213eebbbd3ec2bfb13735183a4 Mon Sep 17 00:00:00 2001 From: Matt Butcher Date: Fri, 12 Apr 2013 16:09:39 -0500 Subject: Removed iconv-specific unit tests. --- test/HTML5/Parser/StringInputStreamTest.php | 35 ++++------------------------- 1 file changed, 4 insertions(+), 31 deletions(-) (limited to 'test') diff --git a/test/HTML5/Parser/StringInputStreamTest.php b/test/HTML5/Parser/StringInputStreamTest.php index f47de4f..86b52b7 100644 --- a/test/HTML5/Parser/StringInputStreamTest.php +++ b/test/HTML5/Parser/StringInputStreamTest.php @@ -314,10 +314,9 @@ class InputStreamTest extends \HTML5\Tests\TestCase { $this->invalidParseErrorTestHandler("\xF4\x8F\xBF\xBE", 1, 'U+10FFFE (permanent noncharacter)'); $this->invalidParseErrorTestHandler("\xF4\x8F\xBF\xBF", 1, 'U+10FFFF (permanent noncharacter)'); - // MPB: All of these seem to fail with iconv, which seems to strip them all. Adding an alternate test - // to make sure this is a stable phenomenon. cf. http://codesnipers.com/?q=splitting-surrogate-pairs - // and also http://www.unicode.org/faq/utf_bom.html - // Single UTF-16 surrogates + // MPB: These pass on some versions of iconv, and fail on others. Since we aren't in the + // business of writing tests against iconv, I've just commented these out. Should revisit + // at a later point. /* $this->invalidParseErrorTestHandler("\xED\xA0\x80", 1, 'U+D800 (UTF-16 surrogate character)'); $this->invalidParseErrorTestHandler("\xED\xAD\xBF", 1, 'U+DB7F (UTF-16 surrogate character)'); @@ -337,32 +336,6 @@ class InputStreamTest extends \HTML5\Tests\TestCase { $this->invalidParseErrorTestHandler("\xED\xAF\xBF\xED\xB0\x80", 2, 'U+DBFF U+DC00 (paired UTF-16 surrogates)'); $this->invalidParseErrorTestHandler("\xED\xAF\xBF\xED\xBF\xBF", 2, 'U+DBFF U+DFFF (paired UTF-16 surrogates)'); */ - - // MPB: This seems to be non-conformant to Unicode, but... there's nothing we can really do about that. - // So we just test to make sure this is consistent. - $stripped_surrogates = array( - "\xED\xA0\x80", - "\xED\xAD\xBF", - "\xED\xAE\x80", - "\xED\xAF\xBF", - "\xED\xB0\x80", - "\xED\xBE\x80", - "\xED\xBF\xBF", - - // Paired UTF-16 surrogates - "\xED\xA0\x80\xED\xB0\x80", - "\xED\xA0\x80\xED\xBF\xBF", - "\xED\xAD\xBF\xED\xB0\x80", - "\xED\xAD\xBF\xED\xBF\xBF", - "\xED\xAE\x80\xED\xB0\x80", - "\xED\xAE\x80\xED\xBF\xBF", - "\xED\xAF\xBF\xED\xB0\x80", - "\xED\xAF\xBF\xED\xBF\xBF", - ); - foreach($stripped_surrogates as $sur) { - $is = new StringInputStream($sur); - $this->assertEquals('', $is->remainingChars(), sprintf("Surrogate removed: %d", $sur)); - - } } + } -- cgit v1.2.3