From 1678e73e34b167ba0312d1b65797363ea8f62fe2 Mon Sep 17 00:00:00 2001 From: Technosophos Date: Thu, 11 Apr 2013 13:18:56 -0500 Subject: Moved UTF-8 character check out to UTF8Utils. --- src/HTML5/Parser/StringInputStream.php | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) (limited to 'src/HTML5/Parser/StringInputStream.php') diff --git a/src/HTML5/Parser/StringInputStream.php b/src/HTML5/Parser/StringInputStream.php index edd0399..5f41c21 100644 --- a/src/HTML5/Parser/StringInputStream.php +++ b/src/HTML5/Parser/StringInputStream.php @@ -65,7 +65,14 @@ class StringInputStream implements InputStream { public function __construct($data, $encoding = 'UTF-8') { $data = $this->convertToUTF8($data, $encoding); - $this->checkForIllegalCodepoints($data); + + // There is good reason to question whether it makes sense to + // do this here, since most of these checks are done during + // parsing, and since this check doesn't actually *do* anything. + $e = UTF8Utils::checkForIllegalCodepoints($data); + if (!empty($e)) { + throw new ParseError("UTF-8 encoding issues: " . implode(', ', $e)); + } $data = $this->replaceLinefeeds($data); -- cgit v1.2.3