diff options
author | Technosophos <[email protected]> | 2013-04-11 13:18:56 -0500 |
---|---|---|
committer | Technosophos <[email protected]> | 2013-04-11 13:18:56 -0500 |
commit | 1678e73e34b167ba0312d1b65797363ea8f62fe2 (patch) | |
tree | c9e9397dce845605e550ca67d10f9e1666257730 /src/HTML5/Parser/StringInputStream.php | |
parent | 64eaa3365d74b58de23bb7787844ab3ead4b0b20 (diff) |
Moved UTF-8 character check out to UTF8Utils.
Diffstat (limited to 'src/HTML5/Parser/StringInputStream.php')
-rw-r--r-- | src/HTML5/Parser/StringInputStream.php | 9 |
1 files changed, 8 insertions, 1 deletions
diff --git a/src/HTML5/Parser/StringInputStream.php b/src/HTML5/Parser/StringInputStream.php index edd0399..5f41c21 100644 --- a/src/HTML5/Parser/StringInputStream.php +++ b/src/HTML5/Parser/StringInputStream.php @@ -65,7 +65,14 @@ class StringInputStream implements InputStream { public function __construct($data, $encoding = 'UTF-8') { $data = $this->convertToUTF8($data, $encoding); - $this->checkForIllegalCodepoints($data); + + // There is good reason to question whether it makes sense to + // do this here, since most of these checks are done during + // parsing, and since this check doesn't actually *do* anything. + $e = UTF8Utils::checkForIllegalCodepoints($data); + if (!empty($e)) { + throw new ParseError("UTF-8 encoding issues: " . implode(', ', $e)); + } $data = $this->replaceLinefeeds($data); |