summaryrefslogtreecommitdiff
path: root/src/HTML5/Parser/StringInputStream.php
diff options
context:
space:
mode:
authorTechnosophos <[email protected]>2013-04-11 13:18:56 -0500
committerTechnosophos <[email protected]>2013-04-11 13:18:56 -0500
commit1678e73e34b167ba0312d1b65797363ea8f62fe2 (patch)
treec9e9397dce845605e550ca67d10f9e1666257730 /src/HTML5/Parser/StringInputStream.php
parent64eaa3365d74b58de23bb7787844ab3ead4b0b20 (diff)
Moved UTF-8 character check out to UTF8Utils.
Diffstat (limited to 'src/HTML5/Parser/StringInputStream.php')
-rw-r--r--src/HTML5/Parser/StringInputStream.php9
1 files changed, 8 insertions, 1 deletions
diff --git a/src/HTML5/Parser/StringInputStream.php b/src/HTML5/Parser/StringInputStream.php
index edd0399..5f41c21 100644
--- a/src/HTML5/Parser/StringInputStream.php
+++ b/src/HTML5/Parser/StringInputStream.php
@@ -65,7 +65,14 @@ class StringInputStream implements InputStream {
public function __construct($data, $encoding = 'UTF-8') {
$data = $this->convertToUTF8($data, $encoding);
- $this->checkForIllegalCodepoints($data);
+
+ // There is good reason to question whether it makes sense to
+ // do this here, since most of these checks are done during
+ // parsing, and since this check doesn't actually *do* anything.
+ $e = UTF8Utils::checkForIllegalCodepoints($data);
+ if (!empty($e)) {
+ throw new ParseError("UTF-8 encoding issues: " . implode(', ', $e));
+ }
$data = $this->replaceLinefeeds($data);