summaryrefslogtreecommitdiff
path: root/src/HTML5/Parser/StringInputStream.php
diff options
context:
space:
mode:
authorTitouan Galopin <[email protected]>2018-10-22 23:49:05 +0200
committerTitouan Galopin <[email protected]>2018-11-02 21:44:20 +0100
commit80b8e9177f587365535e9dd6bff45addad9c2bb1 (patch)
tree7083457d7e3a48a8b2b74f31b59313eea1ca7f45 /src/HTML5/Parser/StringInputStream.php
parentfb50d43c6c640683d78651535ba46c06e452c628 (diff)
Add more extensions on composer.json, improve phpdocs and remove dead code
Diffstat (limited to 'src/HTML5/Parser/StringInputStream.php')
-rw-r--r--src/HTML5/Parser/StringInputStream.php27
1 files changed, 16 insertions, 11 deletions
diff --git a/src/HTML5/Parser/StringInputStream.php b/src/HTML5/Parser/StringInputStream.php
index 4cac3c2..0973941 100644
--- a/src/HTML5/Parser/StringInputStream.php
+++ b/src/HTML5/Parser/StringInputStream.php
@@ -65,22 +65,21 @@ class StringInputStream implements InputStream
/**
* Create a new InputStream wrapper.
*
- * @param $data Data
- * to parse
+ * @param string $data Data to parse
+ * @param string $encoding The encoding to use for the data.
+ * @param string $debug A fprintf format to use to echo the data on stdout.
*/
public function __construct($data, $encoding = 'UTF-8', $debug = '')
{
$data = UTF8Utils::convertToUTF8($data, $encoding);
- if ($debug)
+ if ($debug) {
fprintf(STDOUT, $debug, $data, strlen($data));
+ }
- // There is good reason to question whether it makes sense to
- // do this here, since most of these checks are done during
- // parsing, and since this check doesn't actually *do* anything.
+ // There is good reason to question whether it makes sense to
+ // do this here, since most of these checks are done during
+ // parsing, and since this check doesn't actually *do* anything.
$this->errors = UTF8Utils::checkForIllegalCodepoints($data);
- // if (!empty($e)) {
- // throw new ParseError("UTF-8 encoding issues: " . implode(', ', $e));
- // }
$data = $this->replaceLinefeeds($data);
@@ -95,7 +94,11 @@ class StringInputStream implements InputStream
protected function replaceLinefeeds($data)
{
/*
- * U+000D CARRIAGE RETURN (CR) characters and U+000A LINE FEED (LF) characters are treated specially. Any CR characters that are followed by LF characters must be removed, and any CR characters not followed by LF characters must be converted to LF characters. Thus, newlines in HTML DOMs are represented by LF characters, and there are never any CR characters in the input to the tokenization stage.
+ * U+000D CARRIAGE RETURN (CR) characters and U+000A LINE FEED (LF) characters are treated specially.
+ * Any CR characters that are followed by LF characters must be removed, and any CR characters not
+ * followed by LF characters must be converted to LF characters. Thus, newlines in HTML DOMs are
+ * represented by LF characters, and there are never any CR characters in the input to the tokenization
+ * stage.
*/
$crlfTable = array(
"\0" => "\xEF\xBF\xBD",
@@ -126,7 +129,7 @@ class StringInputStream implements InputStream
*/
public function getCurrentLine()
{
- return currentLine();
+ return $this->currentLine();
}
/**
@@ -281,6 +284,8 @@ class StringInputStream implements InputStream
* substring.
* @param int $max
* The max number of chars to read.
+ *
+ * @return string
*/
public function charsWhile($bytes, $max = null)
{