diff options
Diffstat (limited to 'src/HTML5/Parser/Tokenizer.php')
-rw-r--r-- | src/HTML5/Parser/Tokenizer.php | 45 |
1 files changed, 39 insertions, 6 deletions
diff --git a/src/HTML5/Parser/Tokenizer.php b/src/HTML5/Parser/Tokenizer.php index 1f6868b..9645f83 100644 --- a/src/HTML5/Parser/Tokenizer.php +++ b/src/HTML5/Parser/Tokenizer.php @@ -431,6 +431,12 @@ class Tokenizer /** * Parse attributes from inside of a tag. + * + * @param string[] $attributes + * + * @return bool + * + * @throws ParseError */ protected function attribute(&$attributes) { @@ -489,6 +495,8 @@ class Tokenizer /** * Consume an attribute value. * 8.2.4.37 and after. + * + * @return string|null */ protected function attributeValue() { @@ -590,6 +598,8 @@ class Tokenizer * Prepend any leading characters. This essentially * negates the need to backtrack, but it's sort of * a hack. + * + * @return bool */ protected function bogusComment($leading = '') { @@ -614,6 +624,8 @@ class Tokenizer * Read a comment. * * Expects the first tok to be inside of the comment. + * + * @return bool */ protected function comment() { @@ -645,6 +657,8 @@ class Tokenizer /** * Check if the scanner has reached the end of a comment. + * + * @return bool */ protected function isCommentEnd() { @@ -679,6 +693,8 @@ class Tokenizer * not Quirksmode is enabled on the event handler. * * @todo This method is a little long. Should probably refactor. + * + * @return bool */ protected function doctype() { @@ -701,13 +717,9 @@ class Tokenizer return $this->eof(); } - $doctypeName = ''; - // NULL char: convert. if ($tok === "\0") { $this->parseError("Unexpected null character in DOCTYPE."); - $doctypeName .= UTF8::FFFD; - $tok = $this->scanner->next(); } $stop = " \n\f>"; @@ -792,6 +804,7 @@ class Tokenizer * @param string $stopchars * Characters (in addition to a close-quote) that should stop the string. * E.g. sometimes '>' is higher precedence than '"' or "'". + * * @return mixed String if one is found (quotations omitted) */ protected function quotedString($stopchars) @@ -813,6 +826,8 @@ class Tokenizer /** * Handle a CDATA section. + * + * @return bool */ protected function cdataSection() { @@ -856,6 +871,8 @@ class Tokenizer * treated as "bogus comments". However, since we're not a user * agent, we allow them. We consume until ?> and then issue a * EventListener::processingInstruction() event. + * + * @return bool */ protected function processingInstruction() { @@ -900,6 +917,10 @@ class Tokenizer /** * Read from the input stream until we get to the desired sequene * or hit the end of the input stream. + * + * @param string $sequence + * + * @return string */ protected function readUntilSequence($sequence) { @@ -935,6 +956,11 @@ class Tokenizer * Example: $this->sequenceMatches('</script>') will * see if the input stream is at the start of a * '</script>' string. + * + * @param string $sequence + * @param bool $caseSensitive + * + * @return bool */ protected function sequenceMatches($sequence, $caseSensitive = true) { @@ -976,6 +1002,8 @@ class Tokenizer * Add text to the temporary buffer. * * @see flushBuffer() + * + * @param string $str */ protected function buffer($str) { @@ -987,6 +1015,10 @@ class Tokenizer * * A parse error always returns false because it never consumes any * characters. + * + * @param string $msg + * + * @return string */ protected function parseError($msg) { @@ -1009,9 +1041,11 @@ class Tokenizer * Returns false if the entity could not be found. If $inAttribute is set * to true, a bare & will be returned as-is. * - * @param boolean $inAttribute + * @param bool $inAttribute * Set to true if the text is inside of an attribute value. * false otherwise. + * + * @return bool|string */ protected function decodeCharacterReference($inAttribute = false) { @@ -1023,7 +1057,6 @@ class Tokenizer // Next char after &. $tok = $this->scanner->next(); - $entity = ''; $start = $this->scanner->position(); if ($tok == false) { |