summaryrefslogtreecommitdiff
path: root/src/HTML5/Parser/Tokenizer.php
diff options
context:
space:
mode:
Diffstat (limited to 'src/HTML5/Parser/Tokenizer.php')
-rw-r--r--src/HTML5/Parser/Tokenizer.php45
1 files changed, 39 insertions, 6 deletions
diff --git a/src/HTML5/Parser/Tokenizer.php b/src/HTML5/Parser/Tokenizer.php
index 1f6868b..9645f83 100644
--- a/src/HTML5/Parser/Tokenizer.php
+++ b/src/HTML5/Parser/Tokenizer.php
@@ -431,6 +431,12 @@ class Tokenizer
/**
* Parse attributes from inside of a tag.
+ *
+ * @param string[] $attributes
+ *
+ * @return bool
+ *
+ * @throws ParseError
*/
protected function attribute(&$attributes)
{
@@ -489,6 +495,8 @@ class Tokenizer
/**
* Consume an attribute value.
* 8.2.4.37 and after.
+ *
+ * @return string|null
*/
protected function attributeValue()
{
@@ -590,6 +598,8 @@ class Tokenizer
* Prepend any leading characters. This essentially
* negates the need to backtrack, but it's sort of
* a hack.
+ *
+ * @return bool
*/
protected function bogusComment($leading = '')
{
@@ -614,6 +624,8 @@ class Tokenizer
* Read a comment.
*
* Expects the first tok to be inside of the comment.
+ *
+ * @return bool
*/
protected function comment()
{
@@ -645,6 +657,8 @@ class Tokenizer
/**
* Check if the scanner has reached the end of a comment.
+ *
+ * @return bool
*/
protected function isCommentEnd()
{
@@ -679,6 +693,8 @@ class Tokenizer
* not Quirksmode is enabled on the event handler.
*
* @todo This method is a little long. Should probably refactor.
+ *
+ * @return bool
*/
protected function doctype()
{
@@ -701,13 +717,9 @@ class Tokenizer
return $this->eof();
}
- $doctypeName = '';
-
// NULL char: convert.
if ($tok === "\0") {
$this->parseError("Unexpected null character in DOCTYPE.");
- $doctypeName .= UTF8::FFFD;
- $tok = $this->scanner->next();
}
$stop = " \n\f>";
@@ -792,6 +804,7 @@ class Tokenizer
* @param string $stopchars
* Characters (in addition to a close-quote) that should stop the string.
* E.g. sometimes '>' is higher precedence than '"' or "'".
+ *
* @return mixed String if one is found (quotations omitted)
*/
protected function quotedString($stopchars)
@@ -813,6 +826,8 @@ class Tokenizer
/**
* Handle a CDATA section.
+ *
+ * @return bool
*/
protected function cdataSection()
{
@@ -856,6 +871,8 @@ class Tokenizer
* treated as "bogus comments". However, since we're not a user
* agent, we allow them. We consume until ?> and then issue a
* EventListener::processingInstruction() event.
+ *
+ * @return bool
*/
protected function processingInstruction()
{
@@ -900,6 +917,10 @@ class Tokenizer
/**
* Read from the input stream until we get to the desired sequene
* or hit the end of the input stream.
+ *
+ * @param string $sequence
+ *
+ * @return string
*/
protected function readUntilSequence($sequence)
{
@@ -935,6 +956,11 @@ class Tokenizer
* Example: $this->sequenceMatches('</script>') will
* see if the input stream is at the start of a
* '</script>' string.
+ *
+ * @param string $sequence
+ * @param bool $caseSensitive
+ *
+ * @return bool
*/
protected function sequenceMatches($sequence, $caseSensitive = true)
{
@@ -976,6 +1002,8 @@ class Tokenizer
* Add text to the temporary buffer.
*
* @see flushBuffer()
+ *
+ * @param string $str
*/
protected function buffer($str)
{
@@ -987,6 +1015,10 @@ class Tokenizer
*
* A parse error always returns false because it never consumes any
* characters.
+ *
+ * @param string $msg
+ *
+ * @return string
*/
protected function parseError($msg)
{
@@ -1009,9 +1041,11 @@ class Tokenizer
* Returns false if the entity could not be found. If $inAttribute is set
* to true, a bare & will be returned as-is.
*
- * @param boolean $inAttribute
+ * @param bool $inAttribute
* Set to true if the text is inside of an attribute value.
* false otherwise.
+ *
+ * @return bool|string
*/
protected function decodeCharacterReference($inAttribute = false)
{
@@ -1023,7 +1057,6 @@ class Tokenizer
// Next char after &.
$tok = $this->scanner->next();
- $entity = '';
$start = $this->scanner->position();
if ($tok == false) {