diff options
author | Matt Farina <[email protected]> | 2013-04-10 13:07:49 -0400 |
---|---|---|
committer | Matt Farina <[email protected]> | 2013-04-10 13:10:34 -0400 |
commit | f9590a91ad948db1449a3dc61d90e9c9a71c6ee3 (patch) | |
tree | 87006cac96ef39ee5336ea24ec38ea7785c1c9ac /src/HTML5 | |
parent | fe956dc6bdbf3e71336bbff250ddff7f370e1f93 (diff) |
Moved the scanner to the new Parser InputStream and updated the unit tests to use StringInputStream
Diffstat (limited to 'src/HTML5')
-rw-r--r-- | src/HTML5/Parser/InputStream.php | 21 | ||||
-rw-r--r-- | src/HTML5/Parser/Scanner.php | 17 | ||||
-rw-r--r-- | src/HTML5/Parser/StringInputStream.php | 62 |
3 files changed, 60 insertions, 40 deletions
diff --git a/src/HTML5/Parser/InputStream.php b/src/HTML5/Parser/InputStream.php index 4c5c075..bb2ca1c 100644 --- a/src/HTML5/Parser/InputStream.php +++ b/src/HTML5/Parser/InputStream.php @@ -4,12 +4,12 @@ namespace HTML5\Parser; /** * Interface for stream readers. */ -interface InputStream { +interface InputStream extends \Iterator { /** * Returns the current line that is being consumed. * - * TODO: Move this to the tokenizer. + * TODO: Move this to the scanner. */ public function currentLine(); @@ -18,7 +18,7 @@ interface InputStream { * * Newlines are column 0. The first char after a newline is column 1. * - * @TODO Move this to the tokenizer. + * @TODO Move this to the scanner. * * @return int * The column number. @@ -26,11 +26,6 @@ interface InputStream { public function columnOffset(); /** - * Retrieve the currently consumed character. - */ - public function char(); - - /** * Get all characters until EOF. * * This consumes characters until the EOF. @@ -74,16 +69,14 @@ interface InputStream { /** * Unconsume one character. + * + * @param int $howMany + * The number of characters to move the pointer back. */ - public function unconsume(); + public function unconsume($howMany = 1); /** * Retrieve the next character without advancing the pointer. */ public function peek(); - - /** - * Get the position of the reader. - */ - public function position(); } diff --git a/src/HTML5/Parser/Scanner.php b/src/HTML5/Parser/Scanner.php index 10698d7..4716fc4 100644 --- a/src/HTML5/Parser/Scanner.php +++ b/src/HTML5/Parser/Scanner.php @@ -11,8 +11,6 @@ class Scanner { const CHARS_ALNUM = 'abcdefAghijklmnopqrstuvwxyABCDEFGHIJKLMNOPQRSTUVWXYZ01234567890'; const CHARS_ALPHA = 'abcdefAghijklmnopqrstuvwxyABCDEFGHIJKLMNOPQRSTUVWXYZ'; - - protected $char; protected $is; /** @@ -32,7 +30,7 @@ class Scanner { * The current intiger byte position. */ public function position() { - return $this->is->position(); + return $this->is->key(); } /** @@ -54,8 +52,11 @@ class Scanner { * The next character. */ public function next() { - $this->char = $this->is->char(); - return $this->char; + $this->is->next(); + if ($this->is->valid()) { + return $this->is->current(); + } + return FALSE; } /** @@ -67,7 +68,7 @@ class Scanner { * The current character. */ public function current() { - return $this->char; + return $this->is->current(); } /** @@ -77,9 +78,7 @@ class Scanner { * The number of characters to move the pointer back. */ public function unconsume($howMany = 1) { - for ($i = 0; $i < $howMany; ++$i) { - $this->is->unconsume(); - } + $this->is->unconsume($howMany); } /** diff --git a/src/HTML5/Parser/StringInputStream.php b/src/HTML5/Parser/StringInputStream.php index 4ceae44..e26bb38 100644 --- a/src/HTML5/Parser/StringInputStream.php +++ b/src/HTML5/Parser/StringInputStream.php @@ -269,16 +269,40 @@ class StringInputStream implements InputStream { } /** - * Retrieve the currently consumed character. - * @note This performs bounds checking + * Get the current character. + * + * @return string + * The current character. + */ + public function current() { + return $this->data[$this->char]; + } + + /** + * Advance the pointer. This is part of the Iterator interface. + */ + public function next() { + $this->char++; + } + + /** + * Rewind to the start of the string. + */ + public function rewind() { + $this->char = 0; + } + + /** + * Is the current pointer location valid. + * + * @return bool + * Is the current pointer location valid. */ - public function char() { - // MPB: This appears to advance the pointer, which is not the same - // as "retrieving the currently consumed character". Calling char() - // twice will return two different results. - if ($this->char++ < $this->EOF) { - return $this->data[$this->char - 1]; + public function valid() { + if ($this->char < $this->EOF) { + return TRUE; } + return FALSE; } @@ -362,22 +386,26 @@ class StringInputStream implements InputStream { } /** - * Unconsume one character. + * Unconsume characters. + * + * @param int $howMany + * The number of characters to unconsume. */ - public function unconsume() { - if ($this->char > 0 && $this->char <= $this->EOF) { - $this->char--; + public function unconsume($howMany = 1) { + if (($this->char - $howMany) >= 0) { + $this->char = $this->char - $howMany; } } - public function unget() { - $this->unconsume(); - } public function peek() { - return $this->data[$this->char + 1]; + if (($this->char + 1) <= $this->EOF) { + return $this->data[$this->char + 1]; + } + + return FALSE; } - public function position() { + public function key() { return $this->char; } } |