From f9590a91ad948db1449a3dc61d90e9c9a71c6ee3 Mon Sep 17 00:00:00 2001 From: Matt Farina Date: Wed, 10 Apr 2013 13:07:49 -0400 Subject: Moved the scanner to the new Parser InputStream and updated the unit tests to use StringInputStream --- src/HTML5/Parser/InputStream.php | 21 ++++-------- src/HTML5/Parser/Scanner.php | 17 +++++----- src/HTML5/Parser/StringInputStream.php | 62 ++++++++++++++++++++++++---------- 3 files changed, 60 insertions(+), 40 deletions(-) (limited to 'src/HTML5') diff --git a/src/HTML5/Parser/InputStream.php b/src/HTML5/Parser/InputStream.php index 4c5c075..bb2ca1c 100644 --- a/src/HTML5/Parser/InputStream.php +++ b/src/HTML5/Parser/InputStream.php @@ -4,12 +4,12 @@ namespace HTML5\Parser; /** * Interface for stream readers. */ -interface InputStream { +interface InputStream extends \Iterator { /** * Returns the current line that is being consumed. * - * TODO: Move this to the tokenizer. + * TODO: Move this to the scanner. */ public function currentLine(); @@ -18,18 +18,13 @@ interface InputStream { * * Newlines are column 0. The first char after a newline is column 1. * - * @TODO Move this to the tokenizer. + * @TODO Move this to the scanner. * * @return int * The column number. */ public function columnOffset(); - /** - * Retrieve the currently consumed character. - */ - public function char(); - /** * Get all characters until EOF. * @@ -74,16 +69,14 @@ interface InputStream { /** * Unconsume one character. + * + * @param int $howMany + * The number of characters to move the pointer back. */ - public function unconsume(); + public function unconsume($howMany = 1); /** * Retrieve the next character without advancing the pointer. */ public function peek(); - - /** - * Get the position of the reader. - */ - public function position(); } diff --git a/src/HTML5/Parser/Scanner.php b/src/HTML5/Parser/Scanner.php index 10698d7..4716fc4 100644 --- a/src/HTML5/Parser/Scanner.php +++ b/src/HTML5/Parser/Scanner.php @@ -11,8 +11,6 @@ class Scanner { const CHARS_ALNUM = 'abcdefAghijklmnopqrstuvwxyABCDEFGHIJKLMNOPQRSTUVWXYZ01234567890'; const CHARS_ALPHA = 'abcdefAghijklmnopqrstuvwxyABCDEFGHIJKLMNOPQRSTUVWXYZ'; - - protected $char; protected $is; /** @@ -32,7 +30,7 @@ class Scanner { * The current intiger byte position. */ public function position() { - return $this->is->position(); + return $this->is->key(); } /** @@ -54,8 +52,11 @@ class Scanner { * The next character. */ public function next() { - $this->char = $this->is->char(); - return $this->char; + $this->is->next(); + if ($this->is->valid()) { + return $this->is->current(); + } + return FALSE; } /** @@ -67,7 +68,7 @@ class Scanner { * The current character. */ public function current() { - return $this->char; + return $this->is->current(); } /** @@ -77,9 +78,7 @@ class Scanner { * The number of characters to move the pointer back. */ public function unconsume($howMany = 1) { - for ($i = 0; $i < $howMany; ++$i) { - $this->is->unconsume(); - } + $this->is->unconsume($howMany); } /** diff --git a/src/HTML5/Parser/StringInputStream.php b/src/HTML5/Parser/StringInputStream.php index 4ceae44..e26bb38 100644 --- a/src/HTML5/Parser/StringInputStream.php +++ b/src/HTML5/Parser/StringInputStream.php @@ -269,16 +269,40 @@ class StringInputStream implements InputStream { } /** - * Retrieve the currently consumed character. - * @note This performs bounds checking + * Get the current character. + * + * @return string + * The current character. + */ + public function current() { + return $this->data[$this->char]; + } + + /** + * Advance the pointer. This is part of the Iterator interface. + */ + public function next() { + $this->char++; + } + + /** + * Rewind to the start of the string. + */ + public function rewind() { + $this->char = 0; + } + + /** + * Is the current pointer location valid. + * + * @return bool + * Is the current pointer location valid. */ - public function char() { - // MPB: This appears to advance the pointer, which is not the same - // as "retrieving the currently consumed character". Calling char() - // twice will return two different results. - if ($this->char++ < $this->EOF) { - return $this->data[$this->char - 1]; + public function valid() { + if ($this->char < $this->EOF) { + return TRUE; } + return FALSE; } @@ -362,22 +386,26 @@ class StringInputStream implements InputStream { } /** - * Unconsume one character. + * Unconsume characters. + * + * @param int $howMany + * The number of characters to unconsume. */ - public function unconsume() { - if ($this->char > 0 && $this->char <= $this->EOF) { - $this->char--; + public function unconsume($howMany = 1) { + if (($this->char - $howMany) >= 0) { + $this->char = $this->char - $howMany; } } - public function unget() { - $this->unconsume(); - } public function peek() { - return $this->data[$this->char + 1]; + if (($this->char + 1) <= $this->EOF) { + return $this->data[$this->char + 1]; + } + + return FALSE; } - public function position() { + public function key() { return $this->char; } } -- cgit v1.2.3