summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMatt Farina <[email protected]>2013-04-10 13:07:49 -0400
committerMatt Farina <[email protected]>2013-04-10 13:10:34 -0400
commitf9590a91ad948db1449a3dc61d90e9c9a71c6ee3 (patch)
tree87006cac96ef39ee5336ea24ec38ea7785c1c9ac
parentfe956dc6bdbf3e71336bbff250ddff7f370e1f93 (diff)
Moved the scanner to the new Parser InputStream and updated the unit tests to use StringInputStream
-rw-r--r--src/HTML5/Parser/InputStream.php21
-rw-r--r--src/HTML5/Parser/Scanner.php17
-rw-r--r--src/HTML5/Parser/StringInputStream.php62
-rw-r--r--test/HTML5/ScannerTest.php3
4 files changed, 61 insertions, 42 deletions
diff --git a/src/HTML5/Parser/InputStream.php b/src/HTML5/Parser/InputStream.php
index 4c5c075..bb2ca1c 100644
--- a/src/HTML5/Parser/InputStream.php
+++ b/src/HTML5/Parser/InputStream.php
@@ -4,12 +4,12 @@ namespace HTML5\Parser;
/**
* Interface for stream readers.
*/
-interface InputStream {
+interface InputStream extends \Iterator {
/**
* Returns the current line that is being consumed.
*
- * TODO: Move this to the tokenizer.
+ * TODO: Move this to the scanner.
*/
public function currentLine();
@@ -18,7 +18,7 @@ interface InputStream {
*
* Newlines are column 0. The first char after a newline is column 1.
*
- * @TODO Move this to the tokenizer.
+ * @TODO Move this to the scanner.
*
* @return int
* The column number.
@@ -26,11 +26,6 @@ interface InputStream {
public function columnOffset();
/**
- * Retrieve the currently consumed character.
- */
- public function char();
-
- /**
* Get all characters until EOF.
*
* This consumes characters until the EOF.
@@ -74,16 +69,14 @@ interface InputStream {
/**
* Unconsume one character.
+ *
+ * @param int $howMany
+ * The number of characters to move the pointer back.
*/
- public function unconsume();
+ public function unconsume($howMany = 1);
/**
* Retrieve the next character without advancing the pointer.
*/
public function peek();
-
- /**
- * Get the position of the reader.
- */
- public function position();
}
diff --git a/src/HTML5/Parser/Scanner.php b/src/HTML5/Parser/Scanner.php
index 10698d7..4716fc4 100644
--- a/src/HTML5/Parser/Scanner.php
+++ b/src/HTML5/Parser/Scanner.php
@@ -11,8 +11,6 @@ class Scanner {
const CHARS_ALNUM = 'abcdefAghijklmnopqrstuvwxyABCDEFGHIJKLMNOPQRSTUVWXYZ01234567890';
const CHARS_ALPHA = 'abcdefAghijklmnopqrstuvwxyABCDEFGHIJKLMNOPQRSTUVWXYZ';
-
- protected $char;
protected $is;
/**
@@ -32,7 +30,7 @@ class Scanner {
* The current intiger byte position.
*/
public function position() {
- return $this->is->position();
+ return $this->is->key();
}
/**
@@ -54,8 +52,11 @@ class Scanner {
* The next character.
*/
public function next() {
- $this->char = $this->is->char();
- return $this->char;
+ $this->is->next();
+ if ($this->is->valid()) {
+ return $this->is->current();
+ }
+ return FALSE;
}
/**
@@ -67,7 +68,7 @@ class Scanner {
* The current character.
*/
public function current() {
- return $this->char;
+ return $this->is->current();
}
/**
@@ -77,9 +78,7 @@ class Scanner {
* The number of characters to move the pointer back.
*/
public function unconsume($howMany = 1) {
- for ($i = 0; $i < $howMany; ++$i) {
- $this->is->unconsume();
- }
+ $this->is->unconsume($howMany);
}
/**
diff --git a/src/HTML5/Parser/StringInputStream.php b/src/HTML5/Parser/StringInputStream.php
index 4ceae44..e26bb38 100644
--- a/src/HTML5/Parser/StringInputStream.php
+++ b/src/HTML5/Parser/StringInputStream.php
@@ -269,16 +269,40 @@ class StringInputStream implements InputStream {
}
/**
- * Retrieve the currently consumed character.
- * @note This performs bounds checking
+ * Get the current character.
+ *
+ * @return string
+ * The current character.
+ */
+ public function current() {
+ return $this->data[$this->char];
+ }
+
+ /**
+ * Advance the pointer. This is part of the Iterator interface.
+ */
+ public function next() {
+ $this->char++;
+ }
+
+ /**
+ * Rewind to the start of the string.
+ */
+ public function rewind() {
+ $this->char = 0;
+ }
+
+ /**
+ * Is the current pointer location valid.
+ *
+ * @return bool
+ * Is the current pointer location valid.
*/
- public function char() {
- // MPB: This appears to advance the pointer, which is not the same
- // as "retrieving the currently consumed character". Calling char()
- // twice will return two different results.
- if ($this->char++ < $this->EOF) {
- return $this->data[$this->char - 1];
+ public function valid() {
+ if ($this->char < $this->EOF) {
+ return TRUE;
}
+
return FALSE;
}
@@ -362,22 +386,26 @@ class StringInputStream implements InputStream {
}
/**
- * Unconsume one character.
+ * Unconsume characters.
+ *
+ * @param int $howMany
+ * The number of characters to unconsume.
*/
- public function unconsume() {
- if ($this->char > 0 && $this->char <= $this->EOF) {
- $this->char--;
+ public function unconsume($howMany = 1) {
+ if (($this->char - $howMany) >= 0) {
+ $this->char = $this->char - $howMany;
}
}
- public function unget() {
- $this->unconsume();
- }
public function peek() {
- return $this->data[$this->char + 1];
+ if (($this->char + 1) <= $this->EOF) {
+ return $this->data[$this->char + 1];
+ }
+
+ return FALSE;
}
- public function position() {
+ public function key() {
return $this->char;
}
}
diff --git a/test/HTML5/ScannerTest.php b/test/HTML5/ScannerTest.php
index 8c341fa..997123c 100644
--- a/test/HTML5/ScannerTest.php
+++ b/test/HTML5/ScannerTest.php
@@ -25,7 +25,6 @@ class ScannerTest extends TestCase {
public function testNext() {
$s = new Scanner(new StringInputStream("abc"));
- $this->assertEquals('a', $s->next());
$this->assertEquals('b', $s->next());
$this->assertEquals('c', $s->next());
}
@@ -42,7 +41,7 @@ class ScannerTest extends TestCase {
public function testPeek() {
$s = new Scanner(new StringInputStream("abc"));
- // The scanner is currently pointed before a.
+
$this->assertEquals('b', $s->peek());
$s->next();