summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMatt Butcher <[email protected]>2013-04-10 10:32:41 -0500
committerMatt Butcher <[email protected]>2013-04-10 10:32:41 -0500
commitb50dd3df9f5c4eb3dccf50b9f6434bacbbf8820a (patch)
tree1b8b7e599dc4b0506239d85e50c053f0ca5f5d57
parent0492edc5c43c9e355ea49479fc31945db951dad7 (diff)
parent699802c519e9779cd94dece288c840acf4c0ce51 (diff)
Merge branch 'master' of github.com:technosophos/HTML5-PHP
-rw-r--r--src/HTML5/Parser/Scanner.php54
-rw-r--r--test/HTML5/ScannerTest.php128
2 files changed, 177 insertions, 5 deletions
diff --git a/src/HTML5/Parser/Scanner.php b/src/HTML5/Parser/Scanner.php
index 33621fd..2500dab 100644
--- a/src/HTML5/Parser/Scanner.php
+++ b/src/HTML5/Parser/Scanner.php
@@ -36,7 +36,10 @@ class Scanner {
}
/**
- * Take a peek at the next character in the data.
+ * Take a peek at the character after the next character in the data.
+ *
+ * For example, you start scanning the string abc. The pointer is before a.
+ * When you start peek() will return b while next() will return a.
*
* @return string
* The next character.
@@ -82,16 +85,57 @@ class Scanner {
}
}
+ /**
+ * Get the next group of that is a hex value.
+ *
+ * Note, along with getting the characters the pointer in the data will be
+ * moved as well.
+ *
+ * @todo There is a potential for many false positives with this method. Make it more accurate.
+ *
+ * @return string
+ * The next group that is a hex value.
+ */
public function getHex() {
- $this->charsWhile(self::CHARS_HEX);
+ return $this->is->charsWhile(self::CHARS_HEX);
}
+
+ /**
+ * Get the next group of characters that are ASCII Alpha characters.
+ *
+ * Note, along with getting the characters the pointer in the data will be
+ * moved as well.
+ *
+ * @return string
+ * The next group of ASCII alpha characters.
+ */
public function getAsciiAlpha() {
- $this->charsWhile(self::CHARS_ALPHA);
+ return $this->is->charsWhile(self::CHARS_ALPHA);
}
+
+ /**
+ * Get the next group of characters that are ASCII Alpha characters and numbers.
+ *
+ * Note, along with getting the characters the pointer in the data will be
+ * moved as well.
+ *
+ * @return string
+ * The next group of ASCII alpha characters and numbers.
+ */
public function getAsciiAlphaNum() {
- $this->charsWhile(self::CHARS_ALNUM);
+ return $this->is->charsWhile(self::CHARS_ALNUM);
}
+
+ /**
+ * Get the next group of numbers.
+ *
+ * Note, along with getting the characters the pointer in the data will be
+ * moved as well.
+ *
+ * @return string
+ * The next group of numbers.
+ */
public function getNumeric() {
- $this->charsWhile('0123456789');
+ return $this->is->charsWhile('0123456789');
}
}
diff --git a/test/HTML5/ScannerTest.php b/test/HTML5/ScannerTest.php
new file mode 100644
index 0000000..251dcfa
--- /dev/null
+++ b/test/HTML5/ScannerTest.php
@@ -0,0 +1,128 @@
+<?php
+/**
+ * @file
+ * Test the Scanner. This requires the InputStream tests are all good.
+ */
+namespace HTML5\Tests;
+
+use \HTML5\InputStream;
+use \HTML5\Parser\Scanner;
+
+require_once 'TestCase.php';
+
+class ScannerTest extends TestCase {
+
+ /**
+ * A canary test to make sure the basics are setup and working.
+ */
+ public function testConstruct() {
+ $is = new InputStream("abc");
+ $s = new Scanner($is);
+
+ $this->assertInstanceOf('\HTML5\Parser\Scanner', $s);
+ }
+
+ public function testNext() {
+ $s = new Scanner(new InputStream("abc"));
+
+ $this->assertEquals('a', $s->next());
+ $this->assertEquals('b', $s->next());
+ $this->assertEquals('c', $s->next());
+ }
+
+ public function testPosition() {
+ $s = new Scanner(new InputStream("abc"));
+
+ $this->assertEquals(0, $s->position());
+
+ $s->next();
+ $this->assertEquals(1, $s->position());
+ }
+
+ public function testPeek() {
+ $s = new Scanner(new InputStream("abc"));
+
+ // The scanner is currently pointed before a.
+ $this->assertEquals('b', $s->peek());
+
+ $s->next();
+ $this->assertEquals('c', $s->peek());
+ }
+
+ public function testCurrent() {
+ $s = new Scanner(new InputStream("abc"));
+
+ // Before scanning the string begins the current is empty.
+ $this->assertEquals('', $s->current());
+
+ $c = $s->next();
+ $this->assertEquals($c, $s->current());
+
+ // Test movement through the string.
+ $c = $s->next();
+ $this->assertEquals($c, $s->current());
+ }
+
+ public function testUnconsume() {
+ $s = new Scanner(new InputStream("abcdefghijklmnopqrst"));
+
+ // Get initial position.
+ $s->next();
+ $start = $s->position();
+
+ // Move forward a bunch of positions.
+ $amount = 7;
+ for($i = 0; $i < $amount; $i++) {
+ $s->next();
+ }
+
+ // Roll back the amount we moved forward.
+ $s->unconsume($amount);
+
+ $this->assertEquals($start, $s->position());
+ }
+
+ // public function testGetHex() {
+ // $s = new Scanner(new InputStream("abcdef%mnop*"));
+
+ // $s->next();
+
+ // $this->assertEquals('bcdef', $s->getHex());
+
+ // echo $s->next(); echo $s->next(); echo $s->position(); echo $s->getHex();
+
+ // //$this->assertEquals('mnop', $s->getHex());
+ // }
+
+ public function testGetAsciiAlpha() {
+ $s = new Scanner(new InputStream("abcdef1%mnop*"));
+
+ $this->assertEquals('abcdef', $s->getAsciiAlpha());
+
+ // Move past the 1% to scan the next group of text.
+ $s->next();
+ $s->next();
+ $this->assertEquals('mnop', $s->getAsciiAlpha());
+ }
+
+ public function testGetAsciiAlphaNum() {
+ $s = new Scanner(new InputStream("abcdef1ghpo#mn94op"));
+
+ $this->assertEquals('abcdef1ghpo', $s->getAsciiAlphaNum());
+
+ // Move past the # to scan the next group of text.
+ $s->next();
+ $this->assertEquals('mn94op', $s->getAsciiAlphaNum());
+ }
+
+ public function testGetNumeric() {
+ $s = new Scanner(new InputStream("1784a 45 9867 #"));
+
+ $this->assertEquals('1784', $s->getNumeric());
+
+ // Move past the 'a ' to scan the next group of text.
+ $s->next();
+ $s->next();
+ $this->assertEquals('45', $s->getNumeric());
+ }
+} \ No newline at end of file