diff options
author | Matt Butcher <[email protected]> | 2013-04-10 10:32:41 -0500 |
---|---|---|
committer | Matt Butcher <[email protected]> | 2013-04-10 10:32:41 -0500 |
commit | b50dd3df9f5c4eb3dccf50b9f6434bacbbf8820a (patch) | |
tree | 1b8b7e599dc4b0506239d85e50c053f0ca5f5d57 | |
parent | 0492edc5c43c9e355ea49479fc31945db951dad7 (diff) | |
parent | 699802c519e9779cd94dece288c840acf4c0ce51 (diff) |
Merge branch 'master' of github.com:technosophos/HTML5-PHP
-rw-r--r-- | src/HTML5/Parser/Scanner.php | 54 | ||||
-rw-r--r-- | test/HTML5/ScannerTest.php | 128 |
2 files changed, 177 insertions, 5 deletions
diff --git a/src/HTML5/Parser/Scanner.php b/src/HTML5/Parser/Scanner.php index 33621fd..2500dab 100644 --- a/src/HTML5/Parser/Scanner.php +++ b/src/HTML5/Parser/Scanner.php @@ -36,7 +36,10 @@ class Scanner { } /** - * Take a peek at the next character in the data. + * Take a peek at the character after the next character in the data. + * + * For example, you start scanning the string abc. The pointer is before a. + * When you start peek() will return b while next() will return a. * * @return string * The next character. @@ -82,16 +85,57 @@ class Scanner { } } + /** + * Get the next group of that is a hex value. + * + * Note, along with getting the characters the pointer in the data will be + * moved as well. + * + * @todo There is a potential for many false positives with this method. Make it more accurate. + * + * @return string + * The next group that is a hex value. + */ public function getHex() { - $this->charsWhile(self::CHARS_HEX); + return $this->is->charsWhile(self::CHARS_HEX); } + + /** + * Get the next group of characters that are ASCII Alpha characters. + * + * Note, along with getting the characters the pointer in the data will be + * moved as well. + * + * @return string + * The next group of ASCII alpha characters. + */ public function getAsciiAlpha() { - $this->charsWhile(self::CHARS_ALPHA); + return $this->is->charsWhile(self::CHARS_ALPHA); } + + /** + * Get the next group of characters that are ASCII Alpha characters and numbers. + * + * Note, along with getting the characters the pointer in the data will be + * moved as well. + * + * @return string + * The next group of ASCII alpha characters and numbers. + */ public function getAsciiAlphaNum() { - $this->charsWhile(self::CHARS_ALNUM); + return $this->is->charsWhile(self::CHARS_ALNUM); } + + /** + * Get the next group of numbers. + * + * Note, along with getting the characters the pointer in the data will be + * moved as well. + * + * @return string + * The next group of numbers. + */ public function getNumeric() { - $this->charsWhile('0123456789'); + return $this->is->charsWhile('0123456789'); } } diff --git a/test/HTML5/ScannerTest.php b/test/HTML5/ScannerTest.php new file mode 100644 index 0000000..251dcfa --- /dev/null +++ b/test/HTML5/ScannerTest.php @@ -0,0 +1,128 @@ +<?php +/** + * @file + * Test the Scanner. This requires the InputStream tests are all good. + */ +namespace HTML5\Tests; + +use \HTML5\InputStream; +use \HTML5\Parser\Scanner; + +require_once 'TestCase.php'; + +class ScannerTest extends TestCase { + + /** + * A canary test to make sure the basics are setup and working. + */ + public function testConstruct() { + $is = new InputStream("abc"); + $s = new Scanner($is); + + $this->assertInstanceOf('\HTML5\Parser\Scanner', $s); + } + + public function testNext() { + $s = new Scanner(new InputStream("abc")); + + $this->assertEquals('a', $s->next()); + $this->assertEquals('b', $s->next()); + $this->assertEquals('c', $s->next()); + } + + public function testPosition() { + $s = new Scanner(new InputStream("abc")); + + $this->assertEquals(0, $s->position()); + + $s->next(); + $this->assertEquals(1, $s->position()); + } + + public function testPeek() { + $s = new Scanner(new InputStream("abc")); + + // The scanner is currently pointed before a. + $this->assertEquals('b', $s->peek()); + + $s->next(); + $this->assertEquals('c', $s->peek()); + } + + public function testCurrent() { + $s = new Scanner(new InputStream("abc")); + + // Before scanning the string begins the current is empty. + $this->assertEquals('', $s->current()); + + $c = $s->next(); + $this->assertEquals($c, $s->current()); + + // Test movement through the string. + $c = $s->next(); + $this->assertEquals($c, $s->current()); + } + + public function testUnconsume() { + $s = new Scanner(new InputStream("abcdefghijklmnopqrst")); + + // Get initial position. + $s->next(); + $start = $s->position(); + + // Move forward a bunch of positions. + $amount = 7; + for($i = 0; $i < $amount; $i++) { + $s->next(); + } + + // Roll back the amount we moved forward. + $s->unconsume($amount); + + $this->assertEquals($start, $s->position()); + } + + // public function testGetHex() { + // $s = new Scanner(new InputStream("abcdef%mnop*")); + + // $s->next(); + + // $this->assertEquals('bcdef', $s->getHex()); + + // echo $s->next(); echo $s->next(); echo $s->position(); echo $s->getHex(); + + // //$this->assertEquals('mnop', $s->getHex()); + // } + + public function testGetAsciiAlpha() { + $s = new Scanner(new InputStream("abcdef1%mnop*")); + + $this->assertEquals('abcdef', $s->getAsciiAlpha()); + + // Move past the 1% to scan the next group of text. + $s->next(); + $s->next(); + $this->assertEquals('mnop', $s->getAsciiAlpha()); + } + + public function testGetAsciiAlphaNum() { + $s = new Scanner(new InputStream("abcdef1ghpo#mn94op")); + + $this->assertEquals('abcdef1ghpo', $s->getAsciiAlphaNum()); + + // Move past the # to scan the next group of text. + $s->next(); + $this->assertEquals('mn94op', $s->getAsciiAlphaNum()); + } + + public function testGetNumeric() { + $s = new Scanner(new InputStream("1784a 45 9867 #")); + + $this->assertEquals('1784', $s->getNumeric()); + + // Move past the 'a ' to scan the next group of text. + $s->next(); + $s->next(); + $this->assertEquals('45', $s->getNumeric()); + } +}
\ No newline at end of file |