summaryrefslogtreecommitdiff
path: root/src/HTML5/Parser/Scanner.php
diff options
context:
space:
mode:
Diffstat (limited to 'src/HTML5/Parser/Scanner.php')
-rw-r--r--src/HTML5/Parser/Scanner.php411
1 files changed, 213 insertions, 198 deletions
diff --git a/src/HTML5/Parser/Scanner.php b/src/HTML5/Parser/Scanner.php
index 18ed821..a262004 100644
--- a/src/HTML5/Parser/Scanner.php
+++ b/src/HTML5/Parser/Scanner.php
@@ -6,202 +6,217 @@ namespace Masterminds\HTML5\Parser;
*
* This scans over an input stream.
*/
-class Scanner {
- const CHARS_HEX = 'abcdefABCDEF01234567890';
- const CHARS_ALNUM = 'abcdefAghijklmnopqrstuvwxyABCDEFGHIJKLMNOPQRSTUVWXYZ01234567890';
- const CHARS_ALPHA = 'abcdefAghijklmnopqrstuvwxyABCDEFGHIJKLMNOPQRSTUVWXYZ';
-
- protected $is;
-
- // Flipping this to TRUE will give minisculely more debugging info.
- public $debug = FALSE;
-
- /**
- * Create a new Scanner.
- *
- * @param \Masterminds\HTML5\Parser\InputStream $input
- * An InputStream to be scanned.
- */
- public function __construct($input) {
- $this->is = $input;
- }
-
- /**
- * Get the current position.
- *
- * @return int
- * The current intiger byte position.
- */
- public function position() {
- return $this->is->key();
- }
-
- /**
- * Take a peek at the next character in the data.
- *
- * @return string
- * The next character.
- */
- public function peek() {
- return $this->is->peek();
- }
-
- /**
- * Get the next character.
- *
- * Note: This advances the pointer.
- *
- * @return string
- * The next character.
- */
- public function next() {
- $this->is->next();
- if ($this->is->valid()) {
- if ($this->debug) fprintf(STDOUT, "> %s\n", $this->is->current());
- return $this->is->current();
- }
- return FALSE;
- }
-
- /**
- * Get the current character.
- *
- * Note, this does not advance the pointer.
- *
- * @return string
- * The current character.
- */
- public function current() {
- if ($this->is->valid()) {
- return $this->is->current();
- }
- return FALSE;
- }
-
- /**
- * Silently consume N chars.
- */
- public function consume($count = 1) {
- for ($i = 0; $i < $count; ++$i) {
- $this->next();
- }
- }
-
- /**
- * Unconsume some of the data. This moves the data pointer backwards.
- *
- * @param int $howMany
- * The number of characters to move the pointer back.
- */
- public function unconsume($howMany = 1) {
- $this->is->unconsume($howMany);
- }
-
- /**
- * Get the next group of that contains hex characters.
- *
- * Note, along with getting the characters the pointer in the data will be
- * moved as well.
- *
- * @return string
- * The next group that is hex characters.
- */
- public function getHex() {
- return $this->is->charsWhile(static::CHARS_HEX);
- }
-
- /**
- * Get the next group of characters that are ASCII Alpha characters.
- *
- * Note, along with getting the characters the pointer in the data will be
- * moved as well.
- *
- * @return string
- * The next group of ASCII alpha characters.
- */
- public function getAsciiAlpha() {
- return $this->is->charsWhile(static::CHARS_ALPHA);
- }
-
- /**
- * Get the next group of characters that are ASCII Alpha characters and numbers.
- *
- * Note, along with getting the characters the pointer in the data will be
- * moved as well.
- *
- * @return string
- * The next group of ASCII alpha characters and numbers.
- */
- public function getAsciiAlphaNum() {
- return $this->is->charsWhile(static::CHARS_ALNUM);
- }
-
- /**
- * Get the next group of numbers.
- *
- * Note, along with getting the characters the pointer in the data will be
- * moved as well.
- *
- * @return string
- * The next group of numbers.
- */
- public function getNumeric() {
- return $this->is->charsWhile('0123456789');
- }
-
- /**
- * Consume whitespace.
- *
- * Whitespace in HTML5 is: formfeed, tab, newline, space.
- */
- public function whitespace() {
- return $this->is->charsWhile("\n\t\f ");
- }
-
- /**
- * Returns the current line that is being consumed.
- *
- * @return int
- * The current line number.
- */
- public function currentLine() {
- return $this->is->currentLine();
- }
-
- /**
- * Read chars until something in the mask is encountered.
- */
- public function charsUntil($mask) {
- return $this->is->charsUntil($mask);
- }
- /**
- * Read chars as long as the mask matches.
- */
- public function charsWhile($mask) {
- return $this->is->charsWhile($mask);
- }
-
- /**
- * Returns the current column of the current line that the tokenizer is at.
- *
- * Newlines are column 0. The first char after a newline is column 1.
- *
- * @return int
- * The column number.
- */
- public function columnOffset() {
- return $this->is->columnOffset();
- }
-
- /**
- * Get all characters until EOF.
- *
- * This consumes characters until the EOF.
- *
- * @return int
- * The number of characters remaining.
- */
- public function remainingChars() {
- return $this->is->remainingChars();
- }
+class Scanner
+{
+
+ const CHARS_HEX = 'abcdefABCDEF01234567890';
+
+ const CHARS_ALNUM = 'abcdefAghijklmnopqrstuvwxyABCDEFGHIJKLMNOPQRSTUVWXYZ01234567890';
+
+ const CHARS_ALPHA = 'abcdefAghijklmnopqrstuvwxyABCDEFGHIJKLMNOPQRSTUVWXYZ';
+
+ protected $is;
+
+ // Flipping this to TRUE will give minisculely more debugging info.
+ public $debug = FALSE;
+
+ /**
+ * Create a new Scanner.
+ *
+ * @param \Masterminds\HTML5\Parser\InputStream $input
+ * An InputStream to be scanned.
+ */
+ public function __construct($input)
+ {
+ $this->is = $input;
+ }
+
+ /**
+ * Get the current position.
+ *
+ * @return int The current intiger byte position.
+ */
+ public function position()
+ {
+ return $this->is->key();
+ }
+
+ /**
+ * Take a peek at the next character in the data.
+ *
+ * @return string The next character.
+ */
+ public function peek()
+ {
+ return $this->is->peek();
+ }
+
+ /**
+ * Get the next character.
+ *
+ * Note: This advances the pointer.
+ *
+ * @return string The next character.
+ */
+ public function next()
+ {
+ $this->is->next();
+ if ($this->is->valid()) {
+ if ($this->debug)
+ fprintf(STDOUT, "> %s\n", $this->is->current());
+ return $this->is->current();
+ }
+
+ return FALSE;
+ }
+
+ /**
+ * Get the current character.
+ *
+ * Note, this does not advance the pointer.
+ *
+ * @return string The current character.
+ */
+ public function current()
+ {
+ if ($this->is->valid()) {
+ return $this->is->current();
+ }
+
+ return FALSE;
+ }
+
+ /**
+ * Silently consume N chars.
+ */
+ public function consume($count = 1)
+ {
+ for ($i = 0; $i < $count; ++ $i) {
+ $this->next();
+ }
+ }
+
+ /**
+ * Unconsume some of the data.
+ * This moves the data pointer backwards.
+ *
+ * @param int $howMany
+ * The number of characters to move the pointer back.
+ */
+ public function unconsume($howMany = 1)
+ {
+ $this->is->unconsume($howMany);
+ }
+
+ /**
+ * Get the next group of that contains hex characters.
+ *
+ * Note, along with getting the characters the pointer in the data will be
+ * moved as well.
+ *
+ * @return string The next group that is hex characters.
+ */
+ public function getHex()
+ {
+ return $this->is->charsWhile(static::CHARS_HEX);
+ }
+
+ /**
+ * Get the next group of characters that are ASCII Alpha characters.
+ *
+ * Note, along with getting the characters the pointer in the data will be
+ * moved as well.
+ *
+ * @return string The next group of ASCII alpha characters.
+ */
+ public function getAsciiAlpha()
+ {
+ return $this->is->charsWhile(static::CHARS_ALPHA);
+ }
+
+ /**
+ * Get the next group of characters that are ASCII Alpha characters and numbers.
+ *
+ * Note, along with getting the characters the pointer in the data will be
+ * moved as well.
+ *
+ * @return string The next group of ASCII alpha characters and numbers.
+ */
+ public function getAsciiAlphaNum()
+ {
+ return $this->is->charsWhile(static::CHARS_ALNUM);
+ }
+
+ /**
+ * Get the next group of numbers.
+ *
+ * Note, along with getting the characters the pointer in the data will be
+ * moved as well.
+ *
+ * @return string The next group of numbers.
+ */
+ public function getNumeric()
+ {
+ return $this->is->charsWhile('0123456789');
+ }
+
+ /**
+ * Consume whitespace.
+ *
+ * Whitespace in HTML5 is: formfeed, tab, newline, space.
+ */
+ public function whitespace()
+ {
+ return $this->is->charsWhile("\n\t\f ");
+ }
+
+ /**
+ * Returns the current line that is being consumed.
+ *
+ * @return int The current line number.
+ */
+ public function currentLine()
+ {
+ return $this->is->currentLine();
+ }
+
+ /**
+ * Read chars until something in the mask is encountered.
+ */
+ public function charsUntil($mask)
+ {
+ return $this->is->charsUntil($mask);
+ }
+
+ /**
+ * Read chars as long as the mask matches.
+ */
+ public function charsWhile($mask)
+ {
+ return $this->is->charsWhile($mask);
+ }
+
+ /**
+ * Returns the current column of the current line that the tokenizer is at.
+ *
+ * Newlines are column 0. The first char after a newline is column 1.
+ *
+ * @return int The column number.
+ */
+ public function columnOffset()
+ {
+ return $this->is->columnOffset();
+ }
+
+ /**
+ * Get all characters until EOF.
+ *
+ * This consumes characters until the EOF.
+ *
+ * @return int The number of characters remaining.
+ */
+ public function remainingChars()
+ {
+ return $this->is->remainingChars();
+ }
}