summaryrefslogtreecommitdiff
path: root/src/HTML5/Parser/Scanner.php
blob: 0260391fe280c73a68d762e84197713d3a10e1b9 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
<?php
namespace HTML5\Parser;

/**
 * The scanner.
 *
 * This scans over an input stream.
 */
class Scanner {
  const CHARS_HEX = 'abcdefABCDEF01234567890';
  const CHARS_ALNUM = 'abcdefAghijklmnopqrstuvwxyABCDEFGHIJKLMNOPQRSTUVWXYZ01234567890';
  const CHARS_ALPHA = 'abcdefAghijklmnopqrstuvwxyABCDEFGHIJKLMNOPQRSTUVWXYZ';


  protected $char;
  protected $is;

  /**
   * Create a new Scanner.
   *
   * @param \HTML5\InputStream $input
   *   An InputStream to be scanned.
   */
  public function __construct($input) {
    $this->is = $input;
  }

  /**
   * Get the current position.
   *
   * @return int
   *   The current intiger byte position.
   */
  public function position() {
    return $this->is->position();
  }

  /**
   * Take a peek at the character after the next character in the data.
   *
   * For example, you start scanning the string abc. The pointer is before a.
   * When you start peek() will return b while next() will return a.
   *
   * @return string
   *   The next character.
   */
  public function peek() {
    return $this->is->peek();
  }

  /**
   * Get the next character.
   * 
   * Note: This advances the pointer.
   *
   * @return string
   *   The next character.
   */
  public function next() {
    $this->char = $this->is->char();
    return $this->char;
  }

  /**
   * Get the current character.
   *
   * Note, this does not advance the pointer.
   * 
   * @return string
   *   The current character.
   */
  public function current() {
    return $this->char;
  }

  /**
   * Unconsume some of the data. This moves the data pointer backwards.
   *
   * @param  int $howMany
   *   The number of characters to move the pointer back.
   */
  public function unconsume($howMany = 1) {
    for ($i = 0; $i < $howMany; ++$i) {
      $this->is->unconsume();
    }
  }

  public function getHex() {
    $this->charsWhile(self::CHARS_HEX);
  }
  public function getAsciiAlpha() {
    $this->charsWhile(self::CHARS_ALPHA);
  }
  public function getAsciiAlphaNum() {
    $this->charsWhile(self::CHARS_ALNUM);
  }
  public function getNumeric() {
    $this->charsWhile('0123456789');
  }
}