summaryrefslogtreecommitdiff
path: root/src/HTML5/Parser/EventHandler.php
blob: 4a99aa6733a1c33b0250977c2c8e4fbb58be5bea (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
<?php
namespace Masterminds\HTML5\Parser;

/**
 * Standard events for HTML5.
 *
 * This is roughly analogous to a SAX2 or expat-style interface. 
 * However, it is tuned specifically for HTML5, according to section 8 
 * of the HTML5 specification.
 *
 * An event handler receives parser events. For a concrete 
 * implementation, see DOMTreeBuilder.
 *
 * Quirks support in the parser is limited to close-in syntax (malformed 
 * tags or attributes). Higher order syntax and semantic issues with a 
 * document (e.g. mismatched tags, illegal nesting, etc.) are the 
 * responsibility of the event handler implementation.
 *
 * See HTML5 spec section 8.2.4
 */
interface EventHandler {
  const DOCTYPE_NONE = 0;
  const DOCTYPE_PUBLIC = 1;
  const DOCTYPE_SYSTEM = 2;
  /**
   * A doctype declaration.
   *
   * @param string $name
   *   The name of the root element.
   * @param int $idType
   *   One of DOCTYPE_NONE, DOCTYPE_PUBLIC, or DOCTYPE_SYSTEM.
   * @param string $id
   *   The identifier. For DOCTYPE_PUBLIC, this is the public ID. If DOCTYPE_SYSTEM,
   *   then this is a system ID.
   * @param boolean $quirks
   *   Indicates whether the builder should enter quirks mode.
   */
  public function doctype($name, $idType = 0, $id = NULL, $quirks = FALSE);
  /**
   * A start tag.
   *
   * IMPORTANT: The parser watches the return value of this event. If this returns
   * an integer, the parser will switch TEXTMODE patters according to the int.
   *
   * This is how the Tree Builder can tell the Tokenizer when a certain tag should
   * cause the parser to go into RAW text mode.
   *
   * The HTML5 standard requires that the builder is the one that initiates this
   * step, and this is the only way short of a circular reference that we can
   * do that.
   *
   * Example: if a startTag even for a `script` name is fired, and the startTag()
   * implementation returns Tokenizer::TEXTMODE_RAW, then the tokenizer will
   * switch into RAW text mode and consume data until it reaches a closing
   * `script` tag.
   *
   * The textmode is automatically reset to Tokenizer::TEXTMODE_NORMAL when the
   * closing tag is encounter. **This behavior may change.**
   *
   * @param string $name
   *   The tag name.
   * @param array $attributes
   *   An array with all of the tag's attributes.
   * @param boolean $selfClosing
   *   An indicator of whether or not this tag is self-closing (<foo/>)
   * @return numeric
   *   One of the Tokenizer::TEXTMODE_* constants.
   */
  public function startTag($name, $attributes = array(), $selfClosing = FALSE);
  /**
   * An end-tag.
   */
  public function endTag($name);
  /**
   * A comment section (unparsed character data).
   */
  public function comment($cdata);
  /**
   * A unit of parsed character data.
   *
   * Entities in this text are *already decoded*.
   */
  public function text($cdata);
  /**
   * Indicates that the document has been entirely processed.
   */
  public function eof();
  /**
   * Emitted when the parser encounters an error condition.
   */
  public function parseError($msg, $line, $col);

  /**
   * A CDATA section.
   *
   * @param string $data
   *   The unparsed character data.
   */
  public function cdata($data);
  /**
   * This is a holdover from the XML spec.
   *
   * While user agents don't get PIs, server-side does.
   *
   * @param string $name
   *   The name of the processor (e.g. 'php').
   * @param string $data
   *   The unparsed data.
   */
  public function processingInstruction($name, $data = NULL);
}