scanner = $scanner;
$this->events = $eventHandler;
}
/**
* 8.2.4.1
*/
public function consumeData() {
// Scan a token
$this->scanner->next();
// Character Ref
$this->characterReference();
// TagOpen
// Null
// EOF
// Character
}
/**
* 8.2.4.2
*/
protected function characterReference($inAttr = FALSE) {
if ($this->tok == '&') {
$this->tok = $this->scanner->next();
$$this->text .= $this->consumeCharacterReference($inAttr);
}
}
protected function consumeCharacterReference($inAttribute = FALSE) {
$entity = '';
$start = $this->scanner->position();
// Whitespace: Ignore
switch ($this->tok) {
case NULL:
case "\t":
case "\n":
case "\f":
case ' ':
case '&':
case '<':
// Don't consume; just return. Spec says return nothing, but I
// think we have to append '&' to the string.
return '&';
case '#':
// Consume and read a number
$this->tok = $this->scanner->next();
// X[0-9a-fA-F]+;
// x[0-9a-fA-F]+;
if ($this->tok == 'x' || $this->tok == 'X') {
$hex = $this->scanner->getHex();
$this->tok = $this->scanner->current();
if (empty($hex)) {
throw ParseError("Expected HEX;, got " . $this->tok);
}
$entity = hexdec($hex);
}
// [0-9]+;
else {
$entity = $this->scanner->getNumeric();
$this->tok = $this->scanner->current();
if (empty($numeric)) {
throw ParseError("Expected DIGITS;, got $#" . $this->tok);
}
}
break;
default:
// Attempt to consume a string up to a ';'.
// [a-zA-Z0-9]+;
$entity = $this->scanner->getAsciiAlpha();
$this->tok = $this->scanner->current();
}
// We have an entity. We're done here.
if ($this->tok == ';') {
return $entity;
}
// If in an attribute, then failing to match ; means unconsume the
// entire string. Otherwise, failure to match is an error.
if ($inAttribute) {
$this->scanner->unconsume($this->scanner->position() - $start);
return '&';
}
throw new ParseError("Expected &ENTITY;, got &ENTITY (no trailing ;)");
}
protected function rcdata() {
// Ampersand
// <
// Null
// EOF
// Character
}
protected function rawtext() {
// < is a literal
// NULL is an error
// EOF
// Character data
}
protected function scriptData() {
// < is a literal
// NULL is an error
// EOF
// Character data
}
/**
* 8.2.4.7
*/
protected function plaintext() {
// NULL -> parse error
// EOF -> eof
// -> Character data
}
/**
* 8.2.4.8
*/
protected function tagOpen() {
// ! -> markup declaration
// / -> end tagopen
// a-zA-Z -> tagname
// ? -> parse error
// -> Anything else is a parse error
}
/**
* 8.2.4.9
*/
protected function endTagOpen() {
// a-zA-Z -> tagname
// > -> parse error
// EOF -> parse error
// -> parse error
}
/**
* 8.2.4.10
*/
protected function tagName() {
// tab, lf, ff, space -> before attr name
// / -> self-closing tag
// > -> current tag is done, data-state
// NULL parse error
// EOF -> parse error
// -> append to tagname
}
/**
* 8.2.4.11
*/
protected function rcdataLessThan() {
// / -> empty the tmp buffer and go to end-tag
// ->rcdata
}
/**
* 8.2.4.12
*/
protected function rcdataEndTag() {
// A-Za-z: append to tagname
// -> rcdata state
}
/**
* 8.2.4.13
*/
protected function rcdataEndTagName() {
// tab, lf, ff, space -> before attribute or treat as anything
// / -> self-closing tag
// > -> end tag, back to data
// A-Za-z -> append to tagname
// -> rcdata state
}
/**
* 8.2.4.14
*/
protected function rawtextLessThan() {
// / -> rawtext endtag state
// -> rawtext
}
/**
* 8.2.4.15
*/
protected function rawtextEndTagOpen() {
// A-Za-z -> rawtext
// ->rawtext
}
protected function rawtextEndTagName() {
// tab, lf, ff, space -> before attr name
//
}
protected function scriptLessThan(){
}
protected function scriptEndTagOpen() {
}
protected function scriptEndTagName() {
}
protected function scriptEscapeStart() {
}
protected function scriptEscapeStartDash() {
}
protected function scriptEscaped() {
}
protected function scriptEscapedDash() {
}
protected function scriptEscapedDashDash() {
}
protected function scriptEscapedLessThan() {
}
protected function scriptEscapedEndTagOpen() {
}
protected function scriptEscapedEndTagName() {
}
protected function scriptDoubleEscapeStart() {
}
protected function scriptDoubleEscaped() {
}
protected function scriptDoubleEscapedDash() {
}
protected function scriptDoubleEscapedDashDash() {
}
protected function scriptDoubleEscapedLessThan() {
}
protected function scriptDoubleEscapeEnd() {
}
protected function beforeAttributeName() {
}
protected function attributeName() {
}
protected function afterAttributeName() {
}
protected function beforeAttributeValue() {
}
protected function attributeValueDoubleQuote() {
}
protected function attributeValueSingleQuote() {
}
protected function attributeValueUnquoted() {
}
protected function characterReferenceInAttributeValue() {
}
protected function afterAttributeValueQuoted() {
}
protected function selfCloseingStartTag() {
}
protected function bogusComment() {
}
protected function markupDeclarationOpen() {
}
protected function commentStart() {
}
protected function commentStartDash() {
}
protected function comment() {
}
protected function commentEndDash() {
}
protected function commentEnd() {
}
protected function commentEndBangState() {
}
protected function doctype() {
}
protected function beforeDoctype() {
}
protected function doctypeName() {
}
protected function afterDoctypeName() {
}
protected function doctypePublicKeyword() {
}
protected function beforeDoctypePublicId() {
}
protected function doctypePublicIdDoubleQuoted() {
}
protected function doctypePublicIdSingleQuoted() {
}
protected function afterDoctypePublicId() {
}
protected function betweenDoctypePublicAndSystem() {
}
protected function afterDoctypeSystemKeyword() {
}
protected function beforeDoctypeSystemIdentifier() {
}
protected function doctypeSystemIdDoubleQuoted() {
}
protected function doctypeSystemIdSingleQuoted() {
}
protected function afterDoctypeSystemId() {
}
protected function bogusDoctype() {
}
protected function cdataSection() {
}
}