diff options
-rw-r--r-- | src/HTML5/Parser/Tokenizer.php | 20 | ||||
-rw-r--r-- | test/HTML5/Parser/EventStack.php | 6 |
2 files changed, 16 insertions, 10 deletions
diff --git a/src/HTML5/Parser/Tokenizer.php b/src/HTML5/Parser/Tokenizer.php index f3e45e1..c835120 100644 --- a/src/HTML5/Parser/Tokenizer.php +++ b/src/HTML5/Parser/Tokenizer.php @@ -1,6 +1,8 @@ <?php namespace HTML5\Parser; +use HTML5\Elements; + /** * The HTML5 tokenizer. * @@ -45,10 +47,11 @@ class Tokenizer { * NORMAL: Scan non-elements. * RAW: Scan until a specific closing tag. * RCDATA: Scan until a specifc close state. - */ + *//* const TEXTMODE_NORMAL = 0; const TEXTMODE_RAW = 1; const TEXTMODE_RCDATA = 2; + */ /** * Create a new tokenizer. @@ -105,13 +108,13 @@ class Tokenizer { * startTag(), but it can also be set manually using this function. * * @param integer $textmode - * One of Tokenizer::TEXTMODE_* + * One of Elements::TEXT_* * @param string $untilTag * The tag that should stop RAW or RCDATA mode. Normal mode does not * use this indicator. */ public function setTextMode($textmode, $untilTag = NULL) { - $this->textMode = $textmode; + $this->textMode = $textmode & (Elements::TEXT_RAW | Elements::TEXT_RCDATA); $this->untilTag = $untilTag; } @@ -140,17 +143,18 @@ class Tokenizer { /** * Parse anything that looks like character data. * - * Different rules apply based on the current TEXTMODE. + * Different rules apply based on the current text mode. + * + * @see Elements::TEXT_RAW Elements::TEXT_RCDATA. */ protected function characterData() { if ($this->scanner->current() === FALSE) { return FALSE; } switch ($this->textMode) { - case self::TEXTMODE_RAW: - case self::TEXTMODE_RCDATA: + case Elements::TEXT_RAW: + case Elements::TEXT_RCDATA: return $this->rawText(); - case self::TEXTMODE_NORMAL: default: $tok = $this->scanner->current(); if (strspn($tok, "<&")) { @@ -190,7 +194,7 @@ class Tokenizer { $sequence = '</' . $this->untilTag . '>'; $txt = $this->readUntilSequence($sequence); $this->events->text($txt); - $this->setTextMode(self::TEXTMODE_NORMAL); + $this->setTextMode(0); return $this->endTag(); } diff --git a/test/HTML5/Parser/EventStack.php b/test/HTML5/Parser/EventStack.php index c9ac20e..1f56ea9 100644 --- a/test/HTML5/Parser/EventStack.php +++ b/test/HTML5/Parser/EventStack.php @@ -1,13 +1,15 @@ <?php namespace HTML5\Parser; +use HTML5\Elements; + /** * This testing class gathers events from a parser and builds a stack of events. * It is useful for checking the output of a tokenizer. * * IMPORTANT: * - * The startTag event also kicks the parser into TEXTMODE_RAW when it encounters + * The startTag event also kicks the parser into TEXT_RAW when it encounters * script or pre tags. This is to match the behavior required by the HTML5 spec, * which says that the tree builder must tell the tokenizer when to switch states. */ @@ -49,7 +51,7 @@ class EventStack implements EventHandler { $args = func_get_args(); $this->store('startTag', $args); if ($name == 'pre' || $name == 'script') { - return Tokenizer::TEXTMODE_RAW; + return Elements::TEXT_RAW; } } |