summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/HTML5/Parser/Tokenizer.php20
-rw-r--r--test/HTML5/Parser/EventStack.php6
2 files changed, 16 insertions, 10 deletions
diff --git a/src/HTML5/Parser/Tokenizer.php b/src/HTML5/Parser/Tokenizer.php
index f3e45e1..c835120 100644
--- a/src/HTML5/Parser/Tokenizer.php
+++ b/src/HTML5/Parser/Tokenizer.php
@@ -1,6 +1,8 @@
<?php
namespace HTML5\Parser;
+use HTML5\Elements;
+
/**
* The HTML5 tokenizer.
*
@@ -45,10 +47,11 @@ class Tokenizer {
* NORMAL: Scan non-elements.
* RAW: Scan until a specific closing tag.
* RCDATA: Scan until a specifc close state.
- */
+ *//*
const TEXTMODE_NORMAL = 0;
const TEXTMODE_RAW = 1;
const TEXTMODE_RCDATA = 2;
+ */
/**
* Create a new tokenizer.
@@ -105,13 +108,13 @@ class Tokenizer {
* startTag(), but it can also be set manually using this function.
*
* @param integer $textmode
- * One of Tokenizer::TEXTMODE_*
+ * One of Elements::TEXT_*
* @param string $untilTag
* The tag that should stop RAW or RCDATA mode. Normal mode does not
* use this indicator.
*/
public function setTextMode($textmode, $untilTag = NULL) {
- $this->textMode = $textmode;
+ $this->textMode = $textmode & (Elements::TEXT_RAW | Elements::TEXT_RCDATA);
$this->untilTag = $untilTag;
}
@@ -140,17 +143,18 @@ class Tokenizer {
/**
* Parse anything that looks like character data.
*
- * Different rules apply based on the current TEXTMODE.
+ * Different rules apply based on the current text mode.
+ *
+ * @see Elements::TEXT_RAW Elements::TEXT_RCDATA.
*/
protected function characterData() {
if ($this->scanner->current() === FALSE) {
return FALSE;
}
switch ($this->textMode) {
- case self::TEXTMODE_RAW:
- case self::TEXTMODE_RCDATA:
+ case Elements::TEXT_RAW:
+ case Elements::TEXT_RCDATA:
return $this->rawText();
- case self::TEXTMODE_NORMAL:
default:
$tok = $this->scanner->current();
if (strspn($tok, "<&")) {
@@ -190,7 +194,7 @@ class Tokenizer {
$sequence = '</' . $this->untilTag . '>';
$txt = $this->readUntilSequence($sequence);
$this->events->text($txt);
- $this->setTextMode(self::TEXTMODE_NORMAL);
+ $this->setTextMode(0);
return $this->endTag();
}
diff --git a/test/HTML5/Parser/EventStack.php b/test/HTML5/Parser/EventStack.php
index c9ac20e..1f56ea9 100644
--- a/test/HTML5/Parser/EventStack.php
+++ b/test/HTML5/Parser/EventStack.php
@@ -1,13 +1,15 @@
<?php
namespace HTML5\Parser;
+use HTML5\Elements;
+
/**
* This testing class gathers events from a parser and builds a stack of events.
* It is useful for checking the output of a tokenizer.
*
* IMPORTANT:
*
- * The startTag event also kicks the parser into TEXTMODE_RAW when it encounters
+ * The startTag event also kicks the parser into TEXT_RAW when it encounters
* script or pre tags. This is to match the behavior required by the HTML5 spec,
* which says that the tree builder must tell the tokenizer when to switch states.
*/
@@ -49,7 +51,7 @@ class EventStack implements EventHandler {
$args = func_get_args();
$this->store('startTag', $args);
if ($name == 'pre' || $name == 'script') {
- return Tokenizer::TEXTMODE_RAW;
+ return Elements::TEXT_RAW;
}
}