summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorTechnosophos <[email protected]>2013-04-19 17:43:28 -0500
committerTechnosophos <[email protected]>2013-04-19 17:43:28 -0500
commit9351d1c046ed3a6a82db3daa7eaab2336d5b204d (patch)
tree494bde4b282a86132e8817751dd21ea96dc8e4fa /src
parent4e5458898e6d9a73d3eae7b3213187407a940ce8 (diff)
Full support for rawtext. Unit tests finished.
Diffstat (limited to 'src')
-rw-r--r--src/HTML5/Parser/EventHandler.php27
-rw-r--r--src/HTML5/Parser/Tokenizer.php7
2 files changed, 32 insertions, 2 deletions
diff --git a/src/HTML5/Parser/EventHandler.php b/src/HTML5/Parser/EventHandler.php
index 8282cb7..ebb30b2 100644
--- a/src/HTML5/Parser/EventHandler.php
+++ b/src/HTML5/Parser/EventHandler.php
@@ -27,7 +27,32 @@ interface EventHandler {
/**
* A start tag.
*
- *
+ * IMPORTANT: The parser watches the return value of this event. If this returns
+ * an integer, the parser will switch TEXTMODE patters according to the int.
+ *
+ * This is how the Tree Builder can tell the Tokenizer when a certain tag should
+ * cause the parser to go into RAW text mode.
+ *
+ * The HTML5 standard requires that the builder is the one that initiates this
+ * step, and this is the only way short of a circular reference that we can
+ * do that.
+ *
+ * Example: if a startTag even for a `script` name is fired, and the startTag()
+ * implementation returns Tokenizer::TEXTMODE_RAW, then the tokenizer will
+ * switch into RAW text mode and consume data until it reaches a closing
+ * `script` tag.
+ *
+ * The textmode is automatically reset to Tokenizer::TEXTMODE_NORMAL when the
+ * closing tag is encounter. **This behavior may change.**
+ *
+ * @param string $name
+ * The tag name.
+ * @param array $attributes
+ * An array with all of the tag's attributes.
+ * @param boolean $selfClosing
+ * An indicator of whether or not this tag is self-closing (<foo/>)
+ * @return numeric
+ * One of the Tokenizer::TEXTMODE_* constants.
*/
public function startTag($name, $attributes = array(), $selfClosing = FALSE);
/**
diff --git a/src/HTML5/Parser/Tokenizer.php b/src/HTML5/Parser/Tokenizer.php
index 4f2f792..02bb328 100644
--- a/src/HTML5/Parser/Tokenizer.php
+++ b/src/HTML5/Parser/Tokenizer.php
@@ -303,11 +303,15 @@ class Tokenizer {
}
while (!$this->isTagEnd($selfClose));
- $this->events->startTag($name, $attributes, $selfClose);
+ $mode = $this->events->startTag($name, $attributes, $selfClose);
// Should we do this? What does this buy that selfClose doesn't?
if ($selfClose) {
$this->events->endTag($name);
}
+ elseif (is_int($mode)) {
+ //fprintf(STDOUT, "Event response says move into mode %d for tag %s", $mode, $name);
+ $this->setTextMode($mode, $name);
+ }
$this->scanner->next();
@@ -816,6 +820,7 @@ class Tokenizer {
}
// If we get here, we hit the EOF.
+ $this->parseError("Unexpected EOF during text read.");
return $buffer;
}