summaryrefslogtreecommitdiff
path: root/src/HTML5/Parser
diff options
context:
space:
mode:
authorTechnosophos <[email protected]>2013-04-16 14:45:56 -0500
committerTechnosophos <[email protected]>2013-04-16 14:45:56 -0500
commit186ee9d481aded712459a4371619fcdbd33b443a (patch)
tree58f28af8c784153f9396b62b3032056aeca923c2 /src/HTML5/Parser
parent2f941ff18eb9f87e84ba2768fbdaf969be9e9fd2 (diff)
Added support for processing instructions.
Diffstat (limited to 'src/HTML5/Parser')
-rw-r--r--src/HTML5/Parser/EventHandler.php19
-rw-r--r--src/HTML5/Parser/Tokenizer.php34
2 files changed, 50 insertions, 3 deletions
diff --git a/src/HTML5/Parser/EventHandler.php b/src/HTML5/Parser/EventHandler.php
index bc16cdf..c28d80b 100644
--- a/src/HTML5/Parser/EventHandler.php
+++ b/src/HTML5/Parser/EventHandler.php
@@ -51,7 +51,22 @@ interface EventHandler {
*/
public function parseError($msg, $line, $col);
- // Do we need...
+ /**
+ * A CDATA section.
+ *
+ * @param string $data
+ * The unparsed character data.
+ */
public function cdata($data);
- // public function processorInstruction();
+ /**
+ * This is a holdover from the XML spec.
+ *
+ * While user agents don't get PIs, server-side does.
+ *
+ * @param string $name
+ * The name of the processor (e.g. 'php').
+ * @param string $data
+ * The unparsed data.
+ */
+ public function processingInstruction($name, $data = NULL);
}
diff --git a/src/HTML5/Parser/Tokenizer.php b/src/HTML5/Parser/Tokenizer.php
index 4f1404f..d79f2c0 100644
--- a/src/HTML5/Parser/Tokenizer.php
+++ b/src/HTML5/Parser/Tokenizer.php
@@ -222,8 +222,8 @@ class Tokenizer {
return $this->markupDeclaration() ||
$this->endTag() ||
- $this->tagName() ||
$this->processingInstruction() ||
+ $this->tagName() ||
// This always returns false.
$this->parseError("Illegal tag opening") ||
$this->characterData();
@@ -784,6 +784,38 @@ class Tokenizer {
* EventListener::processingInstruction() event.
*/
protected function processingInstruction() {
+ if ($this->scanner->current() != '?') {
+ return FALSE;
+ }
+
+ $tok = $this->scanner->next();
+ $procName = $this->scanner->getAsciiAlpha();
+ $white = strlen($this->scanner->whitespace());
+
+ // If not a PI, send to bogusComment.
+ if (strlen($procName) == 0 || $white == 0 || $this->scanner->current() == FALSE) {
+ $this->parseError("Expected processing instruction name, got $tok");
+ $this->bogusComment('<?' . $tok . $procName);
+ return TRUE;
+ }
+
+ $data = '';
+ while ($this->scanner->current() != '?' && $this->scanner->peek() != '>') {
+ $data .= $this->scanner->current();
+
+ $tok = $this->scanner->next();
+ if ($tok === FALSE) {
+ $this->parseError("Unexpected EOF in processing instruction.");
+ $this->events->processingInstruction($procName, $data);
+ return TRUE;
+ }
+
+ }
+
+ $this->scanner->next(); // >
+ $this->scanner->next(); // Next token.
+ $this->events->processingInstruction($procName, $data);
+ return TRUE;
}