summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTechnosophos <[email protected]>2013-04-16 14:45:56 -0500
committerTechnosophos <[email protected]>2013-04-16 14:45:56 -0500
commit186ee9d481aded712459a4371619fcdbd33b443a (patch)
tree58f28af8c784153f9396b62b3032056aeca923c2
parent2f941ff18eb9f87e84ba2768fbdaf969be9e9fd2 (diff)
Added support for processing instructions.
-rw-r--r--src/HTML5/Parser/EventHandler.php19
-rw-r--r--src/HTML5/Parser/Tokenizer.php34
-rw-r--r--test/HTML5/Parser/EventStack.php3
-rw-r--r--test/HTML5/Parser/TokenizerTest.php15
4 files changed, 68 insertions, 3 deletions
diff --git a/src/HTML5/Parser/EventHandler.php b/src/HTML5/Parser/EventHandler.php
index bc16cdf..c28d80b 100644
--- a/src/HTML5/Parser/EventHandler.php
+++ b/src/HTML5/Parser/EventHandler.php
@@ -51,7 +51,22 @@ interface EventHandler {
*/
public function parseError($msg, $line, $col);
- // Do we need...
+ /**
+ * A CDATA section.
+ *
+ * @param string $data
+ * The unparsed character data.
+ */
public function cdata($data);
- // public function processorInstruction();
+ /**
+ * This is a holdover from the XML spec.
+ *
+ * While user agents don't get PIs, server-side does.
+ *
+ * @param string $name
+ * The name of the processor (e.g. 'php').
+ * @param string $data
+ * The unparsed data.
+ */
+ public function processingInstruction($name, $data = NULL);
}
diff --git a/src/HTML5/Parser/Tokenizer.php b/src/HTML5/Parser/Tokenizer.php
index 4f1404f..d79f2c0 100644
--- a/src/HTML5/Parser/Tokenizer.php
+++ b/src/HTML5/Parser/Tokenizer.php
@@ -222,8 +222,8 @@ class Tokenizer {
return $this->markupDeclaration() ||
$this->endTag() ||
- $this->tagName() ||
$this->processingInstruction() ||
+ $this->tagName() ||
// This always returns false.
$this->parseError("Illegal tag opening") ||
$this->characterData();
@@ -784,6 +784,38 @@ class Tokenizer {
* EventListener::processingInstruction() event.
*/
protected function processingInstruction() {
+ if ($this->scanner->current() != '?') {
+ return FALSE;
+ }
+
+ $tok = $this->scanner->next();
+ $procName = $this->scanner->getAsciiAlpha();
+ $white = strlen($this->scanner->whitespace());
+
+ // If not a PI, send to bogusComment.
+ if (strlen($procName) == 0 || $white == 0 || $this->scanner->current() == FALSE) {
+ $this->parseError("Expected processing instruction name, got $tok");
+ $this->bogusComment('<?' . $tok . $procName);
+ return TRUE;
+ }
+
+ $data = '';
+ while ($this->scanner->current() != '?' && $this->scanner->peek() != '>') {
+ $data .= $this->scanner->current();
+
+ $tok = $this->scanner->next();
+ if ($tok === FALSE) {
+ $this->parseError("Unexpected EOF in processing instruction.");
+ $this->events->processingInstruction($procName, $data);
+ return TRUE;
+ }
+
+ }
+
+ $this->scanner->next(); // >
+ $this->scanner->next(); // Next token.
+ $this->events->processingInstruction($procName, $data);
+ return TRUE;
}
diff --git a/test/HTML5/Parser/EventStack.php b/test/HTML5/Parser/EventStack.php
index e865507..f197855 100644
--- a/test/HTML5/Parser/EventStack.php
+++ b/test/HTML5/Parser/EventStack.php
@@ -71,6 +71,9 @@ class EventStack implements EventHandler {
$this->store('error', func_get_args());
}
+ public function processingInstruction($name, $data = NULL) {
+ $this->store('pi', func_get_args());
+ }
}
class EventStackParseError extends \Exception {
diff --git a/test/HTML5/Parser/TokenizerTest.php b/test/HTML5/Parser/TokenizerTest.php
index fb33e37..c4c66e7 100644
--- a/test/HTML5/Parser/TokenizerTest.php
+++ b/test/HTML5/Parser/TokenizerTest.php
@@ -106,6 +106,8 @@ class TokenizerTest extends \HTML5\Tests\TestCase {
'<!CDATA[[ test ]]>',
'<![CDATA[',
'<![CDATA[hellooooo hello',
+ '<? Hello World ?>',
+ '<? Hello World',
);
foreach ($bogus as $str) {
$events = $this->parse($str);
@@ -258,6 +260,19 @@ class TokenizerTest extends \HTML5\Tests\TestCase {
}
}
+ public function testProcessorInstruction() {
+ $good = array(
+ '<?hph ?>' => 'hph',
+ '<?hph echo "Hello World"; ?>' => array('hph', 'echo "Hello World"; '),
+ "<?hph \necho 'Hello World';\n?>" => array('hph', "echo 'Hello World';\n"),
+ );
+ foreach ($good as $test => $expects) {
+ $events = $this->parse($test);
+ $this->assertEquals(2, $events->depth(), "Counting events for '$test': " . print_r($events, TRUE));
+ $this->assertEventEquals('pi', $expects, $events->get(0));
+ }
+ }
+
public function testText() {
$good = array(
'a<br>b',