diff options
author | Technosophos <[email protected]> | 2013-04-16 14:45:56 -0500 |
---|---|---|
committer | Technosophos <[email protected]> | 2013-04-16 14:45:56 -0500 |
commit | 186ee9d481aded712459a4371619fcdbd33b443a (patch) | |
tree | 58f28af8c784153f9396b62b3032056aeca923c2 | |
parent | 2f941ff18eb9f87e84ba2768fbdaf969be9e9fd2 (diff) |
Added support for processing instructions.
-rw-r--r-- | src/HTML5/Parser/EventHandler.php | 19 | ||||
-rw-r--r-- | src/HTML5/Parser/Tokenizer.php | 34 | ||||
-rw-r--r-- | test/HTML5/Parser/EventStack.php | 3 | ||||
-rw-r--r-- | test/HTML5/Parser/TokenizerTest.php | 15 |
4 files changed, 68 insertions, 3 deletions
diff --git a/src/HTML5/Parser/EventHandler.php b/src/HTML5/Parser/EventHandler.php index bc16cdf..c28d80b 100644 --- a/src/HTML5/Parser/EventHandler.php +++ b/src/HTML5/Parser/EventHandler.php @@ -51,7 +51,22 @@ interface EventHandler { */ public function parseError($msg, $line, $col); - // Do we need... + /** + * A CDATA section. + * + * @param string $data + * The unparsed character data. + */ public function cdata($data); - // public function processorInstruction(); + /** + * This is a holdover from the XML spec. + * + * While user agents don't get PIs, server-side does. + * + * @param string $name + * The name of the processor (e.g. 'php'). + * @param string $data + * The unparsed data. + */ + public function processingInstruction($name, $data = NULL); } diff --git a/src/HTML5/Parser/Tokenizer.php b/src/HTML5/Parser/Tokenizer.php index 4f1404f..d79f2c0 100644 --- a/src/HTML5/Parser/Tokenizer.php +++ b/src/HTML5/Parser/Tokenizer.php @@ -222,8 +222,8 @@ class Tokenizer { return $this->markupDeclaration() || $this->endTag() || - $this->tagName() || $this->processingInstruction() || + $this->tagName() || // This always returns false. $this->parseError("Illegal tag opening") || $this->characterData(); @@ -784,6 +784,38 @@ class Tokenizer { * EventListener::processingInstruction() event. */ protected function processingInstruction() { + if ($this->scanner->current() != '?') { + return FALSE; + } + + $tok = $this->scanner->next(); + $procName = $this->scanner->getAsciiAlpha(); + $white = strlen($this->scanner->whitespace()); + + // If not a PI, send to bogusComment. + if (strlen($procName) == 0 || $white == 0 || $this->scanner->current() == FALSE) { + $this->parseError("Expected processing instruction name, got $tok"); + $this->bogusComment('<?' . $tok . $procName); + return TRUE; + } + + $data = ''; + while ($this->scanner->current() != '?' && $this->scanner->peek() != '>') { + $data .= $this->scanner->current(); + + $tok = $this->scanner->next(); + if ($tok === FALSE) { + $this->parseError("Unexpected EOF in processing instruction."); + $this->events->processingInstruction($procName, $data); + return TRUE; + } + + } + + $this->scanner->next(); // > + $this->scanner->next(); // Next token. + $this->events->processingInstruction($procName, $data); + return TRUE; } diff --git a/test/HTML5/Parser/EventStack.php b/test/HTML5/Parser/EventStack.php index e865507..f197855 100644 --- a/test/HTML5/Parser/EventStack.php +++ b/test/HTML5/Parser/EventStack.php @@ -71,6 +71,9 @@ class EventStack implements EventHandler { $this->store('error', func_get_args()); } + public function processingInstruction($name, $data = NULL) { + $this->store('pi', func_get_args()); + } } class EventStackParseError extends \Exception { diff --git a/test/HTML5/Parser/TokenizerTest.php b/test/HTML5/Parser/TokenizerTest.php index fb33e37..c4c66e7 100644 --- a/test/HTML5/Parser/TokenizerTest.php +++ b/test/HTML5/Parser/TokenizerTest.php @@ -106,6 +106,8 @@ class TokenizerTest extends \HTML5\Tests\TestCase { '<!CDATA[[ test ]]>', '<![CDATA[', '<![CDATA[hellooooo hello', + '<? Hello World ?>', + '<? Hello World', ); foreach ($bogus as $str) { $events = $this->parse($str); @@ -258,6 +260,19 @@ class TokenizerTest extends \HTML5\Tests\TestCase { } } + public function testProcessorInstruction() { + $good = array( + '<?hph ?>' => 'hph', + '<?hph echo "Hello World"; ?>' => array('hph', 'echo "Hello World"; '), + "<?hph \necho 'Hello World';\n?>" => array('hph', "echo 'Hello World';\n"), + ); + foreach ($good as $test => $expects) { + $events = $this->parse($test); + $this->assertEquals(2, $events->depth(), "Counting events for '$test': " . print_r($events, TRUE)); + $this->assertEventEquals('pi', $expects, $events->get(0)); + } + } + public function testText() { $good = array( 'a<br>b', |