From 186ee9d481aded712459a4371619fcdbd33b443a Mon Sep 17 00:00:00 2001 From: Technosophos Date: Tue, 16 Apr 2013 14:45:56 -0500 Subject: Added support for processing instructions. --- src/HTML5/Parser/EventHandler.php | 19 +++++++++++++++++-- src/HTML5/Parser/Tokenizer.php | 34 +++++++++++++++++++++++++++++++++- 2 files changed, 50 insertions(+), 3 deletions(-) (limited to 'src/HTML5/Parser') diff --git a/src/HTML5/Parser/EventHandler.php b/src/HTML5/Parser/EventHandler.php index bc16cdf..c28d80b 100644 --- a/src/HTML5/Parser/EventHandler.php +++ b/src/HTML5/Parser/EventHandler.php @@ -51,7 +51,22 @@ interface EventHandler { */ public function parseError($msg, $line, $col); - // Do we need... + /** + * A CDATA section. + * + * @param string $data + * The unparsed character data. + */ public function cdata($data); - // public function processorInstruction(); + /** + * This is a holdover from the XML spec. + * + * While user agents don't get PIs, server-side does. + * + * @param string $name + * The name of the processor (e.g. 'php'). + * @param string $data + * The unparsed data. + */ + public function processingInstruction($name, $data = NULL); } diff --git a/src/HTML5/Parser/Tokenizer.php b/src/HTML5/Parser/Tokenizer.php index 4f1404f..d79f2c0 100644 --- a/src/HTML5/Parser/Tokenizer.php +++ b/src/HTML5/Parser/Tokenizer.php @@ -222,8 +222,8 @@ class Tokenizer { return $this->markupDeclaration() || $this->endTag() || - $this->tagName() || $this->processingInstruction() || + $this->tagName() || // This always returns false. $this->parseError("Illegal tag opening") || $this->characterData(); @@ -784,6 +784,38 @@ class Tokenizer { * EventListener::processingInstruction() event. */ protected function processingInstruction() { + if ($this->scanner->current() != '?') { + return FALSE; + } + + $tok = $this->scanner->next(); + $procName = $this->scanner->getAsciiAlpha(); + $white = strlen($this->scanner->whitespace()); + + // If not a PI, send to bogusComment. + if (strlen($procName) == 0 || $white == 0 || $this->scanner->current() == FALSE) { + $this->parseError("Expected processing instruction name, got $tok"); + $this->bogusComment('scanner->current() != '?' && $this->scanner->peek() != '>') { + $data .= $this->scanner->current(); + + $tok = $this->scanner->next(); + if ($tok === FALSE) { + $this->parseError("Unexpected EOF in processing instruction."); + $this->events->processingInstruction($procName, $data); + return TRUE; + } + + } + + $this->scanner->next(); // > + $this->scanner->next(); // Next token. + $this->events->processingInstruction($procName, $data); + return TRUE; } -- cgit v1.2.3