summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorTechnosophos <[email protected]>2013-04-23 09:29:09 -0500
committerTechnosophos <[email protected]>2013-04-23 09:29:09 -0500
commit48cbec335c4b2e1f63b79ebd632b3988e4c3bd89 (patch)
tree8a92bea7d5748228a950b9b286d1e15a8b65083b /src
parentda188b7251d151ae56773d7b1a3bb88a774fbdf3 (diff)
Added an instruction processor for PIs.
Diffstat (limited to 'src')
-rw-r--r--src/HTML5/InstructionProcessor.php43
-rw-r--r--src/HTML5/Parser/DOMTreeBuilder.php117
2 files changed, 159 insertions, 1 deletions
diff --git a/src/HTML5/InstructionProcessor.php b/src/HTML5/InstructionProcessor.php
new file mode 100644
index 0000000..cfe7b33
--- /dev/null
+++ b/src/HTML5/InstructionProcessor.php
@@ -0,0 +1,43 @@
+<?php
+/**
+ * A handler for processor instructions.
+ */
+namespace HTML5;
+
+/**
+ * Provide an processor to handle embedded instructions.
+ *
+ * XML defines a mechanism for inserting instructions (like PHP) into a
+ * document. These are called "Processor Instructions." The HTML5 parser
+ * provides an opportunity to handle these processor instructions during
+ * the tree-building phase (before the DOM is constructed), which makes
+ * it possible to alter the document as it is being created.
+ *
+ * One could, for example, use this mechanism to execute well-formed PHP
+ * code embedded inside of an HTML5 document.
+ */
+interface InstructionProcessor {
+
+ /**
+ * Process an individual processing instruction.
+ *
+ * The process() function is responsible for doing the following:
+ * - Determining whether $name is an instruction type it can handle.
+ * - Determining what to do with the data passed in.
+ * - Making any subsequent modifications to the DOM by modifying the
+ * DOMElement or its attached DOM tree.
+ *
+ * @param DOMElement $element
+ * The parent element for the current processing instruction.
+ * @param string $name
+ * The instruction's name. E.g. `&lt;?php` has the name `php`.
+ * @param string $data
+ * All of the data between the opening and closing PI marks.
+ * @return DOMElement
+ * The element that should be considered "Current". This may just be
+ * the element passed in, but if the processor added more elements,
+ * it may choose to reset the current element to one of the elements
+ * it created. (When in doubt, return the element passed in.)
+ */
+ public function process(\DOMElement $element, $name, $data);
+}
diff --git a/src/HTML5/Parser/DOMTreeBuilder.php b/src/HTML5/Parser/DOMTreeBuilder.php
index b0fbdfb..be9fa23 100644
--- a/src/HTML5/Parser/DOMTreeBuilder.php
+++ b/src/HTML5/Parser/DOMTreeBuilder.php
@@ -1,23 +1,138 @@
<?php
namespace HTML5\Parser;
+/**
+ * Create an HTML5 DOM tree from events.
+ *
+ * This attempts to create a DOM from events emitted by a parser. This
+ * attempts (but does not guarantee) to up-convert older HTML documents
+ * to HTML5. It does this by applying HTML5's rules, but it will not
+ * change the architecture of the document itself.
+ */
class DOMTreeBuilder implements EventHandler {
+ protected $stack = array();
+ protected $current; // Pointer in the tag hierarchy.
+ protected $doc;
+
+ protected $processor;
+
+ /**
+ * Quirks mode is enabled by default. Any document that is missing the
+ * DT will be considered to be in quirks mode.
+ */
+ protected $quirks = TRUE;
+
+ public function __construct() {
+ // XXX:
+ // Create the doctype. For now, we are always creating HTML5
+ // documents, and attempting to up-convert any older DTDs to HTML5.
+ $dt = \DOMImplementation::createDocumentType('html');
+ $this->doc = \DOMImplementation::createDocument(NULL, 'html', $dt);
+ $this->doc->errors = array();
+
+ $this->current = $this->doc->documentElement();
+ }
+
+ /**
+ * Provide an instruction processor.
+ *
+ * This is used for handling Processor Instructions as they are
+ * inserted. If omitted, PI's are inserted directly into the DOM tree.
+ */
+ public function setInstructionProcessor(\HTML5\InstructionProcessor $proc) {
+ $this->processor = $proc;
+ }
+
public function doctype($name, $idType = 0, $id = NULL, $quirks = FALSE) {
+ // This is used solely for setting quirks mode. Currently we don't
+ // try to preserve the inbound DT. We convert it to HTML5.
+ $this->quirks = $quirks;
}
+
public function startTag($name, $attributes = array(), $selfClosing = FALSE) {
+ $lname = $this->normalizeTagName($name);
+
+
+ // XXX: Since we create the root element, we skip this if it occurs
+ // inside of the builder. We should probably check to make sure that
+ // there is only one element so far, and indicate an error if there
+ // is a structural problem.
+ if ($lname == 'html') {
+ return;
+ }
+
+ $ele = $this->doc->createElement($lname);
+
+ $this->current->appendChild($ele);
+
+ // XXX: Need to handle self-closing tags and unary tags.
+ $this->current = $ele;
}
+
public function endTag($name) {
+ $lname = $this->normalizeTagName($name);
+ if ($this->current->tagName() != $lname) {
+ return $this->quirksTreeResolver($lname);
+ }
+
+ // XXX: HTML has no parent. What do we do, though,
+ // if this element appears in the wrong place?
+ if ($lname == 'html') {
+ return;
+ }
+ $this->current = $this->current->parentNode;
}
+
public function comment($cdata) {
+ $node = $this->doc->createComment($cdata);
+ $this->current->appendChild($node);
}
- public function text($cdata) {
+
+ public function text($data) {
+ $node = $this->doc->createTextNode($data);
+ $this->current->appendChild($node);
}
+
public function eof() {
+ // If the $current isn't the $root, do we need to do anything?
}
+
public function parseError($msg, $line, $col) {
+ $this->doc->errors[] = sprintf("Line %d, Col %d: %s", $line, $col, $msg);
}
+
public function cdata($data) {
+ $node = $this->doc->createCDATASection($data);
}
+
public function processingInstruction($name, $data = NULL) {
+ // Important: The processor may modify the current DOM tree however
+ // it sees fit.
+ if (isset($this->processor)) {
+ $res = $processor->process($this->current, $name, $data);
+ if (!empty($res)) {
+ $this->current = $res;
+ }
+ }
+ }
+
+ // ==========================================================================
+ // UTILITIES
+ // ==========================================================================
+
+ protected function normalizeTagName($name) {
+ if (strpos($name, ':') !== FALSE) {
+ // We know from the grammar that there must be at least one other
+ // char besides :, since : is not a legal tag start.
+ $parts = explode(':', $name);
+ return array_pop($parts);
+ }
+
+ return $name;
+ }
+
+ protected function quirksTreeResolver($name) {
+ throw new \Exception("Not implemented.");
+
}
}