doc = \DOMImplementation::createDocument(NULL, 'html', $dt); $this->doc->errors = array(); $this->current = $this->doc->documentElement; } /** * Get the document. */ public function document() { return $this->doc; } /** * Provide an instruction processor. * * This is used for handling Processor Instructions as they are * inserted. If omitted, PI's are inserted directly into the DOM tree. */ public function setInstructionProcessor(\HTML5\InstructionProcessor $proc) { $this->processor = $proc; } public function doctype($name, $idType = 0, $id = NULL, $quirks = FALSE) { // This is used solely for setting quirks mode. Currently we don't // try to preserve the inbound DT. We convert it to HTML5. $this->quirks = $quirks; } public function startTag($name, $attributes = array(), $selfClosing = FALSE) { $lname = $this->normalizeTagName($name); // XXX: Since we create the root element, we skip this if it occurs // inside of the builder. We should probably check to make sure that // there is only one element so far, and indicate an error if there // is a structural problem. if ($lname == 'html') { return; } $ele = $this->doc->createElement($lname); foreach ($attributes as $aName => $aVal) { $ele->setAttribute($aName, $aVal); // This is necessary on a non-DTD schema, like HTML5. if ($aName == 'id') { $ele->setIdAttribute('id', TRUE); } } $this->current->appendChild($ele); // XXX: Need to handle self-closing tags and unary tags. $this->current = $ele; // Return the element mask, which the tokenizer can then use to set // various processing rules. return Elements::element($name); } public function endTag($name) { $lname = $this->normalizeTagName($name); if ($this->current->tagName != $lname) { return $this->quirksTreeResolver($lname); } // XXX: HTML has no parent. What do we do, though, // if this element appears in the wrong place? if ($lname == 'html') { return; } $this->current = $this->current->parentNode; } public function comment($cdata) { // TODO: Need to handle case where comment appears outside of the HTML tag. $node = $this->doc->createComment($cdata); $this->current->appendChild($node); } public function text($data) { $node = $this->doc->createTextNode($data); $this->current->appendChild($node); } public function eof() { // If the $current isn't the $root, do we need to do anything? } public function parseError($msg, $line, $col) { $this->doc->errors[] = sprintf("Line %d, Col %d: %s", $line, $col, $msg); } public function cdata($data) { $node = $this->doc->createCDATASection($data); } public function processingInstruction($name, $data = NULL) { // Important: The processor may modify the current DOM tree however // it sees fit. if (isset($this->processor)) { $res = $processor->process($this->current, $name, $data); if (!empty($res)) { $this->current = $res; } } } // ========================================================================== // UTILITIES // ========================================================================== protected function normalizeTagName($name) { if (strpos($name, ':') !== FALSE) { // We know from the grammar that there must be at least one other // char besides :, since : is not a legal tag start. $parts = explode(':', $name); return array_pop($parts); } return $name; } protected function quirksTreeResolver($name) { throw new \Exception("Not implemented."); } }