diff options
Diffstat (limited to 'src/HTML5/Parser/DOMTreeBuilder.php')
-rw-r--r-- | src/HTML5/Parser/DOMTreeBuilder.php | 178 |
1 files changed, 88 insertions, 90 deletions
diff --git a/src/HTML5/Parser/DOMTreeBuilder.php b/src/HTML5/Parser/DOMTreeBuilder.php index 7f0e16a..365bb75 100644 --- a/src/HTML5/Parser/DOMTreeBuilder.php +++ b/src/HTML5/Parser/DOMTreeBuilder.php @@ -1,7 +1,9 @@ <?php + namespace Masterminds\HTML5\Parser; use Masterminds\HTML5\Elements; +use Masterminds\HTML5\InstructionProcessor; /** * Create an HTML5 DOM tree from events. @@ -24,7 +26,7 @@ use Masterminds\HTML5\Elements; class DOMTreeBuilder implements EventHandler { /** - * Defined in http://www.w3.org/TR/html51/infrastructure.html#html-namespace-0 + * Defined in http://www.w3.org/TR/html51/infrastructure.html#html-namespace-0. */ const NAMESPACE_HTML = 'http://www.w3.org/1999/xhtml'; @@ -45,14 +47,14 @@ class DOMTreeBuilder implements EventHandler const OPT_IMPLICIT_NS = 'implicit_namespaces'; /** - * Holds the HTML5 element names that causes a namespace switch + * Holds the HTML5 element names that causes a namespace switch. * * @var array */ protected $nsRoots = array( 'html' => self::NAMESPACE_HTML, 'svg' => self::NAMESPACE_SVG, - 'math' => self::NAMESPACE_MATHML + 'math' => self::NAMESPACE_MATHML, ); /** @@ -63,7 +65,7 @@ class DOMTreeBuilder implements EventHandler protected $implicitNamespaces = array( 'xml' => self::NAMESPACE_XML, 'xmlns' => self::NAMESPACE_XMLNS, - 'xlink' => self::NAMESPACE_XLINK + 'xlink' => self::NAMESPACE_XLINK, ); /** @@ -146,15 +148,15 @@ class DOMTreeBuilder implements EventHandler protected $insertMode = 0; /** - * Track if we are in an element that allows only inline child nodes + * Track if we are in an element that allows only inline child nodes. + * * @var string|null */ protected $onlyInline; /** * Quirks mode is enabled by default. - * Any document that is missing the - * DT will be considered to be in quirks mode. + * Any document that is missing the DT will be considered to be in quirks mode. */ protected $quirks = true; @@ -175,24 +177,23 @@ class DOMTreeBuilder implements EventHandler // $this->doc = \DOMImplementation::createDocument(NULL, 'html', $dt); $this->doc = $impl->createDocument(null, null, $dt); } + $this->errors = array(); $this->current = $this->doc; // ->documentElement; // Create a rules engine for tags. - $this->rules = new TreeBuildingRules($this->doc); + $this->rules = new TreeBuildingRules(); $implicitNS = array(); if (isset($this->options[self::OPT_IMPLICIT_NS])) { $implicitNS = $this->options[self::OPT_IMPLICIT_NS]; - } elseif (isset($this->options["implicitNamespaces"])) { - $implicitNS = $this->options["implicitNamespaces"]; + } elseif (isset($this->options['implicitNamespaces'])) { + $implicitNS = $this->options['implicitNamespaces']; } // Fill $nsStack with the defalut HTML5 namespaces, plus the "implicitNamespaces" array taken form $options - array_unshift($this->nsStack, $implicitNS + array( - '' => self::NAMESPACE_HTML - ) + $this->implicitNamespaces); + array_unshift($this->nsStack, $implicitNS + array('' => self::NAMESPACE_HTML) + $this->implicitNamespaces); if ($isFragment) { $this->insertMode = static::IM_IN_BODY; @@ -229,8 +230,10 @@ class DOMTreeBuilder implements EventHandler * * This is used for handling Processor Instructions as they are * inserted. If omitted, PI's are inserted directly into the DOM tree. + * + * @param InstructionProcessor $proc */ - public function setInstructionProcessor(\Masterminds\HTML5\InstructionProcessor $proc) + public function setInstructionProcessor(InstructionProcessor $proc) { $this->processor = $proc; } @@ -242,7 +245,7 @@ class DOMTreeBuilder implements EventHandler $this->quirks = $quirks; if ($this->insertMode > static::IM_INITIAL) { - $this->parseError("Illegal placement of DOCTYPE tag. Ignoring: " . $name); + $this->parseError('Illegal placement of DOCTYPE tag. Ignoring: ' . $name); return; } @@ -256,27 +259,32 @@ class DOMTreeBuilder implements EventHandler * @todo - XMLNS namespace handling (we need to parse, even if it's not valid) * - XLink, MathML and SVG namespace handling * - Omission rules: 8.1.2.4 Optional tags + * + * @param string $name + * @param array $attributes + * @param bool $selfClosing + * + * @return int */ public function startTag($name, $attributes = array(), $selfClosing = false) { - // fprintf(STDOUT, $name); $lname = $this->normalizeTagName($name); // Make sure we have an html element. - if (! $this->doc->documentElement && $name !== 'html' && ! $this->frag) { + if (!$this->doc->documentElement && 'html' !== $name && !$this->frag) { $this->startTag('html'); } // Set quirks mode if we're at IM_INITIAL with no doctype. - if ($this->insertMode == static::IM_INITIAL) { + if ($this->insertMode === static::IM_INITIAL) { $this->quirks = true; - $this->parseError("No DOCTYPE specified."); + $this->parseError('No DOCTYPE specified.'); } // SPECIAL TAG HANDLING: // Spec says do this, and "don't ask." // find the spec where this is defined... looks problematic - if ($name == 'image' && !($this->insertMode === static::IM_IN_SVG || $this->insertMode === static::IM_IN_MATHML)) { + if ('image' === $name && !($this->insertMode === static::IM_IN_SVG || $this->insertMode === static::IM_IN_MATHML)) { $name = 'img'; } @@ -292,7 +300,7 @@ class DOMTreeBuilder implements EventHandler break; case 'head': if ($this->insertMode > static::IM_BEFORE_HEAD) { - $this->parseError("Unexpected head tag outside of head context."); + $this->parseError('Unexpected head tag outside of head context.'); } else { $this->insertMode = static::IM_IN_HEAD; } @@ -307,14 +315,14 @@ class DOMTreeBuilder implements EventHandler $this->insertMode = static::IM_IN_MATHML; break; case 'noscript': - if ($this->insertMode == static::IM_IN_HEAD) { + if ($this->insertMode === static::IM_IN_HEAD) { $this->insertMode = static::IM_IN_HEAD_NOSCRIPT; } break; } // Special case handling for SVG. - if ($this->insertMode == static::IM_IN_SVG) { + if ($this->insertMode === static::IM_IN_SVG) { $lname = Elements::normalizeSvgElement($lname); } @@ -322,62 +330,58 @@ class DOMTreeBuilder implements EventHandler // when we found a tag thats appears inside $nsRoots, we have to switch the defalut namespace if (isset($this->nsRoots[$lname]) && $this->nsStack[0][''] !== $this->nsRoots[$lname]) { array_unshift($this->nsStack, array( - '' => $this->nsRoots[$lname] + '' => $this->nsRoots[$lname], ) + $this->nsStack[0]); - $pushes ++; + ++$pushes; } $needsWorkaround = false; - if (isset($this->options["xmlNamespaces"]) && $this->options["xmlNamespaces"]) { + if (isset($this->options['xmlNamespaces']) && $this->options['xmlNamespaces']) { // when xmlNamespaces is true a and we found a 'xmlns' or 'xmlns:*' attribute, we should add a new item to the $nsStack foreach ($attributes as $aName => $aVal) { - if ($aName === 'xmlns') { + if ('xmlns' === $aName) { $needsWorkaround = $aVal; array_unshift($this->nsStack, array( - '' => $aVal + '' => $aVal, ) + $this->nsStack[0]); - $pushes ++; - } elseif ((($pos = strpos($aName, ':')) ? substr($aName, 0, $pos) : '') === 'xmlns') { + ++$pushes; + } elseif ('xmlns' === (($pos = strpos($aName, ':')) ? substr($aName, 0, $pos) : '')) { array_unshift($this->nsStack, array( - substr($aName, $pos + 1) => $aVal + substr($aName, $pos + 1) => $aVal, ) + $this->nsStack[0]); - $pushes ++; + ++$pushes; } } } if ($this->onlyInline && Elements::isA($lname, Elements::BLOCK_TAG)) { - $this->autoclose($this->onlyInline); - $this->onlyInline = null; + $this->autoclose($this->onlyInline); + $this->onlyInline = null; } try { $prefix = ($pos = strpos($lname, ':')) ? substr($lname, 0, $pos) : ''; - - if ($needsWorkaround!==false) { - - $xml = "<$lname xmlns=\"$needsWorkaround\" ".(strlen($prefix) && isset($this->nsStack[0][$prefix])?("xmlns:$prefix=\"".$this->nsStack[0][$prefix]."\""):"")."/>"; + if (false !== $needsWorkaround) { + $xml = "<$lname xmlns=\"$needsWorkaround\" " . (strlen($prefix) && isset($this->nsStack[0][$prefix]) ? ("xmlns:$prefix=\"" . $this->nsStack[0][$prefix] . '"') : '') . '/>'; $frag = new \DOMDocument('1.0', 'UTF-8'); $frag->loadXML($xml); $ele = $this->doc->importNode($frag->documentElement, true); - } else { - if (!isset($this->nsStack[0][$prefix]) || ($prefix === "" && isset($this->options[self::OPT_DISABLE_HTML_NS]) && $this->options[self::OPT_DISABLE_HTML_NS])) { + if (!isset($this->nsStack[0][$prefix]) || ('' === $prefix && isset($this->options[self::OPT_DISABLE_HTML_NS]) && $this->options[self::OPT_DISABLE_HTML_NS])) { $ele = $this->doc->createElement($lname); } else { $ele = $this->doc->createElementNS($this->nsStack[0][$prefix], $lname); } } - } catch (\DOMException $e) { $this->parseError("Illegal tag name: <$lname>. Replaced with <invalid>."); $ele = $this->doc->createElement('invalid'); } if (Elements::isA($lname, Elements::BLOCK_ONLY_INLINE)) { - $this->onlyInline = $lname; + $this->onlyInline = $lname; } // When we add some namespacess, we have to track them. Later, when "endElement" is invoked, we have to remove them. @@ -396,23 +400,23 @@ class DOMTreeBuilder implements EventHandler foreach ($attributes as $aName => $aVal) { // xmlns attributes can't be set - if ($aName === 'xmlns') { + if ('xmlns' === $aName) { continue; } - if ($this->insertMode == static::IM_IN_SVG) { + if ($this->insertMode === static::IM_IN_SVG) { $aName = Elements::normalizeSvgAttribute($aName); - } elseif ($this->insertMode == static::IM_IN_MATHML) { + } elseif ($this->insertMode === static::IM_IN_MATHML) { $aName = Elements::normalizeMathMlAttribute($aName); } try { $prefix = ($pos = strpos($aName, ':')) ? substr($aName, 0, $pos) : false; - if ($prefix==='xmlns') { - $ele->setAttributeNs(self::NAMESPACE_XMLNS, $aName, $aVal); - } elseif ($prefix!==false && isset($this->nsStack[0][$prefix])) { - $ele->setAttributeNs($this->nsStack[0][$prefix], $aName, $aVal); + if ('xmlns' === $prefix) { + $ele->setAttributeNS(self::NAMESPACE_XMLNS, $aName, $aVal); + } elseif (false !== $prefix && isset($this->nsStack[0][$prefix])) { + $ele->setAttributeNS($this->nsStack[0][$prefix], $aName, $aVal); } else { $ele->setAttribute($aName, $aVal); } @@ -422,19 +426,19 @@ class DOMTreeBuilder implements EventHandler } // This is necessary on a non-DTD schema, like HTML5. - if ($aName == 'id') { + if ('id' === $aName) { $ele->setIdAttribute('id', true); } } - // Some elements have special processing rules. Handle those separately. - if ($this->rules->hasRules($name) && $this->frag !== $this->current) { + if ($this->frag !== $this->current && $this->rules->hasRules($name)) { + // Some elements have special processing rules. Handle those separately. $this->current = $this->rules->evaluate($ele, $this->current); - } // Otherwise, it's a standard element. - else { + } else { + // Otherwise, it's a standard element. $this->current->appendChild($ele); - if (! Elements::isA($name, Elements::VOID_TAG)) { + if (!Elements::isA($name, Elements::VOID_TAG)) { $this->current = $ele; } @@ -448,7 +452,7 @@ class DOMTreeBuilder implements EventHandler // This is sort of a last-ditch attempt to correct for cases where no head/body // elements are provided. - if ($this->insertMode <= static::IM_BEFORE_HEAD && $name != 'head' && $name != 'html') { + if ($this->insertMode <= static::IM_BEFORE_HEAD && 'head' !== $name && 'html' !== $name) { $this->insertMode = static::IM_IN_BODY; } @@ -456,7 +460,7 @@ class DOMTreeBuilder implements EventHandler // but we have to remove the namespaces pushed to $nsStack. if ($pushes > 0 && Elements::isA($name, Elements::VOID_TAG)) { // remove the namespaced definded by current node - for ($i = 0; $i < $pushes; $i ++) { + for ($i = 0; $i < $pushes; ++$i) { array_shift($this->nsStack); } } @@ -485,7 +489,7 @@ class DOMTreeBuilder implements EventHandler 'html', 'br', 'head', - 'title' + 'title', ))) { $this->startTag('html'); $this->endTag($name); @@ -495,13 +499,13 @@ class DOMTreeBuilder implements EventHandler } // Ignore the tag. - $this->parseError("Illegal closing tag at global scope."); + $this->parseError('Illegal closing tag at global scope.'); return; } // Special case handling for SVG. - if ($this->insertMode == static::IM_IN_SVG) { + if ($this->insertMode === static::IM_IN_SVG) { $lname = Elements::normalizeSvgElement($lname); } @@ -512,39 +516,33 @@ class DOMTreeBuilder implements EventHandler $cid = spl_object_hash($this->current); } - // XXX: Not sure whether we need this anymore. - // if ($name != $lname) { - // return $this->quirksTreeResolver($lname); - // } - // XXX: HTML has no parent. What do we do, though, // if this element appears in the wrong place? - if ($lname == 'html') { + if ('html' === $lname) { return; } // remove the namespaced definded by current node if (isset($this->pushes[$cid])) { - for ($i = 0; $i < $this->pushes[$cid][0]; $i ++) { + for ($i = 0; $i < $this->pushes[$cid][0]; ++$i) { array_shift($this->nsStack); } unset($this->pushes[$cid]); } - if (! $this->autoclose($lname)) { + if (!$this->autoclose($lname)) { $this->parseError('Could not find closing tag for ' . $lname); } - // switch ($this->insertMode) { switch ($lname) { - case "head": + case 'head': $this->insertMode = static::IM_AFTER_HEAD; break; - case "body": + case 'body': $this->insertMode = static::IM_AFTER_BODY; break; - case "svg": - case "mathml": + case 'svg': + case 'mathml': $this->insertMode = static::IM_IN_BODY; break; } @@ -566,9 +564,9 @@ class DOMTreeBuilder implements EventHandler // practical as most documents contain these characters. Other text is not // expected here so recording a parse error is necessary. $dataTmp = trim($data, " \t\n\r\f"); - if (! empty($dataTmp)) { + if (!empty($dataTmp)) { // fprintf(STDOUT, "Unexpected insert mode: %d", $this->insertMode); - $this->parseError("Unexpected text. Ignoring: " . $dataTmp); + $this->parseError('Unexpected text. Ignoring: ' . $dataTmp); } return; @@ -585,7 +583,7 @@ class DOMTreeBuilder implements EventHandler public function parseError($msg, $line = 0, $col = 0) { - $this->errors[] = sprintf("Line %d, Col %d: %s", $line, $col, $msg); + $this->errors[] = sprintf('Line %d, Col %d: %s', $line, $col, $msg); } public function getErrors() @@ -602,15 +600,14 @@ class DOMTreeBuilder implements EventHandler public function processingInstruction($name, $data = null) { // XXX: Ignore initial XML declaration, per the spec. - if ($this->insertMode == static::IM_INITIAL && 'xml' == strtolower($name)) { + if ($this->insertMode === static::IM_INITIAL && 'xml' === strtolower($name)) { return; } - // Important: The processor may modify the current DOM tree however - // it sees fit. - if (isset($this->processor)) { + // Important: The processor may modify the current DOM tree however it sees fit. + if ($this->processor instanceof InstructionProcessor) { $res = $this->processor->process($this->current, $name, $data); - if (! empty($res)) { + if (!empty($res)) { $this->current = $res; } @@ -632,9 +629,9 @@ class DOMTreeBuilder implements EventHandler * * See sections 2.9 and 8.1.2. * - * @param string $name - * The tag name. - * @return string The normalized tag name. + * @param string $name The tag name + * + * @return string the normalized tag name */ protected function normalizeTagName($name) { @@ -646,7 +643,7 @@ class DOMTreeBuilder implements EventHandler protected function quirksTreeResolver($name) { - throw new \Exception("Not implemented."); + throw new \Exception('Not implemented.'); } /** @@ -660,15 +657,16 @@ class DOMTreeBuilder implements EventHandler { $working = $this->current; do { - if ($working->nodeType != XML_ELEMENT_NODE) { + if (XML_ELEMENT_NODE !== $working->nodeType) { return false; } - if ($working->tagName == $tagName) { + if ($working->tagName === $tagName) { $this->current = $working->parentNode; return true; } } while ($working = $working->parentNode); + return false; } @@ -685,8 +683,8 @@ class DOMTreeBuilder implements EventHandler protected function isAncestor($tagName) { $candidate = $this->current; - while ($candidate->nodeType === XML_ELEMENT_NODE) { - if ($candidate->tagName == $tagName) { + while (XML_ELEMENT_NODE === $candidate->nodeType) { + if ($candidate->tagName === $tagName) { return true; } $candidate = $candidate->parentNode; @@ -704,6 +702,6 @@ class DOMTreeBuilder implements EventHandler */ protected function isParent($tagName) { - return $this->current->tagName == $tagName; + return $this->current->tagName === $tagName; } } |