summaryrefslogtreecommitdiff
path: root/src/HTML5/Parser/DOMTreeBuilder.php
diff options
context:
space:
mode:
Diffstat (limited to 'src/HTML5/Parser/DOMTreeBuilder.php')
-rw-r--r--src/HTML5/Parser/DOMTreeBuilder.php178
1 files changed, 88 insertions, 90 deletions
diff --git a/src/HTML5/Parser/DOMTreeBuilder.php b/src/HTML5/Parser/DOMTreeBuilder.php
index 7f0e16a..365bb75 100644
--- a/src/HTML5/Parser/DOMTreeBuilder.php
+++ b/src/HTML5/Parser/DOMTreeBuilder.php
@@ -1,7 +1,9 @@
<?php
+
namespace Masterminds\HTML5\Parser;
use Masterminds\HTML5\Elements;
+use Masterminds\HTML5\InstructionProcessor;
/**
* Create an HTML5 DOM tree from events.
@@ -24,7 +26,7 @@ use Masterminds\HTML5\Elements;
class DOMTreeBuilder implements EventHandler
{
/**
- * Defined in http://www.w3.org/TR/html51/infrastructure.html#html-namespace-0
+ * Defined in http://www.w3.org/TR/html51/infrastructure.html#html-namespace-0.
*/
const NAMESPACE_HTML = 'http://www.w3.org/1999/xhtml';
@@ -45,14 +47,14 @@ class DOMTreeBuilder implements EventHandler
const OPT_IMPLICIT_NS = 'implicit_namespaces';
/**
- * Holds the HTML5 element names that causes a namespace switch
+ * Holds the HTML5 element names that causes a namespace switch.
*
* @var array
*/
protected $nsRoots = array(
'html' => self::NAMESPACE_HTML,
'svg' => self::NAMESPACE_SVG,
- 'math' => self::NAMESPACE_MATHML
+ 'math' => self::NAMESPACE_MATHML,
);
/**
@@ -63,7 +65,7 @@ class DOMTreeBuilder implements EventHandler
protected $implicitNamespaces = array(
'xml' => self::NAMESPACE_XML,
'xmlns' => self::NAMESPACE_XMLNS,
- 'xlink' => self::NAMESPACE_XLINK
+ 'xlink' => self::NAMESPACE_XLINK,
);
/**
@@ -146,15 +148,15 @@ class DOMTreeBuilder implements EventHandler
protected $insertMode = 0;
/**
- * Track if we are in an element that allows only inline child nodes
+ * Track if we are in an element that allows only inline child nodes.
+ *
* @var string|null
*/
protected $onlyInline;
/**
* Quirks mode is enabled by default.
- * Any document that is missing the
- * DT will be considered to be in quirks mode.
+ * Any document that is missing the DT will be considered to be in quirks mode.
*/
protected $quirks = true;
@@ -175,24 +177,23 @@ class DOMTreeBuilder implements EventHandler
// $this->doc = \DOMImplementation::createDocument(NULL, 'html', $dt);
$this->doc = $impl->createDocument(null, null, $dt);
}
+
$this->errors = array();
$this->current = $this->doc; // ->documentElement;
// Create a rules engine for tags.
- $this->rules = new TreeBuildingRules($this->doc);
+ $this->rules = new TreeBuildingRules();
$implicitNS = array();
if (isset($this->options[self::OPT_IMPLICIT_NS])) {
$implicitNS = $this->options[self::OPT_IMPLICIT_NS];
- } elseif (isset($this->options["implicitNamespaces"])) {
- $implicitNS = $this->options["implicitNamespaces"];
+ } elseif (isset($this->options['implicitNamespaces'])) {
+ $implicitNS = $this->options['implicitNamespaces'];
}
// Fill $nsStack with the defalut HTML5 namespaces, plus the "implicitNamespaces" array taken form $options
- array_unshift($this->nsStack, $implicitNS + array(
- '' => self::NAMESPACE_HTML
- ) + $this->implicitNamespaces);
+ array_unshift($this->nsStack, $implicitNS + array('' => self::NAMESPACE_HTML) + $this->implicitNamespaces);
if ($isFragment) {
$this->insertMode = static::IM_IN_BODY;
@@ -229,8 +230,10 @@ class DOMTreeBuilder implements EventHandler
*
* This is used for handling Processor Instructions as they are
* inserted. If omitted, PI's are inserted directly into the DOM tree.
+ *
+ * @param InstructionProcessor $proc
*/
- public function setInstructionProcessor(\Masterminds\HTML5\InstructionProcessor $proc)
+ public function setInstructionProcessor(InstructionProcessor $proc)
{
$this->processor = $proc;
}
@@ -242,7 +245,7 @@ class DOMTreeBuilder implements EventHandler
$this->quirks = $quirks;
if ($this->insertMode > static::IM_INITIAL) {
- $this->parseError("Illegal placement of DOCTYPE tag. Ignoring: " . $name);
+ $this->parseError('Illegal placement of DOCTYPE tag. Ignoring: ' . $name);
return;
}
@@ -256,27 +259,32 @@ class DOMTreeBuilder implements EventHandler
* @todo - XMLNS namespace handling (we need to parse, even if it's not valid)
* - XLink, MathML and SVG namespace handling
* - Omission rules: 8.1.2.4 Optional tags
+ *
+ * @param string $name
+ * @param array $attributes
+ * @param bool $selfClosing
+ *
+ * @return int
*/
public function startTag($name, $attributes = array(), $selfClosing = false)
{
- // fprintf(STDOUT, $name);
$lname = $this->normalizeTagName($name);
// Make sure we have an html element.
- if (! $this->doc->documentElement && $name !== 'html' && ! $this->frag) {
+ if (!$this->doc->documentElement && 'html' !== $name && !$this->frag) {
$this->startTag('html');
}
// Set quirks mode if we're at IM_INITIAL with no doctype.
- if ($this->insertMode == static::IM_INITIAL) {
+ if ($this->insertMode === static::IM_INITIAL) {
$this->quirks = true;
- $this->parseError("No DOCTYPE specified.");
+ $this->parseError('No DOCTYPE specified.');
}
// SPECIAL TAG HANDLING:
// Spec says do this, and "don't ask."
// find the spec where this is defined... looks problematic
- if ($name == 'image' && !($this->insertMode === static::IM_IN_SVG || $this->insertMode === static::IM_IN_MATHML)) {
+ if ('image' === $name && !($this->insertMode === static::IM_IN_SVG || $this->insertMode === static::IM_IN_MATHML)) {
$name = 'img';
}
@@ -292,7 +300,7 @@ class DOMTreeBuilder implements EventHandler
break;
case 'head':
if ($this->insertMode > static::IM_BEFORE_HEAD) {
- $this->parseError("Unexpected head tag outside of head context.");
+ $this->parseError('Unexpected head tag outside of head context.');
} else {
$this->insertMode = static::IM_IN_HEAD;
}
@@ -307,14 +315,14 @@ class DOMTreeBuilder implements EventHandler
$this->insertMode = static::IM_IN_MATHML;
break;
case 'noscript':
- if ($this->insertMode == static::IM_IN_HEAD) {
+ if ($this->insertMode === static::IM_IN_HEAD) {
$this->insertMode = static::IM_IN_HEAD_NOSCRIPT;
}
break;
}
// Special case handling for SVG.
- if ($this->insertMode == static::IM_IN_SVG) {
+ if ($this->insertMode === static::IM_IN_SVG) {
$lname = Elements::normalizeSvgElement($lname);
}
@@ -322,62 +330,58 @@ class DOMTreeBuilder implements EventHandler
// when we found a tag thats appears inside $nsRoots, we have to switch the defalut namespace
if (isset($this->nsRoots[$lname]) && $this->nsStack[0][''] !== $this->nsRoots[$lname]) {
array_unshift($this->nsStack, array(
- '' => $this->nsRoots[$lname]
+ '' => $this->nsRoots[$lname],
) + $this->nsStack[0]);
- $pushes ++;
+ ++$pushes;
}
$needsWorkaround = false;
- if (isset($this->options["xmlNamespaces"]) && $this->options["xmlNamespaces"]) {
+ if (isset($this->options['xmlNamespaces']) && $this->options['xmlNamespaces']) {
// when xmlNamespaces is true a and we found a 'xmlns' or 'xmlns:*' attribute, we should add a new item to the $nsStack
foreach ($attributes as $aName => $aVal) {
- if ($aName === 'xmlns') {
+ if ('xmlns' === $aName) {
$needsWorkaround = $aVal;
array_unshift($this->nsStack, array(
- '' => $aVal
+ '' => $aVal,
) + $this->nsStack[0]);
- $pushes ++;
- } elseif ((($pos = strpos($aName, ':')) ? substr($aName, 0, $pos) : '') === 'xmlns') {
+ ++$pushes;
+ } elseif ('xmlns' === (($pos = strpos($aName, ':')) ? substr($aName, 0, $pos) : '')) {
array_unshift($this->nsStack, array(
- substr($aName, $pos + 1) => $aVal
+ substr($aName, $pos + 1) => $aVal,
) + $this->nsStack[0]);
- $pushes ++;
+ ++$pushes;
}
}
}
if ($this->onlyInline && Elements::isA($lname, Elements::BLOCK_TAG)) {
- $this->autoclose($this->onlyInline);
- $this->onlyInline = null;
+ $this->autoclose($this->onlyInline);
+ $this->onlyInline = null;
}
try {
$prefix = ($pos = strpos($lname, ':')) ? substr($lname, 0, $pos) : '';
-
- if ($needsWorkaround!==false) {
-
- $xml = "<$lname xmlns=\"$needsWorkaround\" ".(strlen($prefix) && isset($this->nsStack[0][$prefix])?("xmlns:$prefix=\"".$this->nsStack[0][$prefix]."\""):"")."/>";
+ if (false !== $needsWorkaround) {
+ $xml = "<$lname xmlns=\"$needsWorkaround\" " . (strlen($prefix) && isset($this->nsStack[0][$prefix]) ? ("xmlns:$prefix=\"" . $this->nsStack[0][$prefix] . '"') : '') . '/>';
$frag = new \DOMDocument('1.0', 'UTF-8');
$frag->loadXML($xml);
$ele = $this->doc->importNode($frag->documentElement, true);
-
} else {
- if (!isset($this->nsStack[0][$prefix]) || ($prefix === "" && isset($this->options[self::OPT_DISABLE_HTML_NS]) && $this->options[self::OPT_DISABLE_HTML_NS])) {
+ if (!isset($this->nsStack[0][$prefix]) || ('' === $prefix && isset($this->options[self::OPT_DISABLE_HTML_NS]) && $this->options[self::OPT_DISABLE_HTML_NS])) {
$ele = $this->doc->createElement($lname);
} else {
$ele = $this->doc->createElementNS($this->nsStack[0][$prefix], $lname);
}
}
-
} catch (\DOMException $e) {
$this->parseError("Illegal tag name: <$lname>. Replaced with <invalid>.");
$ele = $this->doc->createElement('invalid');
}
if (Elements::isA($lname, Elements::BLOCK_ONLY_INLINE)) {
- $this->onlyInline = $lname;
+ $this->onlyInline = $lname;
}
// When we add some namespacess, we have to track them. Later, when "endElement" is invoked, we have to remove them.
@@ -396,23 +400,23 @@ class DOMTreeBuilder implements EventHandler
foreach ($attributes as $aName => $aVal) {
// xmlns attributes can't be set
- if ($aName === 'xmlns') {
+ if ('xmlns' === $aName) {
continue;
}
- if ($this->insertMode == static::IM_IN_SVG) {
+ if ($this->insertMode === static::IM_IN_SVG) {
$aName = Elements::normalizeSvgAttribute($aName);
- } elseif ($this->insertMode == static::IM_IN_MATHML) {
+ } elseif ($this->insertMode === static::IM_IN_MATHML) {
$aName = Elements::normalizeMathMlAttribute($aName);
}
try {
$prefix = ($pos = strpos($aName, ':')) ? substr($aName, 0, $pos) : false;
- if ($prefix==='xmlns') {
- $ele->setAttributeNs(self::NAMESPACE_XMLNS, $aName, $aVal);
- } elseif ($prefix!==false && isset($this->nsStack[0][$prefix])) {
- $ele->setAttributeNs($this->nsStack[0][$prefix], $aName, $aVal);
+ if ('xmlns' === $prefix) {
+ $ele->setAttributeNS(self::NAMESPACE_XMLNS, $aName, $aVal);
+ } elseif (false !== $prefix && isset($this->nsStack[0][$prefix])) {
+ $ele->setAttributeNS($this->nsStack[0][$prefix], $aName, $aVal);
} else {
$ele->setAttribute($aName, $aVal);
}
@@ -422,19 +426,19 @@ class DOMTreeBuilder implements EventHandler
}
// This is necessary on a non-DTD schema, like HTML5.
- if ($aName == 'id') {
+ if ('id' === $aName) {
$ele->setIdAttribute('id', true);
}
}
- // Some elements have special processing rules. Handle those separately.
- if ($this->rules->hasRules($name) && $this->frag !== $this->current) {
+ if ($this->frag !== $this->current && $this->rules->hasRules($name)) {
+ // Some elements have special processing rules. Handle those separately.
$this->current = $this->rules->evaluate($ele, $this->current);
- } // Otherwise, it's a standard element.
- else {
+ } else {
+ // Otherwise, it's a standard element.
$this->current->appendChild($ele);
- if (! Elements::isA($name, Elements::VOID_TAG)) {
+ if (!Elements::isA($name, Elements::VOID_TAG)) {
$this->current = $ele;
}
@@ -448,7 +452,7 @@ class DOMTreeBuilder implements EventHandler
// This is sort of a last-ditch attempt to correct for cases where no head/body
// elements are provided.
- if ($this->insertMode <= static::IM_BEFORE_HEAD && $name != 'head' && $name != 'html') {
+ if ($this->insertMode <= static::IM_BEFORE_HEAD && 'head' !== $name && 'html' !== $name) {
$this->insertMode = static::IM_IN_BODY;
}
@@ -456,7 +460,7 @@ class DOMTreeBuilder implements EventHandler
// but we have to remove the namespaces pushed to $nsStack.
if ($pushes > 0 && Elements::isA($name, Elements::VOID_TAG)) {
// remove the namespaced definded by current node
- for ($i = 0; $i < $pushes; $i ++) {
+ for ($i = 0; $i < $pushes; ++$i) {
array_shift($this->nsStack);
}
}
@@ -485,7 +489,7 @@ class DOMTreeBuilder implements EventHandler
'html',
'br',
'head',
- 'title'
+ 'title',
))) {
$this->startTag('html');
$this->endTag($name);
@@ -495,13 +499,13 @@ class DOMTreeBuilder implements EventHandler
}
// Ignore the tag.
- $this->parseError("Illegal closing tag at global scope.");
+ $this->parseError('Illegal closing tag at global scope.');
return;
}
// Special case handling for SVG.
- if ($this->insertMode == static::IM_IN_SVG) {
+ if ($this->insertMode === static::IM_IN_SVG) {
$lname = Elements::normalizeSvgElement($lname);
}
@@ -512,39 +516,33 @@ class DOMTreeBuilder implements EventHandler
$cid = spl_object_hash($this->current);
}
- // XXX: Not sure whether we need this anymore.
- // if ($name != $lname) {
- // return $this->quirksTreeResolver($lname);
- // }
-
// XXX: HTML has no parent. What do we do, though,
// if this element appears in the wrong place?
- if ($lname == 'html') {
+ if ('html' === $lname) {
return;
}
// remove the namespaced definded by current node
if (isset($this->pushes[$cid])) {
- for ($i = 0; $i < $this->pushes[$cid][0]; $i ++) {
+ for ($i = 0; $i < $this->pushes[$cid][0]; ++$i) {
array_shift($this->nsStack);
}
unset($this->pushes[$cid]);
}
- if (! $this->autoclose($lname)) {
+ if (!$this->autoclose($lname)) {
$this->parseError('Could not find closing tag for ' . $lname);
}
- // switch ($this->insertMode) {
switch ($lname) {
- case "head":
+ case 'head':
$this->insertMode = static::IM_AFTER_HEAD;
break;
- case "body":
+ case 'body':
$this->insertMode = static::IM_AFTER_BODY;
break;
- case "svg":
- case "mathml":
+ case 'svg':
+ case 'mathml':
$this->insertMode = static::IM_IN_BODY;
break;
}
@@ -566,9 +564,9 @@ class DOMTreeBuilder implements EventHandler
// practical as most documents contain these characters. Other text is not
// expected here so recording a parse error is necessary.
$dataTmp = trim($data, " \t\n\r\f");
- if (! empty($dataTmp)) {
+ if (!empty($dataTmp)) {
// fprintf(STDOUT, "Unexpected insert mode: %d", $this->insertMode);
- $this->parseError("Unexpected text. Ignoring: " . $dataTmp);
+ $this->parseError('Unexpected text. Ignoring: ' . $dataTmp);
}
return;
@@ -585,7 +583,7 @@ class DOMTreeBuilder implements EventHandler
public function parseError($msg, $line = 0, $col = 0)
{
- $this->errors[] = sprintf("Line %d, Col %d: %s", $line, $col, $msg);
+ $this->errors[] = sprintf('Line %d, Col %d: %s', $line, $col, $msg);
}
public function getErrors()
@@ -602,15 +600,14 @@ class DOMTreeBuilder implements EventHandler
public function processingInstruction($name, $data = null)
{
// XXX: Ignore initial XML declaration, per the spec.
- if ($this->insertMode == static::IM_INITIAL && 'xml' == strtolower($name)) {
+ if ($this->insertMode === static::IM_INITIAL && 'xml' === strtolower($name)) {
return;
}
- // Important: The processor may modify the current DOM tree however
- // it sees fit.
- if (isset($this->processor)) {
+ // Important: The processor may modify the current DOM tree however it sees fit.
+ if ($this->processor instanceof InstructionProcessor) {
$res = $this->processor->process($this->current, $name, $data);
- if (! empty($res)) {
+ if (!empty($res)) {
$this->current = $res;
}
@@ -632,9 +629,9 @@ class DOMTreeBuilder implements EventHandler
*
* See sections 2.9 and 8.1.2.
*
- * @param string $name
- * The tag name.
- * @return string The normalized tag name.
+ * @param string $name The tag name
+ *
+ * @return string the normalized tag name
*/
protected function normalizeTagName($name)
{
@@ -646,7 +643,7 @@ class DOMTreeBuilder implements EventHandler
protected function quirksTreeResolver($name)
{
- throw new \Exception("Not implemented.");
+ throw new \Exception('Not implemented.');
}
/**
@@ -660,15 +657,16 @@ class DOMTreeBuilder implements EventHandler
{
$working = $this->current;
do {
- if ($working->nodeType != XML_ELEMENT_NODE) {
+ if (XML_ELEMENT_NODE !== $working->nodeType) {
return false;
}
- if ($working->tagName == $tagName) {
+ if ($working->tagName === $tagName) {
$this->current = $working->parentNode;
return true;
}
} while ($working = $working->parentNode);
+
return false;
}
@@ -685,8 +683,8 @@ class DOMTreeBuilder implements EventHandler
protected function isAncestor($tagName)
{
$candidate = $this->current;
- while ($candidate->nodeType === XML_ELEMENT_NODE) {
- if ($candidate->tagName == $tagName) {
+ while (XML_ELEMENT_NODE === $candidate->nodeType) {
+ if ($candidate->tagName === $tagName) {
return true;
}
$candidate = $candidate->parentNode;
@@ -704,6 +702,6 @@ class DOMTreeBuilder implements EventHandler
*/
protected function isParent($tagName)
{
- return $this->current->tagName == $tagName;
+ return $this->current->tagName === $tagName;
}
}