summaryrefslogtreecommitdiff
path: root/src/HTML5/Parser
diff options
context:
space:
mode:
authorTitouan Galopin <[email protected]>2018-11-25 00:58:42 +0100
committerTitouan Galopin <[email protected]>2018-11-25 14:58:29 +0100
commitd829a30e092ea5d868b93a258724b12f9aa313fd (patch)
tree1c8819b825738ce95d87d96e6e2cafeb2c7f14c8 /src/HTML5/Parser
parentd39a98a478c95e0df86ed564650f7326244116e2 (diff)
Fix coding style
Diffstat (limited to 'src/HTML5/Parser')
-rw-r--r--src/HTML5/Parser/CharacterReference.php32
-rw-r--r--src/HTML5/Parser/DOMTreeBuilder.php178
-rw-r--r--src/HTML5/Parser/EventHandler.php36
-rw-r--r--src/HTML5/Parser/FileInputStream.php7
-rw-r--r--src/HTML5/Parser/InputStream.php26
-rw-r--r--src/HTML5/Parser/ParseError.php1
-rw-r--r--src/HTML5/Parser/Scanner.php74
-rw-r--r--src/HTML5/Parser/StringInputStream.php65
-rw-r--r--src/HTML5/Parser/Tokenizer.php332
-rw-r--r--src/HTML5/Parser/TreeBuildingRules.php33
-rw-r--r--src/HTML5/Parser/UTF8Utils.php23
11 files changed, 400 insertions, 407 deletions
diff --git a/src/HTML5/Parser/CharacterReference.php b/src/HTML5/Parser/CharacterReference.php
index c1617e7..3ecfba4 100644
--- a/src/HTML5/Parser/CharacterReference.php
+++ b/src/HTML5/Parser/CharacterReference.php
@@ -1,4 +1,5 @@
<?php
+
namespace Masterminds\HTML5\Parser;
use Masterminds\HTML5\Entities;
@@ -6,25 +7,22 @@ use Masterminds\HTML5\Entities;
/**
* Manage entity references.
*
- * This is a simple resolver for HTML5 character reference entitites.
- * See \Masterminds\HTML5\Entities for the list of supported entities.
+ * This is a simple resolver for HTML5 character reference entitites. See Entities for the list of supported entities.
*/
class CharacterReference
{
-
protected static $numeric_mask = array(
0x0,
0x2FFFF,
0,
- 0xFFFF
+ 0xFFFF,
);
/**
- * Given a name (e.g.
- * 'amp'), lookup the UTF-8 character ('&')
+ * Given a name (e.g. 'amp'), lookup the UTF-8 character ('&').
+ *
+ * @param string $name The name to look up
*
- * @param string $name
- * The name to look up.
* @return string The character sequence. In UTF-8 this may be more than one byte.
*/
public static function lookupName($name)
@@ -34,20 +32,16 @@ class CharacterReference
}
/**
- * Given a Unicode codepoint, return the UTF-8 character.
- *
- * (NOT USED ANYWHERE)
- */
- /*
- * public static function lookupCode($codePoint) { return 'POINT'; }
- */
-
- /**
* Given a decimal number, return the UTF-8 character.
+ *
+ * @param $int
+ *
+ * @return false|string|string[]|null
*/
public static function lookupDecimal($int)
{
$entity = '&#' . $int . ';';
+
// UNTESTED: This may fail on some planes. Couldn't find full documentation
// on the value of the mask array.
return mb_decode_numericentity($entity, static::$numeric_mask, 'utf-8');
@@ -55,6 +49,10 @@ class CharacterReference
/**
* Given a hexidecimal number, return the UTF-8 character.
+ *
+ * @param $hexdec
+ *
+ * @return false|string|string[]|null
*/
public static function lookupHex($hexdec)
{
diff --git a/src/HTML5/Parser/DOMTreeBuilder.php b/src/HTML5/Parser/DOMTreeBuilder.php
index 7f0e16a..365bb75 100644
--- a/src/HTML5/Parser/DOMTreeBuilder.php
+++ b/src/HTML5/Parser/DOMTreeBuilder.php
@@ -1,7 +1,9 @@
<?php
+
namespace Masterminds\HTML5\Parser;
use Masterminds\HTML5\Elements;
+use Masterminds\HTML5\InstructionProcessor;
/**
* Create an HTML5 DOM tree from events.
@@ -24,7 +26,7 @@ use Masterminds\HTML5\Elements;
class DOMTreeBuilder implements EventHandler
{
/**
- * Defined in http://www.w3.org/TR/html51/infrastructure.html#html-namespace-0
+ * Defined in http://www.w3.org/TR/html51/infrastructure.html#html-namespace-0.
*/
const NAMESPACE_HTML = 'http://www.w3.org/1999/xhtml';
@@ -45,14 +47,14 @@ class DOMTreeBuilder implements EventHandler
const OPT_IMPLICIT_NS = 'implicit_namespaces';
/**
- * Holds the HTML5 element names that causes a namespace switch
+ * Holds the HTML5 element names that causes a namespace switch.
*
* @var array
*/
protected $nsRoots = array(
'html' => self::NAMESPACE_HTML,
'svg' => self::NAMESPACE_SVG,
- 'math' => self::NAMESPACE_MATHML
+ 'math' => self::NAMESPACE_MATHML,
);
/**
@@ -63,7 +65,7 @@ class DOMTreeBuilder implements EventHandler
protected $implicitNamespaces = array(
'xml' => self::NAMESPACE_XML,
'xmlns' => self::NAMESPACE_XMLNS,
- 'xlink' => self::NAMESPACE_XLINK
+ 'xlink' => self::NAMESPACE_XLINK,
);
/**
@@ -146,15 +148,15 @@ class DOMTreeBuilder implements EventHandler
protected $insertMode = 0;
/**
- * Track if we are in an element that allows only inline child nodes
+ * Track if we are in an element that allows only inline child nodes.
+ *
* @var string|null
*/
protected $onlyInline;
/**
* Quirks mode is enabled by default.
- * Any document that is missing the
- * DT will be considered to be in quirks mode.
+ * Any document that is missing the DT will be considered to be in quirks mode.
*/
protected $quirks = true;
@@ -175,24 +177,23 @@ class DOMTreeBuilder implements EventHandler
// $this->doc = \DOMImplementation::createDocument(NULL, 'html', $dt);
$this->doc = $impl->createDocument(null, null, $dt);
}
+
$this->errors = array();
$this->current = $this->doc; // ->documentElement;
// Create a rules engine for tags.
- $this->rules = new TreeBuildingRules($this->doc);
+ $this->rules = new TreeBuildingRules();
$implicitNS = array();
if (isset($this->options[self::OPT_IMPLICIT_NS])) {
$implicitNS = $this->options[self::OPT_IMPLICIT_NS];
- } elseif (isset($this->options["implicitNamespaces"])) {
- $implicitNS = $this->options["implicitNamespaces"];
+ } elseif (isset($this->options['implicitNamespaces'])) {
+ $implicitNS = $this->options['implicitNamespaces'];
}
// Fill $nsStack with the defalut HTML5 namespaces, plus the "implicitNamespaces" array taken form $options
- array_unshift($this->nsStack, $implicitNS + array(
- '' => self::NAMESPACE_HTML
- ) + $this->implicitNamespaces);
+ array_unshift($this->nsStack, $implicitNS + array('' => self::NAMESPACE_HTML) + $this->implicitNamespaces);
if ($isFragment) {
$this->insertMode = static::IM_IN_BODY;
@@ -229,8 +230,10 @@ class DOMTreeBuilder implements EventHandler
*
* This is used for handling Processor Instructions as they are
* inserted. If omitted, PI's are inserted directly into the DOM tree.
+ *
+ * @param InstructionProcessor $proc
*/
- public function setInstructionProcessor(\Masterminds\HTML5\InstructionProcessor $proc)
+ public function setInstructionProcessor(InstructionProcessor $proc)
{
$this->processor = $proc;
}
@@ -242,7 +245,7 @@ class DOMTreeBuilder implements EventHandler
$this->quirks = $quirks;
if ($this->insertMode > static::IM_INITIAL) {
- $this->parseError("Illegal placement of DOCTYPE tag. Ignoring: " . $name);
+ $this->parseError('Illegal placement of DOCTYPE tag. Ignoring: ' . $name);
return;
}
@@ -256,27 +259,32 @@ class DOMTreeBuilder implements EventHandler
* @todo - XMLNS namespace handling (we need to parse, even if it's not valid)
* - XLink, MathML and SVG namespace handling
* - Omission rules: 8.1.2.4 Optional tags
+ *
+ * @param string $name
+ * @param array $attributes
+ * @param bool $selfClosing
+ *
+ * @return int
*/
public function startTag($name, $attributes = array(), $selfClosing = false)
{
- // fprintf(STDOUT, $name);
$lname = $this->normalizeTagName($name);
// Make sure we have an html element.
- if (! $this->doc->documentElement && $name !== 'html' && ! $this->frag) {
+ if (!$this->doc->documentElement && 'html' !== $name && !$this->frag) {
$this->startTag('html');
}
// Set quirks mode if we're at IM_INITIAL with no doctype.
- if ($this->insertMode == static::IM_INITIAL) {
+ if ($this->insertMode === static::IM_INITIAL) {
$this->quirks = true;
- $this->parseError("No DOCTYPE specified.");
+ $this->parseError('No DOCTYPE specified.');
}
// SPECIAL TAG HANDLING:
// Spec says do this, and "don't ask."
// find the spec where this is defined... looks problematic
- if ($name == 'image' && !($this->insertMode === static::IM_IN_SVG || $this->insertMode === static::IM_IN_MATHML)) {
+ if ('image' === $name && !($this->insertMode === static::IM_IN_SVG || $this->insertMode === static::IM_IN_MATHML)) {
$name = 'img';
}
@@ -292,7 +300,7 @@ class DOMTreeBuilder implements EventHandler
break;
case 'head':
if ($this->insertMode > static::IM_BEFORE_HEAD) {
- $this->parseError("Unexpected head tag outside of head context.");
+ $this->parseError('Unexpected head tag outside of head context.');
} else {
$this->insertMode = static::IM_IN_HEAD;
}
@@ -307,14 +315,14 @@ class DOMTreeBuilder implements EventHandler
$this->insertMode = static::IM_IN_MATHML;
break;
case 'noscript':
- if ($this->insertMode == static::IM_IN_HEAD) {
+ if ($this->insertMode === static::IM_IN_HEAD) {
$this->insertMode = static::IM_IN_HEAD_NOSCRIPT;
}
break;
}
// Special case handling for SVG.
- if ($this->insertMode == static::IM_IN_SVG) {
+ if ($this->insertMode === static::IM_IN_SVG) {
$lname = Elements::normalizeSvgElement($lname);
}
@@ -322,62 +330,58 @@ class DOMTreeBuilder implements EventHandler
// when we found a tag thats appears inside $nsRoots, we have to switch the defalut namespace
if (isset($this->nsRoots[$lname]) && $this->nsStack[0][''] !== $this->nsRoots[$lname]) {
array_unshift($this->nsStack, array(
- '' => $this->nsRoots[$lname]
+ '' => $this->nsRoots[$lname],
) + $this->nsStack[0]);
- $pushes ++;
+ ++$pushes;
}
$needsWorkaround = false;
- if (isset($this->options["xmlNamespaces"]) && $this->options["xmlNamespaces"]) {
+ if (isset($this->options['xmlNamespaces']) && $this->options['xmlNamespaces']) {
// when xmlNamespaces is true a and we found a 'xmlns' or 'xmlns:*' attribute, we should add a new item to the $nsStack
foreach ($attributes as $aName => $aVal) {
- if ($aName === 'xmlns') {
+ if ('xmlns' === $aName) {
$needsWorkaround = $aVal;
array_unshift($this->nsStack, array(
- '' => $aVal
+ '' => $aVal,
) + $this->nsStack[0]);
- $pushes ++;
- } elseif ((($pos = strpos($aName, ':')) ? substr($aName, 0, $pos) : '') === 'xmlns') {
+ ++$pushes;
+ } elseif ('xmlns' === (($pos = strpos($aName, ':')) ? substr($aName, 0, $pos) : '')) {
array_unshift($this->nsStack, array(
- substr($aName, $pos + 1) => $aVal
+ substr($aName, $pos + 1) => $aVal,
) + $this->nsStack[0]);
- $pushes ++;
+ ++$pushes;
}
}
}
if ($this->onlyInline && Elements::isA($lname, Elements::BLOCK_TAG)) {
- $this->autoclose($this->onlyInline);
- $this->onlyInline = null;
+ $this->autoclose($this->onlyInline);
+ $this->onlyInline = null;
}
try {
$prefix = ($pos = strpos($lname, ':')) ? substr($lname, 0, $pos) : '';
-
- if ($needsWorkaround!==false) {
-
- $xml = "<$lname xmlns=\"$needsWorkaround\" ".(strlen($prefix) && isset($this->nsStack[0][$prefix])?("xmlns:$prefix=\"".$this->nsStack[0][$prefix]."\""):"")."/>";
+ if (false !== $needsWorkaround) {
+ $xml = "<$lname xmlns=\"$needsWorkaround\" " . (strlen($prefix) && isset($this->nsStack[0][$prefix]) ? ("xmlns:$prefix=\"" . $this->nsStack[0][$prefix] . '"') : '') . '/>';
$frag = new \DOMDocument('1.0', 'UTF-8');
$frag->loadXML($xml);
$ele = $this->doc->importNode($frag->documentElement, true);
-
} else {
- if (!isset($this->nsStack[0][$prefix]) || ($prefix === "" && isset($this->options[self::OPT_DISABLE_HTML_NS]) && $this->options[self::OPT_DISABLE_HTML_NS])) {
+ if (!isset($this->nsStack[0][$prefix]) || ('' === $prefix && isset($this->options[self::OPT_DISABLE_HTML_NS]) && $this->options[self::OPT_DISABLE_HTML_NS])) {
$ele = $this->doc->createElement($lname);
} else {
$ele = $this->doc->createElementNS($this->nsStack[0][$prefix], $lname);
}
}
-
} catch (\DOMException $e) {
$this->parseError("Illegal tag name: <$lname>. Replaced with <invalid>.");
$ele = $this->doc->createElement('invalid');
}
if (Elements::isA($lname, Elements::BLOCK_ONLY_INLINE)) {
- $this->onlyInline = $lname;
+ $this->onlyInline = $lname;
}
// When we add some namespacess, we have to track them. Later, when "endElement" is invoked, we have to remove them.
@@ -396,23 +400,23 @@ class DOMTreeBuilder implements EventHandler
foreach ($attributes as $aName => $aVal) {
// xmlns attributes can't be set
- if ($aName === 'xmlns') {
+ if ('xmlns' === $aName) {
continue;
}
- if ($this->insertMode == static::IM_IN_SVG) {
+ if ($this->insertMode === static::IM_IN_SVG) {
$aName = Elements::normalizeSvgAttribute($aName);
- } elseif ($this->insertMode == static::IM_IN_MATHML) {
+ } elseif ($this->insertMode === static::IM_IN_MATHML) {
$aName = Elements::normalizeMathMlAttribute($aName);
}
try {
$prefix = ($pos = strpos($aName, ':')) ? substr($aName, 0, $pos) : false;
- if ($prefix==='xmlns') {
- $ele->setAttributeNs(self::NAMESPACE_XMLNS, $aName, $aVal);
- } elseif ($prefix!==false && isset($this->nsStack[0][$prefix])) {
- $ele->setAttributeNs($this->nsStack[0][$prefix], $aName, $aVal);
+ if ('xmlns' === $prefix) {
+ $ele->setAttributeNS(self::NAMESPACE_XMLNS, $aName, $aVal);
+ } elseif (false !== $prefix && isset($this->nsStack[0][$prefix])) {
+ $ele->setAttributeNS($this->nsStack[0][$prefix], $aName, $aVal);
} else {
$ele->setAttribute($aName, $aVal);
}
@@ -422,19 +426,19 @@ class DOMTreeBuilder implements EventHandler
}
// This is necessary on a non-DTD schema, like HTML5.
- if ($aName == 'id') {
+ if ('id' === $aName) {
$ele->setIdAttribute('id', true);
}
}
- // Some elements have special processing rules. Handle those separately.
- if ($this->rules->hasRules($name) && $this->frag !== $this->current) {
+ if ($this->frag !== $this->current && $this->rules->hasRules($name)) {
+ // Some elements have special processing rules. Handle those separately.
$this->current = $this->rules->evaluate($ele, $this->current);
- } // Otherwise, it's a standard element.
- else {
+ } else {
+ // Otherwise, it's a standard element.
$this->current->appendChild($ele);
- if (! Elements::isA($name, Elements::VOID_TAG)) {
+ if (!Elements::isA($name, Elements::VOID_TAG)) {
$this->current = $ele;
}
@@ -448,7 +452,7 @@ class DOMTreeBuilder implements EventHandler
// This is sort of a last-ditch attempt to correct for cases where no head/body
// elements are provided.
- if ($this->insertMode <= static::IM_BEFORE_HEAD && $name != 'head' && $name != 'html') {
+ if ($this->insertMode <= static::IM_BEFORE_HEAD && 'head' !== $name && 'html' !== $name) {
$this->insertMode = static::IM_IN_BODY;
}
@@ -456,7 +460,7 @@ class DOMTreeBuilder implements EventHandler
// but we have to remove the namespaces pushed to $nsStack.
if ($pushes > 0 && Elements::isA($name, Elements::VOID_TAG)) {
// remove the namespaced definded by current node
- for ($i = 0; $i < $pushes; $i ++) {
+ for ($i = 0; $i < $pushes; ++$i) {
array_shift($this->nsStack);
}
}
@@ -485,7 +489,7 @@ class DOMTreeBuilder implements EventHandler
'html',
'br',
'head',
- 'title'
+ 'title',
))) {
$this->startTag('html');
$this->endTag($name);
@@ -495,13 +499,13 @@ class DOMTreeBuilder implements EventHandler
}
// Ignore the tag.
- $this->parseError("Illegal closing tag at global scope.");
+ $this->parseError('Illegal closing tag at global scope.');
return;
}
// Special case handling for SVG.
- if ($this->insertMode == static::IM_IN_SVG) {
+ if ($this->insertMode === static::IM_IN_SVG) {
$lname = Elements::normalizeSvgElement($lname);
}
@@ -512,39 +516,33 @@ class DOMTreeBuilder implements EventHandler
$cid = spl_object_hash($this->current);
}
- // XXX: Not sure whether we need this anymore.
- // if ($name != $lname) {
- // return $this->quirksTreeResolver($lname);
- // }
-
// XXX: HTML has no parent. What do we do, though,
// if this element appears in the wrong place?
- if ($lname == 'html') {
+ if ('html' === $lname) {
return;
}
// remove the namespaced definded by current node
if (isset($this->pushes[$cid])) {
- for ($i = 0; $i < $this->pushes[$cid][0]; $i ++) {
+ for ($i = 0; $i < $this->pushes[$cid][0]; ++$i) {
array_shift($this->nsStack);
}
unset($this->pushes[$cid]);
}
- if (! $this->autoclose($lname)) {
+ if (!$this->autoclose($lname)) {
$this->parseError('Could not find closing tag for ' . $lname);
}
- // switch ($this->insertMode) {
switch ($lname) {
- case "head":
+ case 'head':
$this->insertMode = static::IM_AFTER_HEAD;
break;
- case "body":
+ case 'body':
$this->insertMode = static::IM_AFTER_BODY;
break;
- case "svg":
- case "mathml":
+ case 'svg':
+ case 'mathml':
$this->insertMode = static::IM_IN_BODY;
break;
}
@@ -566,9 +564,9 @@ class DOMTreeBuilder implements EventHandler
// practical as most documents contain these characters. Other text is not
// expected here so recording a parse error is necessary.
$dataTmp = trim($data, " \t\n\r\f");
- if (! empty($dataTmp)) {
+ if (!empty($dataTmp)) {
// fprintf(STDOUT, "Unexpected insert mode: %d", $this->insertMode);
- $this->parseError("Unexpected text. Ignoring: " . $dataTmp);
+ $this->parseError('Unexpected text. Ignoring: ' . $dataTmp);
}
return;
@@ -585,7 +583,7 @@ class DOMTreeBuilder implements EventHandler
public function parseError($msg, $line = 0, $col = 0)
{
- $this->errors[] = sprintf("Line %d, Col %d: %s", $line, $col, $msg);
+ $this->errors[] = sprintf('Line %d, Col %d: %s', $line, $col, $msg);
}
public function getErrors()
@@ -602,15 +600,14 @@ class DOMTreeBuilder implements EventHandler
public function processingInstruction($name, $data = null)
{
// XXX: Ignore initial XML declaration, per the spec.
- if ($this->insertMode == static::IM_INITIAL && 'xml' == strtolower($name)) {
+ if ($this->insertMode === static::IM_INITIAL && 'xml' === strtolower($name)) {
return;
}
- // Important: The processor may modify the current DOM tree however
- // it sees fit.
- if (isset($this->processor)) {
+ // Important: The processor may modify the current DOM tree however it sees fit.
+ if ($this->processor instanceof InstructionProcessor) {
$res = $this->processor->process($this->current, $name, $data);
- if (! empty($res)) {
+ if (!empty($res)) {
$this->current = $res;
}
@@ -632,9 +629,9 @@ class DOMTreeBuilder implements EventHandler
*
* See sections 2.9 and 8.1.2.
*
- * @param string $name
- * The tag name.
- * @return string The normalized tag name.
+ * @param string $name The tag name
+ *
+ * @return string the normalized tag name
*/
protected function normalizeTagName($name)
{
@@ -646,7 +643,7 @@ class DOMTreeBuilder implements EventHandler
protected function quirksTreeResolver($name)
{
- throw new \Exception("Not implemented.");
+ throw new \Exception('Not implemented.');
}
/**
@@ -660,15 +657,16 @@ class DOMTreeBuilder implements EventHandler
{
$working = $this->current;
do {
- if ($working->nodeType != XML_ELEMENT_NODE) {
+ if (XML_ELEMENT_NODE !== $working->nodeType) {
return false;
}
- if ($working->tagName == $tagName) {
+ if ($working->tagName === $tagName) {
$this->current = $working->parentNode;
return true;
}
} while ($working = $working->parentNode);
+
return false;
}
@@ -685,8 +683,8 @@ class DOMTreeBuilder implements EventHandler
protected function isAncestor($tagName)
{
$candidate = $this->current;
- while ($candidate->nodeType === XML_ELEMENT_NODE) {
- if ($candidate->tagName == $tagName) {
+ while (XML_ELEMENT_NODE === $candidate->nodeType) {
+ if ($candidate->tagName === $tagName) {
return true;
}
$candidate = $candidate->parentNode;
@@ -704,6 +702,6 @@ class DOMTreeBuilder implements EventHandler
*/
protected function isParent($tagName)
{
- return $this->current->tagName == $tagName;
+ return $this->current->tagName === $tagName;
}
}
diff --git a/src/HTML5/Parser/EventHandler.php b/src/HTML5/Parser/EventHandler.php
index 3da71a3..cb0109b 100644
--- a/src/HTML5/Parser/EventHandler.php
+++ b/src/HTML5/Parser/EventHandler.php
@@ -1,4 +1,5 @@
<?php
+
namespace Masterminds\HTML5\Parser;
/**
@@ -20,7 +21,6 @@ namespace Masterminds\HTML5\Parser;
*/
interface EventHandler
{
-
const DOCTYPE_NONE = 0;
const DOCTYPE_PUBLIC = 1;
@@ -30,15 +30,11 @@ interface EventHandler
/**
* A doctype declaration.
*
- * @param string $name
- * The name of the root element.
- * @param int $idType
- * One of DOCTYPE_NONE, DOCTYPE_PUBLIC, or DOCTYPE_SYSTEM.
- * @param string $id
- * The identifier. For DOCTYPE_PUBLIC, this is the public ID. If DOCTYPE_SYSTEM,
- * then this is a system ID.
- * @param boolean $quirks
- * Indicates whether the builder should enter quirks mode.
+ * @param string $name The name of the root element
+ * @param int $idType One of DOCTYPE_NONE, DOCTYPE_PUBLIC, or DOCTYPE_SYSTEM
+ * @param string $id The identifier. For DOCTYPE_PUBLIC, this is the public ID. If DOCTYPE_SYSTEM,
+ * then this is a system ID.
+ * @param bool $quirks Indicates whether the builder should enter quirks mode
*/
public function doctype($name, $idType = 0, $id = null, $quirks = false);
@@ -63,13 +59,11 @@ interface EventHandler
* The textmode is automatically reset to Tokenizer::TEXTMODE_NORMAL when the
* closing tag is encounter. **This behavior may change.**
*
- * @param string $name
- * The tag name.
- * @param array $attributes
- * An array with all of the tag's attributes.
- * @param boolean $selfClosing
- * An indicator of whether or not this tag is self-closing (<foo/>)
- * @return int One of the Tokenizer::TEXTMODE_* constants.
+ * @param string $name The tag name
+ * @param array $attributes An array with all of the tag's attributes
+ * @param bool $selfClosing An indicator of whether or not this tag is self-closing (<foo/>)
+ *
+ * @return int one of the Tokenizer::TEXTMODE_* constants
*/
public function startTag($name, $attributes = array(), $selfClosing = false);
@@ -104,7 +98,7 @@ interface EventHandler
* A CDATA section.
*
* @param string $data
- * The unparsed character data.
+ * The unparsed character data
*/
public function cdata($data);
@@ -113,10 +107,8 @@ interface EventHandler
*
* While user agents don't get PIs, server-side does.
*
- * @param string $name
- * The name of the processor (e.g. 'php').
- * @param string $data
- * The unparsed data.
+ * @param string $name The name of the processor (e.g. 'php').
+ * @param string $data The unparsed data
*/
public function processingInstruction($name, $data = null);
}
diff --git a/src/HTML5/Parser/FileInputStream.php b/src/HTML5/Parser/FileInputStream.php
index 76bd17b..f176422 100644
--- a/src/HTML5/Parser/FileInputStream.php
+++ b/src/HTML5/Parser/FileInputStream.php
@@ -1,4 +1,5 @@
<?php
+
namespace Masterminds\HTML5\Parser;
/**
@@ -18,9 +19,9 @@ class FileInputStream extends StringInputStream implements InputStream
/**
* Load a file input stream.
*
- * @param string $data The file or url path to load.
- * @param string $encoding The encoding to use for the data.
- * @param string $debug A fprintf format to use to echo the data on stdout.
+ * @param string $data the file or url path to load
+ * @param string $encoding the encoding to use for the data
+ * @param string $debug a fprintf format to use to echo the data on stdout
*/
public function __construct($data, $encoding = 'UTF-8', $debug = '')
{
diff --git a/src/HTML5/Parser/InputStream.php b/src/HTML5/Parser/InputStream.php
index e4a106a..cf279d8 100644
--- a/src/HTML5/Parser/InputStream.php
+++ b/src/HTML5/Parser/InputStream.php
@@ -29,7 +29,7 @@ interface InputStream extends \Iterator
*
* @TODO Move this to the scanner.
*
- * @return int The column number.
+ * @return int the column number
*/
public function columnOffset();
@@ -49,12 +49,12 @@ interface InputStream extends \Iterator
* and returns the matched substring.
*
* @see strcspn
- * @param string $bytes
- * Bytes to match.
- * @param int $max
- * Maximum number of bytes to scan.
+ *
+ * @param string $bytes Bytes to match
+ * @param int $max Maximum number of bytes to scan
+ *
* @return mixed Index or false if no match is found. You should use strong
- * equality when checking the result, since index could be 0.
+ * equality when checking the result, since index could be 0.
*/
public function charsUntil($bytes, $max = null);
@@ -65,20 +65,18 @@ interface InputStream extends \Iterator
* and returns the matched substring.
*
* @see strspn
- * @param string $bytes
- * A mask of bytes to match. If ANY byte in this mask matches the
- * current char, the pointer advances and the char is part of the
- * substring.
- * @param int $max
- * The max number of chars to read.
+ *
+ * @param string $bytes A mask of bytes to match. If ANY byte in this mask matches the
+ * current char, the pointer advances and the char is part of the
+ * substring.
+ * @param int $max The max number of chars to read
*/
public function charsWhile($bytes, $max = null);
/**
* Unconsume one character.
*
- * @param int $howMany
- * The number of characters to move the pointer back.
+ * @param int $howMany The number of characters to move the pointer back
*/
public function unconsume($howMany = 1);
diff --git a/src/HTML5/Parser/ParseError.php b/src/HTML5/Parser/ParseError.php
index 86498a1..640e516 100644
--- a/src/HTML5/Parser/ParseError.php
+++ b/src/HTML5/Parser/ParseError.php
@@ -1,4 +1,5 @@
<?php
+
namespace Masterminds\HTML5\Parser;
/**
diff --git a/src/HTML5/Parser/Scanner.php b/src/HTML5/Parser/Scanner.php
index cec9a13..7bea1ae 100644
--- a/src/HTML5/Parser/Scanner.php
+++ b/src/HTML5/Parser/Scanner.php
@@ -1,4 +1,5 @@
<?php
+
namespace Masterminds\HTML5\Parser;
use Masterminds\HTML5\Exception;
@@ -18,7 +19,7 @@ class Scanner
private $data;
/**
- * The current integer byte position we are in $data
+ * The current integer byte position we are in $data.
*/
private $char;
@@ -35,10 +36,10 @@ class Scanner
/**
* Create a new Scanner.
*
- * @param string $data Data to parse
- * @param string $encoding The encoding to use for the data.
+ * @param string $data Data to parse
+ * @param string $encoding the encoding to use for the data
*
- * @throws Exception If the given data cannot be encoded to UTF-8.
+ * @throws Exception if the given data cannot be encoded to UTF-8
*/
public function __construct($data, $encoding = 'UTF-8')
{
@@ -75,20 +76,21 @@ class Scanner
* '</script>' string.
*
* @param string $sequence
- * @param bool $caseSensitive
+ * @param bool $caseSensitive
*
* @return bool
*/
public function sequenceMatches($sequence, $caseSensitive = true)
{
$portion = substr($this->data, $this->char, strlen($sequence));
- return $caseSensitive ? $portion === $sequence : strcasecmp($portion, $sequence) === 0;
+
+ return $caseSensitive ? $portion === $sequence : 0 === strcasecmp($portion, $sequence);
}
/**
* Get the current position.
*
- * @return int The current intiger byte position.
+ * @return int the current intiger byte position
*/
public function position()
{
@@ -98,7 +100,7 @@ class Scanner
/**
* Take a peek at the next character in the data.
*
- * @return string The next character.
+ * @return string the next character
*/
public function peek()
{
@@ -114,11 +116,11 @@ class Scanner
*
* Note: This advances the pointer.
*
- * @return string The next character.
+ * @return string the next character
*/
public function next()
{
- $this->char++;
+ ++$this->char;
if ($this->char < $this->EOF) {
return $this->data[$this->char];
@@ -132,7 +134,7 @@ class Scanner
*
* Note, this does not advance the pointer.
*
- * @return string The current character.
+ * @return string the current character
*/
public function current()
{
@@ -157,13 +159,12 @@ class Scanner
* Unconsume some of the data.
* This moves the data pointer backwards.
*
- * @param int $howMany
- * The number of characters to move the pointer back.
+ * @param int $howMany The number of characters to move the pointer back
*/
public function unconsume($howMany = 1)
{
if (($this->char - $howMany) >= 0) {
- $this->char = $this->char - $howMany;
+ $this->char -= $howMany;
}
}
@@ -173,7 +174,7 @@ class Scanner
* Note, along with getting the characters the pointer in the data will be
* moved as well.
*
- * @return string The next group that is hex characters.
+ * @return string the next group that is hex characters
*/
public function getHex()
{
@@ -186,7 +187,7 @@ class Scanner
* Note, along with getting the characters the pointer in the data will be
* moved as well.
*
- * @return string The next group of ASCII alpha characters.
+ * @return string the next group of ASCII alpha characters
*/
public function getAsciiAlpha()
{
@@ -199,7 +200,7 @@ class Scanner
* Note, along with getting the characters the pointer in the data will be
* moved as well.
*
- * @return string The next group of ASCII alpha characters and numbers.
+ * @return string the next group of ASCII alpha characters and numbers
*/
public function getAsciiAlphaNum()
{
@@ -212,7 +213,7 @@ class Scanner
* Note, along with getting the characters the pointer in the data will be
* moved as well.
*
- * @return string The next group of numbers.
+ * @return string the next group of numbers
*/
public function getNumeric()
{
@@ -242,11 +243,11 @@ class Scanner
/**
* Returns the current line that is being consumed.
*
- * @return int The current line number.
+ * @return int the current line number
*/
public function currentLine()
{
- if (empty($this->EOF) || $this->char == 0) {
+ if (empty($this->EOF) || 0 === $this->char) {
return 1;
}
@@ -284,12 +285,12 @@ class Scanner
*
* Newlines are column 0. The first char after a newline is column 1.
*
- * @return int The column number.
+ * @return int the column number
*/
public function columnOffset()
{
// Short circuit for the first char.
- if ($this->char == 0) {
+ if (0 === $this->char) {
return 0;
}
@@ -303,7 +304,7 @@ class Scanner
// However, for here we want the length up until the next byte to be
// processed, so add one to the current byte ($this->char).
- if ($lastLine !== false) {
+ if (false !== $lastLine) {
$findLengthOf = substr($this->data, $lastLine + 1, $this->char - 1 - $lastLine);
} else {
// After a newline.
@@ -318,7 +319,7 @@ class Scanner
*
* This consumes characters until the EOF.
*
- * @return int The number of characters remaining.
+ * @return int the number of characters remaining
*/
public function remainingChars()
{
@@ -351,7 +352,7 @@ class Scanner
$crlfTable = array(
"\0" => "\xEF\xBF\xBD",
"\r\n" => "\n",
- "\r" => "\n"
+ "\r" => "\n",
);
return strtr($data, $crlfTable);
@@ -365,12 +366,11 @@ class Scanner
* Matches as far as possible until we reach a certain set of bytes
* and returns the matched substring.
*
- * @param string $bytes
- * Bytes to match.
- * @param int $max
- * Maximum number of bytes to scan.
+ * @param string $bytes Bytes to match
+ * @param int $max Maximum number of bytes to scan
+ *
* @return mixed Index or false if no match is found. You should use strong
- * equality when checking the result, since index could be 0.
+ * equality when checking the result, since index could be 0.
*/
private function doCharsUntil($bytes, $max = null)
{
@@ -378,7 +378,7 @@ class Scanner
return false;
}
- if ($max === 0 || $max) {
+ if (0 === $max || $max) {
$len = strcspn($this->data, $bytes, $this->char, $max);
} else {
$len = strcspn($this->data, $bytes, $this->char);
@@ -396,12 +396,10 @@ class Scanner
* Matches as far as possible with a certain set of bytes
* and returns the matched substring.
*
- * @param string $bytes
- * A mask of bytes to match. If ANY byte in this mask matches the
- * current char, the pointer advances and the char is part of the
- * substring.
- * @param int $max
- * The max number of chars to read.
+ * @param string $bytes A mask of bytes to match. If ANY byte in this mask matches the
+ * current char, the pointer advances and the char is part of the
+ * substring.
+ * @param int $max The max number of chars to read
*
* @return string
*/
@@ -411,7 +409,7 @@ class Scanner
return false;
}
- if ($max === 0 || $max) {
+ if (0 === $max || $max) {
$len = strspn($this->data, $bytes, $this->char, $max);
} else {
$len = strspn($this->data, $bytes, $this->char);
diff --git a/src/HTML5/Parser/StringInputStream.php b/src/HTML5/Parser/StringInputStream.php
index 0118468..2281990 100644
--- a/src/HTML5/Parser/StringInputStream.php
+++ b/src/HTML5/Parser/StringInputStream.php
@@ -2,6 +2,7 @@
/**
* Loads a string to be parsed.
*/
+
namespace Masterminds\HTML5\Parser;
/*
@@ -50,7 +51,7 @@ class StringInputStream implements InputStream
private $data;
/**
- * The current integer byte position we are in $data
+ * The current integer byte position we are in $data.
*/
private $char;
@@ -67,9 +68,9 @@ class StringInputStream implements InputStream
/**
* Create a new InputStream wrapper.
*
- * @param string $data Data to parse
- * @param string $encoding The encoding to use for the data.
- * @param string $debug A fprintf format to use to echo the data on stdout.
+ * @param string $data Data to parse
+ * @param string $encoding the encoding to use for the data
+ * @param string $debug a fprintf format to use to echo the data on stdout
*/
public function __construct($data, $encoding = 'UTF-8', $debug = '')
{
@@ -110,7 +111,7 @@ class StringInputStream implements InputStream
$crlfTable = array(
"\0" => "\xEF\xBF\xBD",
"\r\n" => "\n",
- "\r" => "\n"
+ "\r" => "\n",
);
return strtr($data, $crlfTable);
@@ -121,7 +122,7 @@ class StringInputStream implements InputStream
*/
public function currentLine()
{
- if (empty($this->EOF) || $this->char == 0) {
+ if (empty($this->EOF) || 0 === $this->char) {
return 1;
}
// Add one to $this->char because we want the number for the next
@@ -130,9 +131,7 @@ class StringInputStream implements InputStream
}
/**
- *
* @deprecated
- *
*/
public function getCurrentLine()
{
@@ -144,12 +143,12 @@ class StringInputStream implements InputStream
*
* Newlines are column 0. The first char after a newline is column 1.
*
- * @return int The column number.
+ * @return int the column number
*/
public function columnOffset()
{
// Short circuit for the first char.
- if ($this->char == 0) {
+ if (0 === $this->char) {
return 0;
}
// strrpos is weird, and the offset needs to be negative for what we
@@ -162,7 +161,7 @@ class StringInputStream implements InputStream
// However, for here we want the length up until the next byte to be
// processed, so add one to the current byte ($this->char).
- if ($lastLine !== false) {
+ if (false !== $lastLine) {
$findLengthOf = substr($this->data, $lastLine + 1, $this->char - 1 - $lastLine);
} else {
// After a newline.
@@ -173,9 +172,7 @@ class StringInputStream implements InputStream
}
/**
- *
* @deprecated
- *
*/
public function getColumnOffset()
{
@@ -185,7 +182,7 @@ class StringInputStream implements InputStream
/**
* Get the current character.
*
- * @return string The current character.
+ * @return string the current character
*/
public function current()
{
@@ -198,7 +195,7 @@ class StringInputStream implements InputStream
*/
public function next()
{
- $this->char ++;
+ ++$this->char;
}
/**
@@ -212,15 +209,11 @@ class StringInputStream implements InputStream
/**
* Is the current pointer location valid.
*
- * @return bool Is the current pointer location valid.
+ * @return bool is the current pointer location valid
*/
public function valid()
{
- if ($this->char < $this->EOF) {
- return true;
- }
-
- return false;
+ return $this->char < $this->EOF;
}
/**
@@ -232,7 +225,7 @@ class StringInputStream implements InputStream
* @note This performs bounds checking
*
* @return string Returns the remaining text. If called when the InputStream is
- * already exhausted, it returns an empty string.
+ * already exhausted, it returns an empty string.
*/
public function remainingChars()
{
@@ -254,12 +247,11 @@ class StringInputStream implements InputStream
* Matches as far as possible until we reach a certain set of bytes
* and returns the matched substring.
*
- * @param string $bytes
- * Bytes to match.
- * @param int $max
- * Maximum number of bytes to scan.
+ * @param string $bytes Bytes to match
+ * @param int $max Maximum number of bytes to scan
+ *
* @return mixed Index or false if no match is found. You should use strong
- * equality when checking the result, since index could be 0.
+ * equality when checking the result, since index could be 0.
*/
public function charsUntil($bytes, $max = null)
{
@@ -267,7 +259,7 @@ class StringInputStream implements InputStream
return false;
}
- if ($max === 0 || $max) {
+ if (0 === $max || $max) {
$len = strcspn($this->data, $bytes, $this->char, $max);
} else {
$len = strcspn($this->data, $bytes, $this->char);
@@ -285,12 +277,10 @@ class StringInputStream implements InputStream
* Matches as far as possible with a certain set of bytes
* and returns the matched substring.
*
- * @param string $bytes
- * A mask of bytes to match. If ANY byte in this mask matches the
- * current char, the pointer advances and the char is part of the
- * substring.
- * @param int $max
- * The max number of chars to read.
+ * @param string $bytes A mask of bytes to match. If ANY byte in this mask matches the
+ * current char, the pointer advances and the char is part of the
+ * substring.
+ * @param int $max The max number of chars to read
*
* @return string
*/
@@ -300,7 +290,7 @@ class StringInputStream implements InputStream
return false;
}
- if ($max === 0 || $max) {
+ if (0 === $max || $max) {
$len = strspn($this->data, $bytes, $this->char, $max);
} else {
$len = strspn($this->data, $bytes, $this->char);
@@ -314,13 +304,12 @@ class StringInputStream implements InputStream
/**
* Unconsume characters.
*
- * @param int $howMany
- * The number of characters to unconsume.
+ * @param int $howMany The number of characters to unconsume
*/
public function unconsume($howMany = 1)
{
if (($this->char - $howMany) >= 0) {
- $this->char = $this->char - $howMany;
+ $this->char -= $howMany;
}
}
diff --git a/src/HTML5/Parser/Tokenizer.php b/src/HTML5/Parser/Tokenizer.php
index ba9de52..cfd0e43 100644
--- a/src/HTML5/Parser/Tokenizer.php
+++ b/src/HTML5/Parser/Tokenizer.php
@@ -1,4 +1,5 @@
<?php
+
namespace Masterminds\HTML5\Parser;
use Masterminds\HTML5\Elements;
@@ -25,7 +26,6 @@ use Masterminds\HTML5\Elements;
*/
class Tokenizer
{
-
protected $scanner;
protected $events;
@@ -56,12 +56,9 @@ class Tokenizer
* it a scanner (input) and an event handler (output), and then calling
* the Tokenizer::parse() method.`
*
- * @param \Masterminds\HTML5\Parser\Scanner $scanner
- * A scanner initialized with an input stream.
- * @param \Masterminds\HTML5\Parser\EventHandler $eventHandler
- * An event handler, initialized and ready to receive
- * events.
- * @param string $mode
+ * @param Scanner $scanner A scanner initialized with an input stream
+ * @param EventHandler $eventHandler An event handler, initialized and ready to receive events
+ * @param string $mode
*/
public function __construct($scanner, $eventHandler, $mode = self::CONFORMANT_HTML)
{
@@ -103,11 +100,9 @@ class Tokenizer
* Normally, setting is done by the event handler via a special return code on
* startTag(), but it can also be set manually using this function.
*
- * @param integer $textmode
- * One of Elements::TEXT_*
- * @param string $untilTag
- * The tag that should stop RAW or RCDATA mode. Normal mode does not
- * use this indicator.
+ * @param int $textmode One of Elements::TEXT_*
+ * @param string $untilTag The tag that should stop RAW or RCDATA mode. Normal mode does not
+ * use this indicator.
*/
public function setTextMode($textmode, $untilTag = null)
{
@@ -117,13 +112,13 @@ class Tokenizer
/**
* Consume a character and make a move.
- * HTML5 8.2.4.1
+ * HTML5 8.2.4.1.
*/
protected function consumeData()
{
$tok = $this->scanner->current();
- if ($tok === '&') {
+ if ('&' === $tok) {
// Character reference
$ref = $this->decodeCharacterReference();
$this->buffer($ref);
@@ -132,7 +127,7 @@ class Tokenizer
}
// Parse tag
- if ($tok === '<') {
+ if ('<' === $tok) {
// Any buffered text data can go out now.
$this->flushBuffer();
@@ -143,7 +138,7 @@ class Tokenizer
|| $this->processingInstruction()
|| $this->tagName()
// This always returns false.
- || $this->parseError("Illegal tag opening")
+ || $this->parseError('Illegal tag opening')
|| $this->characterData();
$tok = $this->scanner->current();
@@ -153,7 +148,7 @@ class Tokenizer
$this->eof($tok);
// Parse character
- if ($tok !== false) {
+ if (false !== $tok) {
switch ($this->textMode) {
case Elements::TEXT_RAW:
$this->rawText($tok);
@@ -164,10 +159,10 @@ class Tokenizer
break;
default:
- if (!strspn($tok, "<&")) {
+ if (!strspn($tok, '<&')) {
// NULL character
- if ($tok === "\00") {
- $this->parseError("Received null character.");
+ if ("\00" === $tok) {
+ $this->parseError('Received null character.');
}
$this->text .= $tok;
@@ -189,7 +184,7 @@ class Tokenizer
protected function characterData()
{
$tok = $this->scanner->current();
- if ($tok === false) {
+ if (false === $tok) {
return false;
}
switch ($this->textMode) {
@@ -198,9 +193,10 @@ class Tokenizer
case Elements::TEXT_RCDATA:
return $this->rcdata($tok);
default:
- if (strspn($tok, "<&")) {
+ if (strspn($tok, '<&')) {
return false;
}
+
return $this->text($tok);
}
}
@@ -208,20 +204,20 @@ class Tokenizer
/**
* This buffers the current token as character data.
*
- * @param string $tok The current token.
+ * @param string $tok the current token
*
* @return bool
*/
protected function text($tok)
{
// This should never happen...
- if ($tok === false) {
+ if (false === $tok) {
return false;
}
// NULL character
- if ($tok === "\00") {
- $this->parseError("Received null character.");
+ if ("\00" === $tok) {
+ $this->parseError('Received null character.');
}
$this->buffer($tok);
@@ -233,7 +229,7 @@ class Tokenizer
/**
* Read text in RAW mode.
*
- * @param string $tok The current token.
+ * @param string $tok the current token
*
* @return bool
*/
@@ -254,7 +250,7 @@ class Tokenizer
/**
* Read text in RCDATA mode.
*
- * @param string $tok The current token.
+ * @param string $tok the current token
*
* @return bool
*/
@@ -268,8 +264,8 @@ class Tokenizer
$txt = '';
$caseSensitive = !Elements::isHtml5Element($this->untilTag);
- while ($tok !== false && ! ($tok == '<' && ($this->scanner->sequenceMatches($sequence, $caseSensitive)))) {
- if ($tok == '&') {
+ while (false !== $tok && !('<' == $tok && ($this->scanner->sequenceMatches($sequence, $caseSensitive)))) {
+ if ('&' == $tok) {
$txt .= $this->decodeCharacterReference();
$tok = $this->scanner->current();
} else {
@@ -280,8 +276,8 @@ class Tokenizer
$len = strlen($sequence);
$this->scanner->consume($len);
$len += $this->scanner->whitespace();
- if ($this->scanner->current() !== '>') {
- $this->parseError("Unclosed RCDATA end tag");
+ if ('>' !== $this->scanner->current()) {
+ $this->parseError('Unclosed RCDATA end tag');
}
$this->scanner->unconsume($len);
@@ -296,7 +292,7 @@ class Tokenizer
*/
protected function eof($tok)
{
- if ($tok === false) {
+ if (false === $tok) {
// fprintf(STDOUT, "EOF");
$this->flushBuffer();
$this->events->eof();
@@ -313,40 +309,37 @@ class Tokenizer
*/
protected function markupDeclaration($tok)
{
- if ($tok != '!') {
+ if ('!' != $tok) {
return false;
}
$tok = $this->scanner->next();
// Comment:
- if ($tok == '-' && $this->scanner->peek() == '-') {
+ if ('-' == $tok && '-' == $this->scanner->peek()) {
$this->scanner->next(); // Consume the other '-'
$this->scanner->next(); // Next char.
return $this->comment();
- }
-
- elseif ($tok == 'D' || $tok == 'd') { // Doctype
+ } elseif ('D' == $tok || 'd' == $tok) { // Doctype
return $this->doctype();
- }
-
- elseif ($tok == '[') { // CDATA section
+ } elseif ('[' == $tok) { // CDATA section
return $this->cdataSection();
}
// FINISH
- $this->parseError("Expected <!--, <![CDATA[, or <!DOCTYPE. Got <!%s", $tok);
+ $this->parseError('Expected <!--, <![CDATA[, or <!DOCTYPE. Got <!%s', $tok);
$this->bogusComment('<!');
+
return true;
}
/**
* Consume an end tag.
- * 8.2.4.9
+ * 8.2.4.9.
*/
protected function endTag()
{
- if ($this->scanner->current() != '/') {
+ if ('/' != $this->scanner->current()) {
return false;
}
$tok = $this->scanner->next();
@@ -355,21 +348,22 @@ class Tokenizer
// > -> parse error
// EOF -> parse error
// -> parse error
- if (! ctype_alpha($tok)) {
+ if (!ctype_alpha($tok)) {
$this->parseError("Expected tag name, got '%s'", $tok);
- if ($tok == "\0" || $tok === false) {
+ if ("\0" == $tok || false === $tok) {
return false;
}
+
return $this->bogusComment('</');
}
$name = $this->scanner->charsUntil("\n\f \t>");
- $name = $this->mode === self::CONFORMANT_XML ? $name: strtolower($name);
+ $name = self::CONFORMANT_XML === $this->mode ? $name : strtolower($name);
// Trash whitespace.
$this->scanner->whitespace();
$tok = $this->scanner->current();
- if ($tok != '>') {
+ if ('>' != $tok) {
$this->parseError("Expected >, got '%s'", $tok);
// We just trash stuff until we get to the next tag close.
$this->scanner->charsUntil('>');
@@ -377,23 +371,24 @@ class Tokenizer
$this->events->endTag($name);
$this->scanner->next();
+
return true;
}
/**
* Consume a tag name and body.
- * 8.2.4.10
+ * 8.2.4.10.
*/
protected function tagName()
{
$tok = $this->scanner->current();
- if (! ctype_alpha($tok)) {
+ if (!ctype_alpha($tok)) {
return false;
}
// We know this is at least one char.
- $name = $this->scanner->charsWhile(":_-0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz");
- $name = $this->mode === self::CONFORMANT_XML ? $name : strtolower($name);
+ $name = $this->scanner->charsWhile(':_-0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz');
+ $name = self::CONFORMANT_XML === $this->mode ? $name : strtolower($name);
$attributes = array();
$selfClose = false;
@@ -403,7 +398,7 @@ class Tokenizer
do {
$this->scanner->whitespace();
$this->attribute($attributes);
- } while (! $this->isTagEnd($selfClose));
+ } while (!$this->isTagEnd($selfClose));
} catch (ParseError $e) {
$selfClose = false;
}
@@ -425,30 +420,34 @@ class Tokenizer
protected function isTagEnd(&$selfClose)
{
$tok = $this->scanner->current();
- if ($tok == '/') {
+ if ('/' == $tok) {
$this->scanner->next();
$this->scanner->whitespace();
$tok = $this->scanner->current();
- if ($tok == '>') {
+ if ('>' == $tok) {
$selfClose = true;
+
return true;
}
- if ($tok === false) {
- $this->parseError("Unexpected EOF inside of tag.");
+ if (false === $tok) {
+ $this->parseError('Unexpected EOF inside of tag.');
+
return true;
}
// Basically, we skip the / token and go on.
// See 8.2.4.43.
$this->parseError("Unexpected '%s' inside of a tag.", $tok);
+
return false;
}
- if ($tok == '>') {
+ if ('>' == $tok) {
return true;
}
- if ($tok === false) {
- $this->parseError("Unexpected EOF inside of tag.");
+ if (false === $tok) {
+ $this->parseError('Unexpected EOF inside of tag.');
+
return true;
}
@@ -467,23 +466,23 @@ class Tokenizer
protected function attribute(&$attributes)
{
$tok = $this->scanner->current();
- if ($tok == '/' || $tok == '>' || $tok === false) {
+ if ('/' == $tok || '>' == $tok || false === $tok) {
return false;
}
- if ($tok == '<') {
+ if ('<' == $tok) {
$this->parseError("Unexpected '<' inside of attributes list.");
// Push the < back onto the stack.
$this->scanner->unconsume();
// Let the caller figure out how to handle this.
- throw new ParseError("Start tag inside of attribute.");
+ throw new ParseError('Start tag inside of attribute.');
}
$name = strtolower($this->scanner->charsUntil("/>=\n\f\t "));
- if (strlen($name) == 0) {
+ if (0 == strlen($name)) {
$tok = $this->scanner->current();
- $this->parseError("Expected an attribute name, got %s.", $tok);
+ $this->parseError('Expected an attribute name, got %s.', $tok);
// Really, only '=' can be the char here. Everything else gets absorbed
// under one rule or another.
$name = $tok;
@@ -497,18 +496,17 @@ class Tokenizer
// see issue #23: https://github.com/Masterminds/html5-php/issues/23
// and http://www.w3.org/TR/2011/WD-html5-20110525/syntax.html#syntax-attribute-name
if (preg_match("/[\x1-\x2C\\/\x3B-\x40\x5B-\x5E\x60\x7B-\x7F]/u", $name)) {
- $this->parseError("Unexpected characters in attribute name: %s", $name);
+ $this->parseError('Unexpected characters in attribute name: %s', $name);
$isValidAttribute = false;
} // There is no limitation for 1st character in HTML5.
// But method "DOMElement::setAttribute" is throwing exception for the
// characters below so they have to be filtered.
// see issue #23: https://github.com/Masterminds/html5-php/issues/23
// and http://www.w3.org/TR/2011/WD-html5-20110525/syntax.html#syntax-attribute-name
- else
- if (preg_match("/^[0-9.-]/u", $name)) {
- $this->parseError("Unexpected character at the begining of attribute name: %s", $name);
- $isValidAttribute = false;
- }
+ elseif (preg_match('/^[0-9.-]/u', $name)) {
+ $this->parseError('Unexpected character at the begining of attribute name: %s', $name);
+ $isValidAttribute = false;
+ }
// 8.1.2.3
$this->scanner->whitespace();
@@ -516,6 +514,7 @@ class Tokenizer
if ($isValidAttribute) {
$attributes[$name] = $val;
}
+
return true;
}
@@ -527,7 +526,7 @@ class Tokenizer
*/
protected function attributeValue()
{
- if ($this->scanner->current() != '=') {
+ if ('=' != $this->scanner->current()) {
return null;
}
$this->scanner->next();
@@ -538,21 +537,24 @@ class Tokenizer
switch ($tok) {
case "\n":
case "\f":
- case " ":
+ case ' ':
case "\t":
// Whitespace here indicates an empty value.
return null;
case '"':
case "'":
$this->scanner->next();
+
return $this->quotedAttributeValue($tok);
case '>':
// case '/': // 8.2.4.37 seems to allow foo=/ as a valid attr.
- $this->parseError("Expected attribute value, got tag end.");
+ $this->parseError('Expected attribute value, got tag end.');
+
return null;
case '=':
case '`':
- $this->parseError("Expecting quotes, got %s.", $tok);
+ $this->parseError('Expecting quotes, got %s.', $tok);
+
return $this->unquotedAttributeValue();
default:
return $this->unquotedAttributeValue();
@@ -562,11 +564,11 @@ class Tokenizer
/**
* Get an attribute value string.
*
- * @param string $quote
- * IMPORTANT: This is a series of chars! Any one of which will be considered
- * termination of an attribute's value. E.g. "\"'" will stop at either
- * ' or ".
- * @return string The attribute value.
+ * @param string $quote IMPORTANT: This is a series of chars! Any one of which will be considered
+ * termination of an attribute's value. E.g. "\"'" will stop at either
+ * ' or ".
+ *
+ * @return string the attribute value
*/
protected function quotedAttributeValue($quote)
{
@@ -574,21 +576,22 @@ class Tokenizer
$val = '';
while (true) {
- $tokens = $this->scanner->charsUntil($stoplist.'&');
- if ($tokens !== false) {
+ $tokens = $this->scanner->charsUntil($stoplist . '&');
+ if (false !== $tokens) {
$val .= $tokens;
} else {
break;
}
$tok = $this->scanner->current();
- if ($tok == '&') {
+ if ('&' == $tok) {
$val .= $this->decodeCharacterReference(true);
continue;
}
break;
}
$this->scanner->next();
+
return $val;
}
@@ -597,34 +600,34 @@ class Tokenizer
$stoplist = "\t\n\f >";
$val = '';
$tok = $this->scanner->current();
- while (strspn($tok, $stoplist) == 0 && $tok !== false) {
- if ($tok == '&') {
+ while (0 == strspn($tok, $stoplist) && false !== $tok) {
+ if ('&' == $tok) {
$val .= $this->decodeCharacterReference(true);
$tok = $this->scanner->current();
} else {
if (strspn($tok, "\"'<=`") > 0) {
- $this->parseError("Unexpected chars in unquoted attribute value %s", $tok);
+ $this->parseError('Unexpected chars in unquoted attribute value %s', $tok);
}
$val .= $tok;
$tok = $this->scanner->next();
}
}
+
return $val;
}
/**
* Consume malformed markup as if it were a comment.
- * 8.2.4.44
+ * 8.2.4.44.
*
* The spec requires that the ENTIRE tag-like thing be enclosed inside of
* the comment. So this will generate comments like:
*
* &lt;!--&lt/+foo&gt;--&gt;
*
- * @param string $leading
- * Prepend any leading characters. This essentially
- * negates the need to backtrack, but it's sort of
- * a hack.
+ * @param string $leading Prepend any leading characters. This essentially
+ * negates the need to backtrack, but it's sort of
+ * a hack.
*
* @return bool
*/
@@ -632,11 +635,11 @@ class Tokenizer
{
$comment = $leading;
$tokens = $this->scanner->charsUntil('>');
- if ($tokens !== false) {
+ if (false !== $tokens) {
$comment .= $tokens;
}
$tok = $this->scanner->current();
- if ($tok !== false) {
+ if (false !== $tok) {
$comment .= $tok;
}
@@ -660,25 +663,27 @@ class Tokenizer
$comment = '';
// <!-->. Emit an empty comment because 8.2.4.46 says to.
- if ($tok == '>') {
+ if ('>' == $tok) {
// Parse error. Emit the comment token.
$this->parseError("Expected comment data, got '>'");
$this->events->comment('');
$this->scanner->next();
+
return true;
}
// Replace NULL with the replacement char.
- if ($tok == "\0") {
+ if ("\0" == $tok) {
$tok = UTF8Utils::FFFD;
}
- while (! $this->isCommentEnd()) {
+ while (!$this->isCommentEnd()) {
$comment .= $tok;
$tok = $this->scanner->next();
}
$this->events->comment($comment);
$this->scanner->next();
+
return true;
}
@@ -692,24 +697,26 @@ class Tokenizer
$tok = $this->scanner->current();
// EOF
- if ($tok === false) {
+ if (false === $tok) {
// Hit the end.
- $this->parseError("Unexpected EOF in a comment.");
+ $this->parseError('Unexpected EOF in a comment.');
+
return true;
}
// If it doesn't start with -, not the end.
- if ($tok != '-') {
+ if ('-' != $tok) {
return false;
}
// Advance one, and test for '->'
- if ($this->scanner->next() == '-' && $this->scanner->peek() == '>') {
+ if ('-' == $this->scanner->next() && '>' == $this->scanner->peek()) {
$this->scanner->next(); // Consume the last '>'
return true;
}
// Unread '-';
$this->scanner->unconsume(1);
+
return false;
}
@@ -729,9 +736,10 @@ class Tokenizer
return false;
}
// Check that string is DOCTYPE.
- $chars = $this->scanner->charsWhile("DOCTYPEdoctype");
+ $chars = $this->scanner->charsWhile('DOCTYPEdoctype');
if (strcasecmp($chars, 'DOCTYPE')) {
$this->parseError('Expected DOCTYPE, got %s', $chars);
+
return $this->bogusComment('<!' . $chars);
}
@@ -739,14 +747,15 @@ class Tokenizer
$tok = $this->scanner->current();
// EOF: die.
- if ($tok === false) {
+ if (false === $tok) {
$this->events->doctype('html5', EventHandler::DOCTYPE_NONE, '', true);
+
return $this->eof($tok);
}
// NULL char: convert.
- if ($tok === "\0") {
- $this->parseError("Unexpected null character in DOCTYPE.");
+ if ("\0" === $tok) {
+ $this->parseError('Unexpected null character in DOCTYPE.');
}
$stop = " \n\f>";
@@ -757,23 +766,26 @@ class Tokenizer
$tok = $this->scanner->current();
// If false, emit a parse error, DOCTYPE, and return.
- if ($tok === false) {
+ if (false === $tok) {
$this->parseError('Unexpected EOF in DOCTYPE declaration.');
$this->events->doctype($doctypeName, EventHandler::DOCTYPE_NONE, null, true);
+
return true;
}
// Short DOCTYPE, like <!DOCTYPE html>
- if ($tok == '>') {
+ if ('>' == $tok) {
// DOCTYPE without a name.
- if (strlen($doctypeName) == 0) {
- $this->parseError("Expected a DOCTYPE name. Got nothing.");
+ if (0 == strlen($doctypeName)) {
+ $this->parseError('Expected a DOCTYPE name. Got nothing.');
$this->events->doctype($doctypeName, 0, null, true);
$this->scanner->next();
+
return true;
}
$this->events->doctype($doctypeName);
$this->scanner->next();
+
return true;
}
$this->scanner->whitespace();
@@ -782,36 +794,40 @@ class Tokenizer
$white = $this->scanner->whitespace();
// Get ID, and flag it as pub or system.
- if (($pub == 'PUBLIC' || $pub == 'SYSTEM') && $white > 0) {
+ if (('PUBLIC' == $pub || 'SYSTEM' == $pub) && $white > 0) {
// Get the sys ID.
- $type = $pub == 'PUBLIC' ? EventHandler::DOCTYPE_PUBLIC : EventHandler::DOCTYPE_SYSTEM;
+ $type = 'PUBLIC' == $pub ? EventHandler::DOCTYPE_PUBLIC : EventHandler::DOCTYPE_SYSTEM;
$id = $this->quotedString("\0>");
- if ($id === false) {
+ if (false === $id) {
$this->events->doctype($doctypeName, $type, $pub, false);
+
return false;
}
// Premature EOF.
- if ($this->scanner->current() === false) {
- $this->parseError("Unexpected EOF in DOCTYPE");
+ if (false === $this->scanner->current()) {
+ $this->parseError('Unexpected EOF in DOCTYPE');
$this->events->doctype($doctypeName, $type, $id, true);
+
return true;
}
// Well-formed complete DOCTYPE.
$this->scanner->whitespace();
- if ($this->scanner->current() == '>') {
+ if ('>' == $this->scanner->current()) {
$this->events->doctype($doctypeName, $type, $id, false);
$this->scanner->next();
+
return true;
}
// If we get here, we have <!DOCTYPE foo PUBLIC "bar" SOME_JUNK
// Throw away the junk, parse error, quirks mode, return true.
- $this->scanner->charsUntil(">");
- $this->parseError("Malformed DOCTYPE.");
+ $this->scanner->charsUntil('>');
+ $this->parseError('Malformed DOCTYPE.');
$this->events->doctype($doctypeName, $type, $id, true);
$this->scanner->next();
+
return true;
}
@@ -819,35 +835,37 @@ class Tokenizer
// Consume to > and trash.
$this->scanner->charsUntil('>');
- $this->parseError("Expected PUBLIC or SYSTEM. Got %s.", $pub);
+ $this->parseError('Expected PUBLIC or SYSTEM. Got %s.', $pub);
$this->events->doctype($doctypeName, 0, null, true);
$this->scanner->next();
+
return true;
}
/**
* Utility for reading a quoted string.
*
- * @param string $stopchars
- * Characters (in addition to a close-quote) that should stop the string.
- * E.g. sometimes '>' is higher precedence than '"' or "'".
+ * @param string $stopchars Characters (in addition to a close-quote) that should stop the string.
+ * E.g. sometimes '>' is higher precedence than '"' or "'".
*
* @return mixed String if one is found (quotations omitted)
*/
protected function quotedString($stopchars)
{
$tok = $this->scanner->current();
- if ($tok == '"' || $tok == "'") {
+ if ('"' == $tok || "'" == $tok) {
$this->scanner->next();
$ret = $this->scanner->charsUntil($tok . $stopchars);
if ($this->scanner->current() == $tok) {
$this->scanner->next();
} else {
// Parse error because no close quote.
- $this->parseError("Expected %s, got %s", $tok, $this->scanner->current());
+ $this->parseError('Expected %s, got %s', $tok, $this->scanner->current());
}
+
return $ret;
}
+
return false;
}
@@ -858,39 +876,43 @@ class Tokenizer
*/
protected function cdataSection()
{
- if ($this->scanner->current() != '[') {
+ if ('[' != $this->scanner->current()) {
return false;
}
$cdata = '';
$this->scanner->next();
$chars = $this->scanner->charsWhile('CDAT');
- if ($chars != 'CDATA' || $this->scanner->current() != '[') {
+ if ('CDATA' != $chars || '[' != $this->scanner->current()) {
$this->parseError('Expected [CDATA[, got %s', $chars);
+
return $this->bogusComment('<![' . $chars);
}
$tok = $this->scanner->next();
do {
- if ($tok === false) {
+ if (false === $tok) {
$this->parseError('Unexpected EOF inside CDATA.');
$this->bogusComment('<![CDATA[' . $cdata);
+
return true;
}
$cdata .= $tok;
$tok = $this->scanner->next();
- } while (! $this->scanner->sequenceMatches(']]>'));
+ } while (!$this->scanner->sequenceMatches(']]>'));
// Consume ]]>
$this->scanner->consume(3);
$this->events->cdata($cdata);
+
return true;
}
// ================================================================
// Non-HTML5
// ================================================================
+
/**
* Handle a processing instruction.
*
@@ -903,7 +925,7 @@ class Tokenizer
*/
protected function processingInstruction()
{
- if ($this->scanner->current() != '?') {
+ if ('?' != $this->scanner->current()) {
return false;
}
@@ -912,21 +934,23 @@ class Tokenizer
$white = $this->scanner->whitespace();
// If not a PI, send to bogusComment.
- if (strlen($procName) == 0 || $white == 0 || $this->scanner->current() == false) {
+ if (0 == strlen($procName) || 0 == $white || false == $this->scanner->current()) {
$this->parseError("Expected processing instruction name, got $tok");
$this->bogusComment('<?' . $tok . $procName);
+
return true;
}
$data = '';
// As long as it's not the case that the next two chars are ? and >.
- while (! ($this->scanner->current() == '?' && $this->scanner->peek() == '>')) {
+ while (!('?' == $this->scanner->current() && '>' == $this->scanner->peek())) {
$data .= $this->scanner->current();
$tok = $this->scanner->next();
- if ($tok === false) {
- $this->parseError("Unexpected EOF in processing instruction.");
+ if (false === $tok) {
+ $this->parseError('Unexpected EOF in processing instruction.');
$this->events->processingInstruction($procName, $data);
+
return true;
}
}
@@ -934,6 +958,7 @@ class Tokenizer
$this->scanner->next(); // >
$this->scanner->next(); // Next token.
$this->events->processingInstruction($procName, $data);
+
return true;
}
@@ -955,7 +980,7 @@ class Tokenizer
// Optimization for reading larger blocks faster.
$first = substr($sequence, 0, 1);
- while ($this->scanner->current() !== false) {
+ while (false !== $this->scanner->current()) {
$buffer .= $this->scanner->charsUntil($first);
// Stop as soon as we hit the stopping condition.
@@ -967,7 +992,8 @@ class Tokenizer
}
// If we get here, we hit the EOF.
- $this->parseError("Unexpected EOF during text read.");
+ $this->parseError('Unexpected EOF during text read.');
+
return $buffer;
}
@@ -985,7 +1011,7 @@ class Tokenizer
* '</script>' string.
*
* @param string $sequence
- * @param bool $caseSensitive
+ * @param bool $caseSensitive
*
* @return bool
*/
@@ -1005,7 +1031,7 @@ class Tokenizer
*/
protected function flushBuffer()
{
- if ($this->text === '') {
+ if ('' === $this->text) {
return;
}
$this->events->text($this->text);
@@ -1055,9 +1081,8 @@ class Tokenizer
*
* If $inAttribute is set to true, a bare & will be returned as-is.
*
- * @param bool $inAttribute
- * Set to true if the text is inside of an attribute value.
- * false otherwise.
+ * @param bool $inAttribute Set to true if the text is inside of an attribute value.
+ * false otherwise.
*
* @return string
*/
@@ -1067,36 +1092,37 @@ class Tokenizer
$tok = $this->scanner->next();
$start = $this->scanner->position();
- if ($tok == false) {
+ if (false === $tok) {
return '&';
}
// These indicate not an entity. We return just
// the &.
- if (strspn($tok, static::WHITE . "&<") == 1) {
+ if (1 === strspn($tok, static::WHITE . '&<')) {
// $this->scanner->next();
return '&';
}
// Numeric entity
- if ($tok == '#') {
+ if ('#' === $tok) {
$tok = $this->scanner->next();
// Hexidecimal encoding.
// X[0-9a-fA-F]+;
// x[0-9a-fA-F]+;
- if ($tok == 'x' || $tok == 'X') {
+ if ('x' === $tok || 'X' === $tok) {
$tok = $this->scanner->next(); // Consume x
// Convert from hex code to char.
$hex = $this->scanner->getHex();
if (empty($hex)) {
- $this->parseError("Expected &#xHEX;, got &#x%s", $tok);
+ $this->parseError('Expected &#xHEX;, got &#x%s', $tok);
// We unconsume because we don't know what parser rules might
// be in effect for the remaining chars. For example. '&#>'
// might result in a specific parsing rule inside of tag
// contexts, while not inside of pcdata context.
$this->scanner->unconsume(2);
+
return '&';
}
$entity = CharacterReference::lookupHex($hex);
@@ -1105,17 +1131,17 @@ class Tokenizer
else {
// Convert from decimal to char.
$numeric = $this->scanner->getNumeric();
- if ($numeric === false) {
- $this->parseError("Expected &#DIGITS;, got &#%s", $tok);
+ if (false === $numeric) {
+ $this->parseError('Expected &#DIGITS;, got &#%s', $tok);
$this->scanner->unconsume(2);
+
return '&';
}
$entity = CharacterReference::lookupDecimal($numeric);
}
- } elseif ($tok === '=' && $inAttribute) {
+ } elseif ('=' === $tok && $inAttribute) {
return '&';
} else { // String entity.
-
// Attempt to consume a string up to a ';'.
// [a-zA-Z0-9]+;
$cname = $this->scanner->getAsciiAlphaNum();
@@ -1124,11 +1150,12 @@ class Tokenizer
// When no entity is found provide the name of the unmatched string
// and continue on as the & is not part of an entity. The & will
// be converted to &amp; elsewhere.
- if ($entity == null) {
- if (!$inAttribute || strlen($cname) === 0) {
+ if (null === $entity) {
+ if (!$inAttribute || '' === $cname) {
$this->parseError("No match in entity table for '%s'", $cname);
}
$this->scanner->unconsume($this->scanner->position() - $start);
+
return '&';
}
}
@@ -1137,8 +1164,9 @@ class Tokenizer
$tok = $this->scanner->current();
// We have an entity. We're done here.
- if ($tok == ';') {
+ if (';' === $tok) {
$this->scanner->next();
+
return $entity;
}
@@ -1146,10 +1174,12 @@ class Tokenizer
// entire string. Otherwise, failure to match is an error.
if ($inAttribute) {
$this->scanner->unconsume($this->scanner->position() - $start);
+
return '&';
}
- $this->parseError("Expected &ENTITY;, got &ENTITY%s (no trailing ;) ", $tok);
+ $this->parseError('Expected &ENTITY;, got &ENTITY%s (no trailing ;) ', $tok);
+
return '&' . $entity;
}
}
diff --git a/src/HTML5/Parser/TreeBuildingRules.php b/src/HTML5/Parser/TreeBuildingRules.php
index d092872..9b94185 100644
--- a/src/HTML5/Parser/TreeBuildingRules.php
+++ b/src/HTML5/Parser/TreeBuildingRules.php
@@ -1,4 +1,5 @@
<?php
+
namespace Masterminds\HTML5\Parser;
/**
@@ -14,7 +15,6 @@ namespace Masterminds\HTML5\Parser;
*/
class TreeBuildingRules
{
-
protected static $tags = array(
'li' => 1,
'dd' => 1,
@@ -29,21 +29,10 @@ class TreeBuildingRules
'tbody' => 1,
'table' => 1,
'optgroup' => 1,
- 'option' => 1
+ 'option' => 1,
);
/**
- * Build a new rules engine.
- *
- * @param \DOMDocument $doc
- * The DOM document to use for evaluation and modification.
- */
- public function __construct($doc)
- {
- $this->doc = $doc;
- }
-
- /**
* Returns true if the given tagname has special processing rules.
*/
public function hasRules($tagname)
@@ -56,7 +45,7 @@ class TreeBuildingRules
*
* This may modify the existing DOM.
*
- * @return \DOMElement The new Current DOM element.
+ * @return \DOMElement the new Current DOM element
*/
public function evaluate($new, $current)
{
@@ -71,7 +60,7 @@ class TreeBuildingRules
return $this->handleRT($new, $current);
case 'optgroup':
return $this->closeIfCurrentMatches($new, $current, array(
- 'optgroup'
+ 'optgroup',
));
case 'option':
return $this->closeIfCurrentMatches($new, $current, array(
@@ -79,13 +68,13 @@ class TreeBuildingRules
));
case 'tr':
return $this->closeIfCurrentMatches($new, $current, array(
- 'tr'
+ 'tr',
));
case 'td':
case 'th':
return $this->closeIfCurrentMatches($new, $current, array(
'th',
- 'td'
+ 'td',
));
case 'tbody':
case 'thead':
@@ -95,7 +84,7 @@ class TreeBuildingRules
return $this->closeIfCurrentMatches($new, $current, array(
'thead',
'tfoot',
- 'tbody'
+ 'tbody',
));
}
@@ -105,7 +94,7 @@ class TreeBuildingRules
protected function handleLI($ele, $current)
{
return $this->closeIfCurrentMatches($ele, $current, array(
- 'li'
+ 'li',
));
}
@@ -113,7 +102,7 @@ class TreeBuildingRules
{
return $this->closeIfCurrentMatches($ele, $current, array(
'dt',
- 'dd'
+ 'dd',
));
}
@@ -121,13 +110,13 @@ class TreeBuildingRules
{
return $this->closeIfCurrentMatches($ele, $current, array(
'rt',
- 'rp'
+ 'rp',
));
}
protected function closeIfCurrentMatches($ele, $current, $match)
{
- if (in_array($current->tagName, $match)) {
+ if (in_array($current->tagName, $match, true)) {
$current->parentNode->appendChild($ele);
} else {
$current->appendChild($ele);
diff --git a/src/HTML5/Parser/UTF8Utils.php b/src/HTML5/Parser/UTF8Utils.php
index 451c155..77c2dfb 100644
--- a/src/HTML5/Parser/UTF8Utils.php
+++ b/src/HTML5/Parser/UTF8Utils.php
@@ -1,5 +1,7 @@
<?php
+
namespace Masterminds\HTML5\Parser;
+
/*
*
* Portions based on code from html5lib files with the following copyright:
@@ -30,11 +32,10 @@ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
use Masterminds\HTML5\Exception;
/**
- * UTF-8 Utilities
+ * UTF-8 Utilities.
*/
class UTF8Utils
{
-
/**
* The Unicode replacement character..
*/
@@ -76,10 +77,8 @@ class UTF8Utils
* This has not yet been tested with charactersets other than UTF-8.
* It should work with ISO-8859-1/-13 and standard Latin Win charsets.
*
- * @param string $data
- * The data to convert.
- * @param string $encoding
- * A valid encoding. Examples: http://www.php.net/manual/en/mbstring.supported-encodings.php
+ * @param string $data The data to convert
+ * @param string $encoding A valid encoding. Examples: http://www.php.net/manual/en/mbstring.supported-encodings.php
*
* @return string
*/
@@ -108,7 +107,7 @@ class UTF8Utils
$data = mb_convert_encoding($data, 'UTF-8', $encoding);
mb_substitute_character($save);
} // @todo Get iconv running in at least some environments if that is possible.
- elseif (function_exists('iconv') && $encoding != 'auto') {
+ elseif (function_exists('iconv') && 'auto' !== $encoding) {
// fprintf(STDOUT, "iconv found\n");
// iconv has the following behaviors:
// - Overlong representations are ignored.
@@ -122,7 +121,7 @@ class UTF8Utils
/*
* One leading U+FEFF BYTE ORDER MARK character must be ignored if any are present.
*/
- if (substr($data, 0, 3) === "\xEF\xBB\xBF") {
+ if ("\xEF\xBB\xBF" === substr($data, 0, 3)) {
$data = substr($data, 3);
}
@@ -132,9 +131,9 @@ class UTF8Utils
/**
* Checks for Unicode code points that are not valid in a document.
*
- * @param string $data A string to analyze.
+ * @param string $data a string to analyze
*
- * @return array An array of (string) error messages produced by the scanning.
+ * @return array an array of (string) error messages produced by the scanning
*/
public static function checkForIllegalCodepoints($data)
{
@@ -144,7 +143,7 @@ class UTF8Utils
/*
* All U+0000 null characters in the input must be replaced by U+FFFD REPLACEMENT CHARACTERs. Any occurrences of such characters is a parse error.
*/
- for ($i = 0, $count = substr_count($data, "\0"); $i < $count; $i ++) {
+ for ($i = 0, $count = substr_count($data, "\0"); $i < $count; ++$i) {
$errors[] = 'null-character';
}
@@ -166,7 +165,7 @@ class UTF8Utils
|
[\xF0-\xF4][\x8F-\xBF]\xBF[\xBE\xBF] # U+nFFFE and U+nFFFF (1 <= n <= 10_{16})
)/x', $data, $matches);
- for ($i = 0; $i < $count; $i ++) {
+ for ($i = 0; $i < $count; ++$i) {
$errors[] = 'invalid-codepoint';
}