summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorTitouan Galopin <[email protected]>2018-11-25 00:58:42 +0100
committerTitouan Galopin <[email protected]>2018-11-25 14:58:29 +0100
commitd829a30e092ea5d868b93a258724b12f9aa313fd (patch)
tree1c8819b825738ce95d87d96e6e2cafeb2c7f14c8 /src
parentd39a98a478c95e0df86ed564650f7326244116e2 (diff)
Fix coding style
Diffstat (limited to 'src')
-rw-r--r--src/HTML5.php129
-rw-r--r--src/HTML5/Elements.php517
-rw-r--r--src/HTML5/Entities.php4
-rw-r--r--src/HTML5/Exception.php1
-rw-r--r--src/HTML5/InstructionProcessor.php20
-rw-r--r--src/HTML5/Parser/CharacterReference.php32
-rw-r--r--src/HTML5/Parser/DOMTreeBuilder.php178
-rw-r--r--src/HTML5/Parser/EventHandler.php36
-rw-r--r--src/HTML5/Parser/FileInputStream.php7
-rw-r--r--src/HTML5/Parser/InputStream.php26
-rw-r--r--src/HTML5/Parser/ParseError.php1
-rw-r--r--src/HTML5/Parser/Scanner.php74
-rw-r--r--src/HTML5/Parser/StringInputStream.php65
-rw-r--r--src/HTML5/Parser/Tokenizer.php332
-rw-r--r--src/HTML5/Parser/TreeBuildingRules.php33
-rw-r--r--src/HTML5/Parser/UTF8Utils.php23
-rw-r--r--src/HTML5/Serializer/HTML5Entities.php6
-rw-r--r--src/HTML5/Serializer/OutputRules.php115
-rw-r--r--src/HTML5/Serializer/RulesInterface.php26
-rw-r--r--src/HTML5/Serializer/Traverser.php35
20 files changed, 806 insertions, 854 deletions
diff --git a/src/HTML5.php b/src/HTML5.php
index 0697bed..168b965 100644
--- a/src/HTML5.php
+++ b/src/HTML5.php
@@ -1,9 +1,7 @@
<?php
+
namespace Masterminds;
-use Masterminds\HTML5\Parser\FileInputStream;
-use Masterminds\HTML5\Parser\InputStream;
-use Masterminds\HTML5\Parser\StringInputStream;
use Masterminds\HTML5\Parser\DOMTreeBuilder;
use Masterminds\HTML5\Parser\Scanner;
use Masterminds\HTML5\Parser\Tokenizer;
@@ -12,37 +10,38 @@ use Masterminds\HTML5\Serializer\Traverser;
/**
* This class offers convenience methods for parsing and serializing HTML5.
- * It is roughly designed to mirror the \DOMDocument class that is
- * provided with most versions of PHP.
+ * It is roughly designed to mirror the \DOMDocument native class.
*/
class HTML5
{
-
/**
* Global options for the parser and serializer.
*
* @var array
*/
- protected $options = array(
- // If the serializer should encode all entities.
- 'encode_entities' => false
+ private $defaultOptions = array(
+ // Whether the serializer should aggressively encode all characters as entities.
+ 'encode_entities' => false,
+
+ // Prevents the parser from automatically assigning the HTML5 namespace to the DOM document.
+ 'disable_html_ns' => false,
);
protected $errors = array();
- public function __construct(array $options = array())
+ public function __construct(array $defaultOptions = array())
{
- $this->options = array_merge($this->options, $options);
+ $this->defaultOptions = array_merge($this->defaultOptions, $defaultOptions);
}
/**
- * Get the default options.
+ * Get the current default options.
*
- * @return array The default options.
+ * @return array
*/
public function getOptions()
{
- return $this->options;
+ return $this->defaultOptions;
}
/**
@@ -55,14 +54,13 @@ class HTML5
*
* The rules governing parsing are set out in the HTML 5 spec.
*
- * @param string|resource $file
- * The path to the file to parse. If this is a resource, it is
- * assumed to be an open stream whose pointer is set to the first
- * byte of input.
- * @param array $options
- * Configuration options when parsing the HTML
+ * @param string|resource $file The path to the file to parse. If this is a resource, it is
+ * assumed to be an open stream whose pointer is set to the first
+ * byte of input.
+ * @param array $options Configuration options when parsing the HTML
+ *
* @return \DOMDocument A DOM document. These object type is defined by the libxml
- * library, and should have been included with your version of PHP.
+ * library, and should have been included with your version of PHP.
*/
public function load($file, array $options = array())
{
@@ -80,12 +78,11 @@ class HTML5
* Take a string of HTML 5 (or earlier) and parse it into a
* DOMDocument.
*
- * @param string $string
- * A html5 document as a string.
- * @param array $options
- * Configuration options when parsing the HTML
+ * @param string $string A html5 document as a string
+ * @param array $options Configuration options when parsing the HTML
+ *
* @return \DOMDocument A DOM document. DOM is part of libxml, which is included with
- * almost all distribtions of PHP.
+ * almost all distribtions of PHP.
*/
public function loadHTML($string, array $options = array())
{
@@ -98,15 +95,13 @@ class HTML5
* This is here to provide backwards compatibility with the
* PHP DOM implementation. It simply calls load().
*
- * @param string $file
- * The path to the file to parse. If this is a resource, it is
- * assumed to be an open stream whose pointer is set to the first
- * byte of input.
- * @param array $options
- * Configuration options when parsing the HTML
+ * @param string $file The path to the file to parse. If this is a resource, it is
+ * assumed to be an open stream whose pointer is set to the first
+ * byte of input.
+ * @param array $options Configuration options when parsing the HTML
*
* @return \DOMDocument A DOM document. These object type is defined by the libxml
- * library, and should have been included with your version of PHP.
+ * library, and should have been included with your version of PHP.
*/
public function loadHTMLFile($file, array $options = array())
{
@@ -116,11 +111,11 @@ class HTML5
/**
* Parse a HTML fragment from a string.
*
- * @param string $string The HTML5 fragment as a string.
- * @param array $options Configuration options when parsing the HTML
+ * @param string $string the HTML5 fragment as a string
+ * @param array $options Configuration options when parsing the HTML
*
* @return \DOMDocumentFragment A DOM fragment. The DOM is part of libxml, which is included with
- * almost all distributions of PHP.
+ * almost all distributions of PHP.
*/
public function loadHTMLFragment($string, array $options = array())
{
@@ -128,7 +123,7 @@ class HTML5
}
/**
- * Return all errors encountered into parsing phase
+ * Return all errors encountered into parsing phase.
*
* @return array
*/
@@ -138,7 +133,7 @@ class HTML5
}
/**
- * Return true it some errors were encountered into parsing phase
+ * Return true it some errors were encountered into parsing phase.
*
* @return bool
*/
@@ -148,23 +143,20 @@ class HTML5
}
/**
- * Parse an input stream.
- *
- * Lower-level loading function. This requires an input stream instead
- * of a string, file, or resource.
+ * Parse an input string.
*
* @param string $input
- * @param array $options
+ * @param array $options
*
* @return \DOMDocument
*/
public function parse($input, array $options = array())
{
$this->errors = array();
- $options = array_merge($this->getOptions(), $options);
+ $options = array_merge($this->defaultOptions, $options);
$events = new DOMTreeBuilder(false, $options);
$scanner = new Scanner($input);
- $parser = new Tokenizer($scanner, $events, !empty($options['xmlNamespaces']) ? Tokenizer::CONFORMANT_XML: Tokenizer::CONFORMANT_HTML);
+ $parser = new Tokenizer($scanner, $events, !empty($options['xmlNamespaces']) ? Tokenizer::CONFORMANT_XML : Tokenizer::CONFORMANT_HTML);
$parser->parse();
$this->errors = $events->getErrors();
@@ -178,17 +170,17 @@ class HTML5
* Lower-level loading function. This requires an input stream instead
* of a string, file, or resource.
*
- * @param string $input The input data to parse in the form of a string.
- * @param array $options An array of options
+ * @param string $input the input data to parse in the form of a string
+ * @param array $options An array of options
*
* @return \DOMDocumentFragment
*/
public function parseFragment($input, array $options = array())
{
- $options = array_merge($this->getOptions(), $options);
+ $options = array_merge($this->defaultOptions, $options);
$events = new DOMTreeBuilder(true, $options);
$scanner = new Scanner($input);
- $parser = new Tokenizer($scanner, $events, !empty($options['xmlNamespaces']) ? Tokenizer::CONFORMANT_XML: Tokenizer::CONFORMANT_HTML);
+ $parser = new Tokenizer($scanner, $events, !empty($options['xmlNamespaces']) ? Tokenizer::CONFORMANT_XML : Tokenizer::CONFORMANT_HTML);
$parser->parse();
$this->errors = $events->getErrors();
@@ -199,15 +191,12 @@ class HTML5
/**
* Save a DOM into a given file as HTML5.
*
- * @param mixed $dom
- * The DOM to be serialized.
- * @param string|resource $file
- * The filename to be written or resource to write to.
- * @param array $options
- * Configuration options when serializing the DOM. These include:
- * - encode_entities: Text written to the output is escaped by default and not all
- * entities are encoded. If this is set to true all entities will be encoded.
- * Defaults to false.
+ * @param mixed $dom The DOM to be serialized
+ * @param string|resource $file The filename to be written or resource to write to
+ * @param array $options Configuration options when serializing the DOM. These include:
+ * - encode_entities: Text written to the output is escaped by default and not all
+ * entities are encoded. If this is set to true all entities will be encoded.
+ * Defaults to false.
*/
public function save($dom, $file, $options = array())
{
@@ -216,9 +205,9 @@ class HTML5
$stream = $file;
$close = false;
} else {
- $stream = fopen($file, 'w');
+ $stream = fopen($file, 'wb');
}
- $options = array_merge($this->getOptions(), $options);
+ $options = array_merge($this->defaultOptions, $options);
$rules = new OutputRules($stream, $options);
$trav = new Traverser($dom, $stream, $rules, $options);
@@ -232,21 +221,19 @@ class HTML5
/**
* Convert a DOM into an HTML5 string.
*
- * @param mixed $dom
- * The DOM to be serialized.
- * @param array $options
- * Configuration options when serializing the DOM. These include:
- * - encode_entities: Text written to the output is escaped by default and not all
- * entities are encoded. If this is set to true all entities will be encoded.
- * Defaults to false.
+ * @param mixed $dom The DOM to be serialized
+ * @param array $options Configuration options when serializing the DOM. These include:
+ * - encode_entities: Text written to the output is escaped by default and not all
+ * entities are encoded. If this is set to true all entities will be encoded.
+ * Defaults to false.
*
- * @return string A HTML5 documented generated from the DOM.
+ * @return string a HTML5 documented generated from the DOM
*/
public function saveHTML($dom, $options = array())
{
- $stream = fopen('php://temp', 'w');
- $this->save($dom, $stream, array_merge($this->getOptions(), $options));
+ $stream = fopen('php://temp', 'wb');
+ $this->save($dom, $stream, array_merge($this->defaultOptions, $options));
- return stream_get_contents($stream, - 1, 0);
+ return stream_get_contents($stream, -1, 0);
}
}
diff --git a/src/HTML5/Elements.php b/src/HTML5/Elements.php
index 3cb6093..a139e26 100644
--- a/src/HTML5/Elements.php
+++ b/src/HTML5/Elements.php
@@ -2,6 +2,7 @@
/**
* Provide general element functions.
*/
+
namespace Masterminds\HTML5;
/**
@@ -17,7 +18,6 @@ namespace Masterminds\HTML5;
*/
class Elements
{
-
/**
* Indicates an element is described in the specification.
*/
@@ -77,116 +77,116 @@ class Elements
* @var array
*/
public static $html5 = array(
- "a" => 1,
- "abbr" => 1,
- "address" => 65, // NORMAL | BLOCK_TAG
- "area" => 9, // NORMAL | VOID_TAG
- "article" => 81, // NORMAL | AUTOCLOSE_P | BLOCK_TAG
- "aside" => 81, // NORMAL | AUTOCLOSE_P | BLOCK_TAG
- "audio" => 1, // NORMAL
- "b" => 1,
- "base" => 9, // NORMAL | VOID_TAG
- "bdi" => 1,
- "bdo" => 1,
- "blockquote" => 81, // NORMAL | AUTOCLOSE_P | BLOCK_TAG
- "body" => 1,
- "br" => 9, // NORMAL | VOID_TAG
- "button" => 1,
- "canvas" => 65, // NORMAL | BLOCK_TAG
- "caption" => 1,
- "cite" => 1,
- "code" => 1,
- "col" => 9, // NORMAL | VOID_TAG
- "colgroup" => 1,
- "command" => 9, // NORMAL | VOID_TAG
+ 'a' => 1,
+ 'abbr' => 1,
+ 'address' => 65, // NORMAL | BLOCK_TAG
+ 'area' => 9, // NORMAL | VOID_TAG
+ 'article' => 81, // NORMAL | AUTOCLOSE_P | BLOCK_TAG
+ 'aside' => 81, // NORMAL | AUTOCLOSE_P | BLOCK_TAG
+ 'audio' => 1, // NORMAL
+ 'b' => 1,
+ 'base' => 9, // NORMAL | VOID_TAG
+ 'bdi' => 1,
+ 'bdo' => 1,
+ 'blockquote' => 81, // NORMAL | AUTOCLOSE_P | BLOCK_TAG
+ 'body' => 1,
+ 'br' => 9, // NORMAL | VOID_TAG
+ 'button' => 1,
+ 'canvas' => 65, // NORMAL | BLOCK_TAG
+ 'caption' => 1,
+ 'cite' => 1,
+ 'code' => 1,
+ 'col' => 9, // NORMAL | VOID_TAG
+ 'colgroup' => 1,
+ 'command' => 9, // NORMAL | VOID_TAG
// "data" => 1, // This is highly experimental and only part of the whatwg spec (not w3c). See https://developer.mozilla.org/en-US/docs/HTML/Element/data
- "datalist" => 1,
- "dd" => 65, // NORMAL | BLOCK_TAG
- "del" => 1,
- "details" => 17, // NORMAL | AUTOCLOSE_P,
- "dfn" => 1,
- "dialog" => 17, // NORMAL | AUTOCLOSE_P,
- "div" => 81, // NORMAL | AUTOCLOSE_P | BLOCK_TAG
- "dl" => 81, // NORMAL | AUTOCLOSE_P | BLOCK_TAG
- "dt" => 1,
- "em" => 1,
- "embed" => 9, // NORMAL | VOID_TAG
- "fieldset" => 81, // NORMAL | AUTOCLOSE_P | BLOCK_TAG
- "figcaption" => 81, // NORMAL | AUTOCLOSE_P | BLOCK_TAG
- "figure" => 81, // NORMAL | AUTOCLOSE_P | BLOCK_TAG
- "footer" => 81, // NORMAL | AUTOCLOSE_P | BLOCK_TAG
- "form" => 81, // NORMAL | AUTOCLOSE_P | BLOCK_TAG
- "h1" => 81, // NORMAL | AUTOCLOSE_P | BLOCK_TAG
- "h2" => 81, // NORMAL | AUTOCLOSE_P | BLOCK_TAG
- "h3" => 81, // NORMAL | AUTOCLOSE_P | BLOCK_TAG
- "h4" => 81, // NORMAL | AUTOCLOSE_P | BLOCK_TAG
- "h5" => 81, // NORMAL | AUTOCLOSE_P | BLOCK_TAG
- "h6" => 81, // NORMAL | AUTOCLOSE_P | BLOCK_TAG
- "head" => 1,
- "header" => 81, // NORMAL | AUTOCLOSE_P | BLOCK_TAG
- "hgroup" => 81, // NORMAL | AUTOCLOSE_P | BLOCK_TAG
- "hr" => 73, // NORMAL | VOID_TAG
- "html" => 1,
- "i" => 1,
- "iframe" => 3, // NORMAL | TEXT_RAW
- "img" => 9, // NORMAL | VOID_TAG
- "input" => 9, // NORMAL | VOID_TAG
- "kbd" => 1,
- "ins" => 1,
- "keygen" => 9, // NORMAL | VOID_TAG
- "label" => 1,
- "legend" => 1,
- "li" => 1,
- "link" => 9, // NORMAL | VOID_TAG
- "map" => 1,
- "mark" => 1,
- "menu" => 17, // NORMAL | AUTOCLOSE_P,
- "meta" => 9, // NORMAL | VOID_TAG
- "meter" => 1,
- "nav" => 17, // NORMAL | AUTOCLOSE_P,
- "noscript" => 65, // NORMAL | BLOCK_TAG
- "object" => 1,
- "ol" => 81, // NORMAL | AUTOCLOSE_P | BLOCK_TAG
- "optgroup" => 1,
- "option" => 1,
- "output" => 65, // NORMAL | BLOCK_TAG
- "p" => 209, // NORMAL | AUTOCLOSE_P | BLOCK_TAG | BLOCK_ONLY_INLINE
- "param" => 9, // NORMAL | VOID_TAG
- "pre" => 81, // NORMAL | AUTOCLOSE_P | BLOCK_TAG
- "progress" => 1,
- "q" => 1,
- "rp" => 1,
- "rt" => 1,
- "ruby" => 1,
- "s" => 1,
- "samp" => 1,
- "script" => 3, // NORMAL | TEXT_RAW
- "section" => 81, // NORMAL | AUTOCLOSE_P | BLOCK_TAG
- "select" => 1,
- "small" => 1,
- "source" => 9, // NORMAL | VOID_TAG
- "span" => 1,
- "strong" => 1,
- "style" => 3, // NORMAL | TEXT_RAW
- "sub" => 1,
- "summary" => 17, // NORMAL | AUTOCLOSE_P,
- "sup" => 1,
- "table" => 65, // NORMAL | BLOCK_TAG
- "tbody" => 1,
- "td" => 1,
- "textarea" => 5, // NORMAL | TEXT_RCDATA
- "tfoot" => 65, // NORMAL | BLOCK_TAG
- "th" => 1,
- "thead" => 1,
- "time" => 1,
- "title" => 5, // NORMAL | TEXT_RCDATA
- "tr" => 1,
- "track" => 9, // NORMAL | VOID_TAG
- "u" => 1,
- "ul" => 81, // NORMAL | AUTOCLOSE_P | BLOCK_TAG
- "var" => 1,
- "video" => 65, // NORMAL | BLOCK_TAG
- "wbr" => 9, // NORMAL | VOID_TAG
+ 'datalist' => 1,
+ 'dd' => 65, // NORMAL | BLOCK_TAG
+ 'del' => 1,
+ 'details' => 17, // NORMAL | AUTOCLOSE_P,
+ 'dfn' => 1,
+ 'dialog' => 17, // NORMAL | AUTOCLOSE_P,
+ 'div' => 81, // NORMAL | AUTOCLOSE_P | BLOCK_TAG
+ 'dl' => 81, // NORMAL | AUTOCLOSE_P | BLOCK_TAG
+ 'dt' => 1,
+ 'em' => 1,
+ 'embed' => 9, // NORMAL | VOID_TAG
+ 'fieldset' => 81, // NORMAL | AUTOCLOSE_P | BLOCK_TAG
+ 'figcaption' => 81, // NORMAL | AUTOCLOSE_P | BLOCK_TAG
+ 'figure' => 81, // NORMAL | AUTOCLOSE_P | BLOCK_TAG
+ 'footer' => 81, // NORMAL | AUTOCLOSE_P | BLOCK_TAG
+ 'form' => 81, // NORMAL | AUTOCLOSE_P | BLOCK_TAG
+ 'h1' => 81, // NORMAL | AUTOCLOSE_P | BLOCK_TAG
+ 'h2' => 81, // NORMAL | AUTOCLOSE_P | BLOCK_TAG
+ 'h3' => 81, // NORMAL | AUTOCLOSE_P | BLOCK_TAG
+ 'h4' => 81, // NORMAL | AUTOCLOSE_P | BLOCK_TAG
+ 'h5' => 81, // NORMAL | AUTOCLOSE_P | BLOCK_TAG
+ 'h6' => 81, // NORMAL | AUTOCLOSE_P | BLOCK_TAG
+ 'head' => 1,
+ 'header' => 81, // NORMAL | AUTOCLOSE_P | BLOCK_TAG
+ 'hgroup' => 81, // NORMAL | AUTOCLOSE_P | BLOCK_TAG
+ 'hr' => 73, // NORMAL | VOID_TAG
+ 'html' => 1,
+ 'i' => 1,
+ 'iframe' => 3, // NORMAL | TEXT_RAW
+ 'img' => 9, // NORMAL | VOID_TAG
+ 'input' => 9, // NORMAL | VOID_TAG
+ 'kbd' => 1,
+ 'ins' => 1,
+ 'keygen' => 9, // NORMAL | VOID_TAG
+ 'label' => 1,
+ 'legend' => 1,
+ 'li' => 1,
+ 'link' => 9, // NORMAL | VOID_TAG
+ 'map' => 1,
+ 'mark' => 1,
+ 'menu' => 17, // NORMAL | AUTOCLOSE_P,
+ 'meta' => 9, // NORMAL | VOID_TAG
+ 'meter' => 1,
+ 'nav' => 17, // NORMAL | AUTOCLOSE_P,
+ 'noscript' => 65, // NORMAL | BLOCK_TAG
+ 'object' => 1,
+ 'ol' => 81, // NORMAL | AUTOCLOSE_P | BLOCK_TAG
+ 'optgroup' => 1,
+ 'option' => 1,
+ 'output' => 65, // NORMAL | BLOCK_TAG
+ 'p' => 209, // NORMAL | AUTOCLOSE_P | BLOCK_TAG | BLOCK_ONLY_INLINE
+ 'param' => 9, // NORMAL | VOID_TAG
+ 'pre' => 81, // NORMAL | AUTOCLOSE_P | BLOCK_TAG
+ 'progress' => 1,
+ 'q' => 1,
+ 'rp' => 1,
+ 'rt' => 1,
+ 'ruby' => 1,
+ 's' => 1,
+ 'samp' => 1,
+ 'script' => 3, // NORMAL | TEXT_RAW
+ 'section' => 81, // NORMAL | AUTOCLOSE_P | BLOCK_TAG
+ 'select' => 1,
+ 'small' => 1,
+ 'source' => 9, // NORMAL | VOID_TAG
+ 'span' => 1,
+ 'strong' => 1,
+ 'style' => 3, // NORMAL | TEXT_RAW
+ 'sub' => 1,
+ 'summary' => 17, // NORMAL | AUTOCLOSE_P,
+ 'sup' => 1,
+ 'table' => 65, // NORMAL | BLOCK_TAG
+ 'tbody' => 1,
+ 'td' => 1,
+ 'textarea' => 5, // NORMAL | TEXT_RCDATA
+ 'tfoot' => 65, // NORMAL | BLOCK_TAG
+ 'th' => 1,
+ 'thead' => 1,
+ 'time' => 1,
+ 'title' => 5, // NORMAL | TEXT_RCDATA
+ 'tr' => 1,
+ 'track' => 9, // NORMAL | VOID_TAG
+ 'u' => 1,
+ 'ul' => 81, // NORMAL | AUTOCLOSE_P | BLOCK_TAG
+ 'var' => 1,
+ 'video' => 65, // NORMAL | BLOCK_TAG
+ 'wbr' => 9, // NORMAL | VOID_TAG
// Legacy?
'basefont' => 8, // VOID_TAG
@@ -202,7 +202,7 @@ class Elements
'marquee' => 0,
'isindex' => 8, // VOID_TAG
'xmp' => 20, // AUTOCLOSE_P | VOID_TAG | RAW_TEXT
- 'noembed' => 2 // RAW_TEXT
+ 'noembed' => 2, // RAW_TEXT
);
/**
@@ -215,45 +215,45 @@ class Elements
* @var array
*/
public static $mathml = array(
- "maction" => 1,
- "maligngroup" => 1,
- "malignmark" => 1,
- "math" => 1,
- "menclose" => 1,
- "merror" => 1,
- "mfenced" => 1,
- "mfrac" => 1,
- "mglyph" => 1,
- "mi" => 1,
- "mlabeledtr" => 1,
- "mlongdiv" => 1,
- "mmultiscripts" => 1,
- "mn" => 1,
- "mo" => 1,
- "mover" => 1,
- "mpadded" => 1,
- "mphantom" => 1,
- "mroot" => 1,
- "mrow" => 1,
- "ms" => 1,
- "mscarries" => 1,
- "mscarry" => 1,
- "msgroup" => 1,
- "msline" => 1,
- "mspace" => 1,
- "msqrt" => 1,
- "msrow" => 1,
- "mstack" => 1,
- "mstyle" => 1,
- "msub" => 1,
- "msup" => 1,
- "msubsup" => 1,
- "mtable" => 1,
- "mtd" => 1,
- "mtext" => 1,
- "mtr" => 1,
- "munder" => 1,
- "munderover" => 1
+ 'maction' => 1,
+ 'maligngroup' => 1,
+ 'malignmark' => 1,
+ 'math' => 1,
+ 'menclose' => 1,
+ 'merror' => 1,
+ 'mfenced' => 1,
+ 'mfrac' => 1,
+ 'mglyph' => 1,
+ 'mi' => 1,
+ 'mlabeledtr' => 1,
+ 'mlongdiv' => 1,
+ 'mmultiscripts' => 1,
+ 'mn' => 1,
+ 'mo' => 1,
+ 'mover' => 1,
+ 'mpadded' => 1,
+ 'mphantom' => 1,
+ 'mroot' => 1,
+ 'mrow' => 1,
+ 'ms' => 1,
+ 'mscarries' => 1,
+ 'mscarry' => 1,
+ 'msgroup' => 1,
+ 'msline' => 1,
+ 'mspace' => 1,
+ 'msqrt' => 1,
+ 'msrow' => 1,
+ 'mstack' => 1,
+ 'mstyle' => 1,
+ 'msub' => 1,
+ 'msup' => 1,
+ 'msubsup' => 1,
+ 'mtable' => 1,
+ 'mtd' => 1,
+ 'mtext' => 1,
+ 'mtr' => 1,
+ 'munder' => 1,
+ 'munderover' => 1,
);
/**
@@ -269,86 +269,86 @@ class Elements
* @var array
*/
public static $svg = array(
- "a" => 1,
- "altGlyph" => 1,
- "altGlyphDef" => 1,
- "altGlyphItem" => 1,
- "animate" => 1,
- "animateColor" => 1,
- "animateMotion" => 1,
- "animateTransform" => 1,
- "circle" => 1,
- "clipPath" => 1,
- "color-profile" => 1,
- "cursor" => 1,
- "defs" => 1,
- "desc" => 1,
- "ellipse" => 1,
- "feBlend" => 1,
- "feColorMatrix" => 1,
- "feComponentTransfer" => 1,
- "feComposite" => 1,
- "feConvolveMatrix" => 1,
- "feDiffuseLighting" => 1,
- "feDisplacementMap" => 1,
- "feDistantLight" => 1,
- "feFlood" => 1,
- "feFuncA" => 1,
- "feFuncB" => 1,
- "feFuncG" => 1,
- "feFuncR" => 1,
- "feGaussianBlur" => 1,
- "feImage" => 1,
- "feMerge" => 1,
- "feMergeNode" => 1,
- "feMorphology" => 1,
- "feOffset" => 1,
- "fePointLight" => 1,
- "feSpecularLighting" => 1,
- "feSpotLight" => 1,
- "feTile" => 1,
- "feTurbulence" => 1,
- "filter" => 1,
- "font" => 1,
- "font-face" => 1,
- "font-face-format" => 1,
- "font-face-name" => 1,
- "font-face-src" => 1,
- "font-face-uri" => 1,
- "foreignObject" => 1,
- "g" => 1,
- "glyph" => 1,
- "glyphRef" => 1,
- "hkern" => 1,
- "image" => 1,
- "line" => 1,
- "linearGradient" => 1,
- "marker" => 1,
- "mask" => 1,
- "metadata" => 1,
- "missing-glyph" => 1,
- "mpath" => 1,
- "path" => 1,
- "pattern" => 1,
- "polygon" => 1,
- "polyline" => 1,
- "radialGradient" => 1,
- "rect" => 1,
- "script" => 3, // NORMAL | RAW_TEXT
- "set" => 1,
- "stop" => 1,
- "style" => 3, // NORMAL | RAW_TEXT
- "svg" => 1,
- "switch" => 1,
- "symbol" => 1,
- "text" => 1,
- "textPath" => 1,
- "title" => 1,
- "tref" => 1,
- "tspan" => 1,
- "use" => 1,
- "view" => 1,
- "vkern" => 1
+ 'a' => 1,
+ 'altGlyph' => 1,
+ 'altGlyphDef' => 1,
+ 'altGlyphItem' => 1,
+ 'animate' => 1,
+ 'animateColor' => 1,
+ 'animateMotion' => 1,
+ 'animateTransform' => 1,
+ 'circle' => 1,
+ 'clipPath' => 1,
+ 'color-profile' => 1,
+ 'cursor' => 1,
+ 'defs' => 1,
+ 'desc' => 1,
+ 'ellipse' => 1,
+ 'feBlend' => 1,
+ 'feColorMatrix' => 1,
+ 'feComponentTransfer' => 1,
+ 'feComposite' => 1,
+ 'feConvolveMatrix' => 1,
+ 'feDiffuseLighting' => 1,
+ 'feDisplacementMap' => 1,
+ 'feDistantLight' => 1,
+ 'feFlood' => 1,
+ 'feFuncA' => 1,
+ 'feFuncB' => 1,
+ 'feFuncG' => 1,
+ 'feFuncR' => 1,
+ 'feGaussianBlur' => 1,
+ 'feImage' => 1,
+ 'feMerge' => 1,
+ 'feMergeNode' => 1,
+ 'feMorphology' => 1,
+ 'feOffset' => 1,
+ 'fePointLight' => 1,
+ 'feSpecularLighting' => 1,
+ 'feSpotLight' => 1,
+ 'feTile' => 1,
+ 'feTurbulence' => 1,
+ 'filter' => 1,
+ 'font' => 1,
+ 'font-face' => 1,
+ 'font-face-format' => 1,
+ 'font-face-name' => 1,
+ 'font-face-src' => 1,
+ 'font-face-uri' => 1,
+ 'foreignObject' => 1,
+ 'g' => 1,
+ 'glyph' => 1,
+ 'glyphRef' => 1,
+ 'hkern' => 1,
+ 'image' => 1,
+ 'line' => 1,
+ 'linearGradient' => 1,
+ 'marker' => 1,
+ 'mask' => 1,
+ 'metadata' => 1,
+ 'missing-glyph' => 1,
+ 'mpath' => 1,
+ 'path' => 1,
+ 'pattern' => 1,
+ 'polygon' => 1,
+ 'polyline' => 1,
+ 'radialGradient' => 1,
+ 'rect' => 1,
+ 'script' => 3, // NORMAL | RAW_TEXT
+ 'set' => 1,
+ 'stop' => 1,
+ 'style' => 3, // NORMAL | RAW_TEXT
+ 'svg' => 1,
+ 'switch' => 1,
+ 'symbol' => 1,
+ 'text' => 1,
+ 'textPath' => 1,
+ 'title' => 1,
+ 'tref' => 1,
+ 'tspan' => 1,
+ 'use' => 1,
+ 'view' => 1,
+ 'vkern' => 1,
);
/**
@@ -419,7 +419,7 @@ class Elements
'viewtarget' => 'viewTarget',
'xchannelselector' => 'xChannelSelector',
'ychannelselector' => 'yChannelSelector',
- 'zoomandpan' => 'zoomAndPan'
+ 'zoomandpan' => 'zoomAndPan',
);
/**
@@ -465,7 +465,7 @@ class Elements
'glyphref' => 'glyphRef',
'lineargradient' => 'linearGradient',
'radialgradient' => 'radialGradient',
- 'textpath' => 'textPath'
+ 'textpath' => 'textPath',
);
/**
@@ -477,24 +477,22 @@ class Elements
*
* Elements::isA('script', Elements::TEXT_RCDATA); // Returns false.
*
- * @param string $name
- * The element name.
- * @param int $mask
- * One of the constants on this class.
- * @return boolean true if the element matches the mask, false otherwise.
+ * @param string $name The element name
+ * @param int $mask One of the constants on this class
+ *
+ * @return bool true if the element matches the mask, false otherwise
*/
public static function isA($name, $mask)
{
- return (static::element($name) & $mask) == $mask;
+ return (static::element($name) & $mask) === $mask;
}
/**
* Test if an element is a valid html5 element.
*
- * @param string $name
- * The name of the element.
+ * @param string $name The name of the element
*
- * @return bool True if a html5 element and false otherwise.
+ * @return bool true if a html5 element and false otherwise
*/
public static function isHtml5Element($name)
{
@@ -506,10 +504,9 @@ class Elements
/**
* Test if an element name is a valid MathML presentation element.
*
- * @param string $name
- * The name of the element.
+ * @param string $name The name of the element
*
- * @return bool True if a MathML name and false otherwise.
+ * @return bool true if a MathML name and false otherwise
*/
public static function isMathMLElement($name)
{
@@ -520,10 +517,9 @@ class Elements
/**
* Test if an element is a valid SVG element.
*
- * @param string $name
- * The name of the element.
+ * @param string $name The name of the element
*
- * @return boolean True if a SVG element and false otherise.
+ * @return bool true if a SVG element and false otherise
*/
public static function isSvgElement($name)
{
@@ -537,10 +533,9 @@ class Elements
* This includes html5 elements along with other allowed embedded content
* such as svg and mathml.
*
- * @param string $name
- * The name of the element.
+ * @param string $name The name of the element
*
- * @return bool True if valid and false otherwise.
+ * @return bool true if valid and false otherwise
*/
public static function isElement($name)
{
@@ -550,10 +545,9 @@ class Elements
/**
* Get the element mask for the given element name.
*
- * @param string $name
- * The name of the element.
+ * @param string $name The name of the element
*
- * @return int The element mask.
+ * @return int the element mask
*/
public static function element($name)
{
@@ -573,10 +567,9 @@ class Elements
/**
* Normalize a SVG element name to its proper case and form.
*
- * @param string $name
- * The name of the element.
+ * @param string $name The name of the element
*
- * @return string The normalized form of the element name.
+ * @return string the normalized form of the element name
*/
public static function normalizeSvgElement($name)
{
@@ -591,10 +584,9 @@ class Elements
/**
* Normalize a SVG attribute name to its proper case and form.
*
- * @param string $name
- * The name of the attribute.
+ * @param string $name The name of the attribute
*
- * @return string The normalized form of the attribute name.
+ * @return string the normalized form of the attribute name
*/
public static function normalizeSvgAttribute($name)
{
@@ -611,17 +603,16 @@ class Elements
*
* Note, all MathML element names are lowercase.
*
- * @param string $name
- * The name of the attribute.
+ * @param string $name The name of the attribute
*
- * @return string The normalized form of the attribute name.
+ * @return string the normalized form of the attribute name
*/
public static function normalizeMathMlAttribute($name)
{
$name = strtolower($name);
// Only one attribute has a mixed case form for MathML.
- if ($name == 'definitionurl') {
+ if ('definitionurl' === $name) {
$name = 'definitionURL';
}
diff --git a/src/HTML5/Entities.php b/src/HTML5/Entities.php
index 2e605d6..0e7227d 100644
--- a/src/HTML5/Entities.php
+++ b/src/HTML5/Entities.php
@@ -1,4 +1,5 @@
<?php
+
namespace Masterminds\HTML5;
/**
@@ -7,7 +8,6 @@ namespace Masterminds\HTML5;
*/
class Entities
{
-
public static $byName = array(
'Aacute' => 'Á',
'Aacut' => 'Á',
@@ -2231,6 +2231,6 @@ class Entities
'Zscr' => '𝒵',
'zscr' => '𝓏',
'zwj' => '‍',
- 'zwnj' => '‌'
+ 'zwnj' => '‌',
);
}
diff --git a/src/HTML5/Exception.php b/src/HTML5/Exception.php
index 8f33126..64e97e6 100644
--- a/src/HTML5/Exception.php
+++ b/src/HTML5/Exception.php
@@ -1,4 +1,5 @@
<?php
+
namespace Masterminds\HTML5;
/**
diff --git a/src/HTML5/InstructionProcessor.php b/src/HTML5/InstructionProcessor.php
index ac6a23c..3a88a10 100644
--- a/src/HTML5/InstructionProcessor.php
+++ b/src/HTML5/InstructionProcessor.php
@@ -2,6 +2,7 @@
/**
* A handler for processor instructions.
*/
+
namespace Masterminds\HTML5;
/**
@@ -18,7 +19,6 @@ namespace Masterminds\HTML5;
*/
interface InstructionProcessor
{
-
/**
* Process an individual processing instruction.
*
@@ -28,16 +28,14 @@ interface InstructionProcessor
* - Making any subsequent modifications to the DOM by modifying the
* DOMElement or its attached DOM tree.
*
- * @param DOMElement $element
- * The parent element for the current processing instruction.
- * @param string $name
- * The instruction's name. E.g. `&lt;?php` has the name `php`.
- * @param string $data
- * All of the data between the opening and closing PI marks.
- * @return DOMElement The element that should be considered "Current". This may just be
- * the element passed in, but if the processor added more elements,
- * it may choose to reset the current element to one of the elements
- * it created. (When in doubt, return the element passed in.)
+ * @param \DOMElement $element The parent element for the current processing instruction
+ * @param string $name The instruction's name. E.g. `&lt;?php` has the name `php`.
+ * @param string $data All of the data between the opening and closing PI marks
+ *
+ * @return \DOMElement The element that should be considered "Current". This may just be
+ * the element passed in, but if the processor added more elements,
+ * it may choose to reset the current element to one of the elements
+ * it created. (When in doubt, return the element passed in.)
*/
public function process(\DOMElement $element, $name, $data);
}
diff --git a/src/HTML5/Parser/CharacterReference.php b/src/HTML5/Parser/CharacterReference.php
index c1617e7..3ecfba4 100644
--- a/src/HTML5/Parser/CharacterReference.php
+++ b/src/HTML5/Parser/CharacterReference.php
@@ -1,4 +1,5 @@
<?php
+
namespace Masterminds\HTML5\Parser;
use Masterminds\HTML5\Entities;
@@ -6,25 +7,22 @@ use Masterminds\HTML5\Entities;
/**
* Manage entity references.
*
- * This is a simple resolver for HTML5 character reference entitites.
- * See \Masterminds\HTML5\Entities for the list of supported entities.
+ * This is a simple resolver for HTML5 character reference entitites. See Entities for the list of supported entities.
*/
class CharacterReference
{
-
protected static $numeric_mask = array(
0x0,
0x2FFFF,
0,
- 0xFFFF
+ 0xFFFF,
);
/**
- * Given a name (e.g.
- * 'amp'), lookup the UTF-8 character ('&')
+ * Given a name (e.g. 'amp'), lookup the UTF-8 character ('&').
+ *
+ * @param string $name The name to look up
*
- * @param string $name
- * The name to look up.
* @return string The character sequence. In UTF-8 this may be more than one byte.
*/
public static function lookupName($name)
@@ -34,20 +32,16 @@ class CharacterReference
}
/**
- * Given a Unicode codepoint, return the UTF-8 character.
- *
- * (NOT USED ANYWHERE)
- */
- /*
- * public static function lookupCode($codePoint) { return 'POINT'; }
- */
-
- /**
* Given a decimal number, return the UTF-8 character.
+ *
+ * @param $int
+ *
+ * @return false|string|string[]|null
*/
public static function lookupDecimal($int)
{
$entity = '&#' . $int . ';';
+
// UNTESTED: This may fail on some planes. Couldn't find full documentation
// on the value of the mask array.
return mb_decode_numericentity($entity, static::$numeric_mask, 'utf-8');
@@ -55,6 +49,10 @@ class CharacterReference
/**
* Given a hexidecimal number, return the UTF-8 character.
+ *
+ * @param $hexdec
+ *
+ * @return false|string|string[]|null
*/
public static function lookupHex($hexdec)
{
diff --git a/src/HTML5/Parser/DOMTreeBuilder.php b/src/HTML5/Parser/DOMTreeBuilder.php
index 7f0e16a..365bb75 100644
--- a/src/HTML5/Parser/DOMTreeBuilder.php
+++ b/src/HTML5/Parser/DOMTreeBuilder.php
@@ -1,7 +1,9 @@
<?php
+
namespace Masterminds\HTML5\Parser;
use Masterminds\HTML5\Elements;
+use Masterminds\HTML5\InstructionProcessor;
/**
* Create an HTML5 DOM tree from events.
@@ -24,7 +26,7 @@ use Masterminds\HTML5\Elements;
class DOMTreeBuilder implements EventHandler
{
/**
- * Defined in http://www.w3.org/TR/html51/infrastructure.html#html-namespace-0
+ * Defined in http://www.w3.org/TR/html51/infrastructure.html#html-namespace-0.
*/
const NAMESPACE_HTML = 'http://www.w3.org/1999/xhtml';
@@ -45,14 +47,14 @@ class DOMTreeBuilder implements EventHandler
const OPT_IMPLICIT_NS = 'implicit_namespaces';
/**
- * Holds the HTML5 element names that causes a namespace switch
+ * Holds the HTML5 element names that causes a namespace switch.
*
* @var array
*/
protected $nsRoots = array(
'html' => self::NAMESPACE_HTML,
'svg' => self::NAMESPACE_SVG,
- 'math' => self::NAMESPACE_MATHML
+ 'math' => self::NAMESPACE_MATHML,
);
/**
@@ -63,7 +65,7 @@ class DOMTreeBuilder implements EventHandler
protected $implicitNamespaces = array(
'xml' => self::NAMESPACE_XML,
'xmlns' => self::NAMESPACE_XMLNS,
- 'xlink' => self::NAMESPACE_XLINK
+ 'xlink' => self::NAMESPACE_XLINK,
);
/**
@@ -146,15 +148,15 @@ class DOMTreeBuilder implements EventHandler
protected $insertMode = 0;
/**
- * Track if we are in an element that allows only inline child nodes
+ * Track if we are in an element that allows only inline child nodes.
+ *
* @var string|null
*/
protected $onlyInline;
/**
* Quirks mode is enabled by default.
- * Any document that is missing the
- * DT will be considered to be in quirks mode.
+ * Any document that is missing the DT will be considered to be in quirks mode.
*/
protected $quirks = true;
@@ -175,24 +177,23 @@ class DOMTreeBuilder implements EventHandler
// $this->doc = \DOMImplementation::createDocument(NULL, 'html', $dt);
$this->doc = $impl->createDocument(null, null, $dt);
}
+
$this->errors = array();
$this->current = $this->doc; // ->documentElement;
// Create a rules engine for tags.
- $this->rules = new TreeBuildingRules($this->doc);
+ $this->rules = new TreeBuildingRules();
$implicitNS = array();
if (isset($this->options[self::OPT_IMPLICIT_NS])) {
$implicitNS = $this->options[self::OPT_IMPLICIT_NS];
- } elseif (isset($this->options["implicitNamespaces"])) {
- $implicitNS = $this->options["implicitNamespaces"];
+ } elseif (isset($this->options['implicitNamespaces'])) {
+ $implicitNS = $this->options['implicitNamespaces'];
}
// Fill $nsStack with the defalut HTML5 namespaces, plus the "implicitNamespaces" array taken form $options
- array_unshift($this->nsStack, $implicitNS + array(
- '' => self::NAMESPACE_HTML
- ) + $this->implicitNamespaces);
+ array_unshift($this->nsStack, $implicitNS + array('' => self::NAMESPACE_HTML) + $this->implicitNamespaces);
if ($isFragment) {
$this->insertMode = static::IM_IN_BODY;
@@ -229,8 +230,10 @@ class DOMTreeBuilder implements EventHandler
*
* This is used for handling Processor Instructions as they are
* inserted. If omitted, PI's are inserted directly into the DOM tree.
+ *
+ * @param InstructionProcessor $proc
*/
- public function setInstructionProcessor(\Masterminds\HTML5\InstructionProcessor $proc)
+ public function setInstructionProcessor(InstructionProcessor $proc)
{
$this->processor = $proc;
}
@@ -242,7 +245,7 @@ class DOMTreeBuilder implements EventHandler
$this->quirks = $quirks;
if ($this->insertMode > static::IM_INITIAL) {
- $this->parseError("Illegal placement of DOCTYPE tag. Ignoring: " . $name);
+ $this->parseError('Illegal placement of DOCTYPE tag. Ignoring: ' . $name);
return;
}
@@ -256,27 +259,32 @@ class DOMTreeBuilder implements EventHandler
* @todo - XMLNS namespace handling (we need to parse, even if it's not valid)
* - XLink, MathML and SVG namespace handling
* - Omission rules: 8.1.2.4 Optional tags
+ *
+ * @param string $name
+ * @param array $attributes
+ * @param bool $selfClosing
+ *
+ * @return int
*/
public function startTag($name, $attributes = array(), $selfClosing = false)
{
- // fprintf(STDOUT, $name);
$lname = $this->normalizeTagName($name);
// Make sure we have an html element.
- if (! $this->doc->documentElement && $name !== 'html' && ! $this->frag) {
+ if (!$this->doc->documentElement && 'html' !== $name && !$this->frag) {
$this->startTag('html');
}
// Set quirks mode if we're at IM_INITIAL with no doctype.
- if ($this->insertMode == static::IM_INITIAL) {
+ if ($this->insertMode === static::IM_INITIAL) {
$this->quirks = true;
- $this->parseError("No DOCTYPE specified.");
+ $this->parseError('No DOCTYPE specified.');
}
// SPECIAL TAG HANDLING:
// Spec says do this, and "don't ask."
// find the spec where this is defined... looks problematic
- if ($name == 'image' && !($this->insertMode === static::IM_IN_SVG || $this->insertMode === static::IM_IN_MATHML)) {
+ if ('image' === $name && !($this->insertMode === static::IM_IN_SVG || $this->insertMode === static::IM_IN_MATHML)) {
$name = 'img';
}
@@ -292,7 +300,7 @@ class DOMTreeBuilder implements EventHandler
break;
case 'head':
if ($this->insertMode > static::IM_BEFORE_HEAD) {
- $this->parseError("Unexpected head tag outside of head context.");
+ $this->parseError('Unexpected head tag outside of head context.');
} else {
$this->insertMode = static::IM_IN_HEAD;
}
@@ -307,14 +315,14 @@ class DOMTreeBuilder implements EventHandler
$this->insertMode = static::IM_IN_MATHML;
break;
case 'noscript':
- if ($this->insertMode == static::IM_IN_HEAD) {
+ if ($this->insertMode === static::IM_IN_HEAD) {
$this->insertMode = static::IM_IN_HEAD_NOSCRIPT;
}
break;
}
// Special case handling for SVG.
- if ($this->insertMode == static::IM_IN_SVG) {
+ if ($this->insertMode === static::IM_IN_SVG) {
$lname = Elements::normalizeSvgElement($lname);
}
@@ -322,62 +330,58 @@ class DOMTreeBuilder implements EventHandler
// when we found a tag thats appears inside $nsRoots, we have to switch the defalut namespace
if (isset($this->nsRoots[$lname]) && $this->nsStack[0][''] !== $this->nsRoots[$lname]) {
array_unshift($this->nsStack, array(
- '' => $this->nsRoots[$lname]
+ '' => $this->nsRoots[$lname],
) + $this->nsStack[0]);
- $pushes ++;
+ ++$pushes;
}
$needsWorkaround = false;
- if (isset($this->options["xmlNamespaces"]) && $this->options["xmlNamespaces"]) {
+ if (isset($this->options['xmlNamespaces']) && $this->options['xmlNamespaces']) {
// when xmlNamespaces is true a and we found a 'xmlns' or 'xmlns:*' attribute, we should add a new item to the $nsStack
foreach ($attributes as $aName => $aVal) {
- if ($aName === 'xmlns') {
+ if ('xmlns' === $aName) {
$needsWorkaround = $aVal;
array_unshift($this->nsStack, array(
- '' => $aVal
+ '' => $aVal,
) + $this->nsStack[0]);
- $pushes ++;
- } elseif ((($pos = strpos($aName, ':')) ? substr($aName, 0, $pos) : '') === 'xmlns') {
+ ++$pushes;
+ } elseif ('xmlns' === (($pos = strpos($aName, ':')) ? substr($aName, 0, $pos) : '')) {
array_unshift($this->nsStack, array(
- substr($aName, $pos + 1) => $aVal
+ substr($aName, $pos + 1) => $aVal,
) + $this->nsStack[0]);
- $pushes ++;
+ ++$pushes;
}
}
}
if ($this->onlyInline && Elements::isA($lname, Elements::BLOCK_TAG)) {
- $this->autoclose($this->onlyInline);
- $this->onlyInline = null;
+ $this->autoclose($this->onlyInline);
+ $this->onlyInline = null;
}
try {
$prefix = ($pos = strpos($lname, ':')) ? substr($lname, 0, $pos) : '';
-
- if ($needsWorkaround!==false) {
-
- $xml = "<$lname xmlns=\"$needsWorkaround\" ".(strlen($prefix) && isset($this->nsStack[0][$prefix])?("xmlns:$prefix=\"".$this->nsStack[0][$prefix]."\""):"")."/>";
+ if (false !== $needsWorkaround) {
+ $xml = "<$lname xmlns=\"$needsWorkaround\" " . (strlen($prefix) && isset($this->nsStack[0][$prefix]) ? ("xmlns:$prefix=\"" . $this->nsStack[0][$prefix] . '"') : '') . '/>';
$frag = new \DOMDocument('1.0', 'UTF-8');
$frag->loadXML($xml);
$ele = $this->doc->importNode($frag->documentElement, true);
-
} else {
- if (!isset($this->nsStack[0][$prefix]) || ($prefix === "" && isset($this->options[self::OPT_DISABLE_HTML_NS]) && $this->options[self::OPT_DISABLE_HTML_NS])) {
+ if (!isset($this->nsStack[0][$prefix]) || ('' === $prefix && isset($this->options[self::OPT_DISABLE_HTML_NS]) && $this->options[self::OPT_DISABLE_HTML_NS])) {
$ele = $this->doc->createElement($lname);
} else {
$ele = $this->doc->createElementNS($this->nsStack[0][$prefix], $lname);
}
}
-
} catch (\DOMException $e) {
$this->parseError("Illegal tag name: <$lname>. Replaced with <invalid>.");
$ele = $this->doc->createElement('invalid');
}
if (Elements::isA($lname, Elements::BLOCK_ONLY_INLINE)) {
- $this->onlyInline = $lname;
+ $this->onlyInline = $lname;
}
// When we add some namespacess, we have to track them. Later, when "endElement" is invoked, we have to remove them.
@@ -396,23 +400,23 @@ class DOMTreeBuilder implements EventHandler
foreach ($attributes as $aName => $aVal) {
// xmlns attributes can't be set
- if ($aName === 'xmlns') {
+ if ('xmlns' === $aName) {
continue;
}
- if ($this->insertMode == static::IM_IN_SVG) {
+ if ($this->insertMode === static::IM_IN_SVG) {
$aName = Elements::normalizeSvgAttribute($aName);
- } elseif ($this->insertMode == static::IM_IN_MATHML) {
+ } elseif ($this->insertMode === static::IM_IN_MATHML) {
$aName = Elements::normalizeMathMlAttribute($aName);
}
try {
$prefix = ($pos = strpos($aName, ':')) ? substr($aName, 0, $pos) : false;
- if ($prefix==='xmlns') {
- $ele->setAttributeNs(self::NAMESPACE_XMLNS, $aName, $aVal);
- } elseif ($prefix!==false && isset($this->nsStack[0][$prefix])) {
- $ele->setAttributeNs($this->nsStack[0][$prefix], $aName, $aVal);
+ if ('xmlns' === $prefix) {
+ $ele->setAttributeNS(self::NAMESPACE_XMLNS, $aName, $aVal);
+ } elseif (false !== $prefix && isset($this->nsStack[0][$prefix])) {
+ $ele->setAttributeNS($this->nsStack[0][$prefix], $aName, $aVal);
} else {
$ele->setAttribute($aName, $aVal);
}
@@ -422,19 +426,19 @@ class DOMTreeBuilder implements EventHandler
}
// This is necessary on a non-DTD schema, like HTML5.
- if ($aName == 'id') {
+ if ('id' === $aName) {
$ele->setIdAttribute('id', true);
}
}
- // Some elements have special processing rules. Handle those separately.
- if ($this->rules->hasRules($name) && $this->frag !== $this->current) {
+ if ($this->frag !== $this->current && $this->rules->hasRules($name)) {
+ // Some elements have special processing rules. Handle those separately.
$this->current = $this->rules->evaluate($ele, $this->current);
- } // Otherwise, it's a standard element.
- else {
+ } else {
+ // Otherwise, it's a standard element.
$this->current->appendChild($ele);
- if (! Elements::isA($name, Elements::VOID_TAG)) {
+ if (!Elements::isA($name, Elements::VOID_TAG)) {
$this->current = $ele;
}
@@ -448,7 +452,7 @@ class DOMTreeBuilder implements EventHandler
// This is sort of a last-ditch attempt to correct for cases where no head/body
// elements are provided.
- if ($this->insertMode <= static::IM_BEFORE_HEAD && $name != 'head' && $name != 'html') {
+ if ($this->insertMode <= static::IM_BEFORE_HEAD && 'head' !== $name && 'html' !== $name) {
$this->insertMode = static::IM_IN_BODY;
}
@@ -456,7 +460,7 @@ class DOMTreeBuilder implements EventHandler
// but we have to remove the namespaces pushed to $nsStack.
if ($pushes > 0 && Elements::isA($name, Elements::VOID_TAG)) {
// remove the namespaced definded by current node
- for ($i = 0; $i < $pushes; $i ++) {
+ for ($i = 0; $i < $pushes; ++$i) {
array_shift($this->nsStack);
}
}
@@ -485,7 +489,7 @@ class DOMTreeBuilder implements EventHandler
'html',
'br',
'head',
- 'title'
+ 'title',
))) {
$this->startTag('html');
$this->endTag($name);
@@ -495,13 +499,13 @@ class DOMTreeBuilder implements EventHandler
}
// Ignore the tag.
- $this->parseError("Illegal closing tag at global scope.");
+ $this->parseError('Illegal closing tag at global scope.');
return;
}
// Special case handling for SVG.
- if ($this->insertMode == static::IM_IN_SVG) {
+ if ($this->insertMode === static::IM_IN_SVG) {
$lname = Elements::normalizeSvgElement($lname);
}
@@ -512,39 +516,33 @@ class DOMTreeBuilder implements EventHandler
$cid = spl_object_hash($this->current);
}
- // XXX: Not sure whether we need this anymore.
- // if ($name != $lname) {
- // return $this->quirksTreeResolver($lname);
- // }
-
// XXX: HTML has no parent. What do we do, though,
// if this element appears in the wrong place?
- if ($lname == 'html') {
+ if ('html' === $lname) {
return;
}
// remove the namespaced definded by current node
if (isset($this->pushes[$cid])) {
- for ($i = 0; $i < $this->pushes[$cid][0]; $i ++) {
+ for ($i = 0; $i < $this->pushes[$cid][0]; ++$i) {
array_shift($this->nsStack);
}
unset($this->pushes[$cid]);
}
- if (! $this->autoclose($lname)) {
+ if (!$this->autoclose($lname)) {
$this->parseError('Could not find closing tag for ' . $lname);
}
- // switch ($this->insertMode) {
switch ($lname) {
- case "head":
+ case 'head':
$this->insertMode = static::IM_AFTER_HEAD;
break;
- case "body":
+ case 'body':
$this->insertMode = static::IM_AFTER_BODY;
break;
- case "svg":
- case "mathml":
+ case 'svg':
+ case 'mathml':
$this->insertMode = static::IM_IN_BODY;
break;
}
@@ -566,9 +564,9 @@ class DOMTreeBuilder implements EventHandler
// practical as most documents contain these characters. Other text is not
// expected here so recording a parse error is necessary.
$dataTmp = trim($data, " \t\n\r\f");
- if (! empty($dataTmp)) {
+ if (!empty($dataTmp)) {
// fprintf(STDOUT, "Unexpected insert mode: %d", $this->insertMode);
- $this->parseError("Unexpected text. Ignoring: " . $dataTmp);
+ $this->parseError('Unexpected text. Ignoring: ' . $dataTmp);
}
return;
@@ -585,7 +583,7 @@ class DOMTreeBuilder implements EventHandler
public function parseError($msg, $line = 0, $col = 0)
{
- $this->errors[] = sprintf("Line %d, Col %d: %s", $line, $col, $msg);
+ $this->errors[] = sprintf('Line %d, Col %d: %s', $line, $col, $msg);
}
public function getErrors()
@@ -602,15 +600,14 @@ class DOMTreeBuilder implements EventHandler
public function processingInstruction($name, $data = null)
{
// XXX: Ignore initial XML declaration, per the spec.
- if ($this->insertMode == static::IM_INITIAL && 'xml' == strtolower($name)) {
+ if ($this->insertMode === static::IM_INITIAL && 'xml' === strtolower($name)) {
return;
}
- // Important: The processor may modify the current DOM tree however
- // it sees fit.
- if (isset($this->processor)) {
+ // Important: The processor may modify the current DOM tree however it sees fit.
+ if ($this->processor instanceof InstructionProcessor) {
$res = $this->processor->process($this->current, $name, $data);
- if (! empty($res)) {
+ if (!empty($res)) {
$this->current = $res;
}
@@ -632,9 +629,9 @@ class DOMTreeBuilder implements EventHandler
*
* See sections 2.9 and 8.1.2.
*
- * @param string $name
- * The tag name.
- * @return string The normalized tag name.
+ * @param string $name The tag name
+ *
+ * @return string the normalized tag name
*/
protected function normalizeTagName($name)
{
@@ -646,7 +643,7 @@ class DOMTreeBuilder implements EventHandler
protected function quirksTreeResolver($name)
{
- throw new \Exception("Not implemented.");
+ throw new \Exception('Not implemented.');
}
/**
@@ -660,15 +657,16 @@ class DOMTreeBuilder implements EventHandler
{
$working = $this->current;
do {
- if ($working->nodeType != XML_ELEMENT_NODE) {
+ if (XML_ELEMENT_NODE !== $working->nodeType) {
return false;
}
- if ($working->tagName == $tagName) {
+ if ($working->tagName === $tagName) {
$this->current = $working->parentNode;
return true;
}
} while ($working = $working->parentNode);
+
return false;
}
@@ -685,8 +683,8 @@ class DOMTreeBuilder implements EventHandler
protected function isAncestor($tagName)
{
$candidate = $this->current;
- while ($candidate->nodeType === XML_ELEMENT_NODE) {
- if ($candidate->tagName == $tagName) {
+ while (XML_ELEMENT_NODE === $candidate->nodeType) {
+ if ($candidate->tagName === $tagName) {
return true;
}
$candidate = $candidate->parentNode;
@@ -704,6 +702,6 @@ class DOMTreeBuilder implements EventHandler
*/
protected function isParent($tagName)
{
- return $this->current->tagName == $tagName;
+ return $this->current->tagName === $tagName;
}
}
diff --git a/src/HTML5/Parser/EventHandler.php b/src/HTML5/Parser/EventHandler.php
index 3da71a3..cb0109b 100644
--- a/src/HTML5/Parser/EventHandler.php
+++ b/src/HTML5/Parser/EventHandler.php
@@ -1,4 +1,5 @@
<?php
+
namespace Masterminds\HTML5\Parser;
/**
@@ -20,7 +21,6 @@ namespace Masterminds\HTML5\Parser;
*/
interface EventHandler
{
-
const DOCTYPE_NONE = 0;
const DOCTYPE_PUBLIC = 1;
@@ -30,15 +30,11 @@ interface EventHandler
/**
* A doctype declaration.
*
- * @param string $name
- * The name of the root element.
- * @param int $idType
- * One of DOCTYPE_NONE, DOCTYPE_PUBLIC, or DOCTYPE_SYSTEM.
- * @param string $id
- * The identifier. For DOCTYPE_PUBLIC, this is the public ID. If DOCTYPE_SYSTEM,
- * then this is a system ID.
- * @param boolean $quirks
- * Indicates whether the builder should enter quirks mode.
+ * @param string $name The name of the root element
+ * @param int $idType One of DOCTYPE_NONE, DOCTYPE_PUBLIC, or DOCTYPE_SYSTEM
+ * @param string $id The identifier. For DOCTYPE_PUBLIC, this is the public ID. If DOCTYPE_SYSTEM,
+ * then this is a system ID.
+ * @param bool $quirks Indicates whether the builder should enter quirks mode
*/
public function doctype($name, $idType = 0, $id = null, $quirks = false);
@@ -63,13 +59,11 @@ interface EventHandler
* The textmode is automatically reset to Tokenizer::TEXTMODE_NORMAL when the
* closing tag is encounter. **This behavior may change.**
*
- * @param string $name
- * The tag name.
- * @param array $attributes
- * An array with all of the tag's attributes.
- * @param boolean $selfClosing
- * An indicator of whether or not this tag is self-closing (<foo/>)
- * @return int One of the Tokenizer::TEXTMODE_* constants.
+ * @param string $name The tag name
+ * @param array $attributes An array with all of the tag's attributes
+ * @param bool $selfClosing An indicator of whether or not this tag is self-closing (<foo/>)
+ *
+ * @return int one of the Tokenizer::TEXTMODE_* constants
*/
public function startTag($name, $attributes = array(), $selfClosing = false);
@@ -104,7 +98,7 @@ interface EventHandler
* A CDATA section.
*
* @param string $data
- * The unparsed character data.
+ * The unparsed character data
*/
public function cdata($data);
@@ -113,10 +107,8 @@ interface EventHandler
*
* While user agents don't get PIs, server-side does.
*
- * @param string $name
- * The name of the processor (e.g. 'php').
- * @param string $data
- * The unparsed data.
+ * @param string $name The name of the processor (e.g. 'php').
+ * @param string $data The unparsed data
*/
public function processingInstruction($name, $data = null);
}
diff --git a/src/HTML5/Parser/FileInputStream.php b/src/HTML5/Parser/FileInputStream.php
index 76bd17b..f176422 100644
--- a/src/HTML5/Parser/FileInputStream.php
+++ b/src/HTML5/Parser/FileInputStream.php
@@ -1,4 +1,5 @@
<?php
+
namespace Masterminds\HTML5\Parser;
/**
@@ -18,9 +19,9 @@ class FileInputStream extends StringInputStream implements InputStream
/**
* Load a file input stream.
*
- * @param string $data The file or url path to load.
- * @param string $encoding The encoding to use for the data.
- * @param string $debug A fprintf format to use to echo the data on stdout.
+ * @param string $data the file or url path to load
+ * @param string $encoding the encoding to use for the data
+ * @param string $debug a fprintf format to use to echo the data on stdout
*/
public function __construct($data, $encoding = 'UTF-8', $debug = '')
{
diff --git a/src/HTML5/Parser/InputStream.php b/src/HTML5/Parser/InputStream.php
index e4a106a..cf279d8 100644
--- a/src/HTML5/Parser/InputStream.php
+++ b/src/HTML5/Parser/InputStream.php
@@ -29,7 +29,7 @@ interface InputStream extends \Iterator
*
* @TODO Move this to the scanner.
*
- * @return int The column number.
+ * @return int the column number
*/
public function columnOffset();
@@ -49,12 +49,12 @@ interface InputStream extends \Iterator
* and returns the matched substring.
*
* @see strcspn
- * @param string $bytes
- * Bytes to match.
- * @param int $max
- * Maximum number of bytes to scan.
+ *
+ * @param string $bytes Bytes to match
+ * @param int $max Maximum number of bytes to scan
+ *
* @return mixed Index or false if no match is found. You should use strong
- * equality when checking the result, since index could be 0.
+ * equality when checking the result, since index could be 0.
*/
public function charsUntil($bytes, $max = null);
@@ -65,20 +65,18 @@ interface InputStream extends \Iterator
* and returns the matched substring.
*
* @see strspn
- * @param string $bytes
- * A mask of bytes to match. If ANY byte in this mask matches the
- * current char, the pointer advances and the char is part of the
- * substring.
- * @param int $max
- * The max number of chars to read.
+ *
+ * @param string $bytes A mask of bytes to match. If ANY byte in this mask matches the
+ * current char, the pointer advances and the char is part of the
+ * substring.
+ * @param int $max The max number of chars to read
*/
public function charsWhile($bytes, $max = null);
/**
* Unconsume one character.
*
- * @param int $howMany
- * The number of characters to move the pointer back.
+ * @param int $howMany The number of characters to move the pointer back
*/
public function unconsume($howMany = 1);
diff --git a/src/HTML5/Parser/ParseError.php b/src/HTML5/Parser/ParseError.php
index 86498a1..640e516 100644
--- a/src/HTML5/Parser/ParseError.php
+++ b/src/HTML5/Parser/ParseError.php
@@ -1,4 +1,5 @@
<?php
+
namespace Masterminds\HTML5\Parser;
/**
diff --git a/src/HTML5/Parser/Scanner.php b/src/HTML5/Parser/Scanner.php
index cec9a13..7bea1ae 100644
--- a/src/HTML5/Parser/Scanner.php
+++ b/src/HTML5/Parser/Scanner.php
@@ -1,4 +1,5 @@
<?php
+
namespace Masterminds\HTML5\Parser;
use Masterminds\HTML5\Exception;
@@ -18,7 +19,7 @@ class Scanner
private $data;
/**
- * The current integer byte position we are in $data
+ * The current integer byte position we are in $data.
*/
private $char;
@@ -35,10 +36,10 @@ class Scanner
/**
* Create a new Scanner.
*
- * @param string $data Data to parse
- * @param string $encoding The encoding to use for the data.
+ * @param string $data Data to parse
+ * @param string $encoding the encoding to use for the data
*
- * @throws Exception If the given data cannot be encoded to UTF-8.
+ * @throws Exception if the given data cannot be encoded to UTF-8
*/
public function __construct($data, $encoding = 'UTF-8')
{
@@ -75,20 +76,21 @@ class Scanner
* '</script>' string.
*
* @param string $sequence
- * @param bool $caseSensitive
+ * @param bool $caseSensitive
*
* @return bool
*/
public function sequenceMatches($sequence, $caseSensitive = true)
{
$portion = substr($this->data, $this->char, strlen($sequence));
- return $caseSensitive ? $portion === $sequence : strcasecmp($portion, $sequence) === 0;
+
+ return $caseSensitive ? $portion === $sequence : 0 === strcasecmp($portion, $sequence);
}
/**
* Get the current position.
*
- * @return int The current intiger byte position.
+ * @return int the current intiger byte position
*/
public function position()
{
@@ -98,7 +100,7 @@ class Scanner
/**
* Take a peek at the next character in the data.
*
- * @return string The next character.
+ * @return string the next character
*/
public function peek()
{
@@ -114,11 +116,11 @@ class Scanner
*
* Note: This advances the pointer.
*
- * @return string The next character.
+ * @return string the next character
*/
public function next()
{
- $this->char++;
+ ++$this->char;
if ($this->char < $this->EOF) {
return $this->data[$this->char];
@@ -132,7 +134,7 @@ class Scanner
*
* Note, this does not advance the pointer.
*
- * @return string The current character.
+ * @return string the current character
*/
public function current()
{
@@ -157,13 +159,12 @@ class Scanner
* Unconsume some of the data.
* This moves the data pointer backwards.
*
- * @param int $howMany
- * The number of characters to move the pointer back.
+ * @param int $howMany The number of characters to move the pointer back
*/
public function unconsume($howMany = 1)
{
if (($this->char - $howMany) >= 0) {
- $this->char = $this->char - $howMany;
+ $this->char -= $howMany;
}
}
@@ -173,7 +174,7 @@ class Scanner
* Note, along with getting the characters the pointer in the data will be
* moved as well.
*
- * @return string The next group that is hex characters.
+ * @return string the next group that is hex characters
*/
public function getHex()
{
@@ -186,7 +187,7 @@ class Scanner
* Note, along with getting the characters the pointer in the data will be
* moved as well.
*
- * @return string The next group of ASCII alpha characters.
+ * @return string the next group of ASCII alpha characters
*/
public function getAsciiAlpha()
{
@@ -199,7 +200,7 @@ class Scanner
* Note, along with getting the characters the pointer in the data will be
* moved as well.
*
- * @return string The next group of ASCII alpha characters and numbers.
+ * @return string the next group of ASCII alpha characters and numbers
*/
public function getAsciiAlphaNum()
{
@@ -212,7 +213,7 @@ class Scanner
* Note, along with getting the characters the pointer in the data will be
* moved as well.
*
- * @return string The next group of numbers.
+ * @return string the next group of numbers
*/
public function getNumeric()
{
@@ -242,11 +243,11 @@ class Scanner
/**
* Returns the current line that is being consumed.
*
- * @return int The current line number.
+ * @return int the current line number
*/
public function currentLine()
{
- if (empty($this->EOF) || $this->char == 0) {
+ if (empty($this->EOF) || 0 === $this->char) {
return 1;
}
@@ -284,12 +285,12 @@ class Scanner
*
* Newlines are column 0. The first char after a newline is column 1.
*
- * @return int The column number.
+ * @return int the column number
*/
public function columnOffset()
{
// Short circuit for the first char.
- if ($this->char == 0) {
+ if (0 === $this->char) {
return 0;
}
@@ -303,7 +304,7 @@ class Scanner
// However, for here we want the length up until the next byte to be
// processed, so add one to the current byte ($this->char).
- if ($lastLine !== false) {
+ if (false !== $lastLine) {
$findLengthOf = substr($this->data, $lastLine + 1, $this->char - 1 - $lastLine);
} else {
// After a newline.
@@ -318,7 +319,7 @@ class Scanner
*
* This consumes characters until the EOF.
*
- * @return int The number of characters remaining.
+ * @return int the number of characters remaining
*/
public function remainingChars()
{
@@ -351,7 +352,7 @@ class Scanner
$crlfTable = array(
"\0" => "\xEF\xBF\xBD",
"\r\n" => "\n",
- "\r" => "\n"
+ "\r" => "\n",
);
return strtr($data, $crlfTable);
@@ -365,12 +366,11 @@ class Scanner
* Matches as far as possible until we reach a certain set of bytes
* and returns the matched substring.
*
- * @param string $bytes
- * Bytes to match.
- * @param int $max
- * Maximum number of bytes to scan.
+ * @param string $bytes Bytes to match
+ * @param int $max Maximum number of bytes to scan
+ *
* @return mixed Index or false if no match is found. You should use strong
- * equality when checking the result, since index could be 0.
+ * equality when checking the result, since index could be 0.
*/
private function doCharsUntil($bytes, $max = null)
{
@@ -378,7 +378,7 @@ class Scanner
return false;
}
- if ($max === 0 || $max) {
+ if (0 === $max || $max) {
$len = strcspn($this->data, $bytes, $this->char, $max);
} else {
$len = strcspn($this->data, $bytes, $this->char);
@@ -396,12 +396,10 @@ class Scanner
* Matches as far as possible with a certain set of bytes
* and returns the matched substring.
*
- * @param string $bytes
- * A mask of bytes to match. If ANY byte in this mask matches the
- * current char, the pointer advances and the char is part of the
- * substring.
- * @param int $max
- * The max number of chars to read.
+ * @param string $bytes A mask of bytes to match. If ANY byte in this mask matches the
+ * current char, the pointer advances and the char is part of the
+ * substring.
+ * @param int $max The max number of chars to read
*
* @return string
*/
@@ -411,7 +409,7 @@ class Scanner
return false;
}
- if ($max === 0 || $max) {
+ if (0 === $max || $max) {
$len = strspn($this->data, $bytes, $this->char, $max);
} else {
$len = strspn($this->data, $bytes, $this->char);
diff --git a/src/HTML5/Parser/StringInputStream.php b/src/HTML5/Parser/StringInputStream.php
index 0118468..2281990 100644
--- a/src/HTML5/Parser/StringInputStream.php
+++ b/src/HTML5/Parser/StringInputStream.php
@@ -2,6 +2,7 @@
/**
* Loads a string to be parsed.
*/
+
namespace Masterminds\HTML5\Parser;
/*
@@ -50,7 +51,7 @@ class StringInputStream implements InputStream
private $data;
/**
- * The current integer byte position we are in $data
+ * The current integer byte position we are in $data.
*/
private $char;
@@ -67,9 +68,9 @@ class StringInputStream implements InputStream
/**
* Create a new InputStream wrapper.
*
- * @param string $data Data to parse
- * @param string $encoding The encoding to use for the data.
- * @param string $debug A fprintf format to use to echo the data on stdout.
+ * @param string $data Data to parse
+ * @param string $encoding the encoding to use for the data
+ * @param string $debug a fprintf format to use to echo the data on stdout
*/
public function __construct($data, $encoding = 'UTF-8', $debug = '')
{
@@ -110,7 +111,7 @@ class StringInputStream implements InputStream
$crlfTable = array(
"\0" => "\xEF\xBF\xBD",
"\r\n" => "\n",
- "\r" => "\n"
+ "\r" => "\n",
);
return strtr($data, $crlfTable);
@@ -121,7 +122,7 @@ class StringInputStream implements InputStream
*/
public function currentLine()
{
- if (empty($this->EOF) || $this->char == 0) {
+ if (empty($this->EOF) || 0 === $this->char) {
return 1;
}
// Add one to $this->char because we want the number for the next
@@ -130,9 +131,7 @@ class StringInputStream implements InputStream
}
/**
- *
* @deprecated
- *
*/
public function getCurrentLine()
{
@@ -144,12 +143,12 @@ class StringInputStream implements InputStream
*
* Newlines are column 0. The first char after a newline is column 1.
*
- * @return int The column number.
+ * @return int the column number
*/
public function columnOffset()
{
// Short circuit for the first char.
- if ($this->char == 0) {
+ if (0 === $this->char) {
return 0;
}
// strrpos is weird, and the offset needs to be negative for what we
@@ -162,7 +161,7 @@ class StringInputStream implements InputStream
// However, for here we want the length up until the next byte to be
// processed, so add one to the current byte ($this->char).
- if ($lastLine !== false) {
+ if (false !== $lastLine) {
$findLengthOf = substr($this->data, $lastLine + 1, $this->char - 1 - $lastLine);
} else {
// After a newline.
@@ -173,9 +172,7 @@ class StringInputStream implements InputStream
}
/**
- *
* @deprecated
- *
*/
public function getColumnOffset()
{
@@ -185,7 +182,7 @@ class StringInputStream implements InputStream
/**
* Get the current character.
*
- * @return string The current character.
+ * @return string the current character
*/
public function current()
{
@@ -198,7 +195,7 @@ class StringInputStream implements InputStream
*/
public function next()
{
- $this->char ++;
+ ++$this->char;
}
/**
@@ -212,15 +209,11 @@ class StringInputStream implements InputStream
/**
* Is the current pointer location valid.
*
- * @return bool Is the current pointer location valid.
+ * @return bool is the current pointer location valid
*/
public function valid()
{
- if ($this->char < $this->EOF) {
- return true;
- }
-
- return false;
+ return $this->char < $this->EOF;
}
/**
@@ -232,7 +225,7 @@ class StringInputStream implements InputStream
* @note This performs bounds checking
*
* @return string Returns the remaining text. If called when the InputStream is
- * already exhausted, it returns an empty string.
+ * already exhausted, it returns an empty string.
*/
public function remainingChars()
{
@@ -254,12 +247,11 @@ class StringInputStream implements InputStream
* Matches as far as possible until we reach a certain set of bytes
* and returns the matched substring.
*
- * @param string $bytes
- * Bytes to match.
- * @param int $max
- * Maximum number of bytes to scan.
+ * @param string $bytes Bytes to match
+ * @param int $max Maximum number of bytes to scan
+ *
* @return mixed Index or false if no match is found. You should use strong
- * equality when checking the result, since index could be 0.
+ * equality when checking the result, since index could be 0.
*/
public function charsUntil($bytes, $max = null)
{
@@ -267,7 +259,7 @@ class StringInputStream implements InputStream
return false;
}
- if ($max === 0 || $max) {
+ if (0 === $max || $max) {
$len = strcspn($this->data, $bytes, $this->char, $max);
} else {
$len = strcspn($this->data, $bytes, $this->char);
@@ -285,12 +277,10 @@ class StringInputStream implements InputStream
* Matches as far as possible with a certain set of bytes
* and returns the matched substring.
*
- * @param string $bytes
- * A mask of bytes to match. If ANY byte in this mask matches the
- * current char, the pointer advances and the char is part of the
- * substring.
- * @param int $max
- * The max number of chars to read.
+ * @param string $bytes A mask of bytes to match. If ANY byte in this mask matches the
+ * current char, the pointer advances and the char is part of the
+ * substring.
+ * @param int $max The max number of chars to read
*
* @return string
*/
@@ -300,7 +290,7 @@ class StringInputStream implements InputStream
return false;
}
- if ($max === 0 || $max) {
+ if (0 === $max || $max) {
$len = strspn($this->data, $bytes, $this->char, $max);
} else {
$len = strspn($this->data, $bytes, $this->char);
@@ -314,13 +304,12 @@ class StringInputStream implements InputStream
/**
* Unconsume characters.
*
- * @param int $howMany
- * The number of characters to unconsume.
+ * @param int $howMany The number of characters to unconsume
*/
public function unconsume($howMany = 1)
{
if (($this->char - $howMany) >= 0) {
- $this->char = $this->char - $howMany;
+ $this->char -= $howMany;
}
}
diff --git a/src/HTML5/Parser/Tokenizer.php b/src/HTML5/Parser/Tokenizer.php
index ba9de52..cfd0e43 100644
--- a/src/HTML5/Parser/Tokenizer.php
+++ b/src/HTML5/Parser/Tokenizer.php
@@ -1,4 +1,5 @@
<?php
+
namespace Masterminds\HTML5\Parser;
use Masterminds\HTML5\Elements;
@@ -25,7 +26,6 @@ use Masterminds\HTML5\Elements;
*/
class Tokenizer
{
-
protected $scanner;
protected $events;
@@ -56,12 +56,9 @@ class Tokenizer
* it a scanner (input) and an event handler (output), and then calling
* the Tokenizer::parse() method.`
*
- * @param \Masterminds\HTML5\Parser\Scanner $scanner
- * A scanner initialized with an input stream.
- * @param \Masterminds\HTML5\Parser\EventHandler $eventHandler
- * An event handler, initialized and ready to receive
- * events.
- * @param string $mode
+ * @param Scanner $scanner A scanner initialized with an input stream
+ * @param EventHandler $eventHandler An event handler, initialized and ready to receive events
+ * @param string $mode
*/
public function __construct($scanner, $eventHandler, $mode = self::CONFORMANT_HTML)
{
@@ -103,11 +100,9 @@ class Tokenizer
* Normally, setting is done by the event handler via a special return code on
* startTag(), but it can also be set manually using this function.
*
- * @param integer $textmode
- * One of Elements::TEXT_*
- * @param string $untilTag
- * The tag that should stop RAW or RCDATA mode. Normal mode does not
- * use this indicator.
+ * @param int $textmode One of Elements::TEXT_*
+ * @param string $untilTag The tag that should stop RAW or RCDATA mode. Normal mode does not
+ * use this indicator.
*/
public function setTextMode($textmode, $untilTag = null)
{
@@ -117,13 +112,13 @@ class Tokenizer
/**
* Consume a character and make a move.
- * HTML5 8.2.4.1
+ * HTML5 8.2.4.1.
*/
protected function consumeData()
{
$tok = $this->scanner->current();
- if ($tok === '&') {
+ if ('&' === $tok) {
// Character reference
$ref = $this->decodeCharacterReference();
$this->buffer($ref);
@@ -132,7 +127,7 @@ class Tokenizer
}
// Parse tag
- if ($tok === '<') {
+ if ('<' === $tok) {
// Any buffered text data can go out now.
$this->flushBuffer();
@@ -143,7 +138,7 @@ class Tokenizer
|| $this->processingInstruction()
|| $this->tagName()
// This always returns false.
- || $this->parseError("Illegal tag opening")
+ || $this->parseError('Illegal tag opening')
|| $this->characterData();
$tok = $this->scanner->current();
@@ -153,7 +148,7 @@ class Tokenizer
$this->eof($tok);
// Parse character
- if ($tok !== false) {
+ if (false !== $tok) {
switch ($this->textMode) {
case Elements::TEXT_RAW:
$this->rawText($tok);
@@ -164,10 +159,10 @@ class Tokenizer
break;
default:
- if (!strspn($tok, "<&")) {
+ if (!strspn($tok, '<&')) {
// NULL character
- if ($tok === "\00") {
- $this->parseError("Received null character.");
+ if ("\00" === $tok) {
+ $this->parseError('Received null character.');
}
$this->text .= $tok;
@@ -189,7 +184,7 @@ class Tokenizer
protected function characterData()
{
$tok = $this->scanner->current();
- if ($tok === false) {
+ if (false === $tok) {
return false;
}
switch ($this->textMode) {
@@ -198,9 +193,10 @@ class Tokenizer
case Elements::TEXT_RCDATA:
return $this->rcdata($tok);
default:
- if (strspn($tok, "<&")) {
+ if (strspn($tok, '<&')) {
return false;
}
+
return $this->text($tok);
}
}
@@ -208,20 +204,20 @@ class Tokenizer
/**
* This buffers the current token as character data.
*
- * @param string $tok The current token.
+ * @param string $tok the current token
*
* @return bool
*/
protected function text($tok)
{
// This should never happen...
- if ($tok === false) {
+ if (false === $tok) {
return false;
}
// NULL character
- if ($tok === "\00") {
- $this->parseError("Received null character.");
+ if ("\00" === $tok) {
+ $this->parseError('Received null character.');
}
$this->buffer($tok);
@@ -233,7 +229,7 @@ class Tokenizer
/**
* Read text in RAW mode.
*
- * @param string $tok The current token.
+ * @param string $tok the current token
*
* @return bool
*/
@@ -254,7 +250,7 @@ class Tokenizer
/**
* Read text in RCDATA mode.
*
- * @param string $tok The current token.
+ * @param string $tok the current token
*
* @return bool
*/
@@ -268,8 +264,8 @@ class Tokenizer
$txt = '';
$caseSensitive = !Elements::isHtml5Element($this->untilTag);
- while ($tok !== false && ! ($tok == '<' && ($this->scanner->sequenceMatches($sequence, $caseSensitive)))) {
- if ($tok == '&') {
+ while (false !== $tok && !('<' == $tok && ($this->scanner->sequenceMatches($sequence, $caseSensitive)))) {
+ if ('&' == $tok) {
$txt .= $this->decodeCharacterReference();
$tok = $this->scanner->current();
} else {
@@ -280,8 +276,8 @@ class Tokenizer
$len = strlen($sequence);
$this->scanner->consume($len);
$len += $this->scanner->whitespace();
- if ($this->scanner->current() !== '>') {
- $this->parseError("Unclosed RCDATA end tag");
+ if ('>' !== $this->scanner->current()) {
+ $this->parseError('Unclosed RCDATA end tag');
}
$this->scanner->unconsume($len);
@@ -296,7 +292,7 @@ class Tokenizer
*/
protected function eof($tok)
{
- if ($tok === false) {
+ if (false === $tok) {
// fprintf(STDOUT, "EOF");
$this->flushBuffer();
$this->events->eof();
@@ -313,40 +309,37 @@ class Tokenizer
*/
protected function markupDeclaration($tok)
{
- if ($tok != '!') {
+ if ('!' != $tok) {
return false;
}
$tok = $this->scanner->next();
// Comment:
- if ($tok == '-' && $this->scanner->peek() == '-') {
+ if ('-' == $tok && '-' == $this->scanner->peek()) {
$this->scanner->next(); // Consume the other '-'
$this->scanner->next(); // Next char.
return $this->comment();
- }
-
- elseif ($tok == 'D' || $tok == 'd') { // Doctype
+ } elseif ('D' == $tok || 'd' == $tok) { // Doctype
return $this->doctype();
- }
-
- elseif ($tok == '[') { // CDATA section
+ } elseif ('[' == $tok) { // CDATA section
return $this->cdataSection();
}
// FINISH
- $this->parseError("Expected <!--, <![CDATA[, or <!DOCTYPE. Got <!%s", $tok);
+ $this->parseError('Expected <!--, <![CDATA[, or <!DOCTYPE. Got <!%s', $tok);
$this->bogusComment('<!');
+
return true;
}
/**
* Consume an end tag.
- * 8.2.4.9
+ * 8.2.4.9.
*/
protected function endTag()
{
- if ($this->scanner->current() != '/') {
+ if ('/' != $this->scanner->current()) {
return false;
}
$tok = $this->scanner->next();
@@ -355,21 +348,22 @@ class Tokenizer
// > -> parse error
// EOF -> parse error
// -> parse error
- if (! ctype_alpha($tok)) {
+ if (!ctype_alpha($tok)) {
$this->parseError("Expected tag name, got '%s'", $tok);
- if ($tok == "\0" || $tok === false) {
+ if ("\0" == $tok || false === $tok) {
return false;
}
+
return $this->bogusComment('</');
}
$name = $this->scanner->charsUntil("\n\f \t>");
- $name = $this->mode === self::CONFORMANT_XML ? $name: strtolower($name);
+ $name = self::CONFORMANT_XML === $this->mode ? $name : strtolower($name);
// Trash whitespace.
$this->scanner->whitespace();
$tok = $this->scanner->current();
- if ($tok != '>') {
+ if ('>' != $tok) {
$this->parseError("Expected >, got '%s'", $tok);
// We just trash stuff until we get to the next tag close.
$this->scanner->charsUntil('>');
@@ -377,23 +371,24 @@ class Tokenizer
$this->events->endTag($name);
$this->scanner->next();
+
return true;
}
/**
* Consume a tag name and body.
- * 8.2.4.10
+ * 8.2.4.10.
*/
protected function tagName()
{
$tok = $this->scanner->current();
- if (! ctype_alpha($tok)) {
+ if (!ctype_alpha($tok)) {
return false;
}
// We know this is at least one char.
- $name = $this->scanner->charsWhile(":_-0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz");
- $name = $this->mode === self::CONFORMANT_XML ? $name : strtolower($name);
+ $name = $this->scanner->charsWhile(':_-0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz');
+ $name = self::CONFORMANT_XML === $this->mode ? $name : strtolower($name);
$attributes = array();
$selfClose = false;
@@ -403,7 +398,7 @@ class Tokenizer
do {
$this->scanner->whitespace();
$this->attribute($attributes);
- } while (! $this->isTagEnd($selfClose));
+ } while (!$this->isTagEnd($selfClose));
} catch (ParseError $e) {
$selfClose = false;
}
@@ -425,30 +420,34 @@ class Tokenizer
protected function isTagEnd(&$selfClose)
{
$tok = $this->scanner->current();
- if ($tok == '/') {
+ if ('/' == $tok) {
$this->scanner->next();
$this->scanner->whitespace();
$tok = $this->scanner->current();
- if ($tok == '>') {
+ if ('>' == $tok) {
$selfClose = true;
+
return true;
}
- if ($tok === false) {
- $this->parseError("Unexpected EOF inside of tag.");
+ if (false === $tok) {
+ $this->parseError('Unexpected EOF inside of tag.');
+
return true;
}
// Basically, we skip the / token and go on.
// See 8.2.4.43.
$this->parseError("Unexpected '%s' inside of a tag.", $tok);
+
return false;
}
- if ($tok == '>') {
+ if ('>' == $tok) {
return true;
}
- if ($tok === false) {
- $this->parseError("Unexpected EOF inside of tag.");
+ if (false === $tok) {
+ $this->parseError('Unexpected EOF inside of tag.');
+
return true;
}
@@ -467,23 +466,23 @@ class Tokenizer
protected function attribute(&$attributes)
{
$tok = $this->scanner->current();
- if ($tok == '/' || $tok == '>' || $tok === false) {
+ if ('/' == $tok || '>' == $tok || false === $tok) {
return false;
}
- if ($tok == '<') {
+ if ('<' == $tok) {
$this->parseError("Unexpected '<' inside of attributes list.");
// Push the < back onto the stack.
$this->scanner->unconsume();
// Let the caller figure out how to handle this.
- throw new ParseError("Start tag inside of attribute.");
+ throw new ParseError('Start tag inside of attribute.');
}
$name = strtolower($this->scanner->charsUntil("/>=\n\f\t "));
- if (strlen($name) == 0) {
+ if (0 == strlen($name)) {
$tok = $this->scanner->current();
- $this->parseError("Expected an attribute name, got %s.", $tok);
+ $this->parseError('Expected an attribute name, got %s.', $tok);
// Really, only '=' can be the char here. Everything else gets absorbed
// under one rule or another.
$name = $tok;
@@ -497,18 +496,17 @@ class Tokenizer
// see issue #23: https://github.com/Masterminds/html5-php/issues/23
// and http://www.w3.org/TR/2011/WD-html5-20110525/syntax.html#syntax-attribute-name
if (preg_match("/[\x1-\x2C\\/\x3B-\x40\x5B-\x5E\x60\x7B-\x7F]/u", $name)) {
- $this->parseError("Unexpected characters in attribute name: %s", $name);
+ $this->parseError('Unexpected characters in attribute name: %s', $name);
$isValidAttribute = false;
} // There is no limitation for 1st character in HTML5.
// But method "DOMElement::setAttribute" is throwing exception for the
// characters below so they have to be filtered.
// see issue #23: https://github.com/Masterminds/html5-php/issues/23
// and http://www.w3.org/TR/2011/WD-html5-20110525/syntax.html#syntax-attribute-name
- else
- if (preg_match("/^[0-9.-]/u", $name)) {
- $this->parseError("Unexpected character at the begining of attribute name: %s", $name);
- $isValidAttribute = false;
- }
+ elseif (preg_match('/^[0-9.-]/u', $name)) {
+ $this->parseError('Unexpected character at the begining of attribute name: %s', $name);
+ $isValidAttribute = false;
+ }
// 8.1.2.3
$this->scanner->whitespace();
@@ -516,6 +514,7 @@ class Tokenizer
if ($isValidAttribute) {
$attributes[$name] = $val;
}
+
return true;
}
@@ -527,7 +526,7 @@ class Tokenizer
*/
protected function attributeValue()
{
- if ($this->scanner->current() != '=') {
+ if ('=' != $this->scanner->current()) {
return null;
}
$this->scanner->next();
@@ -538,21 +537,24 @@ class Tokenizer
switch ($tok) {
case "\n":
case "\f":
- case " ":
+ case ' ':
case "\t":
// Whitespace here indicates an empty value.
return null;
case '"':
case "'":
$this->scanner->next();
+
return $this->quotedAttributeValue($tok);
case '>':
// case '/': // 8.2.4.37 seems to allow foo=/ as a valid attr.
- $this->parseError("Expected attribute value, got tag end.");
+ $this->parseError('Expected attribute value, got tag end.');
+
return null;
case '=':
case '`':
- $this->parseError("Expecting quotes, got %s.", $tok);
+ $this->parseError('Expecting quotes, got %s.', $tok);
+
return $this->unquotedAttributeValue();
default:
return $this->unquotedAttributeValue();
@@ -562,11 +564,11 @@ class Tokenizer
/**
* Get an attribute value string.
*
- * @param string $quote
- * IMPORTANT: This is a series of chars! Any one of which will be considered
- * termination of an attribute's value. E.g. "\"'" will stop at either
- * ' or ".
- * @return string The attribute value.
+ * @param string $quote IMPORTANT: This is a series of chars! Any one of which will be considered
+ * termination of an attribute's value. E.g. "\"'" will stop at either
+ * ' or ".
+ *
+ * @return string the attribute value
*/
protected function quotedAttributeValue($quote)
{
@@ -574,21 +576,22 @@ class Tokenizer
$val = '';
while (true) {
- $tokens = $this->scanner->charsUntil($stoplist.'&');
- if ($tokens !== false) {
+ $tokens = $this->scanner->charsUntil($stoplist . '&');
+ if (false !== $tokens) {
$val .= $tokens;
} else {
break;
}
$tok = $this->scanner->current();
- if ($tok == '&') {
+ if ('&' == $tok) {
$val .= $this->decodeCharacterReference(true);
continue;
}
break;
}
$this->scanner->next();
+
return $val;
}
@@ -597,34 +600,34 @@ class Tokenizer
$stoplist = "\t\n\f >";
$val = '';
$tok = $this->scanner->current();
- while (strspn($tok, $stoplist) == 0 && $tok !== false) {
- if ($tok == '&') {
+ while (0 == strspn($tok, $stoplist) && false !== $tok) {
+ if ('&' == $tok) {
$val .= $this->decodeCharacterReference(true);
$tok = $this->scanner->current();
} else {
if (strspn($tok, "\"'<=`") > 0) {
- $this->parseError("Unexpected chars in unquoted attribute value %s", $tok);
+ $this->parseError('Unexpected chars in unquoted attribute value %s', $tok);
}
$val .= $tok;
$tok = $this->scanner->next();
}
}
+
return $val;
}
/**
* Consume malformed markup as if it were a comment.
- * 8.2.4.44
+ * 8.2.4.44.
*
* The spec requires that the ENTIRE tag-like thing be enclosed inside of
* the comment. So this will generate comments like:
*
* &lt;!--&lt/+foo&gt;--&gt;
*
- * @param string $leading
- * Prepend any leading characters. This essentially
- * negates the need to backtrack, but it's sort of
- * a hack.
+ * @param string $leading Prepend any leading characters. This essentially
+ * negates the need to backtrack, but it's sort of
+ * a hack.
*
* @return bool
*/
@@ -632,11 +635,11 @@ class Tokenizer
{
$comment = $leading;
$tokens = $this->scanner->charsUntil('>');
- if ($tokens !== false) {
+ if (false !== $tokens) {
$comment .= $tokens;
}
$tok = $this->scanner->current();
- if ($tok !== false) {
+ if (false !== $tok) {
$comment .= $tok;
}
@@ -660,25 +663,27 @@ class Tokenizer
$comment = '';
// <!-->. Emit an empty comment because 8.2.4.46 says to.
- if ($tok == '>') {
+ if ('>' == $tok) {
// Parse error. Emit the comment token.
$this->parseError("Expected comment data, got '>'");
$this->events->comment('');
$this->scanner->next();
+
return true;
}
// Replace NULL with the replacement char.
- if ($tok == "\0") {
+ if ("\0" == $tok) {
$tok = UTF8Utils::FFFD;
}
- while (! $this->isCommentEnd()) {
+ while (!$this->isCommentEnd()) {
$comment .= $tok;
$tok = $this->scanner->next();
}
$this->events->comment($comment);
$this->scanner->next();
+
return true;
}
@@ -692,24 +697,26 @@ class Tokenizer
$tok = $this->scanner->current();
// EOF
- if ($tok === false) {
+ if (false === $tok) {
// Hit the end.
- $this->parseError("Unexpected EOF in a comment.");
+ $this->parseError('Unexpected EOF in a comment.');
+
return true;
}
// If it doesn't start with -, not the end.
- if ($tok != '-') {
+ if ('-' != $tok) {
return false;
}
// Advance one, and test for '->'
- if ($this->scanner->next() == '-' && $this->scanner->peek() == '>') {
+ if ('-' == $this->scanner->next() && '>' == $this->scanner->peek()) {
$this->scanner->next(); // Consume the last '>'
return true;
}
// Unread '-';
$this->scanner->unconsume(1);
+
return false;
}
@@ -729,9 +736,10 @@ class Tokenizer
return false;
}
// Check that string is DOCTYPE.
- $chars = $this->scanner->charsWhile("DOCTYPEdoctype");
+ $chars = $this->scanner->charsWhile('DOCTYPEdoctype');
if (strcasecmp($chars, 'DOCTYPE')) {
$this->parseError('Expected DOCTYPE, got %s', $chars);
+
return $this->bogusComment('<!' . $chars);
}
@@ -739,14 +747,15 @@ class Tokenizer
$tok = $this->scanner->current();
// EOF: die.
- if ($tok === false) {
+ if (false === $tok) {
$this->events->doctype('html5', EventHandler::DOCTYPE_NONE, '', true);
+
return $this->eof($tok);
}
// NULL char: convert.
- if ($tok === "\0") {
- $this->parseError("Unexpected null character in DOCTYPE.");
+ if ("\0" === $tok) {
+ $this->parseError('Unexpected null character in DOCTYPE.');
}
$stop = " \n\f>";
@@ -757,23 +766,26 @@ class Tokenizer
$tok = $this->scanner->current();
// If false, emit a parse error, DOCTYPE, and return.
- if ($tok === false) {
+ if (false === $tok) {
$this->parseError('Unexpected EOF in DOCTYPE declaration.');
$this->events->doctype($doctypeName, EventHandler::DOCTYPE_NONE, null, true);
+
return true;
}
// Short DOCTYPE, like <!DOCTYPE html>
- if ($tok == '>') {
+ if ('>' == $tok) {
// DOCTYPE without a name.
- if (strlen($doctypeName) == 0) {
- $this->parseError("Expected a DOCTYPE name. Got nothing.");
+ if (0 == strlen($doctypeName)) {
+ $this->parseError('Expected a DOCTYPE name. Got nothing.');
$this->events->doctype($doctypeName, 0, null, true);
$this->scanner->next();
+
return true;
}
$this->events->doctype($doctypeName);
$this->scanner->next();
+
return true;
}
$this->scanner->whitespace();
@@ -782,36 +794,40 @@ class Tokenizer
$white = $this->scanner->whitespace();
// Get ID, and flag it as pub or system.
- if (($pub == 'PUBLIC' || $pub == 'SYSTEM') && $white > 0) {
+ if (('PUBLIC' == $pub || 'SYSTEM' == $pub) && $white > 0) {
// Get the sys ID.
- $type = $pub == 'PUBLIC' ? EventHandler::DOCTYPE_PUBLIC : EventHandler::DOCTYPE_SYSTEM;
+ $type = 'PUBLIC' == $pub ? EventHandler::DOCTYPE_PUBLIC : EventHandler::DOCTYPE_SYSTEM;
$id = $this->quotedString("\0>");
- if ($id === false) {
+ if (false === $id) {
$this->events->doctype($doctypeName, $type, $pub, false);
+
return false;
}
// Premature EOF.
- if ($this->scanner->current() === false) {
- $this->parseError("Unexpected EOF in DOCTYPE");
+ if (false === $this->scanner->current()) {
+ $this->parseError('Unexpected EOF in DOCTYPE');
$this->events->doctype($doctypeName, $type, $id, true);
+
return true;
}
// Well-formed complete DOCTYPE.
$this->scanner->whitespace();
- if ($this->scanner->current() == '>') {
+ if ('>' == $this->scanner->current()) {
$this->events->doctype($doctypeName, $type, $id, false);
$this->scanner->next();
+
return true;
}
// If we get here, we have <!DOCTYPE foo PUBLIC "bar" SOME_JUNK
// Throw away the junk, parse error, quirks mode, return true.
- $this->scanner->charsUntil(">");
- $this->parseError("Malformed DOCTYPE.");
+ $this->scanner->charsUntil('>');
+ $this->parseError('Malformed DOCTYPE.');
$this->events->doctype($doctypeName, $type, $id, true);
$this->scanner->next();
+
return true;
}
@@ -819,35 +835,37 @@ class Tokenizer
// Consume to > and trash.
$this->scanner->charsUntil('>');
- $this->parseError("Expected PUBLIC or SYSTEM. Got %s.", $pub);
+ $this->parseError('Expected PUBLIC or SYSTEM. Got %s.', $pub);
$this->events->doctype($doctypeName, 0, null, true);
$this->scanner->next();
+
return true;
}
/**
* Utility for reading a quoted string.
*
- * @param string $stopchars
- * Characters (in addition to a close-quote) that should stop the string.
- * E.g. sometimes '>' is higher precedence than '"' or "'".
+ * @param string $stopchars Characters (in addition to a close-quote) that should stop the string.
+ * E.g. sometimes '>' is higher precedence than '"' or "'".
*
* @return mixed String if one is found (quotations omitted)
*/
protected function quotedString($stopchars)
{
$tok = $this->scanner->current();
- if ($tok == '"' || $tok == "'") {
+ if ('"' == $tok || "'" == $tok) {
$this->scanner->next();
$ret = $this->scanner->charsUntil($tok . $stopchars);
if ($this->scanner->current() == $tok) {
$this->scanner->next();
} else {
// Parse error because no close quote.
- $this->parseError("Expected %s, got %s", $tok, $this->scanner->current());
+ $this->parseError('Expected %s, got %s', $tok, $this->scanner->current());
}
+
return $ret;
}
+
return false;
}
@@ -858,39 +876,43 @@ class Tokenizer
*/
protected function cdataSection()
{
- if ($this->scanner->current() != '[') {
+ if ('[' != $this->scanner->current()) {
return false;
}
$cdata = '';
$this->scanner->next();
$chars = $this->scanner->charsWhile('CDAT');
- if ($chars != 'CDATA' || $this->scanner->current() != '[') {
+ if ('CDATA' != $chars || '[' != $this->scanner->current()) {
$this->parseError('Expected [CDATA[, got %s', $chars);
+
return $this->bogusComment('<![' . $chars);
}
$tok = $this->scanner->next();
do {
- if ($tok === false) {
+ if (false === $tok) {
$this->parseError('Unexpected EOF inside CDATA.');
$this->bogusComment('<![CDATA[' . $cdata);
+
return true;
}
$cdata .= $tok;
$tok = $this->scanner->next();
- } while (! $this->scanner->sequenceMatches(']]>'));
+ } while (!$this->scanner->sequenceMatches(']]>'));
// Consume ]]>
$this->scanner->consume(3);
$this->events->cdata($cdata);
+
return true;
}
// ================================================================
// Non-HTML5
// ================================================================
+
/**
* Handle a processing instruction.
*
@@ -903,7 +925,7 @@ class Tokenizer
*/
protected function processingInstruction()
{
- if ($this->scanner->current() != '?') {
+ if ('?' != $this->scanner->current()) {
return false;
}
@@ -912,21 +934,23 @@ class Tokenizer
$white = $this->scanner->whitespace();
// If not a PI, send to bogusComment.
- if (strlen($procName) == 0 || $white == 0 || $this->scanner->current() == false) {
+ if (0 == strlen($procName) || 0 == $white || false == $this->scanner->current()) {
$this->parseError("Expected processing instruction name, got $tok");
$this->bogusComment('<?' . $tok . $procName);
+
return true;
}
$data = '';
// As long as it's not the case that the next two chars are ? and >.
- while (! ($this->scanner->current() == '?' && $this->scanner->peek() == '>')) {
+ while (!('?' == $this->scanner->current() && '>' == $this->scanner->peek())) {
$data .= $this->scanner->current();
$tok = $this->scanner->next();
- if ($tok === false) {
- $this->parseError("Unexpected EOF in processing instruction.");
+ if (false === $tok) {
+ $this->parseError('Unexpected EOF in processing instruction.');
$this->events->processingInstruction($procName, $data);
+
return true;
}
}
@@ -934,6 +958,7 @@ class Tokenizer
$this->scanner->next(); // >
$this->scanner->next(); // Next token.
$this->events->processingInstruction($procName, $data);
+
return true;
}
@@ -955,7 +980,7 @@ class Tokenizer
// Optimization for reading larger blocks faster.
$first = substr($sequence, 0, 1);
- while ($this->scanner->current() !== false) {
+ while (false !== $this->scanner->current()) {
$buffer .= $this->scanner->charsUntil($first);
// Stop as soon as we hit the stopping condition.
@@ -967,7 +992,8 @@ class Tokenizer
}
// If we get here, we hit the EOF.
- $this->parseError("Unexpected EOF during text read.");
+ $this->parseError('Unexpected EOF during text read.');
+
return $buffer;
}
@@ -985,7 +1011,7 @@ class Tokenizer
* '</script>' string.
*
* @param string $sequence
- * @param bool $caseSensitive
+ * @param bool $caseSensitive
*
* @return bool
*/
@@ -1005,7 +1031,7 @@ class Tokenizer
*/
protected function flushBuffer()
{
- if ($this->text === '') {
+ if ('' === $this->text) {
return;
}
$this->events->text($this->text);
@@ -1055,9 +1081,8 @@ class Tokenizer
*
* If $inAttribute is set to true, a bare & will be returned as-is.
*
- * @param bool $inAttribute
- * Set to true if the text is inside of an attribute value.
- * false otherwise.
+ * @param bool $inAttribute Set to true if the text is inside of an attribute value.
+ * false otherwise.
*
* @return string
*/
@@ -1067,36 +1092,37 @@ class Tokenizer
$tok = $this->scanner->next();
$start = $this->scanner->position();
- if ($tok == false) {
+ if (false === $tok) {
return '&';
}
// These indicate not an entity. We return just
// the &.
- if (strspn($tok, static::WHITE . "&<") == 1) {
+ if (1 === strspn($tok, static::WHITE . '&<')) {
// $this->scanner->next();
return '&';
}
// Numeric entity
- if ($tok == '#') {
+ if ('#' === $tok) {
$tok = $this->scanner->next();
// Hexidecimal encoding.
// X[0-9a-fA-F]+;
// x[0-9a-fA-F]+;
- if ($tok == 'x' || $tok == 'X') {
+ if ('x' === $tok || 'X' === $tok) {
$tok = $this->scanner->next(); // Consume x
// Convert from hex code to char.
$hex = $this->scanner->getHex();
if (empty($hex)) {
- $this->parseError("Expected &#xHEX;, got &#x%s", $tok);
+ $this->parseError('Expected &#xHEX;, got &#x%s', $tok);
// We unconsume because we don't know what parser rules might
// be in effect for the remaining chars. For example. '&#>'
// might result in a specific parsing rule inside of tag
// contexts, while not inside of pcdata context.
$this->scanner->unconsume(2);
+
return '&';
}
$entity = CharacterReference::lookupHex($hex);
@@ -1105,17 +1131,17 @@ class Tokenizer
else {
// Convert from decimal to char.
$numeric = $this->scanner->getNumeric();
- if ($numeric === false) {
- $this->parseError("Expected &#DIGITS;, got &#%s", $tok);
+ if (false === $numeric) {
+ $this->parseError('Expected &#DIGITS;, got &#%s', $tok);
$this->scanner->unconsume(2);
+
return '&';
}
$entity = CharacterReference::lookupDecimal($numeric);
}
- } elseif ($tok === '=' && $inAttribute) {
+ } elseif ('=' === $tok && $inAttribute) {
return '&';
} else { // String entity.
-
// Attempt to consume a string up to a ';'.
// [a-zA-Z0-9]+;
$cname = $this->scanner->getAsciiAlphaNum();
@@ -1124,11 +1150,12 @@ class Tokenizer
// When no entity is found provide the name of the unmatched string
// and continue on as the & is not part of an entity. The & will
// be converted to &amp; elsewhere.
- if ($entity == null) {
- if (!$inAttribute || strlen($cname) === 0) {
+ if (null === $entity) {
+ if (!$inAttribute || '' === $cname) {
$this->parseError("No match in entity table for '%s'", $cname);
}
$this->scanner->unconsume($this->scanner->position() - $start);
+
return '&';
}
}
@@ -1137,8 +1164,9 @@ class Tokenizer
$tok = $this->scanner->current();
// We have an entity. We're done here.
- if ($tok == ';') {
+ if (';' === $tok) {
$this->scanner->next();
+
return $entity;
}
@@ -1146,10 +1174,12 @@ class Tokenizer
// entire string. Otherwise, failure to match is an error.
if ($inAttribute) {
$this->scanner->unconsume($this->scanner->position() - $start);
+
return '&';
}
- $this->parseError("Expected &ENTITY;, got &ENTITY%s (no trailing ;) ", $tok);
+ $this->parseError('Expected &ENTITY;, got &ENTITY%s (no trailing ;) ', $tok);
+
return '&' . $entity;
}
}
diff --git a/src/HTML5/Parser/TreeBuildingRules.php b/src/HTML5/Parser/TreeBuildingRules.php
index d092872..9b94185 100644
--- a/src/HTML5/Parser/TreeBuildingRules.php
+++ b/src/HTML5/Parser/TreeBuildingRules.php
@@ -1,4 +1,5 @@
<?php
+
namespace Masterminds\HTML5\Parser;
/**
@@ -14,7 +15,6 @@ namespace Masterminds\HTML5\Parser;
*/
class TreeBuildingRules
{
-
protected static $tags = array(
'li' => 1,
'dd' => 1,
@@ -29,21 +29,10 @@ class TreeBuildingRules
'tbody' => 1,
'table' => 1,
'optgroup' => 1,
- 'option' => 1
+ 'option' => 1,
);
/**
- * Build a new rules engine.
- *
- * @param \DOMDocument $doc
- * The DOM document to use for evaluation and modification.
- */
- public function __construct($doc)
- {
- $this->doc = $doc;
- }
-
- /**
* Returns true if the given tagname has special processing rules.
*/
public function hasRules($tagname)
@@ -56,7 +45,7 @@ class TreeBuildingRules
*
* This may modify the existing DOM.
*
- * @return \DOMElement The new Current DOM element.
+ * @return \DOMElement the new Current DOM element
*/
public function evaluate($new, $current)
{
@@ -71,7 +60,7 @@ class TreeBuildingRules
return $this->handleRT($new, $current);
case 'optgroup':
return $this->closeIfCurrentMatches($new, $current, array(
- 'optgroup'
+ 'optgroup',
));
case 'option':
return $this->closeIfCurrentMatches($new, $current, array(
@@ -79,13 +68,13 @@ class TreeBuildingRules
));
case 'tr':
return $this->closeIfCurrentMatches($new, $current, array(
- 'tr'
+ 'tr',
));
case 'td':
case 'th':
return $this->closeIfCurrentMatches($new, $current, array(
'th',
- 'td'
+ 'td',
));
case 'tbody':
case 'thead':
@@ -95,7 +84,7 @@ class TreeBuildingRules
return $this->closeIfCurrentMatches($new, $current, array(
'thead',
'tfoot',
- 'tbody'
+ 'tbody',
));
}
@@ -105,7 +94,7 @@ class TreeBuildingRules
protected function handleLI($ele, $current)
{
return $this->closeIfCurrentMatches($ele, $current, array(
- 'li'
+ 'li',
));
}
@@ -113,7 +102,7 @@ class TreeBuildingRules
{
return $this->closeIfCurrentMatches($ele, $current, array(
'dt',
- 'dd'
+ 'dd',
));
}
@@ -121,13 +110,13 @@ class TreeBuildingRules
{
return $this->closeIfCurrentMatches($ele, $current, array(
'rt',
- 'rp'
+ 'rp',
));
}
protected function closeIfCurrentMatches($ele, $current, $match)
{
- if (in_array($current->tagName, $match)) {
+ if (in_array($current->tagName, $match, true)) {
$current->parentNode->appendChild($ele);
} else {
$current->appendChild($ele);
diff --git a/src/HTML5/Parser/UTF8Utils.php b/src/HTML5/Parser/UTF8Utils.php
index 451c155..77c2dfb 100644
--- a/src/HTML5/Parser/UTF8Utils.php
+++ b/src/HTML5/Parser/UTF8Utils.php
@@ -1,5 +1,7 @@
<?php
+
namespace Masterminds\HTML5\Parser;
+
/*
*
* Portions based on code from html5lib files with the following copyright:
@@ -30,11 +32,10 @@ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
use Masterminds\HTML5\Exception;
/**
- * UTF-8 Utilities
+ * UTF-8 Utilities.
*/
class UTF8Utils
{
-
/**
* The Unicode replacement character..
*/
@@ -76,10 +77,8 @@ class UTF8Utils
* This has not yet been tested with charactersets other than UTF-8.
* It should work with ISO-8859-1/-13 and standard Latin Win charsets.
*
- * @param string $data
- * The data to convert.
- * @param string $encoding
- * A valid encoding. Examples: http://www.php.net/manual/en/mbstring.supported-encodings.php
+ * @param string $data The data to convert
+ * @param string $encoding A valid encoding. Examples: http://www.php.net/manual/en/mbstring.supported-encodings.php
*
* @return string
*/
@@ -108,7 +107,7 @@ class UTF8Utils
$data = mb_convert_encoding($data, 'UTF-8', $encoding);
mb_substitute_character($save);
} // @todo Get iconv running in at least some environments if that is possible.
- elseif (function_exists('iconv') && $encoding != 'auto') {
+ elseif (function_exists('iconv') && 'auto' !== $encoding) {
// fprintf(STDOUT, "iconv found\n");
// iconv has the following behaviors:
// - Overlong representations are ignored.
@@ -122,7 +121,7 @@ class UTF8Utils
/*
* One leading U+FEFF BYTE ORDER MARK character must be ignored if any are present.
*/
- if (substr($data, 0, 3) === "\xEF\xBB\xBF") {
+ if ("\xEF\xBB\xBF" === substr($data, 0, 3)) {
$data = substr($data, 3);
}
@@ -132,9 +131,9 @@ class UTF8Utils
/**
* Checks for Unicode code points that are not valid in a document.
*
- * @param string $data A string to analyze.
+ * @param string $data a string to analyze
*
- * @return array An array of (string) error messages produced by the scanning.
+ * @return array an array of (string) error messages produced by the scanning
*/
public static function checkForIllegalCodepoints($data)
{
@@ -144,7 +143,7 @@ class UTF8Utils
/*
* All U+0000 null characters in the input must be replaced by U+FFFD REPLACEMENT CHARACTERs. Any occurrences of such characters is a parse error.
*/
- for ($i = 0, $count = substr_count($data, "\0"); $i < $count; $i ++) {
+ for ($i = 0, $count = substr_count($data, "\0"); $i < $count; ++$i) {
$errors[] = 'null-character';
}
@@ -166,7 +165,7 @@ class UTF8Utils
|
[\xF0-\xF4][\x8F-\xBF]\xBF[\xBE\xBF] # U+nFFFE and U+nFFFF (1 <= n <= 10_{16})
)/x', $data, $matches);
- for ($i = 0; $i < $count; $i ++) {
+ for ($i = 0; $i < $count; ++$i) {
$errors[] = 'invalid-codepoint';
}
diff --git a/src/HTML5/Serializer/HTML5Entities.php b/src/HTML5/Serializer/HTML5Entities.php
index 4f90f84..e9421a1 100644
--- a/src/HTML5/Serializer/HTML5Entities.php
+++ b/src/HTML5/Serializer/HTML5Entities.php
@@ -3,11 +3,12 @@
* @file
* This contains HTML5 entities to use with serializing.
*
- * The list here is mildly different from the list at \Masterminds\HTML5\Entities because
+ * The list here is mildly different from the list at Entities because
* that list was generated from the w3c. It contains some entities that are
* not entirely proper such as &am; which maps to &. This list is meant to be
* a fallback for PHP versions prior to PHP 5.4 when dealing with encoding.
*/
+
namespace Masterminds\HTML5\Serializer;
/**
@@ -17,7 +18,6 @@ namespace Masterminds\HTML5\Serializer;
*/
class HTML5Entities
{
-
public static $map = array(
' ' => '&Tab;',
"\n" => '&NewLine;',
@@ -1528,6 +1528,6 @@ class HTML5Entities
'𝕨' => '&wopf;',
'𝕩' => '&xopf;',
'𝕪' => '&yopf;',
- '𝕫' => '&zopf;'
+ '𝕫' => '&zopf;',
);
}
diff --git a/src/HTML5/Serializer/OutputRules.php b/src/HTML5/Serializer/OutputRules.php
index a22683c..2711d1b 100644
--- a/src/HTML5/Serializer/OutputRules.php
+++ b/src/HTML5/Serializer/OutputRules.php
@@ -6,6 +6,7 @@
* These output rules are likely to generate output similar to the document that
* was parsed. It is not intended to output exactly the document that was parsed.
*/
+
namespace Masterminds\HTML5\Serializer;
use Masterminds\HTML5\Elements;
@@ -13,10 +14,10 @@ use Masterminds\HTML5\Elements;
/**
* Generate the output html5 based on element rules.
*/
-class OutputRules implements \Masterminds\HTML5\Serializer\RulesInterface
+class OutputRules implements RulesInterface
{
/**
- * Defined in http://www.w3.org/TR/html51/infrastructure.html#html-namespace-0
+ * Defined in http://www.w3.org/TR/html51/infrastructure.html#html-namespace-0.
*/
const NAMESPACE_HTML = 'http://www.w3.org/1999/xhtml';
@@ -31,7 +32,7 @@ class OutputRules implements \Masterminds\HTML5\Serializer\RulesInterface
const NAMESPACE_XMLNS = 'http://www.w3.org/2000/xmlns/';
/**
- * Holds the HTML5 element names that causes a namespace switch
+ * Holds the HTML5 element names that causes a namespace switch.
*
* @var array
*/
@@ -50,8 +51,9 @@ class OutputRules implements \Masterminds\HTML5\Serializer\RulesInterface
const IM_IN_MATHML = 3;
/**
- * Used as cache to detect if is available ENT_HTML5
- * @var boolean
+ * Used as cache to detect if is available ENT_HTML5.
+ *
+ * @var bool
*/
private $hasHTML5 = false;
@@ -169,12 +171,13 @@ class OutputRules implements \Masterminds\HTML5\Serializer\RulesInterface
// If HHVM, see https://github.com/facebook/hhvm/issues/2727
$this->hasHTML5 = defined('ENT_HTML5') && !defined('HHVM_VERSION');
}
+
public function addRule(array $rule)
{
$this->nonBooleanAttributes[] = $rule;
}
- public function setTraverser(\Masterminds\HTML5\Serializer\Traverser $traverser)
+ public function setTraverser(Traverser $traverser)
{
$this->traverser = $traverser;
@@ -211,10 +214,10 @@ class OutputRules implements \Masterminds\HTML5\Serializer\RulesInterface
// If we are in SVG or MathML there is special handling.
// Using if/elseif instead of switch because it's faster in PHP.
- if ($name == 'svg') {
+ if ('svg' == $name) {
$this->outputMode = static::IM_IN_SVG;
$name = Elements::normalizeSvgElement($name);
- } elseif ($name == 'math') {
+ } elseif ('math' == $name) {
$this->outputMode = static::IM_IN_MATHML;
}
@@ -234,13 +237,13 @@ class OutputRules implements \Masterminds\HTML5\Serializer\RulesInterface
}
// Close out the SVG or MathML special handling.
- if ($name == 'svg' || $name == 'math') {
+ if ('svg' == $name || 'math' == $name) {
$this->outputMode = static::IM_IN_HTML;
}
}
// If not unary, add a closing tag.
- if (! Elements::isA($name, Elements::VOID_TAG)) {
+ if (!Elements::isA($name, Elements::VOID_TAG)) {
$this->closeTag($ele);
}
}
@@ -248,13 +251,13 @@ class OutputRules implements \Masterminds\HTML5\Serializer\RulesInterface
/**
* Write a text node.
*
- * @param \DOMText $ele
- * The text node to write.
+ * @param \DOMText $ele The text node to write
*/
public function text($ele)
{
if (isset($ele->parentNode) && isset($ele->parentNode->tagName) && Elements::isA($ele->parentNode->localName, Elements::TEXT_RAW)) {
$this->wr($ele->data);
+
return;
}
@@ -283,20 +286,20 @@ class OutputRules implements \Masterminds\HTML5\Serializer\RulesInterface
->wr($ele->data)
->wr('?>');
}
+
/**
- * Write the namespace attributes
+ * Write the namespace attributes.
*
*
- * @param \DOMNode $ele
- * The element being written.
+ * @param \DOMNode $ele The element being written
*/
protected function namespaceAttrs($ele)
{
- if (!$this->xpath || $this->xpath->document !== $ele->ownerDocument){
+ if (!$this->xpath || $this->xpath->document !== $ele->ownerDocument) {
$this->xpath = new \DOMXPath($ele->ownerDocument);
}
- foreach( $this->xpath->query('namespace::*[not(.=../../namespace::*)]', $ele ) as $nsNode ) {
+ foreach ($this->xpath->query('namespace::*[not(.=../../namespace::*)]', $ele) as $nsNode) {
if (!in_array($nsNode->nodeValue, $this->implicitNamespaces)) {
$this->wr(' ')->wr($nsNode->nodeName)->wr('="')->wr($nsNode->nodeValue)->wr('"');
}
@@ -309,18 +312,15 @@ class OutputRules implements \Masterminds\HTML5\Serializer\RulesInterface
* Tags for HTML, MathML, and SVG are in the local name. Otherwise, use the
* qualified name (8.3).
*
- * @param \DOMNode $ele
- * The element being written.
+ * @param \DOMNode $ele The element being written
*/
protected function openTag($ele)
{
$this->wr('<')->wr($this->traverser->isLocalElement($ele) ? $ele->localName : $ele->tagName);
-
$this->attrs($ele);
$this->namespaceAttrs($ele);
-
if ($this->outputMode == static::IM_IN_HTML) {
$this->wr('>');
} // If we are not in html mode we are in SVG, MathML, or XML embedded content.
@@ -337,7 +337,7 @@ class OutputRules implements \Masterminds\HTML5\Serializer\RulesInterface
protected function attrs($ele)
{
// FIXME: Needs support for xml, xmlns, xlink, and namespaced elements.
- if (! $ele->hasAttributes()) {
+ if (!$ele->hasAttributes()) {
return $this;
}
@@ -345,7 +345,7 @@ class OutputRules implements \Masterminds\HTML5\Serializer\RulesInterface
// value-less attributes.
$map = $ele->attributes;
$len = $map->length;
- for ($i = 0; $i < $len; ++ $i) {
+ for ($i = 0; $i < $len; ++$i) {
$node = $map->item($i);
$val = $this->enc($node->value, true);
@@ -365,45 +365,42 @@ class OutputRules implements \Masterminds\HTML5\Serializer\RulesInterface
$this->wr(' ')->wr($name);
- if ((isset($val) && $val !== '') || $this->nonBooleanAttribute($node)) {
+ if ((isset($val) && '' !== $val) || $this->nonBooleanAttribute($node)) {
$this->wr('="')->wr($val)->wr('"');
}
}
}
-
protected function nonBooleanAttribute(\DOMAttr $attr)
{
$ele = $attr->ownerElement;
- foreach($this->nonBooleanAttributes as $rule){
-
- if(isset($rule['nodeNamespace']) && $rule['nodeNamespace']!==$ele->namespaceURI){
+ foreach ($this->nonBooleanAttributes as $rule) {
+ if (isset($rule['nodeNamespace']) && $rule['nodeNamespace'] !== $ele->namespaceURI) {
continue;
}
- if(isset($rule['attNamespace']) && $rule['attNamespace']!==$attr->namespaceURI){
+ if (isset($rule['attNamespace']) && $rule['attNamespace'] !== $attr->namespaceURI) {
continue;
}
- if(isset($rule['nodeName']) && !is_array($rule['nodeName']) && $rule['nodeName']!==$ele->localName){
+ if (isset($rule['nodeName']) && !is_array($rule['nodeName']) && $rule['nodeName'] !== $ele->localName) {
continue;
}
- if(isset($rule['nodeName']) && is_array($rule['nodeName']) && !in_array($ele->localName, $rule['nodeName'], true)){
+ if (isset($rule['nodeName']) && is_array($rule['nodeName']) && !in_array($ele->localName, $rule['nodeName'], true)) {
continue;
}
- if(isset($rule['attrName']) && !is_array($rule['attrName']) && $rule['attrName']!==$attr->localName){
+ if (isset($rule['attrName']) && !is_array($rule['attrName']) && $rule['attrName'] !== $attr->localName) {
continue;
}
- if(isset($rule['attrName']) && is_array($rule['attrName']) && !in_array($attr->localName, $rule['attrName'], true)){
+ if (isset($rule['attrName']) && is_array($rule['attrName']) && !in_array($attr->localName, $rule['attrName'], true)) {
continue;
}
- if(isset($rule['xpath'])){
-
+ if (isset($rule['xpath'])) {
$xp = $this->getXPath($attr);
- if(isset($rule['prefixes'])){
- foreach($rule['prefixes'] as $nsPrefix => $ns){
+ if (isset($rule['prefixes'])) {
+ foreach ($rule['prefixes'] as $nsPrefix => $ns) {
$xp->registerNamespace($nsPrefix, $ns);
}
}
- if(!$xp->evaluate($rule['xpath'], $attr)){
+ if (!$xp->evaluate($rule['xpath'], $attr)) {
continue;
}
}
@@ -414,10 +411,12 @@ class OutputRules implements \Masterminds\HTML5\Serializer\RulesInterface
return false;
}
- private function getXPath(\DOMNode $node){
- if(!$this->xpath){
+ private function getXPath(\DOMNode $node)
+ {
+ if (!$this->xpath) {
$this->xpath = new \DOMXPath($node->ownerDocument);
}
+
return $this->xpath;
}
@@ -427,8 +426,7 @@ class OutputRules implements \Masterminds\HTML5\Serializer\RulesInterface
* Tags for HTML, MathML, and SVG are in the local name. Otherwise, use the
* qualified name (8.3).
*
- * @param \DOMNode $ele
- * The element being written.
+ * @param \DOMNode $ele The element being written
*/
protected function closeTag($ele)
{
@@ -440,25 +438,26 @@ class OutputRules implements \Masterminds\HTML5\Serializer\RulesInterface
/**
* Write to the output.
*
- * @param string $text
- * The string to put into the output.
+ * @param string $text The string to put into the output
*
- * @return \Masterminds\HTML5\Serializer\Traverser $this so it can be used in chaining.
+ * @return Traverser $this so it can be used in chaining
*/
protected function wr($text)
{
fwrite($this->out, $text);
+
return $this;
}
/**
* Write a new line character.
*
- * @return \Masterminds\HTML5\Serializer\Traverser $this so it can be used in chaining.
+ * @return Traverser $this so it can be used in chaining
*/
protected function nl()
{
fwrite($this->out, PHP_EOL);
+
return $this;
}
@@ -484,18 +483,15 @@ class OutputRules implements \Masterminds\HTML5\Serializer\RulesInterface
*
* @todo Use the Entities class in php 5.3 to have html5 entities.
*
- * @param string $text
- * text to encode.
- * @param boolean $attribute
- * True if we are encoding an attrubute, false otherwise
+ * @param string $text text to encode
+ * @param bool $attribute True if we are encoding an attrubute, false otherwise
*
- * @return string The encoded text.
+ * @return string the encoded text
*/
protected function enc($text, $attribute = false)
{
-
// Escape the text rather than convert to named character references.
- if (! $this->encode) {
+ if (!$this->encode) {
return $this->escape($text, $attribute);
}
@@ -507,7 +503,7 @@ class OutputRules implements \Masterminds\HTML5\Serializer\RulesInterface
} // If a version earlier than 5.4 html5 entities are not entirely handled.
// This manually handles them.
else {
- return strtr($text, \Masterminds\HTML5\Serializer\HTML5Entities::$map);
+ return strtr($text, HTML5Entities::$map);
}
}
@@ -525,14 +521,11 @@ class OutputRules implements \Masterminds\HTML5\Serializer\RulesInterface
*
* @see http://www.w3.org/TR/2013/CR-html5-20130806/syntax.html#escapingString
*
- * @param string $text
- * text to escape.
- * @param boolean $attribute
- * True if we are escaping an attrubute, false otherwise
+ * @param string $text text to escape
+ * @param bool $attribute True if we are escaping an attrubute, false otherwise
*/
protected function escape($text, $attribute = false)
{
-
// Not using htmlspecialchars because, while it does escaping, it doesn't
// match the requirements of section 8.5. For example, it doesn't handle
// non-breaking spaces.
@@ -540,14 +533,14 @@ class OutputRules implements \Masterminds\HTML5\Serializer\RulesInterface
$replace = array(
'"' => '&quot;',
'&' => '&amp;',
- "\xc2\xa0" => '&nbsp;'
+ "\xc2\xa0" => '&nbsp;',
);
} else {
$replace = array(
'<' => '&lt;',
'>' => '&gt;',
'&' => '&amp;',
- "\xc2\xa0" => '&nbsp;'
+ "\xc2\xa0" => '&nbsp;',
);
}
diff --git a/src/HTML5/Serializer/RulesInterface.php b/src/HTML5/Serializer/RulesInterface.php
index 6ef5e5e..6d3f60e 100644
--- a/src/HTML5/Serializer/RulesInterface.php
+++ b/src/HTML5/Serializer/RulesInterface.php
@@ -3,28 +3,24 @@
* @file
* The interface definition for Rules to generate output.
*/
+
namespace Masterminds\HTML5\Serializer;
/**
- * To create a new rule set for writing output the RulesInterface needs to be
- * implemented.
- * The resulting class can be specified in the options with the
- * key of rules.
+ * To create a new rule set for writing output the RulesInterface needs to be implemented.
+ * The resulting class can be specified in the options with the key of rules.
*
- * For an example implementation see \Masterminds\HTML5\Serializer\OutputRules.
+ * For an example implementation see Serializer\OutputRules.
*/
interface RulesInterface
{
-
/**
* The class constructor.
*
* Note, before the rules can be used a traverser must be registered.
*
- * @param mixed $output
- * The output stream to write output to.
- * @param array $options
- * An array of options.
+ * @param mixed $output The output stream to write output to
+ * @param array $options An array of options
*/
public function __construct($output, $options = array());
@@ -33,11 +29,11 @@ interface RulesInterface
*
* Note, only one traverser can be used by the rules.
*
- * @param \Masterminds\HTML5\Serializer\Traverser $traverser
- * The traverser used in the rules.
- * @return \Masterminds\HTML5\Serializer\RulesInterface $this for the current object.
+ * @param Traverser $traverser The traverser used in the rules
+ *
+ * @return RulesInterface $this for the current object
*/
- public function setTraverser(\Masterminds\HTML5\Serializer\Traverser $traverser);
+ public function setTraverser(Traverser $traverser);
/**
* Write a document element (\DOMDocument).
@@ -92,7 +88,7 @@ interface RulesInterface
/**
* Write a processor instruction.
*
- * To learn about processor instructions see \Masterminds\HTML5\InstructionProcessor
+ * To learn about processor instructions see InstructionProcessor
*
* Instead of returning the result write it to the output stream ($output)
* that was passed into the constructor.
diff --git a/src/HTML5/Serializer/Traverser.php b/src/HTML5/Serializer/Traverser.php
index 399570d..a33943f 100644
--- a/src/HTML5/Serializer/Traverser.php
+++ b/src/HTML5/Serializer/Traverser.php
@@ -1,4 +1,5 @@
<?php
+
namespace Masterminds\HTML5\Serializer;
/**
@@ -12,14 +13,13 @@ namespace Masterminds\HTML5\Serializer;
*/
class Traverser
{
-
/**
* Namespaces that should be treated as "local" to HTML5.
*/
protected static $local_ns = array(
'http://www.w3.org/1999/xhtml' => 'html',
'http://www.w3.org/1998/Math/MathML' => 'math',
- 'http://www.w3.org/2000/svg' => 'svg'
+ 'http://www.w3.org/2000/svg' => 'svg',
);
protected $dom;
@@ -35,16 +35,13 @@ class Traverser
/**
* Create a traverser.
*
- * @param DOMNode|DOMNodeList $dom
- * The document or node to traverse.
- * @param resource $out
- * A stream that allows writing. The traverser will output into this
- * stream.
- * @param array $options
- * An array or options for the traverser as key/value pairs. These include:
- * - encode_entities: A bool to specify if full encding should happen for all named
- * charachter references. Defaults to false which escapes &'<>".
- * - output_rules: The path to the class handling the output rules.
+ * @param \DOMNode|\DOMNodeList $dom The document or node to traverse
+ * @param resource $out A stream that allows writing. The traverser will output into this
+ * stream.
+ * @param array $options An array of options for the traverser as key/value pairs. These include:
+ * - encode_entities: A bool to specify if full encding should happen for all named
+ * charachter references. Defaults to false which escapes &'<>".
+ * - output_rules: The path to the class handling the output rules.
*/
public function __construct($dom, $out, RulesInterface $rules, $options = array())
{
@@ -59,8 +56,7 @@ class Traverser
/**
* Tell the traverser to walk the DOM.
*
- * @return resource $out
- * Returns the output stream.
+ * @return resource $out Returns the output stream
*/
public function walk()
{
@@ -87,8 +83,7 @@ class Traverser
/**
* Process a node in the DOM.
*
- * @param mixed $node
- * A node implementing \DOMNode.
+ * @param mixed $node A node implementing \DOMNode
*/
public function node($node)
{
@@ -119,8 +114,7 @@ class Traverser
/**
* Walk through all the nodes on a node list.
*
- * @param \DOMNodeList $nl
- * A list of child elements to walk through.
+ * @param \DOMNodeList $nl A list of child elements to walk through
*/
public function children($nl)
{
@@ -132,10 +126,9 @@ class Traverser
/**
* Is an element local?
*
- * @param mixed $ele
- * An element that implement \DOMNode.
+ * @param mixed $ele An element that implement \DOMNode
*
- * @return bool True if local and false otherwise.
+ * @return bool true if local and false otherwise
*/
public function isLocalElement($ele)
{