summaryrefslogtreecommitdiff
path: root/vendor/masterminds/html5/src/HTML5.php
diff options
context:
space:
mode:
authorAndrew Dolgov <[email protected]>2022-12-13 20:00:46 +0300
committerAndrew Dolgov <[email protected]>2022-12-13 20:00:46 +0300
commit0226e0ca0dc70f9a0310b3eef045ee1c1e0ca3ac (patch)
tree254c0e880693c2636107308b5931951cfa501311 /vendor/masterminds/html5/src/HTML5.php
split into a separate repo
Diffstat (limited to 'vendor/masterminds/html5/src/HTML5.php')
-rw-r--r--vendor/masterminds/html5/src/HTML5.php246
1 files changed, 246 insertions, 0 deletions
diff --git a/vendor/masterminds/html5/src/HTML5.php b/vendor/masterminds/html5/src/HTML5.php
new file mode 100644
index 0000000..c857145
--- /dev/null
+++ b/vendor/masterminds/html5/src/HTML5.php
@@ -0,0 +1,246 @@
+<?php
+
+namespace Masterminds;
+
+use Masterminds\HTML5\Parser\DOMTreeBuilder;
+use Masterminds\HTML5\Parser\Scanner;
+use Masterminds\HTML5\Parser\Tokenizer;
+use Masterminds\HTML5\Serializer\OutputRules;
+use Masterminds\HTML5\Serializer\Traverser;
+
+/**
+ * This class offers convenience methods for parsing and serializing HTML5.
+ * It is roughly designed to mirror the \DOMDocument native class.
+ */
+class HTML5
+{
+ /**
+ * Global options for the parser and serializer.
+ *
+ * @var array
+ */
+ private $defaultOptions = array(
+ // Whether the serializer should aggressively encode all characters as entities.
+ 'encode_entities' => false,
+
+ // Prevents the parser from automatically assigning the HTML5 namespace to the DOM document.
+ 'disable_html_ns' => false,
+ );
+
+ protected $errors = array();
+
+ public function __construct(array $defaultOptions = array())
+ {
+ $this->defaultOptions = array_merge($this->defaultOptions, $defaultOptions);
+ }
+
+ /**
+ * Get the current default options.
+ *
+ * @return array
+ */
+ public function getOptions()
+ {
+ return $this->defaultOptions;
+ }
+
+ /**
+ * Load and parse an HTML file.
+ *
+ * This will apply the HTML5 parser, which is tolerant of many
+ * varieties of HTML, including XHTML 1, HTML 4, and well-formed HTML
+ * 3. Note that in these cases, not all of the old data will be
+ * preserved. For example, XHTML's XML declaration will be removed.
+ *
+ * The rules governing parsing are set out in the HTML 5 spec.
+ *
+ * @param string|resource $file The path to the file to parse. If this is a resource, it is
+ * assumed to be an open stream whose pointer is set to the first
+ * byte of input.
+ * @param array $options Configuration options when parsing the HTML.
+ *
+ * @return \DOMDocument A DOM document. These object type is defined by the libxml
+ * library, and should have been included with your version of PHP.
+ */
+ public function load($file, array $options = array())
+ {
+ // Handle the case where file is a resource.
+ if (is_resource($file)) {
+ return $this->parse(stream_get_contents($file), $options);
+ }
+
+ return $this->parse(file_get_contents($file), $options);
+ }
+
+ /**
+ * Parse a HTML Document from a string.
+ *
+ * Take a string of HTML 5 (or earlier) and parse it into a
+ * DOMDocument.
+ *
+ * @param string $string A html5 document as a string.
+ * @param array $options Configuration options when parsing the HTML.
+ *
+ * @return \DOMDocument A DOM document. DOM is part of libxml, which is included with
+ * almost all distribtions of PHP.
+ */
+ public function loadHTML($string, array $options = array())
+ {
+ return $this->parse($string, $options);
+ }
+
+ /**
+ * Convenience function to load an HTML file.
+ *
+ * This is here to provide backwards compatibility with the
+ * PHP DOM implementation. It simply calls load().
+ *
+ * @param string $file The path to the file to parse. If this is a resource, it is
+ * assumed to be an open stream whose pointer is set to the first
+ * byte of input.
+ * @param array $options Configuration options when parsing the HTML.
+ *
+ * @return \DOMDocument A DOM document. These object type is defined by the libxml
+ * library, and should have been included with your version of PHP.
+ */
+ public function loadHTMLFile($file, array $options = array())
+ {
+ return $this->load($file, $options);
+ }
+
+ /**
+ * Parse a HTML fragment from a string.
+ *
+ * @param string $string the HTML5 fragment as a string
+ * @param array $options Configuration options when parsing the HTML
+ *
+ * @return \DOMDocumentFragment A DOM fragment. The DOM is part of libxml, which is included with
+ * almost all distributions of PHP.
+ */
+ public function loadHTMLFragment($string, array $options = array())
+ {
+ return $this->parseFragment($string, $options);
+ }
+
+ /**
+ * Return all errors encountered into parsing phase.
+ *
+ * @return array
+ */
+ public function getErrors()
+ {
+ return $this->errors;
+ }
+
+ /**
+ * Return true it some errors were encountered into parsing phase.
+ *
+ * @return bool
+ */
+ public function hasErrors()
+ {
+ return count($this->errors) > 0;
+ }
+
+ /**
+ * Parse an input string.
+ *
+ * @param string $input
+ * @param array $options
+ *
+ * @return \DOMDocument
+ */
+ public function parse($input, array $options = array())
+ {
+ $this->errors = array();
+ $options = array_merge($this->defaultOptions, $options);
+ $events = new DOMTreeBuilder(false, $options);
+ $scanner = new Scanner($input, !empty($options['encoding']) ? $options['encoding'] : 'UTF-8');
+ $parser = new Tokenizer($scanner, $events, !empty($options['xmlNamespaces']) ? Tokenizer::CONFORMANT_XML : Tokenizer::CONFORMANT_HTML);
+
+ $parser->parse();
+ $this->errors = $events->getErrors();
+
+ return $events->document();
+ }
+
+ /**
+ * Parse an input stream where the stream is a fragment.
+ *
+ * Lower-level loading function. This requires an input stream instead
+ * of a string, file, or resource.
+ *
+ * @param string $input The input data to parse in the form of a string.
+ * @param array $options An array of options.
+ *
+ * @return \DOMDocumentFragment
+ */
+ public function parseFragment($input, array $options = array())
+ {
+ $options = array_merge($this->defaultOptions, $options);
+ $events = new DOMTreeBuilder(true, $options);
+ $scanner = new Scanner($input, !empty($options['encoding']) ? $options['encoding'] : 'UTF-8');
+ $parser = new Tokenizer($scanner, $events, !empty($options['xmlNamespaces']) ? Tokenizer::CONFORMANT_XML : Tokenizer::CONFORMANT_HTML);
+
+ $parser->parse();
+ $this->errors = $events->getErrors();
+
+ return $events->fragment();
+ }
+
+ /**
+ * Save a DOM into a given file as HTML5.
+ *
+ * @param mixed $dom The DOM to be serialized.
+ * @param string|resource $file The filename to be written or resource to write to.
+ * @param array $options Configuration options when serializing the DOM. These include:
+ * - encode_entities: Text written to the output is escaped by default and not all
+ * entities are encoded. If this is set to true all entities will be encoded.
+ * Defaults to false.
+ */
+ public function save($dom, $file, $options = array())
+ {
+ $close = true;
+ if (is_resource($file)) {
+ $stream = $file;
+ $close = false;
+ } else {
+ $stream = fopen($file, 'wb');
+ }
+ $options = array_merge($this->defaultOptions, $options);
+ $rules = new OutputRules($stream, $options);
+ $trav = new Traverser($dom, $stream, $rules, $options);
+
+ $trav->walk();
+ /*
+ * release the traverser to avoid cyclic references and allow PHP to free memory without waiting for gc_collect_cycles
+ */
+ $rules->unsetTraverser();
+ if ($close) {
+ fclose($stream);
+ }
+ }
+
+ /**
+ * Convert a DOM into an HTML5 string.
+ *
+ * @param mixed $dom The DOM to be serialized.
+ * @param array $options Configuration options when serializing the DOM. These include:
+ * - encode_entities: Text written to the output is escaped by default and not all
+ * entities are encoded. If this is set to true all entities will be encoded.
+ * Defaults to false.
+ *
+ * @return string A HTML5 documented generated from the DOM.
+ */
+ public function saveHTML($dom, $options = array())
+ {
+ $stream = fopen('php://temp', 'wb');
+ $this->save($dom, $stream, array_merge($this->defaultOptions, $options));
+
+ $html = stream_get_contents($stream, -1, 0);
+
+ fclose($stream);
+
+ return $html;
+ }
+}