summaryrefslogtreecommitdiff
path: root/src/HTML5.php
diff options
context:
space:
mode:
authorAsmir Mustafic <[email protected]>2014-06-04 08:59:18 +0200
committerAsmir Mustafic <[email protected]>2014-06-04 08:59:18 +0200
commite56291d00e9ccf1d2bc2f0274fcbcdc6b7af0516 (patch)
tree79cb27b772240284175fc433af67fc8bdaf62e68 /src/HTML5.php
parent06027f5e4929b6e0c200bd8337ab73baa915beda (diff)
Refactored HTML5 class (no more static methods) and explicit error
handling
Diffstat (limited to 'src/HTML5.php')
-rw-r--r--src/HTML5.php206
1 files changed, 104 insertions, 102 deletions
diff --git a/src/HTML5.php b/src/HTML5.php
index 7295fb4..2ec6fc9 100644
--- a/src/HTML5.php
+++ b/src/HTML5.php
@@ -1,81 +1,89 @@
<?php
-/**
- * The main HTML5 front end.
- */
-use HTML5\Parser\StringInputStream;
+
use HTML5\Parser\FileInputStream;
+use HTML5\Parser\StringInputStream;
+use HTML5\Parser\DOMTreeBuilder;
use HTML5\Parser\Scanner;
use HTML5\Parser\Tokenizer;
-use HTML5\Parser\DOMTreeBuilder;
use HTML5\Serializer\OutputRules;
use HTML5\Serializer\Traverser;
/**
* This class offers convenience methods for parsing and serializing HTML5.
- * It is roughly designed to mirror the \DOMDocument class that is
+ * It is roughly designed to mirror the \DOMDocument class that is
* provided with most versions of PHP.
*
* EXPERIMENTAL. This may change or be completely replaced.
*/
-class HTML5 {
-
+class HTML5
+{
/**
* Global options for the parser and serializer.
* @var array
*/
- public static $options = array(
-
+ private $options = array(
// If the serializer should encode all entities.
- 'encode_entities' => FALSE,
+ 'encode_entities' => FALSE
);
+ private $errors = array();
+
+ public function __construct(array $options = array()) {
+ $this->options = array_merge($this->options, $options);
+ }
+ /**
+ * Get the default options.
+ *
+ * @return array
+ * The default options.
+ */
+ public function getOptions() {
+ return $this->options;
+ }
/**
* Load and parse an HTML file.
*
- * This will apply the HTML5 parser, which is tolerant of many
- * varieties of HTML, including XHTML 1, HTML 4, and well-formed HTML
- * 3. Note that in these cases, not all of the old data will be
+ * This will apply the HTML5 parser, which is tolerant of many
+ * varieties of HTML, including XHTML 1, HTML 4, and well-formed HTML
+ * 3. Note that in these cases, not all of the old data will be
* preserved. For example, XHTML's XML declaration will be removed.
*
* The rules governing parsing are set out in the HTML 5 spec.
*
* @param string $file
- * The path to the file to parse. If this is a resource, it is
- * assumed to be an open stream whose pointer is set to the first
+ * The path to the file to parse. If this is a resource, it is
+ * assumed to be an open stream whose pointer is set to the first
* byte of input.
* @return \DOMDocument
- * A DOM document. These object type is defined by the libxml
+ * A DOM document. These object type is defined by the libxml
* library, and should have been included with your version of PHP.
*/
- public static function load($file) {
-
+ public function load($file) {
// Handle the case where file is a resource.
if (is_resource($file)) {
// FIXME: We need a StreamInputStream class.
- return static::loadHTML(stream_get_contents($file));
+ return $this->loadHTML(stream_get_contents($file));
}
$input = new FileInputStream($file);
- return static::parse($input);
+ return $this->parse($input);
}
-
/**
* Parse a HTML Document from a string.
- *
- * Take a string of HTML 5 (or earlier) and parse it into a
+ *
+ * Take a string of HTML 5 (or earlier) and parse it into a
* DOMDocument.
*
* @param string $string
* A html5 document as a string.
* @return \DOMDocument
- * A DOM document. DOM is part of libxml, which is included with
+ * A DOM document. DOM is part of libxml, which is included with
* almost all distribtions of PHP.
*/
- public static function loadHTML($string) {
+ public function loadHTML($string) {
$input = new StringInputStream($string);
- return static::parse($input);
+ return $this->parse($input);
}
-
/**
* Convenience function to load an HTML file.
*
@@ -83,18 +91,17 @@ class HTML5 {
* PHP DOM implementation. It simply calls load().
*
* @param string $file
- * The path to the file to parse. If this is a resource, it is
- * assumed to be an open stream whose pointer is set to the first
+ * The path to the file to parse. If this is a resource, it is
+ * assumed to be an open stream whose pointer is set to the first
* byte of input.
*
* @return \DOMDocument
- * A DOM document. These object type is defined by the libxml
+ * A DOM document. These object type is defined by the libxml
* library, and should have been included with your version of PHP.
*/
- public static function loadHTMLFile($file, $options = NULL) {
- return static::load($file, $options);
+ public function loadHTMLFile($string) {
+ return $this->load($string);
}
-
/**
* Parse a HTML fragment from a string.
*
@@ -105,12 +112,63 @@ class HTML5 {
* A DOM fragment. The DOM is part of libxml, which is included with
* almost all distributions of PHP.
*/
- public static function loadHTMLFragment($string) {
+ public function loadHTMLFragment($string) {
$input = new StringInputStream($string);
- return static::parseFragment($input);
+ return $this->parseFragment($input);
+ }
+ /**
+ * Return all errors encountered into parsing phase
+ * @return array
+ */
+ public function getErrors() {
+ return $this->errors;
+ }
+ /**
+ * Return true it some errors were encountered into parsing phase
+ * @return bool
+ */
+ public function hasErrors() {
+ return count($this->errors)>0;
}
/**
+ * Parse an input stream.
+ *
+ * Lower-level loading function. This requires an input stream instead
+ * of a string, file, or resource.
+ */
+ public function parse(\HTML5\Parser\InputStream $input) {
+ $this->errors = array();
+ $events = new DOMTreeBuilder();
+ $scanner = new Scanner($input);
+ $parser = new Tokenizer($scanner, $events);
+
+ $parser->parse();
+
+ $document = $events->document();
+
+ if($document){
+ $this->errors = $document->errors;
+ }
+
+ return $document;
+ }
+ /**
+ * Parse an input stream where the stream is a fragment.
+ *
+ * Lower-level loading function. This requires an input stream instead
+ * of a string, file, or resource.
+ */
+ public function parseFragment(\HTML5\Parser\InputStream $input) {
+ $events = new DOMTreeBuilder(TRUE);
+ $scanner = new Scanner($input);
+ $parser = new Tokenizer($scanner, $events);
+
+ $parser->parse();
+
+ return $events->fragment();
+ }
+ /**
* Save a DOM into a given file as HTML5.
*
* @param mixed $dom
@@ -120,19 +178,19 @@ class HTML5 {
* @param array $options
* Configuration options when serializing the DOM. These include:
* - encode_entities: Text written to the output is escaped by default and not all
- * entities are encoded. If this is set to TRUE all entities will be encoded.
- * Defaults to FALSE.
+ * entities are encoded. If this is set to TRUE all entities will be encoded.
+ * Defaults to FALSE.
*/
- public static function save($dom, $file, $options = array()) {
- $options = $options + static::options();
+ public function save($dom, $file, $options = array()) {
$close = TRUE;
if (is_resource($file)) {
$stream = $file;
$close = FALSE;
- }
+ }
else {
$stream = fopen($file, 'w');
}
+ $options = array_merge($this->getOptions(), $options);
$rules = new OutputRules($stream, $options);
$trav = new Traverser($dom, $stream, $rules, $options);
@@ -142,7 +200,6 @@ class HTML5 {
fclose($stream);
}
}
-
/**
* Convert a DOM into an HTML5 string.
*
@@ -151,70 +208,15 @@ class HTML5 {
* @param array $options
* Configuration options when serializing the DOM. These include:
* - encode_entities: Text written to the output is escaped by default and not all
- * entities are encoded. If this is set to TRUE all entities will be encoded.
- * Defaults to FALSE.
+ * entities are encoded. If this is set to TRUE all entities will be encoded.
+ * Defaults to FALSE.
*
* @return string
* A HTML5 documented generated from the DOM.
*/
- public static function saveHTML($dom, $options = array()) {
+ public function saveHTML($dom, $options = array()) {
$stream = fopen('php://temp', 'w');
- static::save($dom, $stream, $options);
- return stream_get_contents($stream, -1, 0);
- }
-
- /**
- * Parse an input stream.
- *
- * Lower-level loading function. This requires an input stream instead
- * of a string, file, or resource.
- */
- public static function parse(\HTML5\Parser\InputStream $input) {
- $events = new DOMTreeBuilder();
- $scanner = new Scanner($input);
- $parser = new Tokenizer($scanner, $events);
-
- $parser->parse();
-
- return $events->document();
- }
-
- /**
- * Parse an input stream where the stream is a fragment.
- *
- * Lower-level loading function. This requires an input stream instead
- * of a string, file, or resource.
- */
- public static function parseFragment(\HTML5\Parser\InputStream $input) {
- $events = new DOMTreeBuilder(TRUE);
- $scanner = new Scanner($input);
- $parser = new Tokenizer($scanner, $events);
-
- $parser->parse();
-
- return $events->fragment();
+ $this->save($dom, $stream, array_merge($this->getOptions(), $options));
+ return stream_get_contents($stream, - 1, 0);
}
-
- /**
- * Get the default options.
- *
- * @return array
- * The default options.
- */
- public static function options() {
- return static::$options;
- }
-
- /**
- * Set a default option.
- *
- * @param string $name
- * The option name.
- * @param mixed $value
- * The option value.
- */
- public static function setOption($name, $value) {
- static::$options[$name] = $value;
- }
-
}