From e56291d00e9ccf1d2bc2f0274fcbcdc6b7af0516 Mon Sep 17 00:00:00 2001 From: Asmir Mustafic Date: Wed, 4 Jun 2014 08:59:18 +0200 Subject: Refactored HTML5 class (no more static methods) and explicit error handling --- src/HTML5.php | 206 +++++++++++++++++++++++++++++----------------------------- 1 file changed, 104 insertions(+), 102 deletions(-) (limited to 'src/HTML5.php') diff --git a/src/HTML5.php b/src/HTML5.php index 7295fb4..2ec6fc9 100644 --- a/src/HTML5.php +++ b/src/HTML5.php @@ -1,81 +1,89 @@ FALSE, + 'encode_entities' => FALSE ); + private $errors = array(); + + public function __construct(array $options = array()) { + $this->options = array_merge($this->options, $options); + } + /** + * Get the default options. + * + * @return array + * The default options. + */ + public function getOptions() { + return $this->options; + } /** * Load and parse an HTML file. * - * This will apply the HTML5 parser, which is tolerant of many - * varieties of HTML, including XHTML 1, HTML 4, and well-formed HTML - * 3. Note that in these cases, not all of the old data will be + * This will apply the HTML5 parser, which is tolerant of many + * varieties of HTML, including XHTML 1, HTML 4, and well-formed HTML + * 3. Note that in these cases, not all of the old data will be * preserved. For example, XHTML's XML declaration will be removed. * * The rules governing parsing are set out in the HTML 5 spec. * * @param string $file - * The path to the file to parse. If this is a resource, it is - * assumed to be an open stream whose pointer is set to the first + * The path to the file to parse. If this is a resource, it is + * assumed to be an open stream whose pointer is set to the first * byte of input. * @return \DOMDocument - * A DOM document. These object type is defined by the libxml + * A DOM document. These object type is defined by the libxml * library, and should have been included with your version of PHP. */ - public static function load($file) { - + public function load($file) { // Handle the case where file is a resource. if (is_resource($file)) { // FIXME: We need a StreamInputStream class. - return static::loadHTML(stream_get_contents($file)); + return $this->loadHTML(stream_get_contents($file)); } $input = new FileInputStream($file); - return static::parse($input); + return $this->parse($input); } - /** * Parse a HTML Document from a string. - * - * Take a string of HTML 5 (or earlier) and parse it into a + * + * Take a string of HTML 5 (or earlier) and parse it into a * DOMDocument. * * @param string $string * A html5 document as a string. * @return \DOMDocument - * A DOM document. DOM is part of libxml, which is included with + * A DOM document. DOM is part of libxml, which is included with * almost all distribtions of PHP. */ - public static function loadHTML($string) { + public function loadHTML($string) { $input = new StringInputStream($string); - return static::parse($input); + return $this->parse($input); } - /** * Convenience function to load an HTML file. * @@ -83,18 +91,17 @@ class HTML5 { * PHP DOM implementation. It simply calls load(). * * @param string $file - * The path to the file to parse. If this is a resource, it is - * assumed to be an open stream whose pointer is set to the first + * The path to the file to parse. If this is a resource, it is + * assumed to be an open stream whose pointer is set to the first * byte of input. * * @return \DOMDocument - * A DOM document. These object type is defined by the libxml + * A DOM document. These object type is defined by the libxml * library, and should have been included with your version of PHP. */ - public static function loadHTMLFile($file, $options = NULL) { - return static::load($file, $options); + public function loadHTMLFile($string) { + return $this->load($string); } - /** * Parse a HTML fragment from a string. * @@ -105,11 +112,62 @@ class HTML5 { * A DOM fragment. The DOM is part of libxml, which is included with * almost all distributions of PHP. */ - public static function loadHTMLFragment($string) { + public function loadHTMLFragment($string) { $input = new StringInputStream($string); - return static::parseFragment($input); + return $this->parseFragment($input); + } + /** + * Return all errors encountered into parsing phase + * @return array + */ + public function getErrors() { + return $this->errors; + } + /** + * Return true it some errors were encountered into parsing phase + * @return bool + */ + public function hasErrors() { + return count($this->errors)>0; } + /** + * Parse an input stream. + * + * Lower-level loading function. This requires an input stream instead + * of a string, file, or resource. + */ + public function parse(\HTML5\Parser\InputStream $input) { + $this->errors = array(); + $events = new DOMTreeBuilder(); + $scanner = new Scanner($input); + $parser = new Tokenizer($scanner, $events); + + $parser->parse(); + + $document = $events->document(); + + if($document){ + $this->errors = $document->errors; + } + + return $document; + } + /** + * Parse an input stream where the stream is a fragment. + * + * Lower-level loading function. This requires an input stream instead + * of a string, file, or resource. + */ + public function parseFragment(\HTML5\Parser\InputStream $input) { + $events = new DOMTreeBuilder(TRUE); + $scanner = new Scanner($input); + $parser = new Tokenizer($scanner, $events); + + $parser->parse(); + + return $events->fragment(); + } /** * Save a DOM into a given file as HTML5. * @@ -120,19 +178,19 @@ class HTML5 { * @param array $options * Configuration options when serializing the DOM. These include: * - encode_entities: Text written to the output is escaped by default and not all - * entities are encoded. If this is set to TRUE all entities will be encoded. - * Defaults to FALSE. + * entities are encoded. If this is set to TRUE all entities will be encoded. + * Defaults to FALSE. */ - public static function save($dom, $file, $options = array()) { - $options = $options + static::options(); + public function save($dom, $file, $options = array()) { $close = TRUE; if (is_resource($file)) { $stream = $file; $close = FALSE; - } + } else { $stream = fopen($file, 'w'); } + $options = array_merge($this->getOptions(), $options); $rules = new OutputRules($stream, $options); $trav = new Traverser($dom, $stream, $rules, $options); @@ -142,7 +200,6 @@ class HTML5 { fclose($stream); } } - /** * Convert a DOM into an HTML5 string. * @@ -151,70 +208,15 @@ class HTML5 { * @param array $options * Configuration options when serializing the DOM. These include: * - encode_entities: Text written to the output is escaped by default and not all - * entities are encoded. If this is set to TRUE all entities will be encoded. - * Defaults to FALSE. + * entities are encoded. If this is set to TRUE all entities will be encoded. + * Defaults to FALSE. * * @return string * A HTML5 documented generated from the DOM. */ - public static function saveHTML($dom, $options = array()) { + public function saveHTML($dom, $options = array()) { $stream = fopen('php://temp', 'w'); - static::save($dom, $stream, $options); - return stream_get_contents($stream, -1, 0); - } - - /** - * Parse an input stream. - * - * Lower-level loading function. This requires an input stream instead - * of a string, file, or resource. - */ - public static function parse(\HTML5\Parser\InputStream $input) { - $events = new DOMTreeBuilder(); - $scanner = new Scanner($input); - $parser = new Tokenizer($scanner, $events); - - $parser->parse(); - - return $events->document(); - } - - /** - * Parse an input stream where the stream is a fragment. - * - * Lower-level loading function. This requires an input stream instead - * of a string, file, or resource. - */ - public static function parseFragment(\HTML5\Parser\InputStream $input) { - $events = new DOMTreeBuilder(TRUE); - $scanner = new Scanner($input); - $parser = new Tokenizer($scanner, $events); - - $parser->parse(); - - return $events->fragment(); + $this->save($dom, $stream, array_merge($this->getOptions(), $options)); + return stream_get_contents($stream, - 1, 0); } - - /** - * Get the default options. - * - * @return array - * The default options. - */ - public static function options() { - return static::$options; - } - - /** - * Set a default option. - * - * @param string $name - * The option name. - * @param mixed $value - * The option value. - */ - public static function setOption($name, $value) { - static::$options[$name] = $value; - } - } -- cgit v1.2.3