diff options
author | Matt Farina <[email protected]> | 2013-05-15 11:24:22 -0400 |
---|---|---|
committer | Matt Farina <[email protected]> | 2013-05-15 11:24:22 -0400 |
commit | 0cdf4ae1aaf6aefdc7218dbb49e543e8603035a8 (patch) | |
tree | c10b651f07469b06242ac5ac8863250a957e9fd9 /src/HTML5.php | |
parent | 5be96c81985e8a40cfcaeff9c5d7ee055b46ac08 (diff) | |
parent | 69d1932bac21ded3d10f5df1a94d892efa2c1b89 (diff) |
Merge branch 'master' of github.com:technosophos/HTML5-PHP
Diffstat (limited to 'src/HTML5.php')
-rw-r--r-- | src/HTML5.php | 86 |
1 files changed, 84 insertions, 2 deletions
diff --git a/src/HTML5.php b/src/HTML5.php index 31db307..23948b3 100644 --- a/src/HTML5.php +++ b/src/HTML5.php @@ -1,14 +1,82 @@ <?php +use HTML5\Parser\StringInputStream; +use HTML5\Parser\FileInputStream; +use HTML5\Parser\Scanner; +use HTML5\Parser\Tokenizer; +use HTML5\Parser\DOMTreeBuilder; +use HTML5\Serializer\Serializer; + /** * The main HTML5 front end. * * This class offers convenience methods for parsing and serializing HTML5. + * It is roughly designed to mirror the \DOMDocument class that is + * provided with most versions of PHP. * * EXPERIMENTAL. This may change or be completely replaced. */ -class HTML5 extends \HTML5\Parser { - // Inherit parse() and parseFragment(). +class HTML5 { + + /** + * Load and parse an HTML file. + * + * This will apply the HTML5 parser, which is tolerant of many + * varieties of HTML, including XHTML 1, HTML 4, and well-formed HTML + * 3. Note that in these cases, not all of the old data will be + * preserved. For example, XHTML's XML declaration will be removed. + * + * The rules governing parsing are set out in the HTML 5 spec. + * + * @param string $file + * The path to the file to parse. If this is a resource, it is + * assumed to be an open stream whose pointer is set to the first + * byte of input. + * @param array $options + * An array of options. + * @return \DOMDocument + * A DOM document. These object type is defined by the libxml + * library, and should have been included with your version of PHP. + */ + public function load($file, $options = NULL) { + + // Handle the case where file is a resource. + if (is_resource($file)) { + // FIXME: We need a StreamInputStream class. + return $this->loadHTML(stream_get_contents($file)); + } + + $input = new FileInputStream($file); + return $this->parse($input); + } + + /** + * Parse an HTML string. + * + * Take a string of HTML 5 (or earlier) and parse it into a + * DOMDocument. + * + * + * @param array $options + * An array of options. + * @return \DOMDocument + * A DOM document. DOM is part of libxml, which is included with + * almost all distribtions of PHP. + */ + public function loadHTML($string, $options = NULL) { + $input = new StringInputStream($string); + return $this->parse($input); + } + + /** + * Convenience function to load an HTML file. + * + * This is here to provide backwards compatibility with the + * PHP DOM implementation. It simply calls load(). + */ + public function loadHTMLFile($file, $options = NULL) { + return $this->load($file, $options); + } /** * Save a DOM into a given file as HTML5. @@ -25,4 +93,18 @@ class HTML5 extends \HTML5\Parser { $serializer = new \HTML5\Serializer\Serializer($dom); return $serializer->saveHTML(); } + + /** + * Parse an input stream. + */ + protected function parse($input) { + $events = new DOMTreeBuilder(); + $scanner = new Scanner($input); + $parser = new Tokenizer($scanner, $events); + + $parser->parse(); + + return $events->document(); + } + } |