From ac286005c0eef03c85a4d75a6b36cc30d2c5990e Mon Sep 17 00:00:00 2001 From: Matt Butcher Date: Wed, 15 May 2013 09:33:25 -0500 Subject: Updated main HTML5 class. It now uses the real library, and matches the DOMDocument interface. --- src/HTML5.php | 86 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 84 insertions(+), 2 deletions(-) (limited to 'src/HTML5.php') diff --git a/src/HTML5.php b/src/HTML5.php index 31db307..23948b3 100644 --- a/src/HTML5.php +++ b/src/HTML5.php @@ -1,14 +1,82 @@ loadHTML(stream_get_contents($file)); + } + + $input = new FileInputStream($file); + return $this->parse($input); + } + + /** + * Parse an HTML string. + * + * Take a string of HTML 5 (or earlier) and parse it into a + * DOMDocument. + * + * + * @param array $options + * An array of options. + * @return \DOMDocument + * A DOM document. DOM is part of libxml, which is included with + * almost all distribtions of PHP. + */ + public function loadHTML($string, $options = NULL) { + $input = new StringInputStream($string); + return $this->parse($input); + } + + /** + * Convenience function to load an HTML file. + * + * This is here to provide backwards compatibility with the + * PHP DOM implementation. It simply calls load(). + */ + public function loadHTMLFile($file, $options = NULL) { + return $this->load($file, $options); + } /** * Save a DOM into a given file as HTML5. @@ -25,4 +93,18 @@ class HTML5 extends \HTML5\Parser { $serializer = new \HTML5\Serializer\Serializer($dom); return $serializer->saveHTML(); } + + /** + * Parse an input stream. + */ + protected function parse($input) { + $events = new DOMTreeBuilder(); + $scanner = new Scanner($input); + $parser = new Tokenizer($scanner, $events); + + $parser->parse(); + + return $events->document(); + } + } -- cgit v1.2.3