From 3e1d8b8d5512082a6dca175de15574f627722c3c Mon Sep 17 00:00:00 2001 From: Matt Butcher Date: Fri, 28 Jun 2013 11:40:28 -0500 Subject: Documentation updates for Beta 1! --- src/HTML5/Parser/CharacterReference.php | 3 +++ src/HTML5/Parser/DOMTreeBuilder.php | 5 +++++ src/HTML5/Parser/EventHandler.php | 12 ++++++++++++ src/HTML5/Parser/FileInputStream.php | 11 ++++++++++- src/HTML5/Parser/InputStream.php | 6 ++++++ src/HTML5/Parser/ParseError.php | 2 +- src/HTML5/Parser/README.md | 2 +- src/HTML5/Parser/Scanner.php | 6 ++++++ src/HTML5/Parser/StringInputStream.php | 3 +++ 9 files changed, 47 insertions(+), 3 deletions(-) (limited to 'src/HTML5/Parser') diff --git a/src/HTML5/Parser/CharacterReference.php b/src/HTML5/Parser/CharacterReference.php index ea6a527..17a9285 100644 --- a/src/HTML5/Parser/CharacterReference.php +++ b/src/HTML5/Parser/CharacterReference.php @@ -5,6 +5,9 @@ use \HTML5\Entities; /** * Manage entity references. + * + * This is a simple resolver for HTML5 character reference entitites. + * See \HTML5\Entities for the list of supported entities. */ class CharacterReference { diff --git a/src/HTML5/Parser/DOMTreeBuilder.php b/src/HTML5/Parser/DOMTreeBuilder.php index f9d1e7a..f0caeb4 100644 --- a/src/HTML5/Parser/DOMTreeBuilder.php +++ b/src/HTML5/Parser/DOMTreeBuilder.php @@ -9,6 +9,11 @@ use HTML5\Elements; * attempts (but does not guarantee) to up-convert older HTML documents * to HTML5. It does this by applying HTML5's rules, but it will not * change the architecture of the document itself. + * + * Many of the error correction and quirks features suggested in the specification + * are implemented herein; however, not all of them are. Since we do not + * assume a graphical user agent, no presentation-specific logic is conducted + * during tree building. */ class DOMTreeBuilder implements EventHandler { diff --git a/src/HTML5/Parser/EventHandler.php b/src/HTML5/Parser/EventHandler.php index ebb30b2..4034938 100644 --- a/src/HTML5/Parser/EventHandler.php +++ b/src/HTML5/Parser/EventHandler.php @@ -4,6 +4,18 @@ namespace HTML5\Parser; /** * Standard events for HTML5. * + * This is roughly analogous to a SAX2 or expat-style interface. + * However, it is tuned specifically for HTML5, according to section 8 + * of the HTML5 specification. + * + * An event handler receives parser events. For a concrete + * implementation, see DOMTreeBuilder. + * + * Quirks support in the parser is limited to close-in syntax (malformed + * tags or attributes). Higher order syntax and semantic issues with a + * document (e.g. mismatched tags, illegal nesting, etc.) are the + * responsibility of the event handler implementation. + * * See HTML5 spec section 8.2.4 */ interface EventHandler { diff --git a/src/HTML5/Parser/FileInputStream.php b/src/HTML5/Parser/FileInputStream.php index ae3b4ef..c1bb128 100644 --- a/src/HTML5/Parser/FileInputStream.php +++ b/src/HTML5/Parser/FileInputStream.php @@ -8,6 +8,15 @@ namespace HTML5\Parser; */ class FileInputStream extends StringInputStream implements InputStream { + /* + * So right now we read files into strings and then process the + * string. We chose to do this largely for the sake of expediency of + * development, and also because we could optimize toward processing + * arbitrarily large chunks of the input. But in the future, we'd + * really like to rewrite this class to efficiently handle lower level + * stream reads (and thus efficiently handle large documents). + */ + /** * Load a file input stream. * @@ -23,4 +32,4 @@ class FileInputStream extends StringInputStream implements InputStream { } -} \ No newline at end of file +} diff --git a/src/HTML5/Parser/InputStream.php b/src/HTML5/Parser/InputStream.php index bb2ca1c..713031d 100644 --- a/src/HTML5/Parser/InputStream.php +++ b/src/HTML5/Parser/InputStream.php @@ -3,6 +3,12 @@ namespace HTML5\Parser; /** * Interface for stream readers. + * + * The parser only reads from streams. Various input sources can write + * an adapater to this InputStream. + * + * Currently provided InputStream implementations include + * FileInputStream and StringInputStream. */ interface InputStream extends \Iterator { diff --git a/src/HTML5/Parser/ParseError.php b/src/HTML5/Parser/ParseError.php index 8fc646e..4d5f117 100644 --- a/src/HTML5/Parser/ParseError.php +++ b/src/HTML5/Parser/ParseError.php @@ -2,7 +2,7 @@ namespace HTML5\Parser; /** - * When the parser has an error. + * Emit when the parser has an error. */ class ParseError extends \Exception { } diff --git a/src/HTML5/Parser/README.md b/src/HTML5/Parser/README.md index 2f5a84a..9f92957 100644 --- a/src/HTML5/Parser/README.md +++ b/src/HTML5/Parser/README.md @@ -12,7 +12,7 @@ of the HTML5 specification, though we do not assume a networking layer. || [ Tree Builder ] // Organizes units into a tree of objects || - [DOM Document] // The final state of the parsed document. + [ DOM Document ] // The final state of the parsed document. ## InputStream diff --git a/src/HTML5/Parser/Scanner.php b/src/HTML5/Parser/Scanner.php index 1ab9b8b..b359f16 100644 --- a/src/HTML5/Parser/Scanner.php +++ b/src/HTML5/Parser/Scanner.php @@ -168,9 +168,15 @@ class Scanner { return $this->is->currentLine(); } + /** + * Read chars until something in the mask is encountered. + */ public function charsUntil($mask) { return $this->is->charsUntil($mask); } + /** + * Read chars as long as the mask matches. + */ public function charsWhile($mask) { return $this->is->charsWhile($mask); } diff --git a/src/HTML5/Parser/StringInputStream.php b/src/HTML5/Parser/StringInputStream.php index 9aa0b73..0d2a7f3 100644 --- a/src/HTML5/Parser/StringInputStream.php +++ b/src/HTML5/Parser/StringInputStream.php @@ -295,6 +295,9 @@ class StringInputStream implements InputStream { } } + /** + * Look ahead without moving cursor. + */ public function peek() { if (($this->char + 1) <= $this->EOF) { return $this->data[$this->char + 1]; -- cgit v1.2.3