summaryrefslogtreecommitdiff
path: root/src/HTML5
diff options
context:
space:
mode:
authorMatt Butcher <[email protected]>2013-06-28 11:40:28 -0500
committerMatt Butcher <[email protected]>2013-06-28 11:40:28 -0500
commit3e1d8b8d5512082a6dca175de15574f627722c3c (patch)
tree4388413fcd377906440b2df769c81ab8e8e29949 /src/HTML5
parent5921b94e0a6b5c1abb3ecf6a25e2292bca68a58b (diff)
Documentation updates for Beta 1!
Diffstat (limited to 'src/HTML5')
-rw-r--r--src/HTML5/Elements.php13
-rw-r--r--src/HTML5/Parser/CharacterReference.php3
-rw-r--r--src/HTML5/Parser/DOMTreeBuilder.php5
-rw-r--r--src/HTML5/Parser/EventHandler.php12
-rw-r--r--src/HTML5/Parser/FileInputStream.php11
-rw-r--r--src/HTML5/Parser/InputStream.php6
-rw-r--r--src/HTML5/Parser/ParseError.php2
-rw-r--r--src/HTML5/Parser/README.md2
-rw-r--r--src/HTML5/Parser/Scanner.php6
-rw-r--r--src/HTML5/Parser/StringInputStream.php3
10 files changed, 59 insertions, 4 deletions
diff --git a/src/HTML5/Elements.php b/src/HTML5/Elements.php
index c5ba50b..3c1e57f 100644
--- a/src/HTML5/Elements.php
+++ b/src/HTML5/Elements.php
@@ -4,22 +4,31 @@ namespace HTML5;
/**
* Provide general element functions.
*
- * @todo consider using a bitmask table lookup. There is enought overlap in
+ * This class provides general information about HTML5 elements,
+ * including syntactic and semantic issues. Parsers and serializers can
+ * use this class as a reference point for information about the rules
+ * of various HTML5 elements.
+ *
+ * @todo consider using a bitmask table lookup. There is enough overlap in
* naming that this could significantly shrink the size and maybe make it
* faster. See the Go teams implementation at https://code.google.com/p/go/source/browse/html/atom.
*/
class Elements {
+ /** Indicates an element is described in the specification. */
const KNOWN_ELEMENT = 1;
// From section 8.1.2: "script", "style"
// From 8.2.5.4.7 ("in body" insertion mode): "noembed", "noscript"
// From 8.4 "style", "xmp", "iframe", "noembed", "noframes"
+ /** Indicates the contained text should be processed as raw text. */
const TEXT_RAW = 2;
// From section 8.1.2: "textarea", "title"
+ /** Indicates the contained text should be processed as RCDATA. */
const TEXT_RCDATA = 4;
+ /** Indicates the tag cannot have content. */
const VOID_TAG = 8;
// "address", "article", "aside", "blockquote", "center", "details", "dialog", "dir", "div", "dl",
@@ -29,6 +38,8 @@ class Elements {
// "pre", "listing"
// "form"
// "plaintext"
+ /** Indicates that if a previous event is for a P tag, that element
+ * should be considered closed. */
const AUTOCLOSE_P = 16;
const TEXT_PLAINTEXT = 32;
diff --git a/src/HTML5/Parser/CharacterReference.php b/src/HTML5/Parser/CharacterReference.php
index ea6a527..17a9285 100644
--- a/src/HTML5/Parser/CharacterReference.php
+++ b/src/HTML5/Parser/CharacterReference.php
@@ -5,6 +5,9 @@ use \HTML5\Entities;
/**
* Manage entity references.
+ *
+ * This is a simple resolver for HTML5 character reference entitites.
+ * See \HTML5\Entities for the list of supported entities.
*/
class CharacterReference {
diff --git a/src/HTML5/Parser/DOMTreeBuilder.php b/src/HTML5/Parser/DOMTreeBuilder.php
index f9d1e7a..f0caeb4 100644
--- a/src/HTML5/Parser/DOMTreeBuilder.php
+++ b/src/HTML5/Parser/DOMTreeBuilder.php
@@ -9,6 +9,11 @@ use HTML5\Elements;
* attempts (but does not guarantee) to up-convert older HTML documents
* to HTML5. It does this by applying HTML5's rules, but it will not
* change the architecture of the document itself.
+ *
+ * Many of the error correction and quirks features suggested in the specification
+ * are implemented herein; however, not all of them are. Since we do not
+ * assume a graphical user agent, no presentation-specific logic is conducted
+ * during tree building.
*/
class DOMTreeBuilder implements EventHandler {
diff --git a/src/HTML5/Parser/EventHandler.php b/src/HTML5/Parser/EventHandler.php
index ebb30b2..4034938 100644
--- a/src/HTML5/Parser/EventHandler.php
+++ b/src/HTML5/Parser/EventHandler.php
@@ -4,6 +4,18 @@ namespace HTML5\Parser;
/**
* Standard events for HTML5.
*
+ * This is roughly analogous to a SAX2 or expat-style interface.
+ * However, it is tuned specifically for HTML5, according to section 8
+ * of the HTML5 specification.
+ *
+ * An event handler receives parser events. For a concrete
+ * implementation, see DOMTreeBuilder.
+ *
+ * Quirks support in the parser is limited to close-in syntax (malformed
+ * tags or attributes). Higher order syntax and semantic issues with a
+ * document (e.g. mismatched tags, illegal nesting, etc.) are the
+ * responsibility of the event handler implementation.
+ *
* See HTML5 spec section 8.2.4
*/
interface EventHandler {
diff --git a/src/HTML5/Parser/FileInputStream.php b/src/HTML5/Parser/FileInputStream.php
index ae3b4ef..c1bb128 100644
--- a/src/HTML5/Parser/FileInputStream.php
+++ b/src/HTML5/Parser/FileInputStream.php
@@ -8,6 +8,15 @@ namespace HTML5\Parser;
*/
class FileInputStream extends StringInputStream implements InputStream {
+ /*
+ * So right now we read files into strings and then process the
+ * string. We chose to do this largely for the sake of expediency of
+ * development, and also because we could optimize toward processing
+ * arbitrarily large chunks of the input. But in the future, we'd
+ * really like to rewrite this class to efficiently handle lower level
+ * stream reads (and thus efficiently handle large documents).
+ */
+
/**
* Load a file input stream.
*
@@ -23,4 +32,4 @@ class FileInputStream extends StringInputStream implements InputStream {
}
-} \ No newline at end of file
+}
diff --git a/src/HTML5/Parser/InputStream.php b/src/HTML5/Parser/InputStream.php
index bb2ca1c..713031d 100644
--- a/src/HTML5/Parser/InputStream.php
+++ b/src/HTML5/Parser/InputStream.php
@@ -3,6 +3,12 @@ namespace HTML5\Parser;
/**
* Interface for stream readers.
+ *
+ * The parser only reads from streams. Various input sources can write
+ * an adapater to this InputStream.
+ *
+ * Currently provided InputStream implementations include
+ * FileInputStream and StringInputStream.
*/
interface InputStream extends \Iterator {
diff --git a/src/HTML5/Parser/ParseError.php b/src/HTML5/Parser/ParseError.php
index 8fc646e..4d5f117 100644
--- a/src/HTML5/Parser/ParseError.php
+++ b/src/HTML5/Parser/ParseError.php
@@ -2,7 +2,7 @@
namespace HTML5\Parser;
/**
- * When the parser has an error.
+ * Emit when the parser has an error.
*/
class ParseError extends \Exception {
}
diff --git a/src/HTML5/Parser/README.md b/src/HTML5/Parser/README.md
index 2f5a84a..9f92957 100644
--- a/src/HTML5/Parser/README.md
+++ b/src/HTML5/Parser/README.md
@@ -12,7 +12,7 @@ of the HTML5 specification, though we do not assume a networking layer.
||
[ Tree Builder ] // Organizes units into a tree of objects
||
- [DOM Document] // The final state of the parsed document.
+ [ DOM Document ] // The final state of the parsed document.
## InputStream
diff --git a/src/HTML5/Parser/Scanner.php b/src/HTML5/Parser/Scanner.php
index 1ab9b8b..b359f16 100644
--- a/src/HTML5/Parser/Scanner.php
+++ b/src/HTML5/Parser/Scanner.php
@@ -168,9 +168,15 @@ class Scanner {
return $this->is->currentLine();
}
+ /**
+ * Read chars until something in the mask is encountered.
+ */
public function charsUntil($mask) {
return $this->is->charsUntil($mask);
}
+ /**
+ * Read chars as long as the mask matches.
+ */
public function charsWhile($mask) {
return $this->is->charsWhile($mask);
}
diff --git a/src/HTML5/Parser/StringInputStream.php b/src/HTML5/Parser/StringInputStream.php
index 9aa0b73..0d2a7f3 100644
--- a/src/HTML5/Parser/StringInputStream.php
+++ b/src/HTML5/Parser/StringInputStream.php
@@ -295,6 +295,9 @@ class StringInputStream implements InputStream {
}
}
+ /**
+ * Look ahead without moving cursor.
+ */
public function peek() {
if (($this->char + 1) <= $this->EOF) {
return $this->data[$this->char + 1];