summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/HTML5.php39
-rw-r--r--src/HTML5/Serializer/README.md4
-rw-r--r--src/HTML5/Serializer/Serializer.php82
-rw-r--r--test/HTML5/Html5Test.php129
-rw-r--r--test/HTML5/Serializer/SerializerTest.php190
5 files changed, 152 insertions, 292 deletions
diff --git a/src/HTML5.php b/src/HTML5.php
index b31d89c..48b6148 100644
--- a/src/HTML5.php
+++ b/src/HTML5.php
@@ -5,7 +5,8 @@ use HTML5\Parser\FileInputStream;
use HTML5\Parser\Scanner;
use HTML5\Parser\Tokenizer;
use HTML5\Parser\DOMTreeBuilder;
-use HTML5\Serializer\Serializer;
+use HTML5\Serializer\OutputRules;
+use HTML5\Serializer\Traverser;
/**
* The main HTML5 front end.
@@ -105,20 +106,28 @@ class HTML5 {
* The filename to be written.
* @param array $options
* Configuration options when serializing the DOM. These include:
- * - output_rules: The class with the serializer writing rules. Defaults to
- * \HTML5\Serializer\OutputRules. The standard rules are representative of the
- * original document. This can be replaced by alternatives that can
- * minify or make other alterations.
* - encode_entities: Text written to the output is escaped by default and not all
* entities are encoded. If this is set to TRUE all entities will be encoded.
* Defaults to FALSE.
*/
public static function save($dom, $file, $options = array()) {
- // Passing all the default options is intentional. This way a custom
- // rule set can have default options passed in if needed.
$options = $options + self::options();
- $serializer = new \HTML5\Serializer\Serializer($dom, $options);
- return $serializer->save($file);
+ $close = TRUE;
+ if (is_resource($file)) {
+ $stream = $file;
+ $close = FALSE;
+ }
+ else {
+ $stream = fopen($file, 'w');
+ }
+ $rules = new OutputRules($stream, $options);
+ $trav = new Traverser($dom, $stream, $rules, $options);
+
+ $trav->walk();
+
+ if ($close) {
+ fclose($stream);
+ }
}
/**
@@ -128,10 +137,6 @@ class HTML5 {
* The DOM to be serialized.
* @param array $options
* Configuration options when serializing the DOM. These include:
- * - output_rules: The class with the serializer writing rules. Defaults to
- * \HTML5\Serializer\OutputRules. The standard rules are representative of the
- * original document. This can be replaced by alternatives that can
- * minify or make other alterations.
* - encode_entities: Text written to the output is escaped by default and not all
* entities are encoded. If this is set to TRUE all entities will be encoded.
* Defaults to FALSE.
@@ -140,11 +145,9 @@ class HTML5 {
* A HTML5 documented generated from the DOM.
*/
public static function saveHTML($dom, $options = array()) {
- // Passing all the default options is intentional. This way a custom
- // rule set can have default options passed in if needed.
- $options = $options + self::options();
- $serializer = new \HTML5\Serializer\Serializer($dom, $options);
- return $serializer->saveHTML();
+ $stream = fopen('php://temp', 'w');
+ static::save($dom, $stream, $options);
+ return stream_get_contents($stream, -1, 0);
}
/**
diff --git a/src/HTML5/Serializer/README.md b/src/HTML5/Serializer/README.md
index 17e3f75..849a47f 100644
--- a/src/HTML5/Serializer/README.md
+++ b/src/HTML5/Serializer/README.md
@@ -4,7 +4,7 @@ The serializer roughly follows sections _8.1 Writing HTML documents_ and section
_8.3 Serializing HTML fragments_ by converting DOMDocument, DOMDocumentFragment,
and DOMNodeList into HTML5.
- [ Serializer ] // Interface for saving.
+ [ HTML5 ] // Interface for saving.
||
[ Traverser ] // Walk the DOM
||
@@ -13,7 +13,7 @@ and DOMNodeList into HTML5.
[ HTML5 ] // HTML5 document or fragment in text.
-## The Serializer
+## HTML5 Class
Provides the top level interface for saving.
diff --git a/src/HTML5/Serializer/Serializer.php b/src/HTML5/Serializer/Serializer.php
deleted file mode 100644
index 7d1e7b2..0000000
--- a/src/HTML5/Serializer/Serializer.php
+++ /dev/null
@@ -1,82 +0,0 @@
-<?php
-/**
- * A simple serializer that walks the DOM tree and outputs HTML5.
- */
-namespace HTML5\Serializer;
-
-use \HTML5\Serializer\OutputRules;
-
-/**
- * Transform a DOM into an HTML5 document.
- *
- * This provides a serializer that roughly follows the save and load API
- * in the native PHP DOM implementation.
- *
- * For reference, see DOMDocument::save, DOMDocument::saveXML,
- * DOMDocument::saveHTML and DOMDocument::saveHTMLFile.
- */
-class Serializer {
- protected $dom;
- protected $options = array();
-
- /**
- * Create a serializer.
- *
- * This takes a DOM-like data structure. It SHOULD treat the
- * DOMNode as an interface, but this does not do type checking.
- *
- * @param DOMNode $dom
- * A DOMNode-like object. Typically, a DOMDocument should be passed.
- * @param array $options
- * Options that can be passed into the serializer. These include:
- * - format: a bool value to specify if formatting (e.g. add indentation)
- * should be used on the output. Defaults to TRUE.
- * - encode: Text written to the output is escaped by default and not all
- * entities are encoded. If this is set to TRUE all entities will be encoded.
- * Defaults to FALSE.
- */
- public function __construct($dom, $options = array()) {
- $this->dom = $dom;
- $this->options = $options;
- }
-
- /**
- * Save to a file.
- *
- * @param mixed $filename
- * A file handle resource or the
- * full name to the file. This will overwrite the contents of
- * any file that it finds.
- */
- public function save($filename) {
- $close = TRUE;
- if (is_resource($filename)) {
- $file = $filename;
- $close = FALSE;
- }
- else {
- $file = fopen($filename, 'w');
- }
- $rules = new OutputRules($file, $this->options);
- $trav = new Traverser($this->dom, $file, $rules, $this->options);
-
- $trav->walk();
-
- if ($close) {
- fclose($file);
- }
- }
-
- /**
- * Return the DOM as an HTML5 string.
- */
- public function saveHTML() {
- // We buffer into a temp-file backed memory map. This may or may not be
- // faster than writing directly to a string, but it makes the interface
- // consistant and will keep memory consumption lower (2MB max for the file
- // buffer).
- $stream = fopen('php://temp', 'w');
- $this->save($stream);
- return stream_get_contents($stream, -1, 0);
- }
-}
diff --git a/test/HTML5/Html5Test.php b/test/HTML5/Html5Test.php
index 6511ae5..13f5b19 100644
--- a/test/HTML5/Html5Test.php
+++ b/test/HTML5/Html5Test.php
@@ -5,6 +5,23 @@ require_once 'TestCase.php';
class Html5Test extends TestCase {
+ /**
+ * Parse and serialize a string.
+ */
+ protected function cycle($html) {
+ $dom = \HTML5::loadHTML('<!DOCTYPE html><html><body>' . $html . '</body></html>');
+ $out = \HTML5::saveHTML($dom);
+
+ return $out;
+ }
+
+ protected function cycleFragment($fragment) {
+ $dom = \HTML5::loadHTMLFragment($fragment);
+ $out = \HTML5::saveHTML($dom);
+
+ return $out;
+ }
+
public function testLoad() {
$dom = \HTML5::load(__DIR__ . '/Html5Test.html');
$this->assertInstanceOf('\DOMDocument', $dom);
@@ -182,4 +199,116 @@ class Html5Test extends TestCase {
$this->assertRegExp('|<f:name>Big rectangle thing</f:name>|',$markup);
}
+ public function testElements() {
+ // Should have content.
+ $res = $this->cycle('<div>FOO</div>');
+ $this->assertRegExp('|<div>FOO</div>|', $res);
+
+ // Should be empty
+ $res = $this->cycle('<span></span>');
+ $this->assertRegExp('|<span></span>|', $res);
+
+ // Should have content.
+ $res = $this->cycleFragment('<div>FOO</div>');
+ $this->assertRegExp('|<div>FOO</div>|', $res);
+
+ // Should be empty
+ $res = $this->cycleFragment('<span></span>');
+ $this->assertRegExp('|<span></span>|', $res);
+
+ // Should have no closing tag.
+ $res = $this->cycle('<hr>');
+ $this->assertRegExp('|<hr></body>|', $res);
+
+ }
+
+ public function testAttributes() {
+ $res = $this->cycle('<div attr="val">FOO</div>');
+ $this->assertRegExp('|<div attr="val">FOO</div>|', $res);
+
+ // XXX: Note that spec does NOT require attrs in the same order.
+ $res = $this->cycle('<div attr="val" class="even">FOO</div>');
+ $this->assertRegExp('|<div attr="val" class="even">FOO</div>|', $res);
+
+ $res = $this->cycle('<div xmlns:foo="http://example.com">FOO</div>');
+ $this->assertRegExp('|<div xmlns:foo="http://example.com">FOO</div>|', $res);
+
+ $res = $this->cycleFragment('<div attr="val">FOO</div>');
+ $this->assertRegExp('|<div attr="val">FOO</div>|', $res);
+
+ // XXX: Note that spec does NOT require attrs in the same order.
+ $res = $this->cycleFragment('<div attr="val" class="even">FOO</div>');
+ $this->assertRegExp('|<div attr="val" class="even">FOO</div>|', $res);
+
+ $res = $this->cycleFragment('<div xmlns:foo="http://example.com">FOO</div>');
+ $this->assertRegExp('|<div xmlns:foo="http://example.com">FOO</div>|', $res);
+ }
+
+ public function testPCData() {
+ $res = $this->cycle('<a>This is a test.</a>');
+ $this->assertRegExp('|This is a test.|', $res);
+
+ $res = $this->cycleFragment('<a>This is a test.</a>');
+ $this->assertRegExp('|This is a test.|', $res);
+
+ $res = $this->cycle('This
+ is
+ a
+ test.');
+
+ // Check that newlines are there, but don't count spaces.
+ $this->assertRegExp('|This\n\s*is\n\s*a\n\s*test.|', $res);
+
+ $res = $this->cycleFragment('This
+ is
+ a
+ test.');
+
+ // Check that newlines are there, but don't count spaces.
+ $this->assertRegExp('|This\n\s*is\n\s*a\n\s*test.|', $res);
+
+ $res = $this->cycle('<a>This <em>is</em> a test.</a>');
+ $this->assertRegExp('|This <em>is</em> a test.|', $res);
+
+ $res = $this->cycleFragment('<a>This <em>is</em> a test.</a>');
+ $this->assertRegExp('|This <em>is</em> a test.|', $res);
+ }
+
+ public function testUnescaped() {
+ $res = $this->cycle('<script>2 < 1</script>');
+ $this->assertRegExp('|2 < 1|', $res);
+
+ $res = $this->cycle('<style>div>div>div</style>');
+ $this->assertRegExp('|div&gt;div&gt;div|', $res);
+
+ $res = $this->cycleFragment('<script>2 < 1</script>');
+ $this->assertRegExp('|2 < 1|', $res);
+
+ $res = $this->cycleFragment('<style>div>div>div</style>');
+ $this->assertRegExp('|div&gt;div&gt;div|', $res);
+ }
+
+ public function testEntities() {
+ $res = $this->cycle('<a>Apples &amp; bananas.</a>');
+ $this->assertRegExp('|Apples &amp; bananas.|', $res);
+
+ $res = $this->cycleFragment('<a>Apples &amp; bananas.</a>');
+ $this->assertRegExp('|Apples &amp; bananas.|', $res);
+ }
+
+ public function testComment() {
+ $res = $this->cycle('a<!-- This is a test. -->b');
+ $this->assertRegExp('|<!-- This is a test. -->|', $res);
+
+ $res = $this->cycleFragment('a<!-- This is a test. -->b');
+ $this->assertRegExp('|<!-- This is a test. -->|', $res);
+ }
+
+ public function testCDATA() {
+ $res = $this->cycle('a<![CDATA[ This <is> a test. ]]>b');
+ $this->assertRegExp('|<!\[CDATA\[ This <is> a test\. \]\]>|', $res);
+
+ $res = $this->cycleFragment('a<![CDATA[ This <is> a test. ]]>b');
+ $this->assertRegExp('|<!\[CDATA\[ This <is> a test\. \]\]>|', $res);
+ }
}
diff --git a/test/HTML5/Serializer/SerializerTest.php b/test/HTML5/Serializer/SerializerTest.php
deleted file mode 100644
index 7cfe44f..0000000
--- a/test/HTML5/Serializer/SerializerTest.php
+++ /dev/null
@@ -1,190 +0,0 @@
-<?php
-// TODO: Add XML namespace examples.
-
-namespace HTML5\Tests;
-
-use \HTML5\Serializer\Serializer;
-
-require_once __DIR__ . '/../TestCase.php';
-
-/**
- * Test the Serializer.
- *
- * These tests are all dependent upon the parser. So if the parser
- * fails, the results of the serializer tests may not be conclusive.
- */
-class SerializerTest extends \HTML5\Tests\TestCase {
-
- /**
- * Parse and serialize a string.
- */
- protected function cycle($html) {
- $dom = \HTML5::loadHTML('<!DOCTYPE html><html><body>' . $html . '</body></html>');
- $options = \HTML5::options();
- $ser = new Serializer($dom, $options);
- $out = $ser->saveHTML();
-
- return $out;
- }
-
- protected function cycleFragment($fragment) {
- $dom = \HTML5::loadHTMLFragment($fragment);
- $options = \HTML5::options();
- $ser = new Serializer($dom, $options);
- $out = $ser->saveHTML();
-
- return $out;
- }
-
- public function testSaveHTML() {
- $html = '<!DOCTYPE html><html><body>test</body></html>';
-
- $dom = \HTML5::loadHTML($html);
- $this->assertTrue($dom instanceof \DOMDocument, "Canary");
-
- $ser = new Serializer($dom, \HTML5::options());
- $out = $ser->saveHTML();
-
- $this->assertTrue(count($out) >= count($html), 'Byte counts');
- $this->assertRegExp('/<!DOCTYPE html>/', $out, 'Has DOCTYPE.');
- $this->assertRegExp('/<body>test<\/body>/', $out, 'Has body text.');
-
- }
-
- public function testSave() {
- $html = '<!DOCTYPE html><html><body>test</body></html>';
-
- $dom = \HTML5::loadHTML($html);
- $this->assertTrue($dom instanceof \DOMDocument, "Canary");
-
- // Test saving to a stream.
- $ser = new Serializer($dom, \HTML5::options());
- $out = fopen("php://temp", "w");
- $ser->save($out);
-
- rewind($out);
- $res = stream_get_contents($out);
- $this->assertTrue(count($res) >= count($html));
-
- // Test saving to a file on the file system.
- $tmpfname = tempnam(sys_get_temp_dir(), "html5-php");
- $ser = new Serializer($dom, \HTML5::options());
- $ser->save($tmpfname);
- $content = file_get_contents($tmpfname);
- $this->assertRegExp('|<body>test</body>|', $content);
- unlink($tmpfname);
- }
-
- public function testElements() {
- // Should have content.
- $res = $this->cycle('<div>FOO</div>');
- $this->assertRegExp('|<div>FOO</div>|', $res);
-
- // Should be empty
- $res = $this->cycle('<span></span>');
- $this->assertRegExp('|<span></span>|', $res);
-
- // Should have content.
- $res = $this->cycleFragment('<div>FOO</div>');
- $this->assertRegExp('|<div>FOO</div>|', $res);
-
- // Should be empty
- $res = $this->cycleFragment('<span></span>');
- $this->assertRegExp('|<span></span>|', $res);
-
- // Should have no closing tag.
- $res = $this->cycle('<hr>');
- $this->assertRegExp('|<hr></body>|', $res);
-
- }
-
- public function testAttributes() {
- $res = $this->cycle('<div attr="val">FOO</div>');
- $this->assertRegExp('|<div attr="val">FOO</div>|', $res);
-
- // XXX: Note that spec does NOT require attrs in the same order.
- $res = $this->cycle('<div attr="val" class="even">FOO</div>');
- $this->assertRegExp('|<div attr="val" class="even">FOO</div>|', $res);
-
- $res = $this->cycle('<div xmlns:foo="http://example.com">FOO</div>');
- $this->assertRegExp('|<div xmlns:foo="http://example.com">FOO</div>|', $res);
-
- $res = $this->cycleFragment('<div attr="val">FOO</div>');
- $this->assertRegExp('|<div attr="val">FOO</div>|', $res);
-
- // XXX: Note that spec does NOT require attrs in the same order.
- $res = $this->cycleFragment('<div attr="val" class="even">FOO</div>');
- $this->assertRegExp('|<div attr="val" class="even">FOO</div>|', $res);
-
- $res = $this->cycleFragment('<div xmlns:foo="http://example.com">FOO</div>');
- $this->assertRegExp('|<div xmlns:foo="http://example.com">FOO</div>|', $res);
- }
-
- public function testPCData() {
- $res = $this->cycle('<a>This is a test.</a>');
- $this->assertRegExp('|This is a test.|', $res);
-
- $res = $this->cycleFragment('<a>This is a test.</a>');
- $this->assertRegExp('|This is a test.|', $res);
-
- $res = $this->cycle('This
- is
- a
- test.');
-
- // Check that newlines are there, but don't count spaces.
- $this->assertRegExp('|This\n\s*is\n\s*a\n\s*test.|', $res);
-
- $res = $this->cycleFragment('This
- is
- a
- test.');
-
- // Check that newlines are there, but don't count spaces.
- $this->assertRegExp('|This\n\s*is\n\s*a\n\s*test.|', $res);
-
- $res = $this->cycle('<a>This <em>is</em> a test.</a>');
- $this->assertRegExp('|This <em>is</em> a test.|', $res);
-
- $res = $this->cycleFragment('<a>This <em>is</em> a test.</a>');
- $this->assertRegExp('|This <em>is</em> a test.|', $res);
- }
-
- public function testUnescaped() {
- $res = $this->cycle('<script>2 < 1</script>');
- $this->assertRegExp('|2 < 1|', $res);
-
- $res = $this->cycle('<style>div>div>div</style>');
- $this->assertRegExp('|div&gt;div&gt;div|', $res);
-
- $res = $this->cycleFragment('<script>2 < 1</script>');
- $this->assertRegExp('|2 < 1|', $res);
-
- $res = $this->cycleFragment('<style>div>div>div</style>');
- $this->assertRegExp('|div&gt;div&gt;div|', $res);
- }
-
- public function testEntities() {
- $res = $this->cycle('<a>Apples &amp; bananas.</a>');
- $this->assertRegExp('|Apples &amp; bananas.|', $res);
-
- $res = $this->cycleFragment('<a>Apples &amp; bananas.</a>');
- $this->assertRegExp('|Apples &amp; bananas.|', $res);
- }
-
- public function testComment() {
- $res = $this->cycle('a<!-- This is a test. -->b');
- $this->assertRegExp('|<!-- This is a test. -->|', $res);
-
- $res = $this->cycleFragment('a<!-- This is a test. -->b');
- $this->assertRegExp('|<!-- This is a test. -->|', $res);
- }
-
- public function testCDATA() {
- $res = $this->cycle('a<![CDATA[ This <is> a test. ]]>b');
- $this->assertRegExp('|<!\[CDATA\[ This <is> a test\. \]\]>|', $res);
-
- $res = $this->cycleFragment('a<![CDATA[ This <is> a test. ]]>b');
- $this->assertRegExp('|<!\[CDATA\[ This <is> a test\. \]\]>|', $res);
- }
-}