diff options
-rw-r--r-- | src/HTML5.php | 39 | ||||
-rw-r--r-- | src/HTML5/Serializer/README.md | 4 | ||||
-rw-r--r-- | src/HTML5/Serializer/Serializer.php | 82 | ||||
-rw-r--r-- | test/HTML5/Html5Test.php | 129 | ||||
-rw-r--r-- | test/HTML5/Serializer/SerializerTest.php | 190 |
5 files changed, 152 insertions, 292 deletions
diff --git a/src/HTML5.php b/src/HTML5.php index b31d89c..48b6148 100644 --- a/src/HTML5.php +++ b/src/HTML5.php @@ -5,7 +5,8 @@ use HTML5\Parser\FileInputStream; use HTML5\Parser\Scanner; use HTML5\Parser\Tokenizer; use HTML5\Parser\DOMTreeBuilder; -use HTML5\Serializer\Serializer; +use HTML5\Serializer\OutputRules; +use HTML5\Serializer\Traverser; /** * The main HTML5 front end. @@ -105,20 +106,28 @@ class HTML5 { * The filename to be written. * @param array $options * Configuration options when serializing the DOM. These include: - * - output_rules: The class with the serializer writing rules. Defaults to - * \HTML5\Serializer\OutputRules. The standard rules are representative of the - * original document. This can be replaced by alternatives that can - * minify or make other alterations. * - encode_entities: Text written to the output is escaped by default and not all * entities are encoded. If this is set to TRUE all entities will be encoded. * Defaults to FALSE. */ public static function save($dom, $file, $options = array()) { - // Passing all the default options is intentional. This way a custom - // rule set can have default options passed in if needed. $options = $options + self::options(); - $serializer = new \HTML5\Serializer\Serializer($dom, $options); - return $serializer->save($file); + $close = TRUE; + if (is_resource($file)) { + $stream = $file; + $close = FALSE; + } + else { + $stream = fopen($file, 'w'); + } + $rules = new OutputRules($stream, $options); + $trav = new Traverser($dom, $stream, $rules, $options); + + $trav->walk(); + + if ($close) { + fclose($stream); + } } /** @@ -128,10 +137,6 @@ class HTML5 { * The DOM to be serialized. * @param array $options * Configuration options when serializing the DOM. These include: - * - output_rules: The class with the serializer writing rules. Defaults to - * \HTML5\Serializer\OutputRules. The standard rules are representative of the - * original document. This can be replaced by alternatives that can - * minify or make other alterations. * - encode_entities: Text written to the output is escaped by default and not all * entities are encoded. If this is set to TRUE all entities will be encoded. * Defaults to FALSE. @@ -140,11 +145,9 @@ class HTML5 { * A HTML5 documented generated from the DOM. */ public static function saveHTML($dom, $options = array()) { - // Passing all the default options is intentional. This way a custom - // rule set can have default options passed in if needed. - $options = $options + self::options(); - $serializer = new \HTML5\Serializer\Serializer($dom, $options); - return $serializer->saveHTML(); + $stream = fopen('php://temp', 'w'); + static::save($dom, $stream, $options); + return stream_get_contents($stream, -1, 0); } /** diff --git a/src/HTML5/Serializer/README.md b/src/HTML5/Serializer/README.md index 17e3f75..849a47f 100644 --- a/src/HTML5/Serializer/README.md +++ b/src/HTML5/Serializer/README.md @@ -4,7 +4,7 @@ The serializer roughly follows sections _8.1 Writing HTML documents_ and section _8.3 Serializing HTML fragments_ by converting DOMDocument, DOMDocumentFragment, and DOMNodeList into HTML5. - [ Serializer ] // Interface for saving. + [ HTML5 ] // Interface for saving. || [ Traverser ] // Walk the DOM || @@ -13,7 +13,7 @@ and DOMNodeList into HTML5. [ HTML5 ] // HTML5 document or fragment in text. -## The Serializer +## HTML5 Class Provides the top level interface for saving. diff --git a/src/HTML5/Serializer/Serializer.php b/src/HTML5/Serializer/Serializer.php deleted file mode 100644 index 7d1e7b2..0000000 --- a/src/HTML5/Serializer/Serializer.php +++ /dev/null @@ -1,82 +0,0 @@ -<?php -/** - * A simple serializer that walks the DOM tree and outputs HTML5. - */ -namespace HTML5\Serializer; - -use \HTML5\Serializer\OutputRules; - -/** - * Transform a DOM into an HTML5 document. - * - * This provides a serializer that roughly follows the save and load API - * in the native PHP DOM implementation. - * - * For reference, see DOMDocument::save, DOMDocument::saveXML, - * DOMDocument::saveHTML and DOMDocument::saveHTMLFile. - */ -class Serializer { - protected $dom; - protected $options = array(); - - /** - * Create a serializer. - * - * This takes a DOM-like data structure. It SHOULD treat the - * DOMNode as an interface, but this does not do type checking. - * - * @param DOMNode $dom - * A DOMNode-like object. Typically, a DOMDocument should be passed. - * @param array $options - * Options that can be passed into the serializer. These include: - * - format: a bool value to specify if formatting (e.g. add indentation) - * should be used on the output. Defaults to TRUE. - * - encode: Text written to the output is escaped by default and not all - * entities are encoded. If this is set to TRUE all entities will be encoded. - * Defaults to FALSE. - */ - public function __construct($dom, $options = array()) { - $this->dom = $dom; - $this->options = $options; - } - - /** - * Save to a file. - * - * @param mixed $filename - * A file handle resource or the - * full name to the file. This will overwrite the contents of - * any file that it finds. - */ - public function save($filename) { - $close = TRUE; - if (is_resource($filename)) { - $file = $filename; - $close = FALSE; - } - else { - $file = fopen($filename, 'w'); - } - $rules = new OutputRules($file, $this->options); - $trav = new Traverser($this->dom, $file, $rules, $this->options); - - $trav->walk(); - - if ($close) { - fclose($file); - } - } - - /** - * Return the DOM as an HTML5 string. - */ - public function saveHTML() { - // We buffer into a temp-file backed memory map. This may or may not be - // faster than writing directly to a string, but it makes the interface - // consistant and will keep memory consumption lower (2MB max for the file - // buffer). - $stream = fopen('php://temp', 'w'); - $this->save($stream); - return stream_get_contents($stream, -1, 0); - } -} diff --git a/test/HTML5/Html5Test.php b/test/HTML5/Html5Test.php index 6511ae5..13f5b19 100644 --- a/test/HTML5/Html5Test.php +++ b/test/HTML5/Html5Test.php @@ -5,6 +5,23 @@ require_once 'TestCase.php'; class Html5Test extends TestCase { + /** + * Parse and serialize a string. + */ + protected function cycle($html) { + $dom = \HTML5::loadHTML('<!DOCTYPE html><html><body>' . $html . '</body></html>'); + $out = \HTML5::saveHTML($dom); + + return $out; + } + + protected function cycleFragment($fragment) { + $dom = \HTML5::loadHTMLFragment($fragment); + $out = \HTML5::saveHTML($dom); + + return $out; + } + public function testLoad() { $dom = \HTML5::load(__DIR__ . '/Html5Test.html'); $this->assertInstanceOf('\DOMDocument', $dom); @@ -182,4 +199,116 @@ class Html5Test extends TestCase { $this->assertRegExp('|<f:name>Big rectangle thing</f:name>|',$markup); } + public function testElements() { + // Should have content. + $res = $this->cycle('<div>FOO</div>'); + $this->assertRegExp('|<div>FOO</div>|', $res); + + // Should be empty + $res = $this->cycle('<span></span>'); + $this->assertRegExp('|<span></span>|', $res); + + // Should have content. + $res = $this->cycleFragment('<div>FOO</div>'); + $this->assertRegExp('|<div>FOO</div>|', $res); + + // Should be empty + $res = $this->cycleFragment('<span></span>'); + $this->assertRegExp('|<span></span>|', $res); + + // Should have no closing tag. + $res = $this->cycle('<hr>'); + $this->assertRegExp('|<hr></body>|', $res); + + } + + public function testAttributes() { + $res = $this->cycle('<div attr="val">FOO</div>'); + $this->assertRegExp('|<div attr="val">FOO</div>|', $res); + + // XXX: Note that spec does NOT require attrs in the same order. + $res = $this->cycle('<div attr="val" class="even">FOO</div>'); + $this->assertRegExp('|<div attr="val" class="even">FOO</div>|', $res); + + $res = $this->cycle('<div xmlns:foo="http://example.com">FOO</div>'); + $this->assertRegExp('|<div xmlns:foo="http://example.com">FOO</div>|', $res); + + $res = $this->cycleFragment('<div attr="val">FOO</div>'); + $this->assertRegExp('|<div attr="val">FOO</div>|', $res); + + // XXX: Note that spec does NOT require attrs in the same order. + $res = $this->cycleFragment('<div attr="val" class="even">FOO</div>'); + $this->assertRegExp('|<div attr="val" class="even">FOO</div>|', $res); + + $res = $this->cycleFragment('<div xmlns:foo="http://example.com">FOO</div>'); + $this->assertRegExp('|<div xmlns:foo="http://example.com">FOO</div>|', $res); + } + + public function testPCData() { + $res = $this->cycle('<a>This is a test.</a>'); + $this->assertRegExp('|This is a test.|', $res); + + $res = $this->cycleFragment('<a>This is a test.</a>'); + $this->assertRegExp('|This is a test.|', $res); + + $res = $this->cycle('This + is + a + test.'); + + // Check that newlines are there, but don't count spaces. + $this->assertRegExp('|This\n\s*is\n\s*a\n\s*test.|', $res); + + $res = $this->cycleFragment('This + is + a + test.'); + + // Check that newlines are there, but don't count spaces. + $this->assertRegExp('|This\n\s*is\n\s*a\n\s*test.|', $res); + + $res = $this->cycle('<a>This <em>is</em> a test.</a>'); + $this->assertRegExp('|This <em>is</em> a test.|', $res); + + $res = $this->cycleFragment('<a>This <em>is</em> a test.</a>'); + $this->assertRegExp('|This <em>is</em> a test.|', $res); + } + + public function testUnescaped() { + $res = $this->cycle('<script>2 < 1</script>'); + $this->assertRegExp('|2 < 1|', $res); + + $res = $this->cycle('<style>div>div>div</style>'); + $this->assertRegExp('|div>div>div|', $res); + + $res = $this->cycleFragment('<script>2 < 1</script>'); + $this->assertRegExp('|2 < 1|', $res); + + $res = $this->cycleFragment('<style>div>div>div</style>'); + $this->assertRegExp('|div>div>div|', $res); + } + + public function testEntities() { + $res = $this->cycle('<a>Apples & bananas.</a>'); + $this->assertRegExp('|Apples & bananas.|', $res); + + $res = $this->cycleFragment('<a>Apples & bananas.</a>'); + $this->assertRegExp('|Apples & bananas.|', $res); + } + + public function testComment() { + $res = $this->cycle('a<!-- This is a test. -->b'); + $this->assertRegExp('|<!-- This is a test. -->|', $res); + + $res = $this->cycleFragment('a<!-- This is a test. -->b'); + $this->assertRegExp('|<!-- This is a test. -->|', $res); + } + + public function testCDATA() { + $res = $this->cycle('a<![CDATA[ This <is> a test. ]]>b'); + $this->assertRegExp('|<!\[CDATA\[ This <is> a test\. \]\]>|', $res); + + $res = $this->cycleFragment('a<![CDATA[ This <is> a test. ]]>b'); + $this->assertRegExp('|<!\[CDATA\[ This <is> a test\. \]\]>|', $res); + } } diff --git a/test/HTML5/Serializer/SerializerTest.php b/test/HTML5/Serializer/SerializerTest.php deleted file mode 100644 index 7cfe44f..0000000 --- a/test/HTML5/Serializer/SerializerTest.php +++ /dev/null @@ -1,190 +0,0 @@ -<?php -// TODO: Add XML namespace examples. - -namespace HTML5\Tests; - -use \HTML5\Serializer\Serializer; - -require_once __DIR__ . '/../TestCase.php'; - -/** - * Test the Serializer. - * - * These tests are all dependent upon the parser. So if the parser - * fails, the results of the serializer tests may not be conclusive. - */ -class SerializerTest extends \HTML5\Tests\TestCase { - - /** - * Parse and serialize a string. - */ - protected function cycle($html) { - $dom = \HTML5::loadHTML('<!DOCTYPE html><html><body>' . $html . '</body></html>'); - $options = \HTML5::options(); - $ser = new Serializer($dom, $options); - $out = $ser->saveHTML(); - - return $out; - } - - protected function cycleFragment($fragment) { - $dom = \HTML5::loadHTMLFragment($fragment); - $options = \HTML5::options(); - $ser = new Serializer($dom, $options); - $out = $ser->saveHTML(); - - return $out; - } - - public function testSaveHTML() { - $html = '<!DOCTYPE html><html><body>test</body></html>'; - - $dom = \HTML5::loadHTML($html); - $this->assertTrue($dom instanceof \DOMDocument, "Canary"); - - $ser = new Serializer($dom, \HTML5::options()); - $out = $ser->saveHTML(); - - $this->assertTrue(count($out) >= count($html), 'Byte counts'); - $this->assertRegExp('/<!DOCTYPE html>/', $out, 'Has DOCTYPE.'); - $this->assertRegExp('/<body>test<\/body>/', $out, 'Has body text.'); - - } - - public function testSave() { - $html = '<!DOCTYPE html><html><body>test</body></html>'; - - $dom = \HTML5::loadHTML($html); - $this->assertTrue($dom instanceof \DOMDocument, "Canary"); - - // Test saving to a stream. - $ser = new Serializer($dom, \HTML5::options()); - $out = fopen("php://temp", "w"); - $ser->save($out); - - rewind($out); - $res = stream_get_contents($out); - $this->assertTrue(count($res) >= count($html)); - - // Test saving to a file on the file system. - $tmpfname = tempnam(sys_get_temp_dir(), "html5-php"); - $ser = new Serializer($dom, \HTML5::options()); - $ser->save($tmpfname); - $content = file_get_contents($tmpfname); - $this->assertRegExp('|<body>test</body>|', $content); - unlink($tmpfname); - } - - public function testElements() { - // Should have content. - $res = $this->cycle('<div>FOO</div>'); - $this->assertRegExp('|<div>FOO</div>|', $res); - - // Should be empty - $res = $this->cycle('<span></span>'); - $this->assertRegExp('|<span></span>|', $res); - - // Should have content. - $res = $this->cycleFragment('<div>FOO</div>'); - $this->assertRegExp('|<div>FOO</div>|', $res); - - // Should be empty - $res = $this->cycleFragment('<span></span>'); - $this->assertRegExp('|<span></span>|', $res); - - // Should have no closing tag. - $res = $this->cycle('<hr>'); - $this->assertRegExp('|<hr></body>|', $res); - - } - - public function testAttributes() { - $res = $this->cycle('<div attr="val">FOO</div>'); - $this->assertRegExp('|<div attr="val">FOO</div>|', $res); - - // XXX: Note that spec does NOT require attrs in the same order. - $res = $this->cycle('<div attr="val" class="even">FOO</div>'); - $this->assertRegExp('|<div attr="val" class="even">FOO</div>|', $res); - - $res = $this->cycle('<div xmlns:foo="http://example.com">FOO</div>'); - $this->assertRegExp('|<div xmlns:foo="http://example.com">FOO</div>|', $res); - - $res = $this->cycleFragment('<div attr="val">FOO</div>'); - $this->assertRegExp('|<div attr="val">FOO</div>|', $res); - - // XXX: Note that spec does NOT require attrs in the same order. - $res = $this->cycleFragment('<div attr="val" class="even">FOO</div>'); - $this->assertRegExp('|<div attr="val" class="even">FOO</div>|', $res); - - $res = $this->cycleFragment('<div xmlns:foo="http://example.com">FOO</div>'); - $this->assertRegExp('|<div xmlns:foo="http://example.com">FOO</div>|', $res); - } - - public function testPCData() { - $res = $this->cycle('<a>This is a test.</a>'); - $this->assertRegExp('|This is a test.|', $res); - - $res = $this->cycleFragment('<a>This is a test.</a>'); - $this->assertRegExp('|This is a test.|', $res); - - $res = $this->cycle('This - is - a - test.'); - - // Check that newlines are there, but don't count spaces. - $this->assertRegExp('|This\n\s*is\n\s*a\n\s*test.|', $res); - - $res = $this->cycleFragment('This - is - a - test.'); - - // Check that newlines are there, but don't count spaces. - $this->assertRegExp('|This\n\s*is\n\s*a\n\s*test.|', $res); - - $res = $this->cycle('<a>This <em>is</em> a test.</a>'); - $this->assertRegExp('|This <em>is</em> a test.|', $res); - - $res = $this->cycleFragment('<a>This <em>is</em> a test.</a>'); - $this->assertRegExp('|This <em>is</em> a test.|', $res); - } - - public function testUnescaped() { - $res = $this->cycle('<script>2 < 1</script>'); - $this->assertRegExp('|2 < 1|', $res); - - $res = $this->cycle('<style>div>div>div</style>'); - $this->assertRegExp('|div>div>div|', $res); - - $res = $this->cycleFragment('<script>2 < 1</script>'); - $this->assertRegExp('|2 < 1|', $res); - - $res = $this->cycleFragment('<style>div>div>div</style>'); - $this->assertRegExp('|div>div>div|', $res); - } - - public function testEntities() { - $res = $this->cycle('<a>Apples & bananas.</a>'); - $this->assertRegExp('|Apples & bananas.|', $res); - - $res = $this->cycleFragment('<a>Apples & bananas.</a>'); - $this->assertRegExp('|Apples & bananas.|', $res); - } - - public function testComment() { - $res = $this->cycle('a<!-- This is a test. -->b'); - $this->assertRegExp('|<!-- This is a test. -->|', $res); - - $res = $this->cycleFragment('a<!-- This is a test. -->b'); - $this->assertRegExp('|<!-- This is a test. -->|', $res); - } - - public function testCDATA() { - $res = $this->cycle('a<![CDATA[ This <is> a test. ]]>b'); - $this->assertRegExp('|<!\[CDATA\[ This <is> a test\. \]\]>|', $res); - - $res = $this->cycleFragment('a<![CDATA[ This <is> a test. ]]>b'); - $this->assertRegExp('|<!\[CDATA\[ This <is> a test\. \]\]>|', $res); - } -} |