diff options
author | Matt Butcher <[email protected]> | 2014-02-11 09:56:01 -0700 |
---|---|---|
committer | Matt Butcher <[email protected]> | 2014-02-11 09:56:01 -0700 |
commit | 44e8e23626bf619844baf9983e931d2f58606377 (patch) | |
tree | 62625f63a954ab5a3f9bb4184039a68496e3896a | |
parent | 77ad931cd824feb33eebae08cf3a5a47bce1e337 (diff) | |
parent | 3b691837c6d7a0969137048fbda274463d6b1d7c (diff) |
Merge branch 'master' of github.com:Masterminds/html5-php
-rw-r--r-- | .travis.yml | 11 | ||||
-rw-r--r-- | CREDITS | 5 | ||||
-rw-r--r-- | README.md | 2 | ||||
-rw-r--r-- | RELEASE.md | 9 | ||||
-rw-r--r-- | composer.json | 5 | ||||
-rw-r--r-- | phpunit.xml.dist | 29 | ||||
-rw-r--r-- | src/HTML5.php | 25 | ||||
-rw-r--r-- | src/HTML5/Elements.php | 11 | ||||
-rw-r--r-- | src/HTML5/Parser/DOMTreeBuilder.php | 16 | ||||
-rw-r--r-- | src/HTML5/Parser/StringInputStream.php | 3 | ||||
-rw-r--r-- | src/HTML5/Parser/Tokenizer.php | 6 | ||||
-rw-r--r-- | src/HTML5/Parser/TreeBuildingRules.php | 11 | ||||
-rw-r--r-- | src/HTML5/Serializer/OutputRules.php | 2 | ||||
-rw-r--r-- | test/HTML5/Html5Test.php | 15 | ||||
-rw-r--r-- | test/HTML5/Parser/DOMTreeBuilderTest.php | 50 | ||||
-rw-r--r-- | test/HTML5/Parser/TokenizerTest.php | 8 | ||||
-rw-r--r-- | test/HTML5/Parser/TreeBuildingRulesTest.php | 11 | ||||
-rw-r--r-- | test/HTML5/Serializer/OutputRulesTest.php | 11 | ||||
-rw-r--r-- | test/HTML5/Serializer/TraverserTest.php | 37 |
19 files changed, 222 insertions, 45 deletions
diff --git a/.travis.yml b/.travis.yml index e01eb12..6b33aae 100644 --- a/.travis.yml +++ b/.travis.yml @@ -9,7 +9,12 @@ notifications: irc: "irc.freenode.net#masterminds" before_script: - - curl -s http://getcomposer.org/installer | php - - php composer.phar install + - composer self-update + - composer install --dev -script: phpunit test/HTML5
\ No newline at end of file +script: + - mkdir -p build/logs + - phpunit -c phpunit.xml.dist + +after_script: + - php vendor/bin/coveralls -v
\ No newline at end of file @@ -1,6 +1,7 @@ Matt Butcher [technosophos] <[email protected]> (lead) Matt Farina [mattfarina] <[email protected]> (lead) -Kukhar Vasily [ngreduce] <[email protected]> (contributor) -Geoffrey Sneddon [gsnedders] <[email protected]> (contributor) +Asmir Mustafic [goetas] <[email protected]> (contributor) Edward Z. Yang [ezyang] <[email protected]> (contributor) +Geoffrey Sneddon [gsnedders] <[email protected]> (contributor) +Kukhar Vasily [ngreduce] <[email protected]> (contributor) Rune Christensen [MrElectronic] <[email protected]> (contributor) @@ -11,7 +11,7 @@ But after some initial refactoring work, we began a new parser. - DOM tree builder - Interoperability with QueryPath [[in progress](https://github.com/technosophos/querypath/issues/114)] -[![Build Status](https://travis-ci.org/Masterminds/html5-php.png?branch=master)](https://travis-ci.org/Masterminds/html5-php) +[![Build Status](https://travis-ci.org/Masterminds/html5-php.png?branch=master)](https://travis-ci.org/Masterminds/html5-php) [![Latest Stable Version](https://poser.pugx.org/masterminds/html5/v/stable.png)](https://packagist.org/packages/masterminds/html5) [![Coverage Status](https://coveralls.io/repos/Masterminds/html5-php/badge.png?branch=master)](https://coveralls.io/r/Masterminds/html5-php?branch=master) ## Installation @@ -1,4 +1,11 @@ # Release Notes -1.0.1 (unreleased) +1.0.1 (2013-11-07) - CDATA encoding is improved. (Non-standard; Issue #19) +- Some parser rules were not returning the new current element. (Issue #20) +- Added, to the README, details on code test coverage and to packagist version. +- Fixed processor instructions. +- Improved test coverage and documentation coverage. + +1.0.0 (2013-10-02) +- Initial release. diff --git a/composer.json b/composer.json index d574580..6b88b8a 100644 --- a/composer.json +++ b/composer.json @@ -19,6 +19,11 @@ "ext-libxml" : "*", "php" : ">=5.3.0" }, + "require-dev": { + "satooshi/php-coveralls": "0.6.*", + "phpunit/phpunit" : "*", + "phpdocumentor/phpdocumentor": "2.1.*" + }, "autoload": { "psr-0": {"HTML5": "src"} } diff --git a/phpunit.xml.dist b/phpunit.xml.dist new file mode 100644 index 0000000..c145f8b --- /dev/null +++ b/phpunit.xml.dist @@ -0,0 +1,29 @@ +<?xml version="1.0" encoding="UTF-8"?> +<phpunit colors="true"> + <testsuites> + <testsuite name="PHPUnit"> + <directory>test/HTML5/</directory> + </testsuite> + </testsuites> + <filter> + <blacklist> + <file>src/HTML5/Parser/InputStream.php</file> + <file>src/HTML5/Serializer/RulesInterface.php</file> + <file>src/HTML5/Entities.php</file> + <file>src/HTML5/Serializer/HTML5Entities.php</file> + </blacklist> + </filter> + <logging> + <log + type="coverage-html" + target="build/coverage" + charset="UTF-8" + yui="true" + highlight="true" + lowUpperBound="35" + highLowerBound="70" + showUncoveredFiles="true" + /> + <log type="coverage-clover" target="build/logs/clover.xml"/> + </logging> +</phpunit> diff --git a/src/HTML5.php b/src/HTML5.php index ce0927f..7295fb4 100644 --- a/src/HTML5.php +++ b/src/HTML5.php @@ -1,5 +1,7 @@ <?php - +/** + * The main HTML5 front end. + */ use HTML5\Parser\StringInputStream; use HTML5\Parser\FileInputStream; use HTML5\Parser\Scanner; @@ -9,8 +11,6 @@ use HTML5\Serializer\OutputRules; use HTML5\Serializer\Traverser; /** - * The main HTML5 front end. - * * This class offers convenience methods for parsing and serializing HTML5. * It is roughly designed to mirror the \DOMDocument class that is * provided with most versions of PHP. @@ -19,6 +19,10 @@ use HTML5\Serializer\Traverser; */ class HTML5 { + /** + * Global options for the parser and serializer. + * @var array + */ public static $options = array( // If the serializer should encode all entities. @@ -77,6 +81,15 @@ class HTML5 { * * This is here to provide backwards compatibility with the * PHP DOM implementation. It simply calls load(). + * + * @param string $file + * The path to the file to parse. If this is a resource, it is + * assumed to be an open stream whose pointer is set to the first + * byte of input. + * + * @return \DOMDocument + * A DOM document. These object type is defined by the libxml + * library, and should have been included with your version of PHP. */ public static function loadHTMLFile($file, $options = NULL) { return static::load($file, $options); @@ -166,6 +179,12 @@ class HTML5 { return $events->document(); } + /** + * Parse an input stream where the stream is a fragment. + * + * Lower-level loading function. This requires an input stream instead + * of a string, file, or resource. + */ public static function parseFragment(\HTML5\Parser\InputStream $input) { $events = new DOMTreeBuilder(TRUE); $scanner = new Scanner($input); diff --git a/src/HTML5/Elements.php b/src/HTML5/Elements.php index 0dabc3a..69d3882 100644 --- a/src/HTML5/Elements.php +++ b/src/HTML5/Elements.php @@ -1,9 +1,10 @@ <?php +/** + * Provide general element functions. + */ namespace HTML5; /** - * Provide general element functions. - * * This class provides general information about HTML5 elements, * including syntactic and semantic issues. Parsers and serializers can * use this class as a reference point for information about the rules @@ -532,6 +533,12 @@ class Elements { /** * Get the element mask for the given element name. + * + * @param string $name + * The name of the element. + * + * @return int + * The element mask. */ public static function element($name) { if (isset(static::$html5[$name])) { diff --git a/src/HTML5/Parser/DOMTreeBuilder.php b/src/HTML5/Parser/DOMTreeBuilder.php index 094104e..13ae3bc 100644 --- a/src/HTML5/Parser/DOMTreeBuilder.php +++ b/src/HTML5/Parser/DOMTreeBuilder.php @@ -289,6 +289,11 @@ class DOMTreeBuilder implements EventHandler { return; } + // Special case handling for SVG. + if ($this->insertMode == static::IM_IN_SVG) { + $lname = Elements::normalizeSvgElement($lname); + } + // XXX: Not sure whether we need this anymore. // if ($name != $lname) { // return $this->quirksTreeResolver($lname); @@ -301,8 +306,8 @@ class DOMTreeBuilder implements EventHandler { } //$this->current = $this->current->parentNode; - if (!$this->autoclose($name)) { - $this->parseError('Could not find closing tag for ' . $name); + if (!$this->autoclose($lname)) { + $this->parseError('Could not find closing tag for ' . $lname); } //switch ($this->insertMode) { @@ -318,11 +323,6 @@ class DOMTreeBuilder implements EventHandler { $this->insertMode = static::IM_IN_BODY; break; } - - // 8.2.5.4.7 - if ($name == 'sarcasm') { - $this->text("Take a deep breath."); - } } public function comment($cdata) { @@ -372,7 +372,7 @@ class DOMTreeBuilder implements EventHandler { // Important: The processor may modify the current DOM tree however // it sees fit. if (isset($this->processor)) { - $res = $processor->process($this->current, $name, $data); + $res = $this->processor->process($this->current, $name, $data); if (!empty($res)) { $this->current = $res; } diff --git a/src/HTML5/Parser/StringInputStream.php b/src/HTML5/Parser/StringInputStream.php index 0d2a7f3..ca5fee0 100644 --- a/src/HTML5/Parser/StringInputStream.php +++ b/src/HTML5/Parser/StringInputStream.php @@ -1,4 +1,7 @@ <?php +/** + * Loads a string to be parsed. + */ namespace HTML5\Parser; /* diff --git a/src/HTML5/Parser/Tokenizer.php b/src/HTML5/Parser/Tokenizer.php index 9866246..df77b5d 100644 --- a/src/HTML5/Parser/Tokenizer.php +++ b/src/HTML5/Parser/Tokenizer.php @@ -295,7 +295,7 @@ class Tokenizer { return $this->bogusComment('</'); } - $name = $this->scanner->charsUntil("\n\f \t>"); + $name = strtolower($this->scanner->charsUntil("\n\f \t>")); // Trash whitespace. $this->scanner->whitespace(); @@ -475,7 +475,7 @@ class Tokenizer { * The attribute value. */ protected function quotedAttributeValue($quote) { - $stoplist = "\t\n\f" . $quote; + $stoplist = "\f" . $quote; $val = ''; $tok = $this->scanner->current(); while (strspn($tok, $stoplist) == 0 && $tok !== FALSE) { @@ -846,7 +846,7 @@ class Tokenizer { $buffer .= $this->scanner->charsUntil($first); // Stop as soon as we hit the stopping condition. - if ($this->sequenceMatches($sequence)) { + if ($this->sequenceMatches($sequence) || $this->sequenceMatches(strtoupper($sequence))) { return $buffer; } $buffer .= $this->scanner->current(); diff --git a/src/HTML5/Parser/TreeBuildingRules.php b/src/HTML5/Parser/TreeBuildingRules.php index 0f1b3a5..b87c6b5 100644 --- a/src/HTML5/Parser/TreeBuildingRules.php +++ b/src/HTML5/Parser/TreeBuildingRules.php @@ -71,20 +71,19 @@ class TreeBuildingRules { case 'rp': return $this->handleRT($new, $current); case 'optgroup': - $this->closeIfCurrentMatches($new, $current, array('optgroup')); + return $this->closeIfCurrentMatches($new, $current, array('optgroup')); case 'option': - $this->closeIfCurrentMatches($new, $current, array('option', 'optgroup')); + return $this->closeIfCurrentMatches($new, $current, array('option', 'optgroup')); case 'tr': - $this->closeIfCurrentMatches($new, $current, array('tr')); + return $this->closeIfCurrentMatches($new, $current, array('tr')); case 'td': case 'th': - $this->closeIfCurrentMatches($new, $current, array('th', 'td')); + return $this->closeIfCurrentMatches($new, $current, array('th', 'td')); case 'tbody': case 'thead': case 'tfoot': case 'table': // Spec isn't explicit about this, but it's necessary. - $this->closeIfCurrentMatches($new, $current, array('thead', 'tfoot', 'tbody')); - + return $this->closeIfCurrentMatches($new, $current, array('thead', 'tfoot', 'tbody')); } return $current; diff --git a/src/HTML5/Serializer/OutputRules.php b/src/HTML5/Serializer/OutputRules.php index bb0cb45..bc57346 100644 --- a/src/HTML5/Serializer/OutputRules.php +++ b/src/HTML5/Serializer/OutputRules.php @@ -120,7 +120,7 @@ class OutputRules implements \HTML5\Serializer\RulesInterface { } public function processorInstruction($ele) { - $this->wr('<?')->wr($ele->target)->wr($ele->data)->wr(' ?>'); + $this->wr('<?')->wr($ele->target)->wr(' ')->wr($ele->data)->wr('?>'); } /** diff --git a/test/HTML5/Html5Test.php b/test/HTML5/Html5Test.php index 13f5b19..dcb51cd 100644 --- a/test/HTML5/Html5Test.php +++ b/test/HTML5/Html5Test.php @@ -26,6 +26,15 @@ class Html5Test extends TestCase { $dom = \HTML5::load(__DIR__ . '/Html5Test.html'); $this->assertInstanceOf('\DOMDocument', $dom); $this->assertEmpty($dom->errors); + + $file = fopen(__DIR__ . '/Html5Test.html', 'r'); + $dom = \HTML5::load($file); + $this->assertInstanceOf('\DOMDocument', $dom); + $this->assertEmpty($dom->errors); + + $dom = \HTML5::loadHTMLFile(__DIR__ . '/Html5Test.html'); + $this->assertInstanceOf('\DOMDocument', $dom); + $this->assertEmpty($dom->errors); } public function testLoadHTML() { @@ -125,7 +134,7 @@ class Html5Test extends TestCase { </body> </html>'); - $this->assertEmpty($dom->errors); + $this->assertEmpty($dom->errors, print_r($dom->errors, TRUE)); // Test a mixed case attribute. $list = $dom->getElementsByTagName('svg'); @@ -192,11 +201,13 @@ class Html5Test extends TestCase { <f:name>Big rectangle thing</f:name> <f:width>40</f:width> <f:length>80</f:length> - </f:rug>"); + </f:rug> + <sarcasm>um, yeah</sarcasm>"); $this->assertEmpty($dom->errors); $markup = \HTML5::saveHTML($dom); $this->assertRegExp('|<f:name>Big rectangle thing</f:name>|',$markup); + $this->assertRegExp('|<sarcasm>um, yeah</sarcasm>|',$markup); } public function testElements() { diff --git a/test/HTML5/Parser/DOMTreeBuilderTest.php b/test/HTML5/Parser/DOMTreeBuilderTest.php index adfc2c9..6eeafe8 100644 --- a/test/HTML5/Parser/DOMTreeBuilderTest.php +++ b/test/HTML5/Parser/DOMTreeBuilderTest.php @@ -301,6 +301,14 @@ class DOMTreeBuilderTest extends \HTML5\Tests\TestCase { $this->assertEquals('textPath', $textPath->tagName); } + public function testNoScript() { + $html = '<!DOCTYPE html><html><head><noscript>No JS</noscript></head></html>'; + $doc = $this->parse($html); + $this->assertEmpty($doc->errors); + $noscript = $doc->getElementsByTagName('noscript')->item(0); + $this->assertEquals('noscript', $noscript->tagName); + } + /** * Regression for issue #13 */ @@ -314,4 +322,46 @@ class DOMTreeBuilderTest extends \HTML5\Tests\TestCase { $this->assertEquals('span', $span->tagName); $this->assertEquals('Test', $span->textContent); } + + public function testInstructionProcessor() { + $string = '<!DOCTYPE html><html><?foo bar ?></html>'; + + $treeBuilder = new DOMTreeBuilder(); + $is = new InstructionProcessorMock(); + $treeBuilder->setInstructionProcessor($is); + + $input = new StringInputStream($string); + $scanner = new Scanner($input); + $parser = new Tokenizer($scanner, $treeBuilder); + + $parser->parse(); + $dom = $treeBuilder->document(); + $div = $dom->getElementsByTagName('div')->item(0); + + $this->assertEquals(1, $is->count); + $this->assertEquals('foo', $is->name); + $this->assertEquals('bar ', $is->data); + $this->assertEquals('div', $div->tagName); + $this->assertEquals('foo', $div->textContent); + } +} + +class InstructionProcessorMock implements \HTML5\InstructionProcessor { + + public $name = NULL; + public $data = NULL; + public $count = 0; + + public function process(\DOMElement $element, $name, $data) { + $this->name = $name; + $this->data = $data; + $this->count++; + + $div = $element->ownerDocument->createElement("div"); + $div->nodeValue = 'foo'; + + $element->appendChild($div); + + return $div; + } } diff --git a/test/HTML5/Parser/TokenizerTest.php b/test/HTML5/Parser/TokenizerTest.php index 0ac987f..231827c 100644 --- a/test/HTML5/Parser/TokenizerTest.php +++ b/test/HTML5/Parser/TokenizerTest.php @@ -142,7 +142,7 @@ class TokenizerTest extends \HTML5\Tests\TestCase { '</test >' => 'test', '</thisIsTheTagThatDoesntEndItJustGoesOnAndOnMyFriend>' => - 'thisIsTheTagThatDoesntEndItJustGoesOnAndOnMyFriend', + 'thisisthetagthatdoesntenditjustgoesonandonmyfriend', // See 8.2.4.10, which requires this and does not say error. '</a<b>' => 'a<b', ); @@ -427,6 +427,12 @@ class TokenizerTest extends \HTML5\Tests\TestCase { $this->assertEventEquals('text', $expects, $events->get(2)); } + // Testing case sensitivity + $events = $this->parse('<TITLE>a test</TITLE>'); + $this->assertEventEquals('startTag', 'title', $events->get(0)); + $this->assertEventEquals('text', 'a test', $events->get(1)); + $this->assertEventEquals('endTag', 'title', $events->get(2)); + } public function testText() { diff --git a/test/HTML5/Parser/TreeBuildingRulesTest.php b/test/HTML5/Parser/TreeBuildingRulesTest.php index fe02893..a247cea 100644 --- a/test/HTML5/Parser/TreeBuildingRulesTest.php +++ b/test/HTML5/Parser/TreeBuildingRulesTest.php @@ -62,4 +62,15 @@ class TreeBuildingRulesTest extends \HTML5\Tests\TestCase { $this->assertEquals('dd', $list->lastChild->tagName); } + public function testTable() { + $html = sprintf(self::HTML_STUB, '<table><thead id="a"><th>foo<td>bar<td>baz'); + $doc = $this->parse($html); + + $list = $doc->getElementById('a'); + + $this->assertEquals(3, $list->childNodes->length); + $this->assertEquals('th', $list->firstChild->tagName); + $this->assertEquals('td', $list->lastChild->tagName); + } + } diff --git a/test/HTML5/Serializer/OutputRulesTest.php b/test/HTML5/Serializer/OutputRulesTest.php index a451055..61e2d50 100644 --- a/test/HTML5/Serializer/OutputRulesTest.php +++ b/test/HTML5/Serializer/OutputRulesTest.php @@ -330,4 +330,15 @@ class OutputRulesTest extends \HTML5\Tests\TestCase { $this->assertRegExp('|<csymbol definitionURL="http://www.example.com/mathops/multiops.html#plusminus">|', $content); } + function testProcessorInstruction() { + $dom = \HTML5::loadHTMLFragment('<?foo bar ?>'); + + $stream = fopen('php://temp', 'w'); + $r = new OutputRules($stream, \HTML5::options()); + $t = new Traverser($dom, $stream, $r, \HTML5::options()); + + $r->processorInstruction($dom->firstChild); + $content = stream_get_contents($stream, -1, 0); + $this->assertRegExp('|<\?foo bar \?>|', $content); + } } diff --git a/test/HTML5/Serializer/TraverserTest.php b/test/HTML5/Serializer/TraverserTest.php index 6fa5c1e..5816bfc 100644 --- a/test/HTML5/Serializer/TraverserTest.php +++ b/test/HTML5/Serializer/TraverserTest.php @@ -59,20 +59,33 @@ class TraverserTest extends \HTML5\Tests\TestCase { $this->assertInstanceOf('\HTML5\Serializer\Traverser', $t); } - function testFragment() { - $html = '<span class="bar">foo</span><span></span><div>bar</div>'; - $input = new \HTML5\Parser\StringInputStream($html); - $dom = \HTML5::parseFragment($input); + function testFragment() { + $html = '<span class="bar">foo</span><span></span><div>bar</div>'; + $input = new \HTML5\Parser\StringInputStream($html); + $dom = \HTML5::parseFragment($input); - //fprintf(STDOUT, print_r($dom, TRUE)); + $this->assertInstanceOf('\DOMDocumentFragment', $dom); - $this->assertInstanceOf('\DOMDocumentFragment', $dom); + $stream = fopen('php://temp', 'w'); + $r = new OutputRules($stream, \HTML5::options()); + $t = new Traverser($dom, $stream, $r, \HTML5::options()); + + $out = $t->walk(); + $this->assertEquals($html, stream_get_contents($stream, -1, 0)); + } + + function testProcessorInstruction() { + $html = '<?foo bar ?>'; + $input = new \HTML5\Parser\StringInputStream($html); + $dom = \HTML5::parseFragment($input); - $stream = fopen('php://temp', 'w'); - $r = new OutputRules($stream, \HTML5::options()); - $t = new Traverser($dom, $stream, $r, \HTML5::options()); + $this->assertInstanceOf('\DOMDocumentFragment', $dom); - $out = $t->walk(); - $this->assertEquals($html, stream_get_contents($stream, -1, 0)); - } + $stream = fopen('php://temp', 'w'); + $r = new OutputRules($stream, \HTML5::options()); + $t = new Traverser($dom, $stream, $r, \HTML5::options()); + + $out = $t->walk(); + $this->assertEquals($html, stream_get_contents($stream, -1, 0)); + } } |