summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMatt Butcher <[email protected]>2014-02-11 09:56:01 -0700
committerMatt Butcher <[email protected]>2014-02-11 09:56:01 -0700
commit44e8e23626bf619844baf9983e931d2f58606377 (patch)
tree62625f63a954ab5a3f9bb4184039a68496e3896a
parent77ad931cd824feb33eebae08cf3a5a47bce1e337 (diff)
parent3b691837c6d7a0969137048fbda274463d6b1d7c (diff)
Merge branch 'master' of github.com:Masterminds/html5-php
-rw-r--r--.travis.yml11
-rw-r--r--CREDITS5
-rw-r--r--README.md2
-rw-r--r--RELEASE.md9
-rw-r--r--composer.json5
-rw-r--r--phpunit.xml.dist29
-rw-r--r--src/HTML5.php25
-rw-r--r--src/HTML5/Elements.php11
-rw-r--r--src/HTML5/Parser/DOMTreeBuilder.php16
-rw-r--r--src/HTML5/Parser/StringInputStream.php3
-rw-r--r--src/HTML5/Parser/Tokenizer.php6
-rw-r--r--src/HTML5/Parser/TreeBuildingRules.php11
-rw-r--r--src/HTML5/Serializer/OutputRules.php2
-rw-r--r--test/HTML5/Html5Test.php15
-rw-r--r--test/HTML5/Parser/DOMTreeBuilderTest.php50
-rw-r--r--test/HTML5/Parser/TokenizerTest.php8
-rw-r--r--test/HTML5/Parser/TreeBuildingRulesTest.php11
-rw-r--r--test/HTML5/Serializer/OutputRulesTest.php11
-rw-r--r--test/HTML5/Serializer/TraverserTest.php37
19 files changed, 222 insertions, 45 deletions
diff --git a/.travis.yml b/.travis.yml
index e01eb12..6b33aae 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -9,7 +9,12 @@ notifications:
irc: "irc.freenode.net#masterminds"
before_script:
- - curl -s http://getcomposer.org/installer | php
- - php composer.phar install
+ - composer self-update
+ - composer install --dev
-script: phpunit test/HTML5 \ No newline at end of file
+script:
+ - mkdir -p build/logs
+ - phpunit -c phpunit.xml.dist
+
+after_script:
+ - php vendor/bin/coveralls -v \ No newline at end of file
diff --git a/CREDITS b/CREDITS
index 193bc34..1f805e6 100644
--- a/CREDITS
+++ b/CREDITS
@@ -1,6 +1,7 @@
Matt Butcher [technosophos] <[email protected]> (lead)
Matt Farina [mattfarina] <[email protected]> (lead)
-Kukhar Vasily [ngreduce] <[email protected]> (contributor)
-Geoffrey Sneddon [gsnedders] <[email protected]> (contributor)
+Asmir Mustafic [goetas] <[email protected]> (contributor)
Edward Z. Yang [ezyang] <[email protected]> (contributor)
+Geoffrey Sneddon [gsnedders] <[email protected]> (contributor)
+Kukhar Vasily [ngreduce] <[email protected]> (contributor)
Rune Christensen [MrElectronic] <[email protected]> (contributor)
diff --git a/README.md b/README.md
index 0daae87..8344c1c 100644
--- a/README.md
+++ b/README.md
@@ -11,7 +11,7 @@ But after some initial refactoring work, we began a new parser.
- DOM tree builder
- Interoperability with QueryPath [[in progress](https://github.com/technosophos/querypath/issues/114)]
-[![Build Status](https://travis-ci.org/Masterminds/html5-php.png?branch=master)](https://travis-ci.org/Masterminds/html5-php)
+[![Build Status](https://travis-ci.org/Masterminds/html5-php.png?branch=master)](https://travis-ci.org/Masterminds/html5-php) [![Latest Stable Version](https://poser.pugx.org/masterminds/html5/v/stable.png)](https://packagist.org/packages/masterminds/html5) [![Coverage Status](https://coveralls.io/repos/Masterminds/html5-php/badge.png?branch=master)](https://coveralls.io/r/Masterminds/html5-php?branch=master)
## Installation
diff --git a/RELEASE.md b/RELEASE.md
index 89267c7..7f12702 100644
--- a/RELEASE.md
+++ b/RELEASE.md
@@ -1,4 +1,11 @@
# Release Notes
-1.0.1 (unreleased)
+1.0.1 (2013-11-07)
- CDATA encoding is improved. (Non-standard; Issue #19)
+- Some parser rules were not returning the new current element. (Issue #20)
+- Added, to the README, details on code test coverage and to packagist version.
+- Fixed processor instructions.
+- Improved test coverage and documentation coverage.
+
+1.0.0 (2013-10-02)
+- Initial release.
diff --git a/composer.json b/composer.json
index d574580..6b88b8a 100644
--- a/composer.json
+++ b/composer.json
@@ -19,6 +19,11 @@
"ext-libxml" : "*",
"php" : ">=5.3.0"
},
+ "require-dev": {
+ "satooshi/php-coveralls": "0.6.*",
+ "phpunit/phpunit" : "*",
+ "phpdocumentor/phpdocumentor": "2.1.*"
+ },
"autoload": {
"psr-0": {"HTML5": "src"}
}
diff --git a/phpunit.xml.dist b/phpunit.xml.dist
new file mode 100644
index 0000000..c145f8b
--- /dev/null
+++ b/phpunit.xml.dist
@@ -0,0 +1,29 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<phpunit colors="true">
+ <testsuites>
+ <testsuite name="PHPUnit">
+ <directory>test/HTML5/</directory>
+ </testsuite>
+ </testsuites>
+ <filter>
+ <blacklist>
+ <file>src/HTML5/Parser/InputStream.php</file>
+ <file>src/HTML5/Serializer/RulesInterface.php</file>
+ <file>src/HTML5/Entities.php</file>
+ <file>src/HTML5/Serializer/HTML5Entities.php</file>
+ </blacklist>
+ </filter>
+ <logging>
+ <log
+ type="coverage-html"
+ target="build/coverage"
+ charset="UTF-8"
+ yui="true"
+ highlight="true"
+ lowUpperBound="35"
+ highLowerBound="70"
+ showUncoveredFiles="true"
+ />
+ <log type="coverage-clover" target="build/logs/clover.xml"/>
+ </logging>
+</phpunit>
diff --git a/src/HTML5.php b/src/HTML5.php
index ce0927f..7295fb4 100644
--- a/src/HTML5.php
+++ b/src/HTML5.php
@@ -1,5 +1,7 @@
<?php
-
+/**
+ * The main HTML5 front end.
+ */
use HTML5\Parser\StringInputStream;
use HTML5\Parser\FileInputStream;
use HTML5\Parser\Scanner;
@@ -9,8 +11,6 @@ use HTML5\Serializer\OutputRules;
use HTML5\Serializer\Traverser;
/**
- * The main HTML5 front end.
- *
* This class offers convenience methods for parsing and serializing HTML5.
* It is roughly designed to mirror the \DOMDocument class that is
* provided with most versions of PHP.
@@ -19,6 +19,10 @@ use HTML5\Serializer\Traverser;
*/
class HTML5 {
+ /**
+ * Global options for the parser and serializer.
+ * @var array
+ */
public static $options = array(
// If the serializer should encode all entities.
@@ -77,6 +81,15 @@ class HTML5 {
*
* This is here to provide backwards compatibility with the
* PHP DOM implementation. It simply calls load().
+ *
+ * @param string $file
+ * The path to the file to parse. If this is a resource, it is
+ * assumed to be an open stream whose pointer is set to the first
+ * byte of input.
+ *
+ * @return \DOMDocument
+ * A DOM document. These object type is defined by the libxml
+ * library, and should have been included with your version of PHP.
*/
public static function loadHTMLFile($file, $options = NULL) {
return static::load($file, $options);
@@ -166,6 +179,12 @@ class HTML5 {
return $events->document();
}
+ /**
+ * Parse an input stream where the stream is a fragment.
+ *
+ * Lower-level loading function. This requires an input stream instead
+ * of a string, file, or resource.
+ */
public static function parseFragment(\HTML5\Parser\InputStream $input) {
$events = new DOMTreeBuilder(TRUE);
$scanner = new Scanner($input);
diff --git a/src/HTML5/Elements.php b/src/HTML5/Elements.php
index 0dabc3a..69d3882 100644
--- a/src/HTML5/Elements.php
+++ b/src/HTML5/Elements.php
@@ -1,9 +1,10 @@
<?php
+/**
+ * Provide general element functions.
+ */
namespace HTML5;
/**
- * Provide general element functions.
- *
* This class provides general information about HTML5 elements,
* including syntactic and semantic issues. Parsers and serializers can
* use this class as a reference point for information about the rules
@@ -532,6 +533,12 @@ class Elements {
/**
* Get the element mask for the given element name.
+ *
+ * @param string $name
+ * The name of the element.
+ *
+ * @return int
+ * The element mask.
*/
public static function element($name) {
if (isset(static::$html5[$name])) {
diff --git a/src/HTML5/Parser/DOMTreeBuilder.php b/src/HTML5/Parser/DOMTreeBuilder.php
index 094104e..13ae3bc 100644
--- a/src/HTML5/Parser/DOMTreeBuilder.php
+++ b/src/HTML5/Parser/DOMTreeBuilder.php
@@ -289,6 +289,11 @@ class DOMTreeBuilder implements EventHandler {
return;
}
+ // Special case handling for SVG.
+ if ($this->insertMode == static::IM_IN_SVG) {
+ $lname = Elements::normalizeSvgElement($lname);
+ }
+
// XXX: Not sure whether we need this anymore.
// if ($name != $lname) {
// return $this->quirksTreeResolver($lname);
@@ -301,8 +306,8 @@ class DOMTreeBuilder implements EventHandler {
}
//$this->current = $this->current->parentNode;
- if (!$this->autoclose($name)) {
- $this->parseError('Could not find closing tag for ' . $name);
+ if (!$this->autoclose($lname)) {
+ $this->parseError('Could not find closing tag for ' . $lname);
}
//switch ($this->insertMode) {
@@ -318,11 +323,6 @@ class DOMTreeBuilder implements EventHandler {
$this->insertMode = static::IM_IN_BODY;
break;
}
-
- // 8.2.5.4.7
- if ($name == 'sarcasm') {
- $this->text("Take a deep breath.");
- }
}
public function comment($cdata) {
@@ -372,7 +372,7 @@ class DOMTreeBuilder implements EventHandler {
// Important: The processor may modify the current DOM tree however
// it sees fit.
if (isset($this->processor)) {
- $res = $processor->process($this->current, $name, $data);
+ $res = $this->processor->process($this->current, $name, $data);
if (!empty($res)) {
$this->current = $res;
}
diff --git a/src/HTML5/Parser/StringInputStream.php b/src/HTML5/Parser/StringInputStream.php
index 0d2a7f3..ca5fee0 100644
--- a/src/HTML5/Parser/StringInputStream.php
+++ b/src/HTML5/Parser/StringInputStream.php
@@ -1,4 +1,7 @@
<?php
+/**
+ * Loads a string to be parsed.
+ */
namespace HTML5\Parser;
/*
diff --git a/src/HTML5/Parser/Tokenizer.php b/src/HTML5/Parser/Tokenizer.php
index 9866246..df77b5d 100644
--- a/src/HTML5/Parser/Tokenizer.php
+++ b/src/HTML5/Parser/Tokenizer.php
@@ -295,7 +295,7 @@ class Tokenizer {
return $this->bogusComment('</');
}
- $name = $this->scanner->charsUntil("\n\f \t>");
+ $name = strtolower($this->scanner->charsUntil("\n\f \t>"));
// Trash whitespace.
$this->scanner->whitespace();
@@ -475,7 +475,7 @@ class Tokenizer {
* The attribute value.
*/
protected function quotedAttributeValue($quote) {
- $stoplist = "\t\n\f" . $quote;
+ $stoplist = "\f" . $quote;
$val = '';
$tok = $this->scanner->current();
while (strspn($tok, $stoplist) == 0 && $tok !== FALSE) {
@@ -846,7 +846,7 @@ class Tokenizer {
$buffer .= $this->scanner->charsUntil($first);
// Stop as soon as we hit the stopping condition.
- if ($this->sequenceMatches($sequence)) {
+ if ($this->sequenceMatches($sequence) || $this->sequenceMatches(strtoupper($sequence))) {
return $buffer;
}
$buffer .= $this->scanner->current();
diff --git a/src/HTML5/Parser/TreeBuildingRules.php b/src/HTML5/Parser/TreeBuildingRules.php
index 0f1b3a5..b87c6b5 100644
--- a/src/HTML5/Parser/TreeBuildingRules.php
+++ b/src/HTML5/Parser/TreeBuildingRules.php
@@ -71,20 +71,19 @@ class TreeBuildingRules {
case 'rp':
return $this->handleRT($new, $current);
case 'optgroup':
- $this->closeIfCurrentMatches($new, $current, array('optgroup'));
+ return $this->closeIfCurrentMatches($new, $current, array('optgroup'));
case 'option':
- $this->closeIfCurrentMatches($new, $current, array('option', 'optgroup'));
+ return $this->closeIfCurrentMatches($new, $current, array('option', 'optgroup'));
case 'tr':
- $this->closeIfCurrentMatches($new, $current, array('tr'));
+ return $this->closeIfCurrentMatches($new, $current, array('tr'));
case 'td':
case 'th':
- $this->closeIfCurrentMatches($new, $current, array('th', 'td'));
+ return $this->closeIfCurrentMatches($new, $current, array('th', 'td'));
case 'tbody':
case 'thead':
case 'tfoot':
case 'table': // Spec isn't explicit about this, but it's necessary.
- $this->closeIfCurrentMatches($new, $current, array('thead', 'tfoot', 'tbody'));
-
+ return $this->closeIfCurrentMatches($new, $current, array('thead', 'tfoot', 'tbody'));
}
return $current;
diff --git a/src/HTML5/Serializer/OutputRules.php b/src/HTML5/Serializer/OutputRules.php
index bb0cb45..bc57346 100644
--- a/src/HTML5/Serializer/OutputRules.php
+++ b/src/HTML5/Serializer/OutputRules.php
@@ -120,7 +120,7 @@ class OutputRules implements \HTML5\Serializer\RulesInterface {
}
public function processorInstruction($ele) {
- $this->wr('<?')->wr($ele->target)->wr($ele->data)->wr(' ?>');
+ $this->wr('<?')->wr($ele->target)->wr(' ')->wr($ele->data)->wr('?>');
}
/**
diff --git a/test/HTML5/Html5Test.php b/test/HTML5/Html5Test.php
index 13f5b19..dcb51cd 100644
--- a/test/HTML5/Html5Test.php
+++ b/test/HTML5/Html5Test.php
@@ -26,6 +26,15 @@ class Html5Test extends TestCase {
$dom = \HTML5::load(__DIR__ . '/Html5Test.html');
$this->assertInstanceOf('\DOMDocument', $dom);
$this->assertEmpty($dom->errors);
+
+ $file = fopen(__DIR__ . '/Html5Test.html', 'r');
+ $dom = \HTML5::load($file);
+ $this->assertInstanceOf('\DOMDocument', $dom);
+ $this->assertEmpty($dom->errors);
+
+ $dom = \HTML5::loadHTMLFile(__DIR__ . '/Html5Test.html');
+ $this->assertInstanceOf('\DOMDocument', $dom);
+ $this->assertEmpty($dom->errors);
}
public function testLoadHTML() {
@@ -125,7 +134,7 @@ class Html5Test extends TestCase {
</body>
</html>');
- $this->assertEmpty($dom->errors);
+ $this->assertEmpty($dom->errors, print_r($dom->errors, TRUE));
// Test a mixed case attribute.
$list = $dom->getElementsByTagName('svg');
@@ -192,11 +201,13 @@ class Html5Test extends TestCase {
<f:name>Big rectangle thing</f:name>
<f:width>40</f:width>
<f:length>80</f:length>
- </f:rug>");
+ </f:rug>
+ <sarcasm>um, yeah</sarcasm>");
$this->assertEmpty($dom->errors);
$markup = \HTML5::saveHTML($dom);
$this->assertRegExp('|<f:name>Big rectangle thing</f:name>|',$markup);
+ $this->assertRegExp('|<sarcasm>um, yeah</sarcasm>|',$markup);
}
public function testElements() {
diff --git a/test/HTML5/Parser/DOMTreeBuilderTest.php b/test/HTML5/Parser/DOMTreeBuilderTest.php
index adfc2c9..6eeafe8 100644
--- a/test/HTML5/Parser/DOMTreeBuilderTest.php
+++ b/test/HTML5/Parser/DOMTreeBuilderTest.php
@@ -301,6 +301,14 @@ class DOMTreeBuilderTest extends \HTML5\Tests\TestCase {
$this->assertEquals('textPath', $textPath->tagName);
}
+ public function testNoScript() {
+ $html = '<!DOCTYPE html><html><head><noscript>No JS</noscript></head></html>';
+ $doc = $this->parse($html);
+ $this->assertEmpty($doc->errors);
+ $noscript = $doc->getElementsByTagName('noscript')->item(0);
+ $this->assertEquals('noscript', $noscript->tagName);
+ }
+
/**
* Regression for issue #13
*/
@@ -314,4 +322,46 @@ class DOMTreeBuilderTest extends \HTML5\Tests\TestCase {
$this->assertEquals('span', $span->tagName);
$this->assertEquals('Test', $span->textContent);
}
+
+ public function testInstructionProcessor() {
+ $string = '<!DOCTYPE html><html><?foo bar ?></html>';
+
+ $treeBuilder = new DOMTreeBuilder();
+ $is = new InstructionProcessorMock();
+ $treeBuilder->setInstructionProcessor($is);
+
+ $input = new StringInputStream($string);
+ $scanner = new Scanner($input);
+ $parser = new Tokenizer($scanner, $treeBuilder);
+
+ $parser->parse();
+ $dom = $treeBuilder->document();
+ $div = $dom->getElementsByTagName('div')->item(0);
+
+ $this->assertEquals(1, $is->count);
+ $this->assertEquals('foo', $is->name);
+ $this->assertEquals('bar ', $is->data);
+ $this->assertEquals('div', $div->tagName);
+ $this->assertEquals('foo', $div->textContent);
+ }
+}
+
+class InstructionProcessorMock implements \HTML5\InstructionProcessor {
+
+ public $name = NULL;
+ public $data = NULL;
+ public $count = 0;
+
+ public function process(\DOMElement $element, $name, $data) {
+ $this->name = $name;
+ $this->data = $data;
+ $this->count++;
+
+ $div = $element->ownerDocument->createElement("div");
+ $div->nodeValue = 'foo';
+
+ $element->appendChild($div);
+
+ return $div;
+ }
}
diff --git a/test/HTML5/Parser/TokenizerTest.php b/test/HTML5/Parser/TokenizerTest.php
index 0ac987f..231827c 100644
--- a/test/HTML5/Parser/TokenizerTest.php
+++ b/test/HTML5/Parser/TokenizerTest.php
@@ -142,7 +142,7 @@ class TokenizerTest extends \HTML5\Tests\TestCase {
'</test
>' => 'test',
'</thisIsTheTagThatDoesntEndItJustGoesOnAndOnMyFriend>' =>
- 'thisIsTheTagThatDoesntEndItJustGoesOnAndOnMyFriend',
+ 'thisisthetagthatdoesntenditjustgoesonandonmyfriend',
// See 8.2.4.10, which requires this and does not say error.
'</a<b>' => 'a<b',
);
@@ -427,6 +427,12 @@ class TokenizerTest extends \HTML5\Tests\TestCase {
$this->assertEventEquals('text', $expects, $events->get(2));
}
+ // Testing case sensitivity
+ $events = $this->parse('<TITLE>a test</TITLE>');
+ $this->assertEventEquals('startTag', 'title', $events->get(0));
+ $this->assertEventEquals('text', 'a test', $events->get(1));
+ $this->assertEventEquals('endTag', 'title', $events->get(2));
+
}
public function testText() {
diff --git a/test/HTML5/Parser/TreeBuildingRulesTest.php b/test/HTML5/Parser/TreeBuildingRulesTest.php
index fe02893..a247cea 100644
--- a/test/HTML5/Parser/TreeBuildingRulesTest.php
+++ b/test/HTML5/Parser/TreeBuildingRulesTest.php
@@ -62,4 +62,15 @@ class TreeBuildingRulesTest extends \HTML5\Tests\TestCase {
$this->assertEquals('dd', $list->lastChild->tagName);
}
+ public function testTable() {
+ $html = sprintf(self::HTML_STUB, '<table><thead id="a"><th>foo<td>bar<td>baz');
+ $doc = $this->parse($html);
+
+ $list = $doc->getElementById('a');
+
+ $this->assertEquals(3, $list->childNodes->length);
+ $this->assertEquals('th', $list->firstChild->tagName);
+ $this->assertEquals('td', $list->lastChild->tagName);
+ }
+
}
diff --git a/test/HTML5/Serializer/OutputRulesTest.php b/test/HTML5/Serializer/OutputRulesTest.php
index a451055..61e2d50 100644
--- a/test/HTML5/Serializer/OutputRulesTest.php
+++ b/test/HTML5/Serializer/OutputRulesTest.php
@@ -330,4 +330,15 @@ class OutputRulesTest extends \HTML5\Tests\TestCase {
$this->assertRegExp('|<csymbol definitionURL="http://www.example.com/mathops/multiops.html#plusminus">|', $content);
}
+ function testProcessorInstruction() {
+ $dom = \HTML5::loadHTMLFragment('<?foo bar ?>');
+
+ $stream = fopen('php://temp', 'w');
+ $r = new OutputRules($stream, \HTML5::options());
+ $t = new Traverser($dom, $stream, $r, \HTML5::options());
+
+ $r->processorInstruction($dom->firstChild);
+ $content = stream_get_contents($stream, -1, 0);
+ $this->assertRegExp('|<\?foo bar \?>|', $content);
+ }
}
diff --git a/test/HTML5/Serializer/TraverserTest.php b/test/HTML5/Serializer/TraverserTest.php
index 6fa5c1e..5816bfc 100644
--- a/test/HTML5/Serializer/TraverserTest.php
+++ b/test/HTML5/Serializer/TraverserTest.php
@@ -59,20 +59,33 @@ class TraverserTest extends \HTML5\Tests\TestCase {
$this->assertInstanceOf('\HTML5\Serializer\Traverser', $t);
}
- function testFragment() {
- $html = '<span class="bar">foo</span><span></span><div>bar</div>';
- $input = new \HTML5\Parser\StringInputStream($html);
- $dom = \HTML5::parseFragment($input);
+ function testFragment() {
+ $html = '<span class="bar">foo</span><span></span><div>bar</div>';
+ $input = new \HTML5\Parser\StringInputStream($html);
+ $dom = \HTML5::parseFragment($input);
- //fprintf(STDOUT, print_r($dom, TRUE));
+ $this->assertInstanceOf('\DOMDocumentFragment', $dom);
- $this->assertInstanceOf('\DOMDocumentFragment', $dom);
+ $stream = fopen('php://temp', 'w');
+ $r = new OutputRules($stream, \HTML5::options());
+ $t = new Traverser($dom, $stream, $r, \HTML5::options());
+
+ $out = $t->walk();
+ $this->assertEquals($html, stream_get_contents($stream, -1, 0));
+ }
+
+ function testProcessorInstruction() {
+ $html = '<?foo bar ?>';
+ $input = new \HTML5\Parser\StringInputStream($html);
+ $dom = \HTML5::parseFragment($input);
- $stream = fopen('php://temp', 'w');
- $r = new OutputRules($stream, \HTML5::options());
- $t = new Traverser($dom, $stream, $r, \HTML5::options());
+ $this->assertInstanceOf('\DOMDocumentFragment', $dom);
- $out = $t->walk();
- $this->assertEquals($html, stream_get_contents($stream, -1, 0));
- }
+ $stream = fopen('php://temp', 'w');
+ $r = new OutputRules($stream, \HTML5::options());
+ $t = new Traverser($dom, $stream, $r, \HTML5::options());
+
+ $out = $t->walk();
+ $this->assertEquals($html, stream_get_contents($stream, -1, 0));
+ }
}