summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/HTML5.php10
-rw-r--r--src/HTML5/Parser/DOMTreeBuilder.php23
-rw-r--r--src/HTML5/Serializer/OutputRules.php29
3 files changed, 42 insertions, 20 deletions
diff --git a/src/HTML5.php b/src/HTML5.php
index 16187df..0b2d368 100644
--- a/src/HTML5.php
+++ b/src/HTML5.php
@@ -163,14 +163,9 @@ class HTML5
$parser = new Tokenizer($scanner, $events);
$parser->parse();
+ $this->errors = $events->getErrors();
- $document = $events->document();
-
- if ($document) {
- $this->errors = $document->errors;
- }
-
- return $document;
+ return $events->document();
}
/**
@@ -186,6 +181,7 @@ class HTML5
$parser = new Tokenizer($scanner, $events);
$parser->parse();
+ $this->errors = $events->getErrors();
return $events->fragment();
}
diff --git a/src/HTML5/Parser/DOMTreeBuilder.php b/src/HTML5/Parser/DOMTreeBuilder.php
index 731d1d8..b9e587f 100644
--- a/src/HTML5/Parser/DOMTreeBuilder.php
+++ b/src/HTML5/Parser/DOMTreeBuilder.php
@@ -145,6 +145,8 @@ class DOMTreeBuilder implements EventHandler
*/
protected $quirks = true;
+ protected $errors = array();
+
public function __construct($isFragment = false, array $options = array())
{
$this->options = $options;
@@ -156,7 +158,7 @@ class DOMTreeBuilder implements EventHandler
$dt = $impl->createDocumentType('html');
// $this->doc = \DOMImplementation::createDocument(NULL, 'html', $dt);
$this->doc = $impl->createDocument(null, null, $dt);
- $this->doc->errors = array();
+ $this->errors = array();
$this->current = $this->doc; // ->documentElement;
@@ -195,7 +197,6 @@ class DOMTreeBuilder implements EventHandler
*/
public function fragment()
{
- $this->frag->errors = $this->doc->errors;
return $this->frag;
}
@@ -337,6 +338,9 @@ class DOMTreeBuilder implements EventHandler
// to avoid spl_object_hash collisions whe have to avoid garbage collection of $ele storing it into $pushes
// see https://bugs.php.net/bug.php?id=67459
$this->pushes[spl_object_hash($ele)] = array($pushes, $ele);
+
+ // SEE https://github.com/facebook/hhvm/issues/2962
+ $ele->setAttribute('html5-php-fake-id-attribute', spl_object_hash($ele));
}
foreach ($attributes as $aName => $aVal) {
@@ -438,7 +442,13 @@ class DOMTreeBuilder implements EventHandler
return;
}
- $cid = spl_object_hash($this->current);
+ // https://github.com/facebook/hhvm/issues/2962
+ if ($cid = $this->current->getAttribute('html5-php-fake-id-attribute')) {
+ $this->current->removeAttribute('html5-php-fake-id-attribute');
+ } else {
+ $cid = spl_object_hash($this->current);
+ }
+
// remove the namespaced definded by current node
if (isset($this->pushes[$cid])) {
for ($i = 0; $i < $this->pushes[$cid][0]; $i ++) {
@@ -501,7 +511,12 @@ class DOMTreeBuilder implements EventHandler
public function parseError($msg, $line = 0, $col = 0)
{
- $this->doc->errors[] = sprintf("Line %d, Col %d: %s", $line, $col, $msg);
+ $this->errors[] = sprintf("Line %d, Col %d: %s", $line, $col, $msg);
+ }
+
+ public function getErrors()
+ {
+ return $this->errors;
}
public function cdata($data)
diff --git a/src/HTML5/Serializer/OutputRules.php b/src/HTML5/Serializer/OutputRules.php
index 2969383..ff8341e 100644
--- a/src/HTML5/Serializer/OutputRules.php
+++ b/src/HTML5/Serializer/OutputRules.php
@@ -22,6 +22,8 @@ class OutputRules implements \Masterminds\HTML5\Serializer\RulesInterface
const IM_IN_MATHML = 3;
+ private $hasHTML5 = false;
+
protected $traverser;
protected $encode = false;
@@ -40,6 +42,9 @@ class OutputRules implements \Masterminds\HTML5\Serializer\RulesInterface
$this->outputMode = static::IM_IN_HTML;
$this->out = $output;
+
+ // If HHVM, see https://github.com/facebook/hhvm/issues/2727
+ $this->hasHTML5 = defined('ENT_HTML5') && !defined('HHVM_VERSION');
}
public function setTraverser(\Masterminds\HTML5\Serializer\Traverser $traverser)
@@ -83,15 +88,20 @@ class OutputRules implements \Masterminds\HTML5\Serializer\RulesInterface
}
$this->openTag($ele);
+ if (Elements::isA($name, Elements::TEXT_RAW)) {
+ foreach ($ele->childNodes as $child) {
+ $this->wr($child->data);
+ }
+ } else {
+ // Handle children.
+ if ($ele->hasChildNodes()) {
+ $this->traverser->children($ele->childNodes);
+ }
- // Handle children.
- if ($ele->hasChildNodes()) {
- $this->traverser->children($ele->childNodes);
- }
-
- // Close out the SVG or MathML special handling.
- if ($name == 'svg' || $name == 'math') {
- $this->outputMode = static::IM_IN_HTML;
+ // Close out the SVG or MathML special handling.
+ if ($name == 'svg' || $name == 'math') {
+ $this->outputMode = static::IM_IN_HTML;
+ }
}
// If not unary, add a closing tag.
@@ -285,7 +295,8 @@ class OutputRules implements \Masterminds\HTML5\Serializer\RulesInterface
// If we are in PHP 5.4+ we can use the native html5 entity functionality to
// convert the named character references.
- if (defined('ENT_HTML5')) {
+
+ if ($this->hasHTML5) {
return htmlentities($text, ENT_HTML5 | ENT_SUBSTITUTE | ENT_QUOTES, 'UTF-8', false);
} // If a version earlier than 5.4 html5 entities are not entirely handled.
// This manually handles them.