From 0f99c53420f05187dc3af9fe04f1129e14dfe96e Mon Sep 17 00:00:00 2001 From: Andres Rey Date: Tue, 21 Feb 2017 18:40:42 +0000 Subject: Fixed test cases and added function to replace font tags with span + param to setNodeTag to keep attributes from original node. --- src/HTMLParser.php | 11 ++++++++++- src/Readability.php | 20 ++++++++++++++------ 2 files changed, 24 insertions(+), 7 deletions(-) (limited to 'src') diff --git a/src/HTMLParser.php b/src/HTMLParser.php index 5866bd9..5fb27b8 100644 --- a/src/HTMLParser.php +++ b/src/HTMLParser.php @@ -290,6 +290,15 @@ class HTMLParser } } } + + // Replace font tags with span + $fonts = $this->dom->getElementsByTagName('font'); + $length = $fonts->length; + for ($i = 0; $i < $length; $i++) { + $font = $fonts->item($length - 1 - $i); + $span = new Readability($font); + $span->setNodeTag('span', true); + } } public function postProcessContent(DOMDocument $article) @@ -458,7 +467,7 @@ class HTMLParser * Check for the rel attribute, then check if the rel attribute is either img_src or image_src, and * finally check for the existence of the href attribute, which should hold the image url. */ - if ($link->hasAttribute('rel') && ($link->getAttribute('rel') === 'img_src' || $link->getAttribute('rel') === 'image_src') && $link->hasAttribute('href')){ + if ($link->hasAttribute('rel') && ($link->getAttribute('rel') === 'img_src' || $link->getAttribute('rel') === 'image_src') && $link->hasAttribute('href')) { return $link->getAttribute('href'); } } diff --git a/src/Readability.php b/src/Readability.php index 027858f..44633b2 100644 --- a/src/Readability.php +++ b/src/Readability.php @@ -253,7 +253,7 @@ class Readability extends Element implements ReadabilityInterface { // Check if the setAttribute method exists, as some elements lack of it (and calling it anyway throws an exception) if (method_exists($this->node, 'setAttribute')) { - $this->contentScore = (float) $score; + $this->contentScore = (float)$score; // Set score in an attribute of the tag to prevent losing it while creating new Readability objects. $this->node->setAttribute('data-readability', $this->contentScore); @@ -286,8 +286,9 @@ class Readability extends Element implements ReadabilityInterface * element with the new tag name and importing it to the main DOMDocument. * * @param string $value + * @param bool $importAttributes */ - public function setNodeTag($value) + public function setNodeTag($value, $importAttributes = false) { $new = new \DOMDocument(); $new->appendChild($new->createElement($value)); @@ -298,6 +299,13 @@ class Readability extends Element implements ReadabilityInterface $new->firstChild->appendChild($import); } + if ($importAttributes) { + // Import attributes from the original node. + foreach ($this->node->attributes as $attribute) { + $new->firstChild->setAttribute($attribute->nodeName, $attribute->nodeValue); + } + } + // The import must be done on the firstChild of $new, since $new is a DOMDocument and not a DOMElement. $import = $this->node->ownerDocument->importNode($new->firstChild, true); $this->node->parentNode->replaceChild($import, $this->node); @@ -335,7 +343,7 @@ class Readability extends Element implements ReadabilityInterface * for parents. * * @param Readability $originalNode - * @param bool $ignoreSelfAndKids + * @param bool $ignoreSelfAndKids * * @return Readability */ @@ -411,7 +419,7 @@ class Readability extends Element implements ReadabilityInterface * Creates a new node based on the text content of the original node. * * @param Readability $originalNode - * @param string $tagName + * @param string $tagName * * @return Readability */ @@ -458,8 +466,8 @@ class Readability extends Element implements ReadabilityInterface * provided one. * * @param Readability $node - * @param string $tagName - * @param int $maxDepth + * @param string $tagName + * @param int $maxDepth * * @return bool */ -- cgit v1.2.3