diff options
author | Andres Rey <[email protected]> | 2017-02-21 18:40:42 +0000 |
---|---|---|
committer | Andres Rey <[email protected]> | 2017-02-21 18:40:42 +0000 |
commit | 0f99c53420f05187dc3af9fe04f1129e14dfe96e (patch) | |
tree | 17e9d6ea450c972243374eee52317e4e086e90e0 /src | |
parent | 268d1130247042c9f3952a143f43ed74522cd64c (diff) |
Fixed test cases and added function to replace font tags with span + param to setNodeTag to keep attributes from original node.
Diffstat (limited to 'src')
-rw-r--r-- | src/HTMLParser.php | 11 | ||||
-rw-r--r-- | src/Readability.php | 20 |
2 files changed, 24 insertions, 7 deletions
diff --git a/src/HTMLParser.php b/src/HTMLParser.php index 5866bd9..5fb27b8 100644 --- a/src/HTMLParser.php +++ b/src/HTMLParser.php @@ -290,6 +290,15 @@ class HTMLParser } } } + + // Replace font tags with span + $fonts = $this->dom->getElementsByTagName('font'); + $length = $fonts->length; + for ($i = 0; $i < $length; $i++) { + $font = $fonts->item($length - 1 - $i); + $span = new Readability($font); + $span->setNodeTag('span', true); + } } public function postProcessContent(DOMDocument $article) @@ -458,7 +467,7 @@ class HTMLParser * Check for the rel attribute, then check if the rel attribute is either img_src or image_src, and * finally check for the existence of the href attribute, which should hold the image url. */ - if ($link->hasAttribute('rel') && ($link->getAttribute('rel') === 'img_src' || $link->getAttribute('rel') === 'image_src') && $link->hasAttribute('href')){ + if ($link->hasAttribute('rel') && ($link->getAttribute('rel') === 'img_src' || $link->getAttribute('rel') === 'image_src') && $link->hasAttribute('href')) { return $link->getAttribute('href'); } } diff --git a/src/Readability.php b/src/Readability.php index 027858f..44633b2 100644 --- a/src/Readability.php +++ b/src/Readability.php @@ -253,7 +253,7 @@ class Readability extends Element implements ReadabilityInterface { // Check if the setAttribute method exists, as some elements lack of it (and calling it anyway throws an exception) if (method_exists($this->node, 'setAttribute')) { - $this->contentScore = (float) $score; + $this->contentScore = (float)$score; // Set score in an attribute of the tag to prevent losing it while creating new Readability objects. $this->node->setAttribute('data-readability', $this->contentScore); @@ -286,8 +286,9 @@ class Readability extends Element implements ReadabilityInterface * element with the new tag name and importing it to the main DOMDocument. * * @param string $value + * @param bool $importAttributes */ - public function setNodeTag($value) + public function setNodeTag($value, $importAttributes = false) { $new = new \DOMDocument(); $new->appendChild($new->createElement($value)); @@ -298,6 +299,13 @@ class Readability extends Element implements ReadabilityInterface $new->firstChild->appendChild($import); } + if ($importAttributes) { + // Import attributes from the original node. + foreach ($this->node->attributes as $attribute) { + $new->firstChild->setAttribute($attribute->nodeName, $attribute->nodeValue); + } + } + // The import must be done on the firstChild of $new, since $new is a DOMDocument and not a DOMElement. $import = $this->node->ownerDocument->importNode($new->firstChild, true); $this->node->parentNode->replaceChild($import, $this->node); @@ -335,7 +343,7 @@ class Readability extends Element implements ReadabilityInterface * for parents. * * @param Readability $originalNode - * @param bool $ignoreSelfAndKids + * @param bool $ignoreSelfAndKids * * @return Readability */ @@ -411,7 +419,7 @@ class Readability extends Element implements ReadabilityInterface * Creates a new node based on the text content of the original node. * * @param Readability $originalNode - * @param string $tagName + * @param string $tagName * * @return Readability */ @@ -458,8 +466,8 @@ class Readability extends Element implements ReadabilityInterface * provided one. * * @param Readability $node - * @param string $tagName - * @param int $maxDepth + * @param string $tagName + * @param int $maxDepth * * @return bool */ |