summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorAndres Rey <[email protected]>2017-02-21 18:40:42 +0000
committerAndres Rey <[email protected]>2017-02-21 18:40:42 +0000
commit0f99c53420f05187dc3af9fe04f1129e14dfe96e (patch)
tree17e9d6ea450c972243374eee52317e4e086e90e0 /src
parent268d1130247042c9f3952a143f43ed74522cd64c (diff)
Fixed test cases and added function to replace font tags with span + param to setNodeTag to keep attributes from original node.
Diffstat (limited to 'src')
-rw-r--r--src/HTMLParser.php11
-rw-r--r--src/Readability.php20
2 files changed, 24 insertions, 7 deletions
diff --git a/src/HTMLParser.php b/src/HTMLParser.php
index 5866bd9..5fb27b8 100644
--- a/src/HTMLParser.php
+++ b/src/HTMLParser.php
@@ -290,6 +290,15 @@ class HTMLParser
}
}
}
+
+ // Replace font tags with span
+ $fonts = $this->dom->getElementsByTagName('font');
+ $length = $fonts->length;
+ for ($i = 0; $i < $length; $i++) {
+ $font = $fonts->item($length - 1 - $i);
+ $span = new Readability($font);
+ $span->setNodeTag('span', true);
+ }
}
public function postProcessContent(DOMDocument $article)
@@ -458,7 +467,7 @@ class HTMLParser
* Check for the rel attribute, then check if the rel attribute is either img_src or image_src, and
* finally check for the existence of the href attribute, which should hold the image url.
*/
- if ($link->hasAttribute('rel') && ($link->getAttribute('rel') === 'img_src' || $link->getAttribute('rel') === 'image_src') && $link->hasAttribute('href')){
+ if ($link->hasAttribute('rel') && ($link->getAttribute('rel') === 'img_src' || $link->getAttribute('rel') === 'image_src') && $link->hasAttribute('href')) {
return $link->getAttribute('href');
}
}
diff --git a/src/Readability.php b/src/Readability.php
index 027858f..44633b2 100644
--- a/src/Readability.php
+++ b/src/Readability.php
@@ -253,7 +253,7 @@ class Readability extends Element implements ReadabilityInterface
{
// Check if the setAttribute method exists, as some elements lack of it (and calling it anyway throws an exception)
if (method_exists($this->node, 'setAttribute')) {
- $this->contentScore = (float) $score;
+ $this->contentScore = (float)$score;
// Set score in an attribute of the tag to prevent losing it while creating new Readability objects.
$this->node->setAttribute('data-readability', $this->contentScore);
@@ -286,8 +286,9 @@ class Readability extends Element implements ReadabilityInterface
* element with the new tag name and importing it to the main DOMDocument.
*
* @param string $value
+ * @param bool $importAttributes
*/
- public function setNodeTag($value)
+ public function setNodeTag($value, $importAttributes = false)
{
$new = new \DOMDocument();
$new->appendChild($new->createElement($value));
@@ -298,6 +299,13 @@ class Readability extends Element implements ReadabilityInterface
$new->firstChild->appendChild($import);
}
+ if ($importAttributes) {
+ // Import attributes from the original node.
+ foreach ($this->node->attributes as $attribute) {
+ $new->firstChild->setAttribute($attribute->nodeName, $attribute->nodeValue);
+ }
+ }
+
// The import must be done on the firstChild of $new, since $new is a DOMDocument and not a DOMElement.
$import = $this->node->ownerDocument->importNode($new->firstChild, true);
$this->node->parentNode->replaceChild($import, $this->node);
@@ -335,7 +343,7 @@ class Readability extends Element implements ReadabilityInterface
* for parents.
*
* @param Readability $originalNode
- * @param bool $ignoreSelfAndKids
+ * @param bool $ignoreSelfAndKids
*
* @return Readability
*/
@@ -411,7 +419,7 @@ class Readability extends Element implements ReadabilityInterface
* Creates a new node based on the text content of the original node.
*
* @param Readability $originalNode
- * @param string $tagName
+ * @param string $tagName
*
* @return Readability
*/
@@ -458,8 +466,8 @@ class Readability extends Element implements ReadabilityInterface
* provided one.
*
* @param Readability $node
- * @param string $tagName
- * @param int $maxDepth
+ * @param string $tagName
+ * @param int $maxDepth
*
* @return bool
*/