summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/HTMLParser.php18
-rw-r--r--src/Readability.php30
2 files changed, 43 insertions, 5 deletions
diff --git a/src/HTMLParser.php b/src/HTMLParser.php
index 21bb88f..5a684a5 100644
--- a/src/HTMLParser.php
+++ b/src/HTMLParser.php
@@ -438,7 +438,8 @@ class HTMLParser
$append = false;
// TODO Check if this comparison working as expected
- if ($sibling === $topCandidate) {
+ // On the original js project it was a simple $sibling == $topCandidate comparison.
+ if ($this->compareNodes($sibling, $topCandidate)) {
$append = true;
} else {
$contentBonus = 0;
@@ -473,7 +474,7 @@ class HTMLParser
// $sibling->setNodeName('div');
}
- $import = $articleContent->importNode($sibling->getDOMNode());
+ $import = $articleContent->importNode($sibling->getDOMNode(), true);
$articleContent->appendChild($import);
}
}
@@ -504,4 +505,17 @@ class HTMLParser
}
return false;
}
+
+ private function compareNodes($node1, $node2)
+ {
+ if ($node1->getTagName() !== $node2->getTagName()) {
+ return false;
+ }
+
+ if ($node1->getTextContent() !== $node2->getTextContent()) {
+ return false;
+ }
+
+ return true;
+ }
}
diff --git a/src/Readability.php b/src/Readability.php
index a9c1592..39e1a28 100644
--- a/src/Readability.php
+++ b/src/Readability.php
@@ -38,6 +38,22 @@ class Readability extends Element implements ReadabilityInterface
public function __construct(\DOMNode $node)
{
parent::__construct($node);
+
+ if (get_class($node) !== 'DOMText') {
+ /*
+ * Restore the score if the object has been already scored.
+ *
+ * And if must be added before calling the getAttribute function, because if we reacht eh DOMDocument
+ * by geting the node parents we'll get a undefined function fatal error
+ */
+ $score = 0;
+
+ if (get_class($node) !== 'DOMDocument') {
+ $score = $node->getAttribute('readability');
+ }
+
+ $this->setContentScore(($score) ? $score : 0);
+ }
}
/**
@@ -240,10 +256,18 @@ class Readability extends Element implements ReadabilityInterface
*/
public function setContentScore($score)
{
- // To prevent the -0 value
- $this->contentScore = ($score === (double)-0) ? 0 : $score;
+ if (get_class($this->node) !== 'DOMDocument') {
- return $this->contentScore;
+ // To prevent the -0 value
+ $this->contentScore = ($score === (double)-0) ? 0 : $score;
+
+ // Set score in an attribute of the tag to prevent losing it while creating new Readability objects.
+ $this->node->setAttribute('readability', $this->contentScore);
+
+ return $this->contentScore;
+ }
+
+ return 0;
}
/**