summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/Readability.php8
1 files changed, 5 insertions, 3 deletions
diff --git a/src/Readability.php b/src/Readability.php
index 93fc810..053d37f 100644
--- a/src/Readability.php
+++ b/src/Readability.php
@@ -997,7 +997,9 @@ class Readability
$MINIMUM_TOPCANDIDATES = 3;
if (count($alternativeCandidateAncestors) >= $MINIMUM_TOPCANDIDATES) {
$parentOfTopCandidate = $topCandidate->parentNode;
- while ($parentOfTopCandidate->nodeName !== 'body') {
+
+ // Check if we are actually dealing with a DOMNode and not a DOMDocument node or higher
+ while ($parentOfTopCandidate->nodeName !== 'body' && $parentOfTopCandidate->nodeType === XML_ELEMENT_NODE) {
$listsContainingThisAncestor = 0;
for ($ancestorIndex = 0; $ancestorIndex < count($alternativeCandidateAncestors) && $listsContainingThisAncestor < $MINIMUM_TOPCANDIDATES; $ancestorIndex++) {
$listsContainingThisAncestor += (int)in_array($parentOfTopCandidate, $alternativeCandidateAncestors[$ancestorIndex]);
@@ -1027,8 +1029,7 @@ class Readability
$scoreThreshold = $lastScore / 3;
/* @var DOMElement $parentOfTopCandidate */
- // Check if we are actually dealing with a DOMNode and not a DOMDocument node or higher
- while ($parentOfTopCandidate->nodeName !== 'body' && $parentOfTopCandidate->nodeType === XML_ELEMENT_NODE) {
+ while ($parentOfTopCandidate->nodeName !== 'body') {
$parentScore = $parentOfTopCandidate->contentScore;
if ($parentScore < $scoreThreshold) {
break;
@@ -1175,6 +1176,7 @@ class Readability
$this->_clean($article, 'h1');
$this->_clean($article, 'footer');
$this->_clean($article, 'link');
+ $this->_clean($article, 'aside');
// Clean out elements have "share" in their id/class combinations from final top candidates,
// which means we don't remove the top candidates even they have "share".