From e1b9ea114e17aa08805eed8f013bb74b337936a2 Mon Sep 17 00:00:00 2001 From: Pedro Amorim Date: Mon, 9 Apr 2018 10:49:30 +0200 Subject: Fix notice non-object on $parentOfTopCandidate for tumblr.com PHP notice on DOMElement $parentOfTopCandidate. Trying to get property of non-object in serc/Readability.php line 1000 Trying to get property of non-object in serc/Readability.php line 1009 Reproduced with this url: https://clipartx.tumblr.com/post/172752750628/orange-swirl-burnt-orange-orange Config: $config = new Configuration; $config->setWordThreshold(5) ->setSummonCthulhu(true) ->setFixRelativeURLs(true) ->setOriginalURL($url); --- src/Readability.php | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'src') diff --git a/src/Readability.php b/src/Readability.php index 93fc810..2623bb2 100644 --- a/src/Readability.php +++ b/src/Readability.php @@ -997,7 +997,7 @@ class Readability $MINIMUM_TOPCANDIDATES = 3; if (count($alternativeCandidateAncestors) >= $MINIMUM_TOPCANDIDATES) { $parentOfTopCandidate = $topCandidate->parentNode; - while ($parentOfTopCandidate->nodeName !== 'body') { + while ($parentOfTopCandidate->nodeName !== 'body' && $parentOfTopCandidate->nodeType === XML_ELEMENT_NODE) { $listsContainingThisAncestor = 0; for ($ancestorIndex = 0; $ancestorIndex < count($alternativeCandidateAncestors) && $listsContainingThisAncestor < $MINIMUM_TOPCANDIDATES; $ancestorIndex++) { $listsContainingThisAncestor += (int)in_array($parentOfTopCandidate, $alternativeCandidateAncestors[$ancestorIndex]); @@ -1047,7 +1047,10 @@ class Readability // If the top candidate is the only child, use parent instead. This will help sibling // joining logic when adjacent content is actually located in parent's sibling node. $parentOfTopCandidate = $topCandidate->parentNode; - while ($parentOfTopCandidate->nodeName !== 'body' && count($parentOfTopCandidate->getChildren(true)) === 1) { + while ($parentOfTopCandidate->nodeName !== 'body' + && $parentOfTopCandidate->nodeType === XML_ELEMENT_NODE + && count($parentOfTopCandidate->getChildren(true)) === 1 + ) { $topCandidate = $parentOfTopCandidate; $parentOfTopCandidate = $topCandidate->parentNode; } -- cgit v1.2.3