diff options
author | Pedro Amorim <[email protected]> | 2018-04-09 10:49:30 +0200 |
---|---|---|
committer | Pedro Amorim <[email protected]> | 2018-04-09 10:59:13 +0200 |
commit | e1b9ea114e17aa08805eed8f013bb74b337936a2 (patch) | |
tree | a55b6065f17e12a4f498066e5bc61ec1f65cf408 /src | |
parent | 992a11260de32a036c932a8ba61bcc8a46c34dd1 (diff) |
Fix notice non-object on $parentOfTopCandidate for tumblr.com
PHP notice on DOMElement $parentOfTopCandidate.
Trying to get property of non-object in serc/Readability.php line 1000
Trying to get property of non-object in serc/Readability.php line 1009
Reproduced with this url:
https://clipartx.tumblr.com/post/172752750628/orange-swirl-burnt-orange-orange
Config:
$config = new Configuration;
$config->setWordThreshold(5)
->setSummonCthulhu(true)
->setFixRelativeURLs(true)
->setOriginalURL($url);
Diffstat (limited to 'src')
-rw-r--r-- | src/Readability.php | 7 |
1 files changed, 5 insertions, 2 deletions
diff --git a/src/Readability.php b/src/Readability.php index 93fc810..2623bb2 100644 --- a/src/Readability.php +++ b/src/Readability.php @@ -997,7 +997,7 @@ class Readability $MINIMUM_TOPCANDIDATES = 3; if (count($alternativeCandidateAncestors) >= $MINIMUM_TOPCANDIDATES) { $parentOfTopCandidate = $topCandidate->parentNode; - while ($parentOfTopCandidate->nodeName !== 'body') { + while ($parentOfTopCandidate->nodeName !== 'body' && $parentOfTopCandidate->nodeType === XML_ELEMENT_NODE) { $listsContainingThisAncestor = 0; for ($ancestorIndex = 0; $ancestorIndex < count($alternativeCandidateAncestors) && $listsContainingThisAncestor < $MINIMUM_TOPCANDIDATES; $ancestorIndex++) { $listsContainingThisAncestor += (int)in_array($parentOfTopCandidate, $alternativeCandidateAncestors[$ancestorIndex]); @@ -1047,7 +1047,10 @@ class Readability // If the top candidate is the only child, use parent instead. This will help sibling // joining logic when adjacent content is actually located in parent's sibling node. $parentOfTopCandidate = $topCandidate->parentNode; - while ($parentOfTopCandidate->nodeName !== 'body' && count($parentOfTopCandidate->getChildren(true)) === 1) { + while ($parentOfTopCandidate->nodeName !== 'body' + && $parentOfTopCandidate->nodeType === XML_ELEMENT_NODE + && count($parentOfTopCandidate->getChildren(true)) === 1 + ) { $topCandidate = $parentOfTopCandidate; $parentOfTopCandidate = $topCandidate->parentNode; } |