summaryrefslogtreecommitdiff
path: root/src/HTMLParser.php
diff options
context:
space:
mode:
authorAndres Rey <[email protected]>2016-11-22 15:39:21 +0000
committerAndres Rey <[email protected]>2016-11-22 15:39:21 +0000
commitcba167695482cca7492303e3de1f59333c0127b6 (patch)
tree1d8e9900642a16476ff15751ee1ef09f3c20cad5 /src/HTMLParser.php
parent37dba87cce8498abaa4ef4c1892a83585abd2c43 (diff)
Removed old reference to elementsToScore, switched the moment when elements are initialized
Diffstat (limited to 'src/HTMLParser.php')
-rw-r--r--src/HTMLParser.php24
1 files changed, 12 insertions, 12 deletions
diff --git a/src/HTMLParser.php b/src/HTMLParser.php
index 4d0a271..bd7774b 100644
--- a/src/HTMLParser.php
+++ b/src/HTMLParser.php
@@ -29,11 +29,6 @@ class HTMLParser
/**
* @var array
*/
- private $elementsToScore = [];
-
- /**
- * @var array
- */
private $regexps = [
'unlikelyCandidates' => '/banner|combx|comment|community|disqus|extra|foot|header|menu|modal|related|remark|rss|share|shoutbox|sidebar|skyscraper|sponsor|ad-break|agegate|pagination|pager|popup/i',
'okMaybeItsACandidate' => '/and|article|body|column|main|shadow/i',
@@ -136,9 +131,9 @@ class HTMLParser
$root = new Readability($root->firstChild);
- $this->getNodes($root);
+ $elementsToScore = $this->getNodes($root);
- $result = $this->rateNodes($this->elementsToScore);
+ $result = $this->rateNodes($elementsToScore);
// Todo, fix return, check for values, maybe create a function to create the return object
return [
@@ -278,6 +273,8 @@ class HTMLParser
{
$stripUnlikelyCandidates = $this->getConfig()->getOption('stripUnlikelyCandidates');
+ $elementsToScore = [];
+
/*
* First, node prepping. Trash nodes that look cruddy (like ones with the
* class name "comment", etc), and turn divs into P tags where they have been
@@ -307,7 +304,7 @@ class HTMLParser
}
if (in_array(strtolower($node->getTagName()), $this->defaultTagsToScore)) {
- $this->elementsToScore[] = $node;
+ $elementsToScore[] = $node;
}
// Turn all divs that don't have children block level elements into p's
@@ -324,7 +321,7 @@ class HTMLParser
$node = $pNode;
} elseif (!$this->hasSingleChildBlockElement($node)) {
$node->setNodeTag('p');
- $this->elementsToScore[] = $node;
+ $elementsToScore[] = $node;
} else {
// EXPERIMENTAL
foreach ($node->getChildren() as $child) {
@@ -339,6 +336,8 @@ class HTMLParser
$node = $node->getNextNode($node);
}
+
+ return $elementsToScore;
}
/**
@@ -381,7 +380,10 @@ class HTMLParser
// Initialize and score ancestors.
/** @var Readability $ancestor */
foreach ($ancestors as $level => $ancestor) {
- // No need to initialize the ancestor since getNodeAncestors() already initializes them.
+ if (!$ancestor->isInitialized()) {
+ $ancestor->initializeNode();
+ $candidates[] = $ancestor;
+ }
/*
* Node score divider:
@@ -400,8 +402,6 @@ class HTMLParser
$currentScore = $ancestor->getContentScore();
$ancestor->setContentScore($currentScore + ($contentScore / $scoreDivider));
-
- $candidates[] = $ancestor;
}
}