summaryrefslogtreecommitdiff
path: root/src/Readability.php
diff options
context:
space:
mode:
authorAndres Rey <[email protected]>2016-11-18 13:52:19 +0000
committerAndres Rey <[email protected]>2016-11-18 13:52:19 +0000
commita01a70b4368d0c01b15f187d0c398486733546f1 (patch)
treef7067f7e9c27e52ba26f13dd760360adb784717b /src/Readability.php
parentc9c13321f095dd442c4b4a7db140775bc025a14b (diff)
Added initialized property to avoid reinitializing nodes.
Diffstat (limited to 'src/Readability.php')
-rw-r--r--src/Readability.php87
1 files changed, 53 insertions, 34 deletions
diff --git a/src/Readability.php b/src/Readability.php
index addbd0d..543c088 100644
--- a/src/Readability.php
+++ b/src/Readability.php
@@ -23,6 +23,11 @@ class Readability extends Element implements ReadabilityInterface
protected $contentScore = 0;
/**
+ * @var int
+ */
+ protected $initialized = false;
+
+ /**
* @var array
*/
private $regexps = [
@@ -88,7 +93,7 @@ class Readability extends Element implements ReadabilityInterface
$node = $this->getParent();
while ($node) {
- $ancestors[] = $node;
+ $ancestors[] = $node->initializeNode();
$level++;
if ($level >= $maxLevel) {
break;
@@ -138,43 +143,47 @@ class Readability extends Element implements ReadabilityInterface
*/
public function initializeNode()
{
- $contentScore = 0;
-
- switch ($this->getTagName()) {
- case 'div':
- $contentScore += 5;
- break;
-
- case 'pre':
- case 'td':
- case 'blockquote':
- $contentScore += 3;
- break;
+ if (!$this->initialized) {
+ $contentScore = 0;
+
+ switch ($this->getTagName()) {
+ case 'div':
+ $contentScore += 5;
+ break;
+
+ case 'pre':
+ case 'td':
+ case 'blockquote':
+ $contentScore += 3;
+ break;
+
+ case 'address':
+ case 'ol':
+ case 'ul':
+ case 'dl':
+ case 'dd':
+ case 'dt':
+ case 'li':
+ case 'form':
+ $contentScore -= 3;
+ break;
+
+ case 'h1':
+ case 'h2':
+ case 'h3':
+ case 'h4':
+ case 'h5':
+ case 'h6':
+ case 'th':
+ $contentScore -= 5;
+ break;
+ }
- case 'address':
- case 'ol':
- case 'ul':
- case 'dl':
- case 'dd':
- case 'dt':
- case 'li':
- case 'form':
- $contentScore -= 3;
- break;
+ $this->setContentScore($contentScore + $this->getClassWeight());
- case 'h1':
- case 'h2':
- case 'h3':
- case 'h4':
- case 'h5':
- case 'h6':
- case 'th':
- $contentScore -= 5;
- break;
+ $this->initialized = true;
}
- $this->setContentScore($contentScore + $this->getClassWeight());
-
return $this;
}
@@ -412,4 +421,14 @@ class Readability extends Element implements ReadabilityInterface
return new static($return);
}
+
+ /**
+ * Checks if the object is initialized.
+ *
+ * @return bool
+ */
+ public function isInitialized()
+ {
+ return $this->initialized;
+ }
}