summaryrefslogtreecommitdiff
path: root/src/Readability.php
diff options
context:
space:
mode:
Diffstat (limited to 'src/Readability.php')
-rw-r--r--src/Readability.php90
1 files changed, 89 insertions, 1 deletions
diff --git a/src/Readability.php b/src/Readability.php
index d4c2181..a3c3cc2 100644
--- a/src/Readability.php
+++ b/src/Readability.php
@@ -4,7 +4,95 @@ namespace andreskrey\Readability;
class Readability implements ReadabilityInterface
{
- private $score = 0;
+ protected $score = 0;
+
+ protected $node;
+
+ private $regexps = [
+ 'positive' => '/article|body|content|entry|hentry|h-entry|main|page|pagination|post|text|blog|story/i',
+ 'negative' => '/hidden|^hid$| hid$| hid |^hid |banner|combx|comment|com-|contact|foot|footer|footnote|masthead|media|meta|modal|outbrain|promo|related|scroll|share|shoutbox|sidebar|skyscraper|sponsor|shopping|tags|tool|widget/i',
+ ];
+
+ /**
+ * @param DOMElement $node
+ */
+ public function __construct($node)
+ {
+ $this->node = $node;
+ }
+
+ public function initializeNode()
+ {
+ switch ($this->node->getTagName()) {
+ case 'div':
+ $this->score += 5;
+ break;
+
+ case 'pre':
+ case 'td':
+ case 'blockquote':
+ $this->score += 3;
+ break;
+
+ case 'address':
+ case 'ol':
+ case 'ul':
+ case 'dl':
+ case 'dd':
+ case 'dt':
+ case 'li':
+ case 'form':
+ $this->score -= 3;
+ break;
+
+ case 'h1':
+ case 'h2':
+ case 'h3':
+ case 'h4':
+ case 'h5':
+ case 'h6':
+ case 'th':
+ $this->score -= 5;
+ break;
+ }
+
+ $this->score += $this->getClassWeight();
+
+ return $this;
+ }
+
+ public function getClassWeight()
+ {
+ // if(!Config::FLAG_WEIGHT_CLASSES) return 0;
+
+ $weight = 0;
+
+ // Look for a special classname
+ $class = $this->node->getAttribute('class');
+ if (trim($class)) {
+ if (preg_match($this->regexps['negative'], $class)) {
+ $weight -= 25;
+ }
+
+ if (preg_match($this->regexps['positive'], $class)) {
+ $weight += 25;
+ }
+ }
+
+ // Look for a special ID
+ $id = $this->node->getAttribute('class');
+ if (trim($id)) {
+ if (preg_match($this->regexps['negative'], $id)) {
+ $weight -= 25;
+ }
+
+ if (preg_match($this->regexps['positive'], $id)) {
+ $weight += 25;
+ }
+ }
+
+ return $weight;
+ }
public function getScore()
{