diff options
Diffstat (limited to 'src/Readability.php')
-rw-r--r-- | src/Readability.php | 90 |
1 files changed, 89 insertions, 1 deletions
diff --git a/src/Readability.php b/src/Readability.php index d4c2181..a3c3cc2 100644 --- a/src/Readability.php +++ b/src/Readability.php @@ -4,7 +4,95 @@ namespace andreskrey\Readability; class Readability implements ReadabilityInterface { - private $score = 0; + protected $score = 0; + + protected $node; + + private $regexps = [ + 'positive' => '/article|body|content|entry|hentry|h-entry|main|page|pagination|post|text|blog|story/i', + 'negative' => '/hidden|^hid$| hid$| hid |^hid |banner|combx|comment|com-|contact|foot|footer|footnote|masthead|media|meta|modal|outbrain|promo|related|scroll|share|shoutbox|sidebar|skyscraper|sponsor|shopping|tags|tool|widget/i', + ]; + + /** + * @param DOMElement $node + */ + public function __construct($node) + { + $this->node = $node; + } + + public function initializeNode() + { + switch ($this->node->getTagName()) { + case 'div': + $this->score += 5; + break; + + case 'pre': + case 'td': + case 'blockquote': + $this->score += 3; + break; + + case 'address': + case 'ol': + case 'ul': + case 'dl': + case 'dd': + case 'dt': + case 'li': + case 'form': + $this->score -= 3; + break; + + case 'h1': + case 'h2': + case 'h3': + case 'h4': + case 'h5': + case 'h6': + case 'th': + $this->score -= 5; + break; + } + + $this->score += $this->getClassWeight(); + + return $this; + } + + public function getClassWeight() + { + // if(!Config::FLAG_WEIGHT_CLASSES) return 0; + + $weight = 0; + + // Look for a special classname + $class = $this->node->getAttribute('class'); + if (trim($class)) { + if (preg_match($this->regexps['negative'], $class)) { + $weight -= 25; + } + + if (preg_match($this->regexps['positive'], $class)) { + $weight += 25; + } + } + + // Look for a special ID + $id = $this->node->getAttribute('class'); + if (trim($id)) { + if (preg_match($this->regexps['negative'], $id)) { + $weight -= 25; + } + + if (preg_match($this->regexps['positive'], $id)) { + $weight += 25; + } + } + + return $weight; + } public function getScore() { |