diff options
Diffstat (limited to 'src/HTMLParser.php')
-rw-r--r-- | src/HTMLParser.php | 7 |
1 files changed, 3 insertions, 4 deletions
diff --git a/src/HTMLParser.php b/src/HTMLParser.php index 6859579..ca994b3 100644 --- a/src/HTMLParser.php +++ b/src/HTMLParser.php @@ -15,8 +15,6 @@ class HTMLParser private $regexps = [ 'unlikelyCandidates' => '/banner|combx|comment|community|disqus|extra|foot|header|menu|modal|related|remark|rss|share|shoutbox|sidebar|skyscraper|sponsor|ad-break|agegate|pagination|pager|popup/i', 'okMaybeItsACandidate' => '/and|article|body|column|main|shadow/i', - 'positive' => '/article|body|content|entry|hentry|h-entry|main|page|pagination|post|text|blog|story/i', - 'negative' => '/hidden|^hid$| hid$| hid |^hid |banner|combx|comment|com-|contact|foot|footer|footnote|masthead|media|meta|modal|outbrain|promo|related|scroll|share|shoutbox|sidebar|skyscraper|sponsor|shopping|tags|tool|widget/i', 'extraneous' => '/print|archive|comment|discuss|e[\-]?mail|share|reply|all|login|sign|single|utility/i', 'byline' => '/byline|author|dateline|writtenby|p-author/i', 'replaceFonts' => '/<(\/?)font[^>]*>/gi', @@ -141,7 +139,7 @@ class HTMLParser } /** - * @param DOMElement $nodes + * @param array $nodes */ private function rateNodes($nodes) { @@ -167,7 +165,8 @@ class HTMLParser $contentScore += min(floor(strlen($node->getValue()) / 100), 3); foreach ($ancestors as $ancestor) { - $tes = $ancestor->node->getTagName(); + $readability = new Readability($ancestor); + $candidates[] = $readability->initializeNode(); } } |