summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorAndres Rey <[email protected]>2017-11-02 19:28:26 +0000
committerAndres Rey <[email protected]>2017-11-02 19:29:20 +0000
commit7119097b8956b67732cdc638a02974fb30b9b8fd (patch)
treefcfeb9f7d085c712dddbbf59566055ae1d1be007 /src
parent2fad6c1a7dc4b45453c1a70bb00120d7f59c528b (diff)
Update the unlikelyCandidates regex
Diffstat (limited to 'src')
-rw-r--r--src/HTMLParser.php2
1 files changed, 1 insertions, 1 deletions
diff --git a/src/HTMLParser.php b/src/HTMLParser.php
index 9aa9974..1a5f75e 100644
--- a/src/HTMLParser.php
+++ b/src/HTMLParser.php
@@ -25,7 +25,7 @@ class HTMLParser
* @var array
*/
private $regexps = [
- 'unlikelyCandidates' => '/banner|combx|comment|community|disqus|extra|foot|header|menu|modal|related|remark|rss|share|shoutbox|sidebar|skyscraper|sponsor|ad-break|agegate|pagination|pager|popup/i',
+ 'unlikelyCandidates' => '/banner|breadcrumbs|combx|comment|community|cover-wrap|disqus|extra|foot|header|legends|menu|modal|related|remark|replies|rss|shoutbox|sidebar|skyscraper|social|sponsor|supplemental|ad-break|agegate|pagination|pager|popup|yom-remote/i',
'okMaybeItsACandidate' => '/and|article|body|column|main|shadow/i',
'extraneous' => '/print|archive|comment|discuss|e[\-]?mail|share|reply|all|login|sign|single|utility/i',
'byline' => '/byline|author|dateline|writtenby|p-author/i',