diff options
author | Andres Rey <[email protected]> | 2017-11-02 19:28:26 +0000 |
---|---|---|
committer | Andres Rey <[email protected]> | 2017-11-02 19:29:20 +0000 |
commit | 7119097b8956b67732cdc638a02974fb30b9b8fd (patch) | |
tree | fcfeb9f7d085c712dddbbf59566055ae1d1be007 /src | |
parent | 2fad6c1a7dc4b45453c1a70bb00120d7f59c528b (diff) |
Update the unlikelyCandidates regex
Diffstat (limited to 'src')
-rw-r--r-- | src/HTMLParser.php | 2 |
1 files changed, 1 insertions, 1 deletions
diff --git a/src/HTMLParser.php b/src/HTMLParser.php index 9aa9974..1a5f75e 100644 --- a/src/HTMLParser.php +++ b/src/HTMLParser.php @@ -25,7 +25,7 @@ class HTMLParser * @var array */ private $regexps = [ - 'unlikelyCandidates' => '/banner|combx|comment|community|disqus|extra|foot|header|menu|modal|related|remark|rss|share|shoutbox|sidebar|skyscraper|sponsor|ad-break|agegate|pagination|pager|popup/i', + 'unlikelyCandidates' => '/banner|breadcrumbs|combx|comment|community|cover-wrap|disqus|extra|foot|header|legends|menu|modal|related|remark|replies|rss|shoutbox|sidebar|skyscraper|social|sponsor|supplemental|ad-break|agegate|pagination|pager|popup|yom-remote/i', 'okMaybeItsACandidate' => '/and|article|body|column|main|shadow/i', 'extraneous' => '/print|archive|comment|discuss|e[\-]?mail|share|reply|all|login|sign|single|utility/i', 'byline' => '/byline|author|dateline|writtenby|p-author/i', |