From 7119097b8956b67732cdc638a02974fb30b9b8fd Mon Sep 17 00:00:00 2001 From: Andres Rey Date: Thu, 2 Nov 2017 19:28:26 +0000 Subject: Update the unlikelyCandidates regex --- src/HTMLParser.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src') diff --git a/src/HTMLParser.php b/src/HTMLParser.php index 9aa9974..1a5f75e 100644 --- a/src/HTMLParser.php +++ b/src/HTMLParser.php @@ -25,7 +25,7 @@ class HTMLParser * @var array */ private $regexps = [ - 'unlikelyCandidates' => '/banner|combx|comment|community|disqus|extra|foot|header|menu|modal|related|remark|rss|share|shoutbox|sidebar|skyscraper|sponsor|ad-break|agegate|pagination|pager|popup/i', + 'unlikelyCandidates' => '/banner|breadcrumbs|combx|comment|community|cover-wrap|disqus|extra|foot|header|legends|menu|modal|related|remark|replies|rss|shoutbox|sidebar|skyscraper|social|sponsor|supplemental|ad-break|agegate|pagination|pager|popup|yom-remote/i', 'okMaybeItsACandidate' => '/and|article|body|column|main|shadow/i', 'extraneous' => '/print|archive|comment|discuss|e[\-]?mail|share|reply|all|login|sign|single|utility/i', 'byline' => '/byline|author|dateline|writtenby|p-author/i', -- cgit v1.2.3