diff options
author | Andres Rey <[email protected]> | 2017-11-12 18:54:40 +0000 |
---|---|---|
committer | Andres Rey <[email protected]> | 2017-11-12 18:54:40 +0000 |
commit | 63bd099cc4a7e8e775698300d866fa218664234a (patch) | |
tree | f4719e960572b212bafe888d10ba09974a4548d7 /src/HTMLParser.php | |
parent | 8e43d4ecdf23a952c64a377a3bbcd49ae262d4e8 (diff) |
Add wordThreshold option
Diffstat (limited to 'src/HTMLParser.php')
-rw-r--r-- | src/HTMLParser.php | 5 |
1 files changed, 3 insertions, 2 deletions
diff --git a/src/HTMLParser.php b/src/HTMLParser.php index 5d86065..987d4b0 100644 --- a/src/HTMLParser.php +++ b/src/HTMLParser.php @@ -90,7 +90,8 @@ class HTMLParser public function __construct(array $options = []) { $defaults = [ - 'maxTopCandidates' => 5, // Max amount of top level candidates + 'maxTopCandidates' => 5, + 'wordThreshold' => 500, 'articleByLine' => false, 'stripUnlikelyCandidates' => true, 'cleanConditionally' => true, @@ -155,7 +156,7 @@ class HTMLParser foreach ($result->getElementsByTagName('p') as $p) { $length += mb_strlen($p->textContent); } - if ($result && mb_strlen(preg_replace('/\s/', '', $result->textContent)) < 500) { + if ($result && mb_strlen(preg_replace('/\s/', '', $result->textContent)) < $this->getConfig()->getOption('wordThreshold')) { $this->dom = $this->loadHTML($html); $root = $this->dom->getElementsByTagName('body')->item(0); |