summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorAndres Rey <[email protected]>2017-11-12 18:54:40 +0000
committerAndres Rey <[email protected]>2017-11-12 18:54:40 +0000
commit63bd099cc4a7e8e775698300d866fa218664234a (patch)
treef4719e960572b212bafe888d10ba09974a4548d7 /src
parent8e43d4ecdf23a952c64a377a3bbcd49ae262d4e8 (diff)
Add wordThreshold option
Diffstat (limited to 'src')
-rw-r--r--src/HTMLParser.php5
1 files changed, 3 insertions, 2 deletions
diff --git a/src/HTMLParser.php b/src/HTMLParser.php
index 5d86065..987d4b0 100644
--- a/src/HTMLParser.php
+++ b/src/HTMLParser.php
@@ -90,7 +90,8 @@ class HTMLParser
public function __construct(array $options = [])
{
$defaults = [
- 'maxTopCandidates' => 5, // Max amount of top level candidates
+ 'maxTopCandidates' => 5,
+ 'wordThreshold' => 500,
'articleByLine' => false,
'stripUnlikelyCandidates' => true,
'cleanConditionally' => true,
@@ -155,7 +156,7 @@ class HTMLParser
foreach ($result->getElementsByTagName('p') as $p) {
$length += mb_strlen($p->textContent);
}
- if ($result && mb_strlen(preg_replace('/\s/', '', $result->textContent)) < 500) {
+ if ($result && mb_strlen(preg_replace('/\s/', '', $result->textContent)) < $this->getConfig()->getOption('wordThreshold')) {
$this->dom = $this->loadHTML($html);
$root = $this->dom->getElementsByTagName('body')->item(0);