summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorAndres Rey <[email protected]>2016-12-28 16:23:07 -0300
committerAndres Rey <[email protected]>2016-12-28 16:23:07 -0300
commit550fe35fbf3bbefa3257ddb7ec32afaea48b6726 (patch)
tree8a245374b4b33374b9dc0592cbb58da2038eaff9 /src
parent1b0ac19d31473b728edfb2e72d3ec0cf1ebac35f (diff)
Removed the private var title since it wasn't used
Diffstat (limited to 'src')
-rw-r--r--src/HTMLParser.php13
1 files changed, 6 insertions, 7 deletions
diff --git a/src/HTMLParser.php b/src/HTMLParser.php
index a8c28ff..6f7afe7 100644
--- a/src/HTMLParser.php
+++ b/src/HTMLParser.php
@@ -29,11 +29,6 @@ class HTMLParser
/**
* @var array
*/
- private $title = [];
-
- /**
- * @var array
- */
private $regexps = [
'unlikelyCandidates' => '/banner|combx|comment|community|disqus|extra|foot|header|menu|modal|related|remark|rss|share|shoutbox|sidebar|skyscraper|sponsor|ad-break|agegate|pagination|pager|popup/i',
'okMaybeItsACandidate' => '/and|article|body|column|main|shadow/i',
@@ -137,7 +132,7 @@ class HTMLParser
$this->metadata = $this->getMetadata();
- $this->title = $this->getTitle();
+ $this->metadata['title'] = $this->getTitle();
// Checking for minimum HTML to work with.
if (!($root = $this->dom->getElementsByTagName('body')->item(0))) {
@@ -162,7 +157,11 @@ class HTMLParser
// TODO Better way to count resulting text. Textcontent usually has alt titles and that stuff
// that doesn't really count to the quality of the result.
- if ($result && mb_strlen($result->textContent) < 500) {
+ $length = 0;
+ foreach($result->getElementsByTagName('p') as $p){
+ $length += mb_strlen($p->textContent);
+ }
+ if ($result && mb_strlen(preg_replace('/\s/', '', $result->textContent)) < 500) {
$root = $this->backupdom->getElementsByTagName('body')->item(0);
if ($this->getConfig()->getOption('stripUnlikelyCandidates')) {