From 550fe35fbf3bbefa3257ddb7ec32afaea48b6726 Mon Sep 17 00:00:00 2001 From: Andres Rey Date: Wed, 28 Dec 2016 16:23:07 -0300 Subject: Removed the private var title since it wasn't used --- src/HTMLParser.php | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) (limited to 'src') diff --git a/src/HTMLParser.php b/src/HTMLParser.php index a8c28ff..6f7afe7 100644 --- a/src/HTMLParser.php +++ b/src/HTMLParser.php @@ -26,11 +26,6 @@ class HTMLParser */ private $metadata = []; - /** - * @var array - */ - private $title = []; - /** * @var array */ @@ -137,7 +132,7 @@ class HTMLParser $this->metadata = $this->getMetadata(); - $this->title = $this->getTitle(); + $this->metadata['title'] = $this->getTitle(); // Checking for minimum HTML to work with. if (!($root = $this->dom->getElementsByTagName('body')->item(0))) { @@ -162,7 +157,11 @@ class HTMLParser // TODO Better way to count resulting text. Textcontent usually has alt titles and that stuff // that doesn't really count to the quality of the result. - if ($result && mb_strlen($result->textContent) < 500) { + $length = 0; + foreach($result->getElementsByTagName('p') as $p){ + $length += mb_strlen($p->textContent); + } + if ($result && mb_strlen(preg_replace('/\s/', '', $result->textContent)) < 500) { $root = $this->backupdom->getElementsByTagName('body')->item(0); if ($this->getConfig()->getOption('stripUnlikelyCandidates')) { -- cgit v1.2.3