summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorAndres Rey <[email protected]>2018-03-10 18:43:31 +0000
committerAndres Rey <[email protected]>2018-03-10 18:43:31 +0000
commitd6615c87857eed1d486c0f90c1a9d909ff262d13 (patch)
tree25e1232183fe22dfc9683a0486db87a58b8e3071 /src
parent746dd0bcf5f3b0e685d842252c620c01faff19b9 (diff)
Failsafe for weird titles
Diffstat (limited to 'src')
-rw-r--r--src/Readability.php6
1 files changed, 5 insertions, 1 deletions
diff --git a/src/Readability.php b/src/Readability.php
index 9a29313..c17911c 100644
--- a/src/Readability.php
+++ b/src/Readability.php
@@ -468,6 +468,10 @@ class Readability
if (count(preg_split('/\s+/', $curTitle)) < 3) {
$curTitle = substr($originalTitle, strpos($originalTitle, ':') + 1);
$this->logger->info(sprintf('[Metadata] Title too short, using the first part of the title instead: \'%s\'', $curTitle));
+ } else if (count(preg_split('/\s+/', substr($curTitle, 0, strpos($curTitle, ':')))) > 5) {
+ // But if we have too many words before the colon there's something weird
+ // with the titles and the H tags so let's just use the original title instead
+ $curTitle = $originalTitle;
}
}
} elseif (mb_strlen($curTitle) > 150 || mb_strlen($curTitle) < 15) {
@@ -1485,7 +1489,7 @@ class Readability
*
* Readability.js has a special filter to avoid cleaning the classes that the algorithm adds. We don't add classes
* here so no need to filter those.
- *
+ *
* @param DOMDocument|DOMNode $node
*
* @return void