diff options
Diffstat (limited to 'src/Readability.php')
-rw-r--r-- | src/Readability.php | 6 |
1 files changed, 5 insertions, 1 deletions
diff --git a/src/Readability.php b/src/Readability.php index 9a29313..c17911c 100644 --- a/src/Readability.php +++ b/src/Readability.php @@ -468,6 +468,10 @@ class Readability if (count(preg_split('/\s+/', $curTitle)) < 3) { $curTitle = substr($originalTitle, strpos($originalTitle, ':') + 1); $this->logger->info(sprintf('[Metadata] Title too short, using the first part of the title instead: \'%s\'', $curTitle)); + } else if (count(preg_split('/\s+/', substr($curTitle, 0, strpos($curTitle, ':')))) > 5) { + // But if we have too many words before the colon there's something weird + // with the titles and the H tags so let's just use the original title instead + $curTitle = $originalTitle; } } } elseif (mb_strlen($curTitle) > 150 || mb_strlen($curTitle) < 15) { @@ -1485,7 +1489,7 @@ class Readability * * Readability.js has a special filter to avoid cleaning the classes that the algorithm adds. We don't add classes * here so no need to filter those. - * + * * @param DOMDocument|DOMNode $node * * @return void |