diff options
-rw-r--r-- | CHANGELOG.md | 2 | ||||
-rw-r--r-- | src/HTMLParser.php | 3 |
2 files changed, 4 insertions, 1 deletions
diff --git a/CHANGELOG.md b/CHANGELOG.md index 2d7fdba..c080912 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,6 +3,8 @@ All notable changes to this project will be documented in this file. ## Unreleased +- Trim titles when detecting hierarchical separators to avoid false negatives on strings with spaces. + ## [v0.3.0](https://github.com/andreskrey/readability.php/releases/tag/v0.3.0) - Merged PR #24. Fixes notice when trying to extract `og:image` diff --git a/src/HTMLParser.php b/src/HTMLParser.php index 84d174e..8c864cb 100644 --- a/src/HTMLParser.php +++ b/src/HTMLParser.php @@ -585,7 +585,8 @@ class HTMLParser $match = false; for ($i = 1; $i <= 2; $i++) { foreach ($this->dom->getElementsByTagName('h' . $i) as $hTag) { - if ($hTag->nodeValue === $curTitle) { + // Trim texts to avoid having false negatives when the title is surrounded by spaces or tabs + if (trim($hTag->nodeValue) === trim($curTitle)) { $match = true; } } |