diff options
author | Andres Rey <[email protected]> | 2017-11-12 19:21:36 +0000 |
---|---|---|
committer | Andres Rey <[email protected]> | 2017-11-12 19:21:36 +0000 |
commit | 422ae0004eba3431fb9d7653ff913443ab048d16 (patch) | |
tree | 64e866f56b2fa12beaa934c3bcd2da8e246ac4bb | |
parent | adf7970f5daf324e51176fdd9600494e598627ea (diff) |
Refactor title matching in H2s
-rw-r--r-- | src/HTMLParser.php | 17 |
1 files changed, 10 insertions, 7 deletions
diff --git a/src/HTMLParser.php b/src/HTMLParser.php index 987d4b0..8931678 100644 --- a/src/HTMLParser.php +++ b/src/HTMLParser.php @@ -1045,13 +1045,16 @@ class HTMLParser $h2 = $article->getElementsByTagName('h2'); if ($h2->length === 1) { $lengthSimilarRate = (mb_strlen($h2->item(0)->textContent) - mb_strlen($this->metadata['title'])) / mb_strlen($this->metadata['title']); - if (abs($lengthSimilarRate) < 0.5 && - ($lengthSimilarRate > 0 ? - strpos($h2->item(0)->textContent, $this->metadata['title']) !== false : - strpos($this->metadata['title'], $h2->item(0)->textContent) !== false - ) - ) { - $this->_clean($article, 'h2'); + + if (abs($lengthSimilarRate) < 0.5) { + if ($lengthSimilarRate > 0) { + $titlesMatch = strpos($h2->item(0)->textContent, $this->metadata['title']) !== false; + } else { + $titlesMatch = strpos($this->metadata['title'], $h2->item(0)->textContent) !== false; + } + if ($titlesMatch) { + $this->_clean($article, 'h2'); + } } } |