From 422ae0004eba3431fb9d7653ff913443ab048d16 Mon Sep 17 00:00:00 2001 From: Andres Rey Date: Sun, 12 Nov 2017 19:21:36 +0000 Subject: Refactor title matching in H2s --- src/HTMLParser.php | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/src/HTMLParser.php b/src/HTMLParser.php index 987d4b0..8931678 100644 --- a/src/HTMLParser.php +++ b/src/HTMLParser.php @@ -1045,13 +1045,16 @@ class HTMLParser $h2 = $article->getElementsByTagName('h2'); if ($h2->length === 1) { $lengthSimilarRate = (mb_strlen($h2->item(0)->textContent) - mb_strlen($this->metadata['title'])) / mb_strlen($this->metadata['title']); - if (abs($lengthSimilarRate) < 0.5 && - ($lengthSimilarRate > 0 ? - strpos($h2->item(0)->textContent, $this->metadata['title']) !== false : - strpos($this->metadata['title'], $h2->item(0)->textContent) !== false - ) - ) { - $this->_clean($article, 'h2'); + + if (abs($lengthSimilarRate) < 0.5) { + if ($lengthSimilarRate > 0) { + $titlesMatch = strpos($h2->item(0)->textContent, $this->metadata['title']) !== false; + } else { + $titlesMatch = strpos($this->metadata['title'], $h2->item(0)->textContent) !== false; + } + if ($titlesMatch) { + $this->_clean($article, 'h2'); + } } } -- cgit v1.2.3