From 01f5f7c94e4c99643ed41ba26db6efcc018d4b07 Mon Sep 17 00:00:00 2001 From: Maria Luiza Soares Date: Fri, 21 Dec 2018 14:32:38 +0100 Subject: Add getSiteName feature based on meta tag og:site_name --- src/Readability.php | 34 +++++++++++++++++++++++++++++++--- 1 file changed, 31 insertions(+), 3 deletions(-) (limited to 'src') diff --git a/src/Readability.php b/src/Readability.php index 8c55e69..313b0ba 100644 --- a/src/Readability.php +++ b/src/Readability.php @@ -56,6 +56,13 @@ class Readability */ protected $author = null; + /** + * Website name. + * + * @var string|null + */ + protected $siteName = null; + /** * Direction of the text. * @@ -287,10 +294,10 @@ class Readability $values = []; // property is a space-separated list of values - $propertyPattern = '/\s*(dc|dcterm|og|twitter)\s*:\s*(author|creator|description|title|image)(?!:)\s*/i'; + $propertyPattern = '/\s*(dc|dcterm|og|twitter)\s*:\s*(author|creator|description|title|image|site_name)(?!:)\s*/i'; // name is a single value - $namePattern = '/^\s*(?:(dc|dcterm|og|twitter|weibo:(article|webpage))\s*[\.:]\s*)?(author|creator|description|title|image)(?!:)\s*$/i'; + $namePattern = '/^\s*(?:(dc|dcterm|og|twitter|weibo:(article|webpage))\s*[\.:]\s*)?(author|creator|description|title|image|site_name)(?!:)\s*$/i'; // Find description tags. foreach ($this->dom->getElementsByTagName('meta') as $meta) { @@ -332,7 +339,6 @@ class Readability * This could be easily replaced with an ugly set of isset($values['key']) or a bunch of ??s. * Will probably replace it with ??s after dropping support of PHP5.6 */ - $key = current(array_intersect([ 'dc:title', 'dcterm:title', @@ -378,6 +384,12 @@ class Readability ], array_keys($values))); $this->setImage(isset($values[$key]) ? $values[$key] : null); + + $key = current(array_intersect([ + 'og:site_name' + ], array_keys($values))); + + $this->setSiteName(isset($values[$key]) ? $values[$key] : null); } /** @@ -1756,6 +1768,22 @@ class Readability $this->author = $author; } + /** + * @return string|null + */ + public function getSiteName() + { + return $this->siteName; + } + + /** + * @param string $siteName + */ + protected function setSiteName($siteName) + { + $this->siteName = $siteName; + } + /** * @return null|string */ -- cgit v1.2.3