diff options
author | Andres Rey <[email protected]> | 2016-12-26 20:13:07 -0300 |
---|---|---|
committer | GitHub <[email protected]> | 2016-12-26 20:13:07 -0300 |
commit | ed77068c961b97911f3470ddec5470d44f57f612 (patch) | |
tree | b62c455c92c9a53a51392d3f06180e9743022146 | |
parent | 1b4a62fb1cbc809f64fec691ef827d30e5ef502f (diff) | |
parent | 1b0ac19d31473b728edfb2e72d3ec0cf1ebac35f (diff) |
Merge pull request #15 from andreskrey/development
Updating for release
-rw-r--r-- | src/HTMLParser.php | 64 |
1 files changed, 48 insertions, 16 deletions
diff --git a/src/HTMLParser.php b/src/HTMLParser.php index 7ee0594..a8c28ff 100644 --- a/src/HTMLParser.php +++ b/src/HTMLParser.php @@ -377,30 +377,61 @@ class HTMLParser */ private function getMetadata() { - $metadata = []; + $metadata = $values = []; + // Match "description", or Twitter's "twitter:description" (Cards) + // in name attribute. + $namePattern = '/^\s*((twitter)\s*:\s*)?(description|title|image)\s*$/i'; + + // Match Facebook's Open Graph title & description properties. + $propertyPattern = '/^\s*og\s*:\s*(description|title|image)\s*$/i'; + foreach ($this->dom->getElementsByTagName('meta') as $meta) { /* @var Readability $meta */ - $name = $meta->getAttribute('name'); - $property = $meta->getAttribute('property'); - - // Select either name or property - $item = ($name ? $name : $property); + $elementName = $meta->getAttribute('name'); + $elementProperty = $meta->getAttribute('property'); - if ($item == 'og:title' || $item == 'twitter:title') { - $metadata['title'] = $meta->getAttribute('content'); + if (in_array('author', [$elementName, $elementProperty])) { + $metadata['byline'] = $meta->getAttribute('content'); + continue; } - if ($item == 'og:description' || $item == 'twitter:description') { - $metadata['excerpt'] = $meta->getAttribute('content'); + $name = null; + if (preg_match($namePattern, $elementName)) { + $name = $elementName; + } elseif (preg_match($propertyPattern, $elementProperty)) { + $name = $elementProperty; } - if ($item == 'author') { - $metadata['byline'] = $meta->getAttribute('content'); + if ($name) { + $content = $meta->getAttribute('content'); + if ($content) { + // Convert to lowercase and remove any whitespace + // so we can match below. + $name = preg_replace('/\s/', '', strtolower($name)); + $values[$name] = trim($content); + } } + } + if (array_key_exists('description', $values)) { + $metadata['excerpt'] = $values['description']; + } elseif (array_key_exists('og:description', $values)) { + // Use facebook open graph description. + $metadata['excerpt'] = $values['og:description']; + } elseif (array_key_exists('twitter:description', $values)) { + // Use twitter cards description. + $metadata['excerpt'] = $values['twitter:description']; + } - if ($item == 'og:image' || $item == 'twitter:image') { - $metadata['image'] = $meta->getAttribute('content'); - } + if (array_key_exists('og:title', $values)) { + // Use facebook open graph title. + $metadata['title'] = $values['og:title']; + } elseif (array_key_exists('twitter:title', $values)) { + // Use twitter cards title. + $metadata['title'] = $values['twitter:title']; + } + + if (array_key_exists('og:image', $values) || array_key_exists('twitter:image', $values)) { + $metadata['image'] = ($values['og:image']) ? $values['og:image'] : $values['twitter:image']; } return $metadata; @@ -458,6 +489,7 @@ class HTMLParser * Gets nodes from the root element. * * @param $node Readability + * @return array */ private function getNodes(Readability $node) { @@ -1030,7 +1062,7 @@ class HTMLParser * Checks if the node is a byline. * * @param Readability $node - * @param string $matchString + * @param string $matchString * * @return bool */ |