summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorAndres Rey <[email protected]>2016-12-26 16:46:47 -0300
committerAndres Rey <[email protected]>2016-12-26 16:46:47 -0300
commit8fd4d10b08265088801670a228b011adad57cd97 (patch)
treeff2b5796fee4f31b29eb08c8a8bd9d671e167622 /src
parent2b70f70e380c6cda03f9b8d8fd9e418b496d8ca0 (diff)
Updated the getMetadata function
Diffstat (limited to 'src')
-rw-r--r--src/HTMLParser.php60
1 files changed, 44 insertions, 16 deletions
diff --git a/src/HTMLParser.php b/src/HTMLParser.php
index 7ee0594..d5308f9 100644
--- a/src/HTMLParser.php
+++ b/src/HTMLParser.php
@@ -377,30 +377,57 @@ class HTMLParser
*/
private function getMetadata()
{
- $metadata = [];
+ $metadata = $values = [];
+ // Match "description", or Twitter's "twitter:description" (Cards)
+ // in name attribute.
+ $namePattern = '/^\s*((twitter)\s*:\s*)?(description|title)\s*$/i';
+
+ // Match Facebook's Open Graph title & description properties.
+ $propertyPattern = '/^\s*og\s*:\s*(description|title)\s*$/i';
+
foreach ($this->dom->getElementsByTagName('meta') as $meta) {
/* @var Readability $meta */
- $name = $meta->getAttribute('name');
- $property = $meta->getAttribute('property');
-
- // Select either name or property
- $item = ($name ? $name : $property);
+ $elementName = $meta->getAttribute('name');
+ $elementProperty = $meta->getAttribute('property');
- if ($item == 'og:title' || $item == 'twitter:title') {
- $metadata['title'] = $meta->getAttribute('content');
+ if (in_array('author', [$elementName, $elementProperty])) {
+ $metadata['byline'] = $meta->getAttribute('content');
+ continue;
}
- if ($item == 'og:description' || $item == 'twitter:description') {
- $metadata['excerpt'] = $meta->getAttribute('content');
+ $name = null;
+ if (preg_match($namePattern, $elementName)) {
+ $name = $elementName;
+ } elseif (preg_match($propertyPattern, $elementProperty)) {
+ $name = $elementProperty;
}
- if ($item == 'author') {
- $metadata['byline'] = $meta->getAttribute('content');
+ if ($name) {
+ $content = $meta->getAttribute('content');
+ if ($content) {
+ // Convert to lowercase and remove any whitespace
+ // so we can match below.
+ $name = preg_replace('/\s/', '', strtolower($name));
+ $values[$name] = trim($content);
+ }
}
+ }
+ if (array_key_exists('description', $values)) {
+ $metadata['excerpt'] = $values['description'];
+ } elseif (array_key_exists('og:description', $values)) {
+ // Use facebook open graph description.
+ $metadata['excerpt'] = $values['og:description'];
+ } elseif (array_key_exists('twitter:description', $values)) {
+ // Use twitter cards description.
+ $metadata['excerpt'] = $values['twitter:description'];
+ }
- if ($item == 'og:image' || $item == 'twitter:image') {
- $metadata['image'] = $meta->getAttribute('content');
- }
+ if (array_key_exists('og:title', $values)) {
+ // Use facebook open graph title.
+ $metadata['title'] = $values['og:title'];
+ } elseif (array_key_exists('twitter:title', $values)) {
+ // Use twitter cards title.
+ $metadata['title'] = $values['twitter:title'];
}
return $metadata;
@@ -458,6 +485,7 @@ class HTMLParser
* Gets nodes from the root element.
*
* @param $node Readability
+ * @return array
*/
private function getNodes(Readability $node)
{
@@ -1030,7 +1058,7 @@ class HTMLParser
* Checks if the node is a byline.
*
* @param Readability $node
- * @param string $matchString
+ * @param string $matchString
*
* @return bool
*/