From ed2142982b69aaaac2dfe5304327c2aef5c7b57b Mon Sep 17 00:00:00 2001 From: Andrew Dolgov Date: Thu, 11 Nov 2010 16:10:39 +0300 Subject: get_favicon_url: rewrite using DOMDocument --- functions.php | 42 ++++++++++++++++++------------------------ 1 file changed, 18 insertions(+), 24 deletions(-) diff --git a/functions.php b/functions.php index f94da9525..8f60254b3 100644 --- a/functions.php +++ b/functions.php @@ -397,37 +397,31 @@ */ function get_favicon_url($url) { + $favicon_url = false; + if ($html = @fetch_file_contents($url)) { - if ( preg_match('/]+rel="(?:shortcut )?icon"[^>]+?href="([^"]+?)"/si', $html, $matches)) { - // Attempt to grab a favicon link from their webpage url - $linkUrl = html_entity_decode($matches[1]); + libxml_use_internal_errors(true); - if (substr($linkUrl, 0, 1) == '/') { - $urlParts = parse_url($url); - $faviconURL = $urlParts['scheme'].'://'.$urlParts['host'].$linkUrl; - } else if (substr($linkUrl, 0, 7) == 'http://') { - $faviconURL = $linkUrl; - } else { - $pos = strrpos($url, "/"); - // no "/" in url or "/" is part of "://" - if ($pos === false || $pos == (strpos($url, "://")+2)) { - $faviconURL = $url.'/'.$linkUrl; - } else { - $faviconURL = substr($url, 0, $pos+1).$linkUrl; - } - } + $doc = new DOMDocument(); + $doc->loadHTML($html); + $xpath = new DOMXPath($doc); + $entries = $xpath->query('/html/head/link[@rel="shortcut icon"]'); - } else { - // If unsuccessful, attempt to "guess" the favicon location - $urlParts = parse_url($url); - $faviconURL = $urlParts['scheme'].'://'.$urlParts['host'].'/favicon.ico'; - } + if (count($entries) > 0) { + foreach ($entries as $entry) { + $favicon_url = rewrite_relative_url($url, $entry->getAttribute("href")); + break; + } + } } + if (!$favicon_url) + $favicon_url = rewrite_relative_url($url, "/favicon.ico"); + // Run a test to see if what we have attempted to get actually exists. - if(USE_CURL_FOR_ICONS || url_validate($faviconURL)) { - return $faviconURL; + if(USE_CURL_FOR_ICONS || url_validate($favicon_url)) { + return $favicon_url; } else { return false; } -- cgit v1.2.3