Browse Source

DiskCache: properly deal with srcset attributes

Andrew Dolgov 2 months ago
parent
commit
3a4b9249a9
2 changed files with 61 additions and 29 deletions
  1. 25 8
      classes/diskcache.php
  2. 36 21
      classes/rssutils.php

+ 25 - 8
classes/diskcache.php

@@ -79,6 +79,7 @@ class DiskCache {
 	// check for locally cached (media) URLs and rewrite to local versions
 	// this is called separately after sanitize() and plugin render article hooks to allow
 	// plugins work on original source URLs used before caching
+	// NOTE: URLs should be already absolutized because this is called after sanitize()
 	static public function rewriteUrls($str)
 	{
 		$res = trim($str);
@@ -89,29 +90,45 @@ class DiskCache {
 			$xpath = new DOMXPath($doc);
 			$cache = new DiskCache("images");
 
-			$entries = $xpath->query('(//img[@src]|//picture/source[@src]|//video[@poster]|//video[@src]|//video/source[@src]|//audio/source[@src])');
+			$entries = $xpath->query('(//img[@src]|//source[@src|@srcset]|//video[@poster|@src])');
 
 			$need_saving = false;
 
 			foreach ($entries as $entry) {
-
 				foreach (array('src', 'poster') as $attr) {
 					if ($entry->hasAttribute($attr)) {
-						// should be already absolutized because this is called after sanitize()
-						$src = $entry->getAttribute($attr);
-						$cached_filename = sha1($src);
+						$url = $entry->getAttribute($attr);
+						$cached_filename = sha1($url);
 
 						if ($cache->exists($cached_filename)) {
+							$url = $cache->getUrl($cached_filename);
 
-							$src = $cache->getUrl(sha1($src));
-
-							$entry->setAttribute($attr, $src);
+							$entry->setAttribute($attr, $url);
 							$entry->removeAttribute("srcset");
 
 							$need_saving = true;
 						}
 					}
 				}
+
+				if ($entry->hasAttribute("srcset")) {
+					$tokens = explode(",", $entry->getAttribute('srcset'));
+
+					for ($i = 0; $i < count($tokens); $i++) {
+						$token = trim($tokens[$i]);
+
+						list ($url, $width) = explode(" ", $token, 2);
+						$cached_filename = sha1($url);
+
+						if ($cache->exists($cached_filename)) {
+							$tokens[$i] = $cache->getUrl($cached_filename) . " " . $width;
+
+							$need_saving = true;
+						}
+					}
+
+					$entry->setAttribute("srcset", implode(", ", $tokens));
+				}
 			}
 
 			if ($need_saving) {

+ 36 - 21
classes/rssutils.php

@@ -1226,6 +1226,32 @@ class RSSUtils {
 		}
 	}
 
+	static function cache_media_url($cache, $url, $site_url) {
+		$url = rewrite_relative_url($site_url, $url);
+		$local_filename = sha1($url);
+
+		Debug::log("cache_media: checking $url", Debug::$LOG_VERBOSE);
+
+		if (!$cache->exists($local_filename)) {
+			Debug::log("cache_media: downloading: $url to $local_filename", Debug::$LOG_VERBOSE);
+
+			global $fetch_last_error_code;
+			global $fetch_last_error;
+
+			$file_content = fetch_file_contents(array("url" => $url,
+				"http_referrer" => $url,
+				"max_size" => MAX_CACHE_FILE_SIZE));
+
+			if ($file_content) {
+				$cache->put($local_filename, $file_content);
+			} else {
+				Debug::log("cache_media: failed with $fetch_last_error_code: $fetch_last_error");
+			}
+		} else if ($cache->isWritable($local_filename)) {
+			$cache->touch($local_filename);
+		}
+	}
+
 	static function cache_media($html, $site_url) {
 		$cache = new DiskCache("images");
 
@@ -1234,35 +1260,24 @@ class RSSUtils {
 			if ($doc->loadHTML($html)) {
 				$xpath = new DOMXPath($doc);
 
-				$entries = $xpath->query('(//img[@src])|(//video/source[@src])|(//audio/source[@src])|(//video[@poster])|(//video[@src])');
+				$entries = $xpath->query('(//img[@src]|//source[@src|@srcset]|//video[@poster|@src])');
 
 				foreach ($entries as $entry) {
 					foreach (array('src', 'poster') as $attr) {
 						if ($entry->hasAttribute($attr) && strpos($entry->getAttribute($attr), "data:") !== 0) {
-							$src = rewrite_relative_url($site_url, $entry->getAttribute($attr));
-
-							$local_filename = sha1($src);
-
-							Debug::log("cache_media: checking $src", Debug::$LOG_VERBOSE);
+							RSSUtils::cache_media_url($cache, $entry->getAttribute($attr), $site_url);
+						}
+					}
 
-							if (!$cache->exists($local_filename)) {
-								Debug::log("cache_media: downloading: $src to $local_filename", Debug::$LOG_VERBOSE);
+					if ($entry->hasAttribute("srcset")) {
+						$tokens = explode(",", $entry->getAttribute('srcset'));
 
-								global $fetch_last_error_code;
-								global $fetch_last_error;
+						for ($i = 0; $i < count($tokens); $i++) {
+							$token = trim($tokens[$i]);
 
-								$file_content = fetch_file_contents(array("url" => $src,
-									"http_referrer" => $src,
-									"max_size" => MAX_CACHE_FILE_SIZE));
+							list ($url, $width) = explode(" ", $token, 2);
 
-								if ($file_content) {
-									$cache->put($local_filename, $file_content);
-								} else {
-									Debug::log("cache_media: failed with $fetch_last_error_code: $fetch_last_error");
-								}
-							} else if ($cache->isWritable($local_filename)) {
-								$cache->touch($local_filename);
-							}
+							RSSUtils::cache_media_url($cache, $url, $site_url);
 						}
 					}
 				}