From 2aef804f4b2cee7baf569f707eb74c27005d7daa Mon Sep 17 00:00:00 2001 From: Andrew Dolgov Date: Mon, 20 Aug 2018 12:12:32 +0300 Subject: split transparent rewriting of locally cached media URLs to execute after both sanitize() and HOOK_RENDER_ARTICLE to allow plugins work on original source URLs consistently --- include/functions.php | 81 ++++++++++++++++++++++++++++++++++++--------------- 1 file changed, 57 insertions(+), 24 deletions(-) (limited to 'include/functions.php') diff --git a/include/functions.php b/include/functions.php index a4e0e4d02..8acea8ef4 100755 --- a/include/functions.php +++ b/include/functions.php @@ -1564,38 +1564,31 @@ return false; } - function sanitize($str, $force_remove_images = false, $owner = false, $site_url = false, $highlight_words = false, $article_id = false) { - if (!$owner) $owner = $_SESSION["uid"]; - - $res = trim($str); if (!$res) return ''; + // check for locally cached (media) URLs and rewrite to local versions + // this is called separately after sanitize() and plugin render article hooks to allow + // plugins work on original source URLs used before caching + function rewrite_cached_urls($str) { $charset_hack = ' '; - $res = trim($res); if (!$res) return ''; - - libxml_use_internal_errors(true); + $res = trim($str); if (!$res) return ''; $doc = new DOMDocument(); $doc->loadHTML($charset_hack . $res); $xpath = new DOMXPath($doc); - $rewrite_base_url = $site_url ? $site_url : get_self_url_prefix(); + $entries = $xpath->query('(//img[@src]|//video/source[@src]|//audio/source[@src])'); - $entries = $xpath->query('(//a[@href]|//img[@src]|//video/source[@src]|//audio/source[@src])'); + $need_saving = false; foreach ($entries as $entry) { - if ($entry->hasAttribute('href')) { - $entry->setAttribute('href', - rewrite_relative_url($rewrite_base_url, $entry->getAttribute('href'))); - - $entry->setAttribute('rel', 'noopener noreferrer'); - } - if ($entry->hasAttribute('src')) { - $src = rewrite_relative_url($rewrite_base_url, $entry->getAttribute('src')); + + // should be already absolutized because this is called after sanitize() + $src = $entry->getAttribute('src'); $cached_filename = CACHE_DIR . '/images/' . sha1($src); if (file_exists($cached_filename)) { @@ -1613,14 +1606,54 @@ $src = get_self_url_prefix() . '/public.php?op=cached_url&hash=' . sha1($src) . $suffix; - if ($entry->hasAttribute('srcset')) { - $entry->removeAttribute('srcset'); - } - - if ($entry->hasAttribute('sizes')) { - $entry->removeAttribute('sizes'); - } + $entry->setAttribute('src', $src); + $need_saving = true; } + } + } + + if ($need_saving) { + $doc->removeChild($doc->firstChild); //remove doctype + $res = $doc->saveHTML(); + } + + return $res; + } + + function sanitize($str, $force_remove_images = false, $owner = false, $site_url = false, $highlight_words = false, $article_id = false) { + if (!$owner) $owner = $_SESSION["uid"]; + + $res = trim($str); if (!$res) return ''; + + $charset_hack = ' + + '; + + $res = trim($res); if (!$res) return ''; + + libxml_use_internal_errors(true); + + $doc = new DOMDocument(); + $doc->loadHTML($charset_hack . $res); + $xpath = new DOMXPath($doc); + + $rewrite_base_url = $site_url ? $site_url : get_self_url_prefix(); + + $entries = $xpath->query('(//a[@href]|//img[@src]|//video/source[@src]|//audio/source[@src])'); + + foreach ($entries as $entry) { + + if ($entry->hasAttribute('href')) { + $entry->setAttribute('href', + rewrite_relative_url($rewrite_base_url, $entry->getAttribute('href'))); + + $entry->setAttribute('rel', 'noopener noreferrer'); + } + + if ($entry->hasAttribute('src')) { + $src = rewrite_relative_url($rewrite_base_url, $entry->getAttribute('src')); + + // cache stuff has gone to rewrite_cached_urls() $entry->setAttribute('src', $src); } -- cgit v1.2.3