From 671f4cee657f36881eeeea7e5d314034252e3ee7 Mon Sep 17 00:00:00 2001 From: Andrew Dolgov Date: Thu, 21 Mar 2019 21:08:02 +0300 Subject: domdocument: remove old meta charset unicode hacks, replace with shorter xml preamble utf8 hack (on loadhtml where it makes sense) af_readability: better (?) charset hack for non-unicode pages --- include/functions.php | 20 +++++--------------- 1 file changed, 5 insertions(+), 15 deletions(-) (limited to 'include') diff --git a/include/functions.php b/include/functions.php index 5ebd4e0ff..d59e79126 100755 --- a/include/functions.php +++ b/include/functions.php @@ -562,7 +562,7 @@ libxml_use_internal_errors(true); $doc = new DOMDocument(); - $doc->loadHTML($html); + $doc->loadHTML('' . $html); $xpath = new DOMXPath($doc); $base = $xpath->query('/html/head/base[@href]'); @@ -1518,14 +1518,10 @@ // plugins work on original source URLs used before caching function rewrite_cached_urls($str) { - $charset_hack = ' - - '; - $res = trim($str); if (!$res) return ''; $doc = new DOMDocument(); - $doc->loadHTML($charset_hack . $res); + $doc->loadHTML('' . $res); $xpath = new DOMXPath($doc); $entries = $xpath->query('(//img[@src]|//picture/source[@src]|//video[@poster]|//video/source[@src]|//audio/source[@src])'); @@ -1580,16 +1576,10 @@ $res = trim($str); if (!$res) return ''; - $charset_hack = ' - - '; - - $res = trim($res); if (!$res) return ''; - libxml_use_internal_errors(true); $doc = new DOMDocument(); - $doc->loadHTML($charset_hack . $res); + $doc->loadHTML('' . $res); $xpath = new DOMXPath($doc); $rewrite_base_url = $site_url ? $site_url : get_self_url_prefix(); @@ -2115,7 +2105,7 @@ libxml_use_internal_errors(true); $doc = new DOMDocument(); - $doc->loadHTML($content); + $doc->loadHTML('' . $content); $xpath = new DOMXPath($doc); $entries = $xpath->query('/html/head/link[@rel="alternate" and '. '(contains(@type,"rss") or contains(@type,"atom"))]|/html/head/link[@rel="feed"]'); @@ -2136,7 +2126,7 @@ } function is_html($content) { - return preg_match("/