From 671f4cee657f36881eeeea7e5d314034252e3ee7 Mon Sep 17 00:00:00 2001 From: Andrew Dolgov Date: Thu, 21 Mar 2019 21:08:02 +0300 Subject: domdocument: remove old meta charset unicode hacks, replace with shorter xml preamble utf8 hack (on loadhtml where it makes sense) af_readability: better (?) charset hack for non-unicode pages --- plugins/af_fsckportal/init.php | 6 +----- plugins/af_readability/init.php | 11 +++-------- plugins/af_tumblr_1280/init.php | 8 ++------ plugins/af_zz_imgproxy/init.php | 2 +- plugins/cache_starred_images/init.php | 6 +----- 5 files changed, 8 insertions(+), 25 deletions(-) (limited to 'plugins') diff --git a/plugins/af_fsckportal/init.php b/plugins/af_fsckportal/init.php index 0fa58e9ed..04b77a15a 100644 --- a/plugins/af_fsckportal/init.php +++ b/plugins/af_fsckportal/init.php @@ -19,11 +19,7 @@ class Af_Fsckportal extends Plugin { $doc = new DOMDocument(); - $charset_hack = ' - - '; - - @$doc->loadHTML($charset_hack . $article["content"]); + @$doc->loadHTML('' . $article["content"]); if ($doc) { $xpath = new DOMXPath($doc); diff --git a/plugins/af_readability/init.php b/plugins/af_readability/init.php index 117646c30..32c54a2c7 100755 --- a/plugins/af_readability/init.php +++ b/plugins/af_readability/init.php @@ -172,14 +172,10 @@ class Af_Readability extends Plugin { if (!$tmpdoc->loadHTML($tmp)) return false; + // this is the worst hack yet :( if (strtolower($tmpdoc->encoding) != 'utf-8') { - $tmpxpath = new DOMXPath($tmpdoc); - - foreach ($tmpxpath->query("//meta") as $elem) { - $elem->parentNode->removeChild($elem); - } - - $tmp = $tmpdoc->saveHTML(); + $tmp = preg_replace("//i", "", $tmp); + $tmp = mb_convert_encoding($tmp, 'utf-8', $tmpdoc->encoding); } try { @@ -210,7 +206,6 @@ class Af_Readability extends Plugin { } catch (Exception $e) { return false; } - } return false; diff --git a/plugins/af_tumblr_1280/init.php b/plugins/af_tumblr_1280/init.php index 8aba0e652..5d7f366a4 100755 --- a/plugins/af_tumblr_1280/init.php +++ b/plugins/af_tumblr_1280/init.php @@ -25,12 +25,8 @@ class Af_Tumblr_1280 extends Plugin { if (!function_exists("curl_init") || ini_get("open_basedir")) return $article; - $charset_hack = ' - - '; - $doc = new DOMDocument(); - $doc->loadHTML($charset_hack . $article["content"]); + $doc->loadHTML('' . $article["content"]); $found = false; @@ -92,4 +88,4 @@ class Af_Tumblr_1280 extends Plugin { return 2; } -} \ No newline at end of file +} diff --git a/plugins/af_zz_imgproxy/init.php b/plugins/af_zz_imgproxy/init.php index 2cd8fcaf0..b172d4563 100755 --- a/plugins/af_zz_imgproxy/init.php +++ b/plugins/af_zz_imgproxy/init.php @@ -155,7 +155,7 @@ class Af_Zz_ImgProxy extends Plugin { $proxy_all = $this->host->get($this, "proxy_all"); $doc = new DOMDocument(); - if (@$doc->loadHTML($article["content"])) { + if (@$doc->loadHTML('' . $article["content"])) { $xpath = new DOMXPath($doc); $imgs = $xpath->query("//img[@src]"); diff --git a/plugins/cache_starred_images/init.php b/plugins/cache_starred_images/init.php index a1916e226..714d4cb9b 100755 --- a/plugins/cache_starred_images/init.php +++ b/plugins/cache_starred_images/init.php @@ -190,12 +190,8 @@ class Cache_Starred_Images extends Plugin implements IHandler { return; } - $charset_hack = ' - - '; - $doc = new DOMDocument(); - $doc->loadHTML($charset_hack . $content); + $doc->loadHTML('' . $content); $xpath = new DOMXPath($doc); $entries = $xpath->query('(//img[@src])|(//video/source[@src])'); -- cgit v1.2.3