summaryrefslogtreecommitdiff
path: root/plugins/af_readability
diff options
context:
space:
mode:
authorAndrew Dolgov <[email protected]>2019-03-21 21:08:02 +0300
committerAndrew Dolgov <[email protected]>2019-03-21 21:08:02 +0300
commit671f4cee657f36881eeeea7e5d314034252e3ee7 (patch)
tree2dc84b4a15b591e94366b37f39418f6e70a8e593 /plugins/af_readability
parent3bd3324e5a9171e6cca20b44d1569da41a1d4874 (diff)
domdocument: remove old meta charset unicode hacks, replace with shorter xml preamble utf8 hack (on loadhtml where it makes sense)
af_readability: better (?) charset hack for non-unicode pages
Diffstat (limited to 'plugins/af_readability')
-rwxr-xr-xplugins/af_readability/init.php11
1 files changed, 3 insertions, 8 deletions
diff --git a/plugins/af_readability/init.php b/plugins/af_readability/init.php
index 117646c30..32c54a2c7 100755
--- a/plugins/af_readability/init.php
+++ b/plugins/af_readability/init.php
@@ -172,14 +172,10 @@ class Af_Readability extends Plugin {
if (!$tmpdoc->loadHTML($tmp))
return false;
+ // this is the worst hack yet :(
if (strtolower($tmpdoc->encoding) != 'utf-8') {
- $tmpxpath = new DOMXPath($tmpdoc);
-
- foreach ($tmpxpath->query("//meta") as $elem) {
- $elem->parentNode->removeChild($elem);
- }
-
- $tmp = $tmpdoc->saveHTML();
+ $tmp = preg_replace("/<meta.*?charset.*?\/>/i", "", $tmp);
+ $tmp = mb_convert_encoding($tmp, 'utf-8', $tmpdoc->encoding);
}
try {
@@ -210,7 +206,6 @@ class Af_Readability extends Plugin {
} catch (Exception $e) {
return false;
}
-
}
return false;