summaryrefslogtreecommitdiff
path: root/plugins
diff options
context:
space:
mode:
authorAndrew Dolgov <[email protected]>2015-07-07 10:15:08 +0300
committerAndrew Dolgov <[email protected]>2015-07-07 10:15:08 +0300
commitb7d1306b197bc7ae60df706f81d1f5665ee04bed (patch)
treebd9ab39334dc82ba8a97f20c97531f390a522fd0 /plugins
parent6322fc6870f5df3878670a690f13a26a4ce76d20 (diff)
af_readability: add a workaround for meta charset html pages
Diffstat (limited to 'plugins')
-rw-r--r--plugins/af_readability/init.php13
1 files changed, 13 insertions, 0 deletions
diff --git a/plugins/af_readability/init.php b/plugins/af_readability/init.php
index 6cdd28faf..15b88d32c 100644
--- a/plugins/af_readability/init.php
+++ b/plugins/af_readability/init.php
@@ -101,6 +101,19 @@ class Af_Readability extends Plugin {
$tmp = fetch_file_contents($article["link"]);
if ($tmp) {
+ $tmpdoc = new DOMDocument("1.0", "UTF-8");
+ $tmpdoc->loadHTML($tmp);
+
+ if ($tmpdoc->encoding != 'UTF-8') {
+ $tmpxpath = new DOMXPath($tmpdoc);
+
+ foreach ($tmpxpath->query("//meta") as $elem) {
+ $elem->parentNode->removeChild($elem);
+ }
+
+ $tmp = $tmpdoc->saveHTML();
+ }
+
$r = new Readability($tmp, $article["link"]);
if ($r->init()) {