diff options
author | Andrew Dolgov <[email protected]> | 2015-07-07 10:15:08 +0300 |
---|---|---|
committer | Andrew Dolgov <[email protected]> | 2015-07-07 10:15:08 +0300 |
commit | b7d1306b197bc7ae60df706f81d1f5665ee04bed (patch) | |
tree | bd9ab39334dc82ba8a97f20c97531f390a522fd0 | |
parent | 6322fc6870f5df3878670a690f13a26a4ce76d20 (diff) |
af_readability: add a workaround for meta charset html pages
-rw-r--r-- | plugins/af_readability/init.php | 13 |
1 files changed, 13 insertions, 0 deletions
diff --git a/plugins/af_readability/init.php b/plugins/af_readability/init.php index 6cdd28faf..15b88d32c 100644 --- a/plugins/af_readability/init.php +++ b/plugins/af_readability/init.php @@ -101,6 +101,19 @@ class Af_Readability extends Plugin { $tmp = fetch_file_contents($article["link"]); if ($tmp) { + $tmpdoc = new DOMDocument("1.0", "UTF-8"); + $tmpdoc->loadHTML($tmp); + + if ($tmpdoc->encoding != 'UTF-8') { + $tmpxpath = new DOMXPath($tmpdoc); + + foreach ($tmpxpath->query("//meta") as $elem) { + $elem->parentNode->removeChild($elem); + } + + $tmp = $tmpdoc->saveHTML(); + } + $r = new Readability($tmp, $article["link"]); if ($r->init()) { |