diff options
author | Andrew Dolgov <[email protected]> | 2015-07-08 10:35:19 +0300 |
---|---|---|
committer | Andrew Dolgov <[email protected]> | 2015-07-08 10:35:19 +0300 |
commit | 831129f6a4c63530674c8bc73550fc83998971f3 (patch) | |
tree | 14a0870ffff97683e35b918fe3392021dbb4b6b0 | |
parent | 6475fc7e06121ff948264b990280b2f488a86aa8 (diff) |
ad_readability: also check for content-type if possible
-rw-r--r-- | plugins/af_readability/init.php | 21 |
1 files changed, 20 insertions, 1 deletions
diff --git a/plugins/af_readability/init.php b/plugins/af_readability/init.php index 15b88d32c..b58be43d2 100644 --- a/plugins/af_readability/init.php +++ b/plugins/af_readability/init.php @@ -98,11 +98,30 @@ class Af_Readability extends Plugin { if (!class_exists("Readability")) require_once(__DIR__ . "/classes/Readability.php"); + if (function_exists("curl_init")) { + $ch = curl_init($article["link"]); + curl_setopt($ch, CURLOPT_TIMEOUT, 5); + curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1); + curl_setopt($ch, CURLOPT_HEADER, true); + curl_setopt($ch, CURLOPT_NOBODY, true); + curl_setopt($ch, CURLOPT_FOLLOWLOCATION, + !ini_get("safe_mode") && !ini_get("open_basedir")); + curl_setopt($ch, CURLOPT_USERAGENT, SELF_USER_AGENT); + + @$result = curl_exec($ch); + $content_type = curl_getinfo($ch, CURLINFO_CONTENT_TYPE); + + if (strpos($content_type, "text/html") === FALSE) + return $article; + } + $tmp = fetch_file_contents($article["link"]); if ($tmp) { $tmpdoc = new DOMDocument("1.0", "UTF-8"); - $tmpdoc->loadHTML($tmp); + + if (!$tmpdoc->loadHTML($tmp)) + return $article; if ($tmpdoc->encoding != 'UTF-8') { $tmpxpath = new DOMXPath($tmpdoc); |