diff options
-rwxr-xr-x | classes/api.php | 8 | ||||
-rw-r--r-- | classes/backend.php | 2 | ||||
-rwxr-xr-x | classes/feeds.php | 19 | ||||
-rwxr-xr-x | classes/handler/public.php | 19 | ||||
-rwxr-xr-x | classes/logger/sql.php | 11 | ||||
-rw-r--r-- | classes/pluginhandler.php | 5 | ||||
-rwxr-xr-x | classes/pluginhost.php | 64 | ||||
-rwxr-xr-x | classes/pref/feeds.php | 2 | ||||
-rwxr-xr-x | classes/rpc.php | 2 | ||||
-rwxr-xr-x | classes/rssutils.php | 18 | ||||
-rw-r--r-- | include/functions.php | 10 | ||||
-rw-r--r-- | js/Article.js | 6 | ||||
-rw-r--r--[-rwxr-xr-x] | plugins/af_proxy_http/init.php (renamed from plugins/af_zz_imgproxy/init.php) | 15 | ||||
-rwxr-xr-x | plugins/af_readability/init.php | 15 | ||||
-rw-r--r-- | plugins/af_readability/vendor/andreskrey/Readability/Configuration.php | 26 | ||||
-rw-r--r-- | plugins/af_readability/vendor/andreskrey/Readability/Nodes/DOM/DOMNodeList.php | 82 | ||||
-rw-r--r-- | plugins/af_readability/vendor/andreskrey/Readability/Nodes/NodeTrait.php | 51 | ||||
-rw-r--r-- | plugins/af_readability/vendor/andreskrey/Readability/Nodes/NodeUtility.php | 20 | ||||
-rw-r--r-- | plugins/af_readability/vendor/andreskrey/Readability/Readability.php | 56 |
19 files changed, 322 insertions, 109 deletions
diff --git a/classes/api.php b/classes/api.php index 44c9841ce..01ea1970d 100755 --- a/classes/api.php +++ b/classes/api.php @@ -535,6 +535,7 @@ class API extends Handler { /* Labels */ + /* API only: -4 All feeds, including virtual feeds */ if ($cat_id == -4 || $cat_id == -2) { $counters = Counters::getLabelCounters(true); @@ -582,7 +583,7 @@ class API extends Handler { if ($include_nested && $cat_id) { $sth = $pdo->prepare("SELECT id, title, order_id FROM ttrss_feed_categories - WHERE parent_cat = ? AND owner_uid = ? ORDER BY id, title"); + WHERE parent_cat = ? AND owner_uid = ? ORDER BY order_id, title"); $sth->execute([$cat_id, $_SESSION['uid']]); @@ -611,12 +612,13 @@ class API extends Handler { $limit_qpart = ""; } + /* API only: -3 All feeds, excluding virtual feeds (e.g. Labels and such) */ if ($cat_id == -4 || $cat_id == -3) { $sth = $pdo->prepare("SELECT id, feed_url, cat_id, title, order_id, ". SUBSTRING_FOR_DATE."(last_updated,1,19) AS last_updated FROM ttrss_feeds WHERE owner_uid = ? - ORDER BY cat_id, title " . $limit_qpart); + ORDER BY order_id, title " . $limit_qpart); $sth->execute([$_SESSION['uid']]); } else { @@ -627,7 +629,7 @@ class API extends Handler { FROM ttrss_feeds WHERE (cat_id = :cat OR (:cat = 0 AND cat_id IS NULL)) AND owner_uid = :uid - ORDER BY cat_id, title " . $limit_qpart); + ORDER BY order_id, title " . $limit_qpart); $sth->execute([":uid" => $_SESSION['uid'], ":cat" => $cat_id]); } diff --git a/classes/backend.php b/classes/backend.php index 5bd724728..122e28c65 100644 --- a/classes/backend.php +++ b/classes/backend.php @@ -88,7 +88,7 @@ class Backend extends Handler { } function help() { - $topic = basename(clean($_REQUEST["topic"])); // only one for now + $topic = clean_filename($_REQUEST["topic"]); // only one for now if ($topic == "main") { $info = get_hotkeys_info(); diff --git a/classes/feeds.php b/classes/feeds.php index b89f4e4ca..bae571a3f 100755 --- a/classes/feeds.php +++ b/classes/feeds.php @@ -2,6 +2,8 @@ require_once "colors.php"; class Feeds extends Handler_Protected { + const NEVER_GROUP_FEEDS = [ -6, 0 ]; + const NEVER_GROUP_BY_DATE = [ -2, -1, -3 ]; private $params; @@ -199,7 +201,8 @@ class Feeds extends Handler_Protected { $qfh_ret = $this->queryFeedHeadlines($params); } - $vfeed_group_enabled = get_pref("VFEED_GROUP_BY_FEED") && $feed != -6; + $vfeed_group_enabled = get_pref("VFEED_GROUP_BY_FEED") && + !(in_array($feed, Feeds::NEVER_GROUP_FEEDS) && !$cat_view); $result = $qfh_ret[0]; // this could be either a PDO query result or a -1 if first id changed $feed_title = $qfh_ret[1]; @@ -1438,7 +1441,7 @@ class Feeds extends Handler_Protected { $start_ts = isset($params["start_ts"]) ? $params["start_ts"] : false; $check_first_id = isset($params["check_first_id"]) ? $params["check_first_id"] : false; $skip_first_id_check = isset($params["skip_first_id_check"]) ? $params["skip_first_id_check"] : false; - $order_by = isset($params["order_by"]) ? $params["order_by"] : false; + //$order_by = isset($params["order_by"]) ? $params["order_by"] : false; $ext_tables_part = ""; $limit_query_part = ""; @@ -1693,12 +1696,18 @@ class Feeds extends Handler_Protected { if (is_numeric($feed)) { // proper override_order applied above if ($vfeed_query_part && !$ignore_vfeed_group && get_pref('VFEED_GROUP_BY_FEED', $owner_uid)) { - $yyiw_desc = $order_by == "date_reverse" ? "" : "desc"; + + if (!(in_array($feed, Feeds::NEVER_GROUP_BY_DATE) && !$cat_view)) { + $yyiw_desc = $order_by == "date_reverse" ? "" : "desc"; + $yyiw_order_qpart = "yyiw $yyiw_desc, "; + } else { + $yyiw_order_qpart = ""; + } if (!$override_order) { - $order_by = "yyiw $yyiw_desc, ttrss_feeds.title, ".$order_by; + $order_by = "$yyiw_order_qpart ttrss_feeds.title, $order_by"; } else { - $order_by = "yyiw $yyiw_desc, ttrss_feeds.title, ".$override_order; + $order_by = "$yyiw_order_qpart ttrss_feeds.title, $override_order"; } } diff --git a/classes/handler/public.php b/classes/handler/public.php index 06c01df57..e2082ff1e 100755 --- a/classes/handler/public.php +++ b/classes/handler/public.php @@ -509,7 +509,7 @@ class Handler_Public extends Handler { <!DOCTYPE html> <html> <head> - <title><?php echo __("Share with Tiny Tiny RSS") ?> ?></title> + <title><?php echo __("Share with Tiny Tiny RSS") ?></title> <?php echo stylesheet_tag("css/default.css"); echo javascript_tag("lib/prototype.js"); @@ -1203,27 +1203,30 @@ class Handler_Public extends Handler { public function pluginhandler() { $host = new PluginHost(); - $plugin = basename(clean($_REQUEST["plugin"])); + $plugin_name = clean_filename($_REQUEST["plugin"]); $method = clean($_REQUEST["pmethod"]); - $host->load($plugin, PluginHost::KIND_USER, 0); + $host->load($plugin_name, PluginHost::KIND_USER, 0); $host->load_data(); - $pclass = $host->get_plugin($plugin); + $plugin = $host->get_plugin($plugin_name); - if ($pclass) { - if (method_exists($pclass, $method)) { - if ($pclass->is_public_method($method)) { - $pclass->$method(); + if ($plugin) { + if (method_exists($plugin, $method)) { + if ($plugin->is_public_method($method)) { + $plugin->$method(); } else { + user_error("PluginHandler[PUBLIC]: Requested private method '$method' of plugin '$plugin_name'.", E_USER_WARNING); header("Content-Type: text/json"); print error_json(6); } } else { + user_error("PluginHandler[PUBLIC]: Requested unknown method '$method' of plugin '$plugin_name'.", E_USER_WARNING); header("Content-Type: text/json"); print error_json(13); } } else { + user_error("PluginHandler[PUBLIC]: Requested method '$method' of unknown plugin '$plugin_name'.", E_USER_WARNING); header("Content-Type: text/json"); print error_json(14); } diff --git a/classes/logger/sql.php b/classes/logger/sql.php index 989539e5d..1b44b1e5f 100755 --- a/classes/logger/sql.php +++ b/classes/logger/sql.php @@ -15,6 +15,17 @@ class Logger_SQL { // limit context length, DOMDocument dumps entire XML in here sometimes, which may be huge $context = mb_substr($context, 0, 8192); + $server_params = [ + "IP" => "REMOTE_ADDR", + "Request URI" => "REQUEST_URI", + "User agent" => "HTTP_USER_AGENT", + ]; + + foreach ($server_params as $n => $p) { + if (isset($_SERVER[$p])) + $context .= "\n$n: " . $_SERVER[$p]; + } + // passed error message may contain invalid unicode characters, failing to insert an error here // would break the execution entirely by generating an actual fatal error instead of a E_WARNING etc $errstr = UConverter::transcode($errstr, 'UTF-8', 'UTF-8'); diff --git a/classes/pluginhandler.php b/classes/pluginhandler.php index d10343e09..9682e440f 100644 --- a/classes/pluginhandler.php +++ b/classes/pluginhandler.php @@ -5,15 +5,18 @@ class PluginHandler extends Handler_Protected { } function catchall($method) { - $plugin = PluginHost::getInstance()->get_plugin(clean($_REQUEST["plugin"])); + $plugin_name = clean($_REQUEST["plugin"]); + $plugin = PluginHost::getInstance()->get_plugin($plugin_name); if ($plugin) { if (method_exists($plugin, $method)) { $plugin->$method(); } else { + user_error("PluginHandler: Requested unknown method '$method' of plugin '$plugin_name'.", E_USER_WARNING); print error_json(13); } } else { + user_error("PluginHandler: Requested method '$method' of unknown plugin '$plugin_name'.", E_USER_WARNING); print error_json(14); } } diff --git a/classes/pluginhost.php b/classes/pluginhost.php index d09ecca17..eab808ae9 100755 --- a/classes/pluginhost.php +++ b/classes/pluginhost.php @@ -128,28 +128,44 @@ class PluginHost { } } - function add_hook($type, $sender) { + function add_hook($type, $sender, $priority = 50) { + $priority = (int) $priority; + if (!is_array($this->hooks[$type])) { - $this->hooks[$type] = array(); + $this->hooks[$type] = []; + } + + if (!is_array($this->hooks[$type][$priority])) { + $this->hooks[$type][$priority] = []; } - array_push($this->hooks[$type], $sender); + array_push($this->hooks[$type][$priority], $sender); + ksort($this->hooks[$type]); } function del_hook($type, $sender) { if (is_array($this->hooks[$type])) { - $key = array_Search($sender, $this->hooks[$type]); - if ($key !== FALSE) { - unset($this->hooks[$type][$key]); + foreach (array_keys($this->hooks[$type]) as $prio) { + $key = array_search($sender, $this->hooks[$type][$prio]); + + if ($key !== FALSE) { + unset($this->hooks[$type][$prio][$key]); + } } } } function get_hooks($type) { if (isset($this->hooks[$type])) { - return $this->hooks[$type]; + $tmp = []; + + foreach (array_keys($this->hooks[$type]) as $prio) { + $tmp = array_merge($tmp, $this->hooks[$type][$prio]); + } + + return $tmp; } else { - return array(); + return []; } } function load_all($kind, $owner_uid = false, $skip_init = false) { @@ -170,7 +186,7 @@ class PluginHost { foreach ($plugins as $class) { $class = trim($class); - $class_file = strtolower(basename($class)); + $class_file = strtolower(clean_filename($class)); if (!is_dir(__DIR__."/../plugins/$class_file") && !is_dir(__DIR__."/../plugins.local/$class_file")) continue; @@ -475,4 +491,34 @@ class PluginHost { function get_owner_uid() { return $this->owner_uid; } + + // handled by classes/pluginhandler.php, requires valid session + function get_method_url($sender, $method, $params) { + return get_self_url_prefix() . "/backend.php?" . + http_build_query( + array_merge( + [ + "op" => "pluginhandler", + "plugin" => strtolower(get_class($sender)), + "method" => $method + ], + $params)); + } + + // WARNING: endpoint in public.php, exposed to unauthenticated users + function get_public_method_url($sender, $method, $params) { + if ($sender->is_public_method($method)) { + return get_self_url_prefix() . "/public.php?" . + http_build_query( + array_merge( + [ + "op" => "pluginhandler", + "plugin" => strtolower(get_class($sender)), + "pmethod" => $method + ], + $params)); + } else { + user_error("get_public_method_url: requested method '$method' of '" . get_class($sender) . "' is private."); + } + } } diff --git a/classes/pref/feeds.php b/classes/pref/feeds.php index c55affd77..f672a0375 100755 --- a/classes/pref/feeds.php +++ b/classes/pref/feeds.php @@ -312,7 +312,7 @@ class Pref_Feeds extends Handler_Protected { array_push($root['items'], $feed); } - $root['param'] = vsprintf(_ngettext('(%d feed)', '(%d feeds)', count($cat['items'])), count($cat['items'])); + $root['param'] = vsprintf(_ngettext('(%d feed)', '(%d feeds)', count($root['items'])), count($root['items'])); } $fl = array(); diff --git a/classes/rpc.php b/classes/rpc.php index 8736cbb65..84c9cfe92 100755 --- a/classes/rpc.php +++ b/classes/rpc.php @@ -572,7 +572,7 @@ class RPC extends Handler_Protected { function log() { $msg = clean($_REQUEST['msg']); - $file = basename(clean($_REQUEST['file'])); + $file = clean_filename($_REQUEST['file']); $line = (int) clean($_REQUEST['line']); $context = clean($_REQUEST['context']); diff --git a/classes/rssutils.php b/classes/rssutils.php index fe4c0a8a3..cee8be8ff 100755 --- a/classes/rssutils.php +++ b/classes/rssutils.php @@ -563,14 +563,6 @@ class RSSUtils { Debug::log("orig date: " . $item->get_date(), Debug::$LOG_VERBOSE); - if ($entry_timestamp == -1 || !$entry_timestamp || $entry_timestamp > time()) { - $entry_timestamp = time(); - } - - $entry_timestamp_fmt = strftime("%Y/%m/%d %H:%M:%S", $entry_timestamp); - - Debug::log("date $entry_timestamp [$entry_timestamp_fmt]", Debug::$LOG_VERBOSE); - $entry_title = strip_tags($item->get_title()); $entry_link = rewrite_relative_url($site_url, clean($item->get_link())); @@ -656,6 +648,7 @@ class RSSUtils { "force_catchup" => false, // ugly hack for the time being "score_modifier" => 0, // no previous value, plugin should recalculate score modifier based on content if needed "language" => $entry_language, + "timestamp" => $entry_timestamp, "num_comments" => $num_comments, // read only "feed" => array("id" => $feed, "fetch_url" => $fetch_url, @@ -797,6 +790,15 @@ class RSSUtils { $article_labels = $article["labels"]; $entry_score_modifier = (int) $article["score_modifier"]; $entry_language = $article["language"]; + $entry_timestamp = $article["timestamp"]; + + if ($entry_timestamp == -1 || !$entry_timestamp || $entry_timestamp > time()) { + $entry_timestamp = time(); + } + + $entry_timestamp_fmt = strftime("%Y/%m/%d %H:%M:%S", $entry_timestamp); + + Debug::log("date $entry_timestamp [$entry_timestamp_fmt]", Debug::$LOG_VERBOSE); if (Debug::get_loglevel() >= Debug::$LOG_EXTENDED) { Debug::log("article labels:", Debug::$LOG_VERBOSE); diff --git a/include/functions.php b/include/functions.php index c326ac468..ec1280c4e 100644 --- a/include/functions.php +++ b/include/functions.php @@ -593,7 +593,7 @@ } function clean_filename($filename) { - return basename(preg_replace("/\.\.|[\/\\\]/", "", $filename)); + return basename(preg_replace("/\.\.|[\/\\\]/", "", clean($filename))); } function make_password($length = 12) { @@ -1013,10 +1013,10 @@ __("Navigation") => array( "next_feed" => __("Open next feed"), "prev_feed" => __("Open previous feed"), - "next_article" => __("Open next article"), - "prev_article" => __("Open previous article"), - "next_article_noscroll" => __("Open next article (don't scroll long articles)"), - "prev_article_noscroll" => __("Open previous article (don't scroll long articles)"), + "next_article" => __("Open next article (scroll long articles)"), + "prev_article" => __("Open previous article (scroll long articles)"), + "next_article_noscroll" => __("Open next article"), + "prev_article_noscroll" => __("Open previous article"), "next_article_noexpand" => __("Move to next article (don't expand or mark read)"), "prev_article_noexpand" => __("Move to previous article (don't expand or mark read)"), "search_dialog" => __("Show search dialog")), diff --git a/js/Article.js b/js/Article.js index b933ed716..970234818 100644 --- a/js/Article.js +++ b/js/Article.js @@ -32,7 +32,7 @@ define(["dojo/_base/declare"], function (declare) { if (ids.length > 0) { const score = prompt(__("Please enter new score for selected articles:")); - if (parseInt(score) != undefined) { + if (!isNaN(parseInt(score))) { ids.each((id) => { const row = $("RROW-" + id); @@ -66,7 +66,7 @@ define(["dojo/_base/declare"], function (declare) { const score_old = row.getAttribute("data-score"); const score = prompt(__("Please enter new score for this article:"), score_old); - if (parseInt(score) != undefined) { + if (!isNaN(parseInt(score))) { row.setAttribute("data-score", score); const pic = row.select(".icon-score")[0]; @@ -340,4 +340,4 @@ define(["dojo/_base/declare"], function (declare) { } return Article; -});
\ No newline at end of file +}); diff --git a/plugins/af_zz_imgproxy/init.php b/plugins/af_proxy_http/init.php index ddc30936f..80100160d 100755..100644 --- a/plugins/af_zz_imgproxy/init.php +++ b/plugins/af_proxy_http/init.php @@ -1,5 +1,5 @@ <?php -class Af_Zz_ImgProxy extends Plugin { +class Af_Proxy_Http extends Plugin { /* @var PluginHost $host */ private $host; @@ -9,7 +9,7 @@ class Af_Zz_ImgProxy extends Plugin { function about() { return array(1.0, - "Load insecure images via built-in proxy", + "Loads media served over plain HTTP via built-in secure proxy", "fox"); } @@ -23,8 +23,8 @@ class Af_Zz_ImgProxy extends Plugin { $this->host = $host; $this->cache = new DiskCache("images"); - $host->add_hook($host::HOOK_RENDER_ARTICLE, $this); - $host->add_hook($host::HOOK_RENDER_ARTICLE_CDM, $this); + $host->add_hook($host::HOOK_RENDER_ARTICLE, $this, 150); + $host->add_hook($host::HOOK_RENDER_ARTICLE_CDM, $this, 150); $host->add_hook($host::HOOK_ENCLOSURE_ENTRY, $this); $host->add_hook($host::HOOK_PREFS_TAB, $this); @@ -141,8 +141,7 @@ class Af_Zz_ImgProxy extends Plugin { } } - return get_self_url_prefix() . "/public.php?op=pluginhandler&plugin=af_zz_imgproxy&pmethod=imgproxy&url=" . - urlencode($url); + return $this->host->get_public_method_url($this, "imgproxy", ["url" => $url]); } } } @@ -210,7 +209,7 @@ class Af_Zz_ImgProxy extends Plugin { if ($args != "prefFeeds") return; print "<div dojoType=\"dijit.layout.AccordionPane\" - title=\"<i class='material-icons'>extension</i> ".__('Image proxy settings (af_zz_imgproxy)')."\">"; + title=\"<i class='material-icons'>extension</i> ".__('Image proxy settings (af_proxy_http)')."\">"; print "<form dojoType=\"dijit.form.Form\">"; @@ -230,7 +229,7 @@ class Af_Zz_ImgProxy extends Plugin { print_hidden("op", "pluginhandler"); print_hidden("method", "save"); - print_hidden("plugin", "af_zz_imgproxy"); + print_hidden("plugin", "af_proxy_http"); $proxy_all = $this->host->get($this, "proxy_all"); print_checkbox("proxy_all", $proxy_all); diff --git a/plugins/af_readability/init.php b/plugins/af_readability/init.php index 7f3c6db4d..a487707c8 100755 --- a/plugins/af_readability/init.php +++ b/plugins/af_readability/init.php @@ -29,7 +29,8 @@ class Af_Readability extends Plugin { { $this->host = $host; - if (version_compare(PHP_VERSION, '5.6.0', '<')) { + if (version_compare(PHP_VERSION, '7.0.0', '<')) { + user_error("af_readability requires PHP 7.0", E_USER_WARNING); return; } @@ -51,8 +52,8 @@ class Af_Readability extends Plugin { print "<div dojoType='dijit.layout.AccordionPane' title=\"<i class='material-icons'>extension</i> ".__('Readability settings (af_readability)')."\">"; - if (version_compare(PHP_VERSION, '5.6.0', '<')) { - print_error("This plugin requires PHP version 5.6."); + if (version_compare(PHP_VERSION, '7.0.0', '<')) { + print_error("This plugin requires PHP 7.0."); } else { print "<h2>" . __("Global settings") . "</h2>"; @@ -88,7 +89,7 @@ class Af_Readability extends Plugin { print "</label>"; print "</fieldset>"; - print print_button("submit", __("Save"), "class='alt-primary'"); + print_button("submit", __("Save"), "class='alt-primary'"); print "</form>"; $enabled_feeds = $this->host->get($this, "enabled_feeds"); @@ -179,7 +180,11 @@ class Af_Readability extends Plugin { // this is the worst hack yet :( if (strtolower($tmpdoc->encoding) != 'utf-8') { $tmp = preg_replace("/<meta.*?charset.*?\/?>/i", "", $tmp); - $tmp = mb_convert_encoding($tmp, 'utf-8', $tmpdoc->encoding); + if (empty($tmpdoc->encoding)) { + $tmp = mb_convert_encoding($tmp, 'utf-8'); + } else { + $tmp = mb_convert_encoding($tmp, 'utf-8', $tmpdoc->encoding); + } } try { diff --git a/plugins/af_readability/vendor/andreskrey/Readability/Configuration.php b/plugins/af_readability/vendor/andreskrey/Readability/Configuration.php index 6c17bc757..0632399c6 100644 --- a/plugins/af_readability/vendor/andreskrey/Readability/Configuration.php +++ b/plugins/af_readability/vendor/andreskrey/Readability/Configuration.php @@ -167,32 +167,6 @@ class Configuration } /** - * @deprecated Use getCharThreshold. Will be removed in version 2.0 - * - * @return int - */ - public function getWordThreshold() - { - @trigger_error('getWordThreshold was replaced with getCharThreshold and will be removed in version 3.0', E_USER_DEPRECATED); - - return $this->charThreshold; - } - - /** - * @param int $charThreshold - * - * @return $this - */ - public function setWordThreshold($charThreshold) - { - @trigger_error('setWordThreshold was replaced with setCharThreshold and will be removed in version 3.0', E_USER_DEPRECATED); - - $this->charThreshold = $charThreshold; - - return $this; - } - - /** * @return bool */ public function getArticleByLine() diff --git a/plugins/af_readability/vendor/andreskrey/Readability/Nodes/DOM/DOMNodeList.php b/plugins/af_readability/vendor/andreskrey/Readability/Nodes/DOM/DOMNodeList.php new file mode 100644 index 000000000..5149c0b98 --- /dev/null +++ b/plugins/af_readability/vendor/andreskrey/Readability/Nodes/DOM/DOMNodeList.php @@ -0,0 +1,82 @@ +<?php + +namespace andreskrey\Readability\Nodes\DOM; + +/** + * Class DOMNodeList. + * + * This is a fake DOMNodeList class that allows adding items to the list. The original class is static and the nodes + * are defined automagically when instantiating it. This fake version behaves exactly the same way but adds the function + * add() that allows to insert new DOMNodes into the DOMNodeList. + * + * It cannot extend the original DOMNodeList class because the functionality behind the property ->length is hidden + * from the user and cannot be extended, changed, or tweaked. + */ +class DOMNodeList implements \Countable, \IteratorAggregate +{ + /** + * @var array + */ + protected $items = []; + + /** + * @var int + */ + protected $length = 0; + + /** + * To allow access to length in the same way that DOMNodeList allows. + * + * {@inheritdoc} + */ + public function __get($name) + { + switch ($name) { + case 'length': + return $this->length; + default: + trigger_error(sprintf('Undefined property: %s::%s', static::class, $name)); + } + } + + /** + * @param DOMNode|DOMElement|DOMComment $node + * + * @return DOMNodeList + */ + public function add($node) + { + $this->items[] = $node; + $this->length++; + + return $this; + } + + /** + * @param int $offset + * + * @return DOMNode|DOMElement|DOMComment + */ + public function item(int $offset) + { + return $this->items[$offset]; + } + + /** + * @return int|void + */ + public function count(): int + { + return $this->length; + } + + /** + * To make it compatible with iterator_to_array() function. + * + * {@inheritdoc} + */ + public function getIterator(): \ArrayIterator + { + return new \ArrayIterator($this->items); + } +} diff --git a/plugins/af_readability/vendor/andreskrey/Readability/Nodes/NodeTrait.php b/plugins/af_readability/vendor/andreskrey/Readability/Nodes/NodeTrait.php index d7060ccbb..5198bbb5f 100644 --- a/plugins/af_readability/vendor/andreskrey/Readability/Nodes/NodeTrait.php +++ b/plugins/af_readability/vendor/andreskrey/Readability/Nodes/NodeTrait.php @@ -181,11 +181,11 @@ trait NodeTrait /** * Override for native hasAttribute. * - * @see getAttribute - * * @param $attributeName * * @return bool + * + * @see getAttribute */ public function hasAttribute($attributeName) { @@ -317,10 +317,14 @@ trait NodeTrait * * @param bool $filterEmptyDOMText Filter empty DOMText nodes? * + * @deprecated Use NodeUtility::filterTextNodes, function will be removed in version 3.0 + * * @return array */ public function getChildren($filterEmptyDOMText = false) { + @trigger_error('getChildren was replaced with NodeUtility::filterTextNodes and will be removed in version 3.0', E_USER_DEPRECATED); + $ret = iterator_to_array($this->childNodes); if ($filterEmptyDOMText) { // Array values is used to discard the key order. Needs to be 0 to whatever without skipping any number @@ -418,12 +422,12 @@ trait NodeTrait public function hasSingleTagInsideElement($tag) { // There should be exactly 1 element child with given tag - if (count($children = $this->getChildren(true)) !== 1 || $children[0]->nodeName !== $tag) { + if (count($children = NodeUtility::filterTextNodes($this->childNodes)) !== 1 || $children->item(0)->nodeName !== $tag) { return false; } // And there should be no text nodes with real content - return array_reduce($children, function ($carry, $child) { + return array_reduce(iterator_to_array($children), function ($carry, $child) { if (!$carry === false) { return false; } @@ -443,7 +447,7 @@ trait NodeTrait { $result = false; if ($this->hasChildNodes()) { - foreach ($this->getChildren() as $child) { + foreach ($this->childNodes as $child) { if (in_array($child->nodeName, $this->divToPElements)) { $result = true; } else { @@ -500,18 +504,22 @@ trait NodeTrait ); } + /** + * In the original JS project they check if the node has the style display=none, which unfortunately + * in our case we have no way of knowing that. So we just check for the attribute hidden or "display: none". + * + * Might be a good idea to check for classes or other attributes like 'aria-hidden' + * + * @return bool + */ public function isProbablyVisible() { - /* - * In the original JS project they check if the node has the style display=none, which unfortunately - * in our case we have no way of knowing that. So we just check for the attribute hidden or "display: none". - * - * Might be a good idea to check for classes or other attributes like 'aria-hidden' - */ - return !preg_match('/display:( )?none/', $this->getAttribute('style')) && !$this->hasAttribute('hidden'); } + /** + * @return bool + */ public function isWhitespace() { return ($this->nodeType === XML_TEXT_NODE && mb_strlen(trim($this->textContent)) === 0) || @@ -557,4 +565,23 @@ trait NodeTrait $count -= ($count - $nodes->length); } } + + /** + * Mimics JS's firstElementChild property. PHP only has firstChild which could be any type of DOMNode. Use this + * function to get the first one that is an DOMElement node. + * + * @return \DOMElement|null + */ + public function getFirstElementChild() + { + if ($this->childNodes instanceof \Traversable) { + foreach ($this->childNodes as $node) { + if ($node instanceof \DOMElement) { + return $node; + } + } + } + + return null; + } } diff --git a/plugins/af_readability/vendor/andreskrey/Readability/Nodes/NodeUtility.php b/plugins/af_readability/vendor/andreskrey/Readability/Nodes/NodeUtility.php index 7a1f18ee4..cbf78bae0 100644 --- a/plugins/af_readability/vendor/andreskrey/Readability/Nodes/NodeUtility.php +++ b/plugins/af_readability/vendor/andreskrey/Readability/Nodes/NodeUtility.php @@ -5,6 +5,7 @@ namespace andreskrey\Readability\Nodes; use andreskrey\Readability\Nodes\DOM\DOMDocument; use andreskrey\Readability\Nodes\DOM\DOMElement; use andreskrey\Readability\Nodes\DOM\DOMNode; +use andreskrey\Readability\Nodes\DOM\DOMNodeList; /** * Class NodeUtility. @@ -157,4 +158,23 @@ class NodeUtility return ($originalNode) ? $originalNode->nextSibling : $originalNode; } + + /** + * Remove all empty DOMNodes from DOMNodeLists. + * + * @param \DOMNodeList $list + * + * @return DOMNodeList + */ + public static function filterTextNodes(\DOMNodeList $list) + { + $newList = new DOMNodeList(); + foreach ($list as $node) { + if ($node->nodeType !== XML_TEXT_NODE || mb_strlen(trim($node->nodeValue))) { + $newList->add($node); + } + } + + return $newList; + } } diff --git a/plugins/af_readability/vendor/andreskrey/Readability/Readability.php b/plugins/af_readability/vendor/andreskrey/Readability/Readability.php index 7b7eed6bf..6bcbf78d7 100644 --- a/plugins/af_readability/vendor/andreskrey/Readability/Readability.php +++ b/plugins/af_readability/vendor/andreskrey/Readability/Readability.php @@ -57,6 +57,13 @@ class Readability protected $author = null; /** + * Website name. + * + * @var string|null + */ + protected $siteName = null; + + /** * Direction of the text. * * @var string|null @@ -287,10 +294,10 @@ class Readability $values = []; // property is a space-separated list of values - $propertyPattern = '/\s*(dc|dcterm|og|twitter)\s*:\s*(author|creator|description|title|image)\s*/i'; + $propertyPattern = '/\s*(dc|dcterm|og|twitter)\s*:\s*(author|creator|description|title|image|site_name)(?!:)\s*/i'; // name is a single value - $namePattern = '/^\s*(?:(dc|dcterm|og|twitter|weibo:(article|webpage))\s*[\.:]\s*)?(author|creator|description|title|image)\s*$/i'; + $namePattern = '/^\s*(?:(dc|dcterm|og|twitter|weibo:(article|webpage))\s*[\.:]\s*)?(author|creator|description|title|image|site_name)(?!:)\s*$/i'; // Find description tags. foreach ($this->dom->getElementsByTagName('meta') as $meta) { @@ -332,7 +339,6 @@ class Readability * This could be easily replaced with an ugly set of isset($values['key']) or a bunch of ??s. * Will probably replace it with ??s after dropping support of PHP5.6 */ - $key = current(array_intersect([ 'dc:title', 'dcterm:title', @@ -373,11 +379,18 @@ class Readability // get main image $key = current(array_intersect([ + 'image', 'og:image', 'twitter:image' ], array_keys($values))); $this->setImage(isset($values[$key]) ? $values[$key] : null); + + $key = current(array_intersect([ + 'og:site_name' + ], array_keys($values))); + + $this->setSiteName(isset($values[$key]) ? $values[$key] : null); } /** @@ -722,7 +735,7 @@ class Readability */ if ($node->hasSingleTagInsideElement('p') && $node->getLinkDensity() < 0.25) { $this->logger->debug(sprintf('[Get Nodes] Found DIV with a single P node, removing DIV. Node content is: \'%s\'', substr($node->nodeValue, 0, 128))); - $pNode = $node->getChildren(true)[0]; + $pNode = NodeUtility::filterTextNodes($node->childNodes)->item(0); $node->parentNode->replaceChild($pNode, $node); $node = $pNode; $elementsToScore[] = $node; @@ -1082,7 +1095,7 @@ class Readability // If the top candidate is the only child, use parent instead. This will help sibling // joining logic when adjacent content is actually located in parent's sibling node. $parentOfTopCandidate = $topCandidate->parentNode; - while ($parentOfTopCandidate->nodeName !== 'body' && count($parentOfTopCandidate->getChildren(true)) === 1) { + while ($parentOfTopCandidate->nodeName !== 'body' && count(NodeUtility::filterTextNodes($parentOfTopCandidate->childNodes)) === 1) { $topCandidate = $parentOfTopCandidate; $parentOfTopCandidate = $topCandidate->parentNode; } @@ -1102,14 +1115,16 @@ class Readability $siblingScoreThreshold = max(10, $topCandidate->contentScore * 0.2); // Keep potential top candidate's parent node to try to get text direction of it later. $parentOfTopCandidate = $topCandidate->parentNode; - $siblings = $parentOfTopCandidate->getChildren(); + $siblings = $parentOfTopCandidate->childNodes; $hasContent = false; $this->logger->info('[Rating] Adding top candidate siblings...'); - /** @var DOMElement $sibling */ - foreach ($siblings as $sibling) { + /* @var DOMElement $sibling */ + // Can't foreach here because down there we might change the tag name and that causes the foreach to skip items + for ($i = 0; $i < $siblings->length; $i++) { + $sibling = $siblings[$i]; $append = false; if ($sibling === $topCandidate) { @@ -1147,7 +1162,6 @@ class Readability * We have a node that isn't a common block level element, like a form or td tag. * Turn it into a div so it doesn't get filtered out later by accident. */ - $sibling = NodeUtility::setNodeTag($sibling, 'div'); } @@ -1266,11 +1280,11 @@ class Readability // Remove single-cell tables foreach ($article->shiftingAwareGetElementsByTagName('table') as $table) { /** @var DOMNode $table */ - $tbody = $table->hasSingleTagInsideElement('tbody') ? $table->childNodes[0] : $table; + $tbody = $table->hasSingleTagInsideElement('tbody') ? $table->getFirstElementChild() : $table; if ($tbody->hasSingleTagInsideElement('tr')) { - $row = $tbody->firstChild; + $row = $tbody->getFirstElementChild(); if ($row->hasSingleTagInsideElement('td')) { - $cell = $row->firstChild; + $cell = $row->getFirstElementChild(); $cell = NodeUtility::setNodeTag($cell, (array_reduce(iterator_to_array($cell->childNodes), function ($carry, $node) { return $node->isPhrasingContent() && $carry; }, true)) ? 'p' : 'div'); @@ -1597,7 +1611,7 @@ class Readability $node->removeAttribute('class'); } - for ($node = $node->firstChild; $node !== null; $node = $node->nextSibling) { + for ($node = $node->getFirstElementChild(); $node !== null; $node = $node->nextSibling) { $this->_cleanClasses($node); } } @@ -1757,6 +1771,22 @@ class Readability } /** + * @return string|null + */ + public function getSiteName() + { + return $this->siteName; + } + + /** + * @param string $siteName + */ + protected function setSiteName($siteName) + { + $this->siteName = $siteName; + } + + /** * @return null|string */ public function getDirection() |