From b8c1d622a77226b14fb307cfe3e0f4cea9e4268a Mon Sep 17 00:00:00 2001 From: Andrew Dolgov Date: Sat, 16 Jul 2022 16:30:46 +0300 Subject: add missing files for forked idiorm --- .../readability.php/src/Nodes/NodeUtility.php | 36 ++++++++++++++-------- 1 file changed, 24 insertions(+), 12 deletions(-) (limited to 'plugins/af_readability/vendor/fivefilters/readability.php/src/Nodes/NodeUtility.php') diff --git a/plugins/af_readability/vendor/fivefilters/readability.php/src/Nodes/NodeUtility.php b/plugins/af_readability/vendor/fivefilters/readability.php/src/Nodes/NodeUtility.php index cbf78bae0..56de70517 100644 --- a/plugins/af_readability/vendor/fivefilters/readability.php/src/Nodes/NodeUtility.php +++ b/plugins/af_readability/vendor/fivefilters/readability.php/src/Nodes/NodeUtility.php @@ -1,11 +1,11 @@ '/-ad-|banner|breadcrumbs|combx|comment|community|cover-wrap|disqus|extra|foot|header|legends|menu|related|remark|replies|rss|shoutbox|sidebar|skyscraper|social|sponsor|supplemental|ad-break|agegate|pagination|pager|popup|yom-remote/i', - 'okMaybeItsACandidate' => '/and|article|body|column|main|shadow/i', + 'unlikelyCandidates' => '/-ad-|ai2html|banner|breadcrumbs|combx|comment|community|cover-wrap|disqus|extra|footer|gdpr|header|legends|menu|related|remark|replies|rss|shoutbox|sidebar|skyscraper|social|sponsor|supplemental|ad-break|agegate|pagination|pager|popup|yom-remote/i', + 'okMaybeItsACandidate' => '/and|article|body|column|content|main|shadow/i', 'extraneous' => '/print|archive|comment|discuss|e[\-]?mail|share|reply|all|login|sign|single|utility/i', 'byline' => '/byline|author|dateline|writtenby|p-author/i', - 'replaceFonts' => '/<(\/?)font[^>]*>/gi', + 'replaceFonts' => '/<(\/?)font[^>]*>/i', 'normalize' => '/\s{2,}/', 'videos' => '/\/\/(www\.)?((dailymotion|youtube|youtube-nocookie|player\.vimeo|v\.qq)\.com|(archive|upload\.wikimedia)\.org|player\.twitch\.tv)/i', + 'shareElements' => '/(\b|_)(share|sharedaddy)(\b|_)/i', 'nextLink' => '/(next|weiter|continue|>([^\|]|$)|»([^\|]|$))/i', 'prevLink' => '/(prev|earl|old|new|<|«)/i', + 'tokenize' => '/\W+/', 'whitespace' => '/^\s*$/', 'hasContent' => '/\S$/', 'positive' => '/article|body|content|entry|hentry|h-entry|main|page|pagination|post|text|blog|story/i', - 'negative' => '/hidden|^hid$| hid$| hid |^hid |banner|combx|comment|com-|contact|foot|footer|footnote|masthead|media|meta|outbrain|promo|related|scroll|share|shoutbox|sidebar|skyscraper|sponsor|shopping|tags|tool|widget/i', + 'negative' => '/-ad-|hidden|^hid$| hid$| hid |^hid |banner|combx|comment|com-|contact|foot|footer|footnote|gdpr|masthead|media|meta|outbrain|promo|related|scroll|share|shoutbox|sidebar|skyscraper|sponsor|shopping|tags|tool|widget/i', // \x{00A0} is the unicode version of   - 'onlyWhitespace' => '/\x{00A0}|\s+/u' + 'onlyWhitespace' => '/\x{00A0}|\s+/u', + 'hashUrl' => '/^#.+/', + 'srcsetUrl' => '/(\S+)(\s+[\d.]+[xw])?(\s*(?:,|$))/', + 'b64DataUrl' => '/^data:\s*([^\s;,]+)\s*;\s*base64\s*,/i', + // See: https://schema.org/Article + 'jsonLdArticleTypes' => '/^Article|AdvertiserContentArticle|NewsArticle|AnalysisNewsArticle|AskPublicNewsArticle|BackgroundNewsArticle|OpinionNewsArticle|ReportageNewsArticle|ReviewNewsArticle|Report|SatiricalArticle|ScholarlyArticle|MedicalScholarlyArticle|SocialMediaPosting|BlogPosting|LiveBlogPosting|DiscussionForumPosting|TechArticle|APIReference$/' + ]; /** + * Finds the next node, starting from the given node, and ignoring + * whitespace in between. If the given node is an element, the same node is + * returned. + * * Imported from the Element class on league\html-to-markdown. * * @param $node * - * @return DOMElement + * @return DOMNode */ - public static function nextElement($node) + public static function nextNode($node) { $next = $node; while ($next -- cgit v1.2.3