From d0a9aeaf80510cdbbf4f4e461798ae9c36ace420 Mon Sep 17 00:00:00 2001 From: Andrew Dolgov Date: Wed, 17 Apr 2019 08:51:17 +0300 Subject: move readability library to af_readability/vendor out of global vendor directory af_redditimgur: use HOOK_GET_FULL_TEXT instead of invoking readability directly --- .../andreskrey/Readability/Nodes/NodeUtility.php | 160 +++++++++++++++++++++ 1 file changed, 160 insertions(+) create mode 100644 plugins/af_readability/vendor/andreskrey/Readability/Nodes/NodeUtility.php (limited to 'plugins/af_readability/vendor/andreskrey/Readability/Nodes/NodeUtility.php') diff --git a/plugins/af_readability/vendor/andreskrey/Readability/Nodes/NodeUtility.php b/plugins/af_readability/vendor/andreskrey/Readability/Nodes/NodeUtility.php new file mode 100644 index 000000000..7a1f18ee4 --- /dev/null +++ b/plugins/af_readability/vendor/andreskrey/Readability/Nodes/NodeUtility.php @@ -0,0 +1,160 @@ + '/-ad-|banner|breadcrumbs|combx|comment|community|cover-wrap|disqus|extra|foot|header|legends|menu|related|remark|replies|rss|shoutbox|sidebar|skyscraper|social|sponsor|supplemental|ad-break|agegate|pagination|pager|popup|yom-remote/i', + 'okMaybeItsACandidate' => '/and|article|body|column|main|shadow/i', + 'extraneous' => '/print|archive|comment|discuss|e[\-]?mail|share|reply|all|login|sign|single|utility/i', + 'byline' => '/byline|author|dateline|writtenby|p-author/i', + 'replaceFonts' => '/<(\/?)font[^>]*>/gi', + 'normalize' => '/\s{2,}/', + 'videos' => '/\/\/(www\.)?((dailymotion|youtube|youtube-nocookie|player\.vimeo|v\.qq)\.com|(archive|upload\.wikimedia)\.org|player\.twitch\.tv)/i', + 'nextLink' => '/(next|weiter|continue|>([^\|]|$)|»([^\|]|$))/i', + 'prevLink' => '/(prev|earl|old|new|<|«)/i', + 'whitespace' => '/^\s*$/', + 'hasContent' => '/\S$/', + 'positive' => '/article|body|content|entry|hentry|h-entry|main|page|pagination|post|text|blog|story/i', + 'negative' => '/hidden|^hid$| hid$| hid |^hid |banner|combx|comment|com-|contact|foot|footer|footnote|masthead|media|meta|outbrain|promo|related|scroll|share|shoutbox|sidebar|skyscraper|sponsor|shopping|tags|tool|widget/i', + // \x{00A0} is the unicode version of   + 'onlyWhitespace' => '/\x{00A0}|\s+/u' + ]; + + /** + * Imported from the Element class on league\html-to-markdown. + * + * @param $node + * + * @return DOMElement + */ + public static function nextElement($node) + { + $next = $node; + while ($next + && $next->nodeType !== XML_ELEMENT_NODE + && $next->isWhitespace()) { + $next = $next->nextSibling; + } + + return $next; + } + + /** + * Changes the node tag name. Since tagName on DOMElement is a read only value, this must be done creating a new + * element with the new tag name and importing it to the main DOMDocument. + * + * @param DOMNode $node + * @param string $value + * @param bool $importAttributes + * + * @return DOMNode + */ + public static function setNodeTag($node, $value, $importAttributes = true) + { + $new = new DOMDocument('1.0', 'utf-8'); + $new->appendChild($new->createElement($value)); + + $children = $node->childNodes; + /** @var $children \DOMNodeList $i */ + for ($i = 0; $i < $children->length; $i++) { + $import = $new->importNode($children->item($i), true); + $new->firstChild->appendChild($import); + } + + if ($importAttributes) { + // Import attributes from the original node. + foreach ($node->attributes as $attribute) { + $new->firstChild->setAttribute($attribute->nodeName, $attribute->nodeValue); + } + } + + // The import must be done on the firstChild of $new, since $new is a DOMDocument and not a DOMElement. + $import = $node->ownerDocument->importNode($new->firstChild, true); + $node->parentNode->replaceChild($import, $node); + + return $import; + } + + /** + * Removes the current node and returns the next node to be parsed (child, sibling or parent). + * + * @param DOMNode $node + * + * @return DOMNode + */ + public static function removeAndGetNext($node) + { + $nextNode = self::getNextNode($node, true); + $node->parentNode->removeChild($node); + + return $nextNode; + } + + /** + * Remove the selected node. + * + * @param $node DOMElement + * + * @return void + **/ + public static function removeNode($node) + { + $parent = $node->parentNode; + if ($parent) { + $parent->removeChild($node); + } + } + + /** + * Returns the next node. First checks for children (if the flag allows it), then for siblings, and finally + * for parents. + * + * @param DOMNode $originalNode + * @param bool $ignoreSelfAndKids + * + * @return DOMNode + */ + public static function getNextNode($originalNode, $ignoreSelfAndKids = false) + { + /* + * Traverse the DOM from node to node, starting at the node passed in. + * Pass true for the second parameter to indicate this node itself + * (and its kids) are going away, and we want the next node over. + * + * Calling this in a loop will traverse the DOM depth-first. + */ + + // First check for kids if those aren't being ignored + if (!$ignoreSelfAndKids && $originalNode->firstChild) { + return $originalNode->firstChild; + } + + // Then for siblings... + if ($originalNode->nextSibling) { + return $originalNode->nextSibling; + } + + // And finally, move up the parent chain *and* find a sibling + // (because this is depth-first traversal, we will have already + // seen the parent nodes themselves). + do { + $originalNode = $originalNode->parentNode; + } while ($originalNode && !$originalNode->nextSibling); + + return ($originalNode) ? $originalNode->nextSibling : $originalNode; + } +} -- cgit v1.2.3