summaryrefslogtreecommitdiff
path: root/vendor/andreskrey/Readability/Nodes/NodeUtility.php
diff options
context:
space:
mode:
Diffstat (limited to 'vendor/andreskrey/Readability/Nodes/NodeUtility.php')
-rw-r--r--vendor/andreskrey/Readability/Nodes/NodeUtility.php160
1 files changed, 0 insertions, 160 deletions
diff --git a/vendor/andreskrey/Readability/Nodes/NodeUtility.php b/vendor/andreskrey/Readability/Nodes/NodeUtility.php
deleted file mode 100644
index 7a1f18ee4..000000000
--- a/vendor/andreskrey/Readability/Nodes/NodeUtility.php
+++ /dev/null
@@ -1,160 +0,0 @@
-<?php
-
-namespace andreskrey\Readability\Nodes;
-
-use andreskrey\Readability\Nodes\DOM\DOMDocument;
-use andreskrey\Readability\Nodes\DOM\DOMElement;
-use andreskrey\Readability\Nodes\DOM\DOMNode;
-
-/**
- * Class NodeUtility.
- */
-class NodeUtility
-{
- /**
- * Collection of regexps to check the node usability.
- *
- * @var array
- */
- public static $regexps = [
- 'unlikelyCandidates' => '/-ad-|banner|breadcrumbs|combx|comment|community|cover-wrap|disqus|extra|foot|header|legends|menu|related|remark|replies|rss|shoutbox|sidebar|skyscraper|social|sponsor|supplemental|ad-break|agegate|pagination|pager|popup|yom-remote/i',
- 'okMaybeItsACandidate' => '/and|article|body|column|main|shadow/i',
- 'extraneous' => '/print|archive|comment|discuss|e[\-]?mail|share|reply|all|login|sign|single|utility/i',
- 'byline' => '/byline|author|dateline|writtenby|p-author/i',
- 'replaceFonts' => '/<(\/?)font[^>]*>/gi',
- 'normalize' => '/\s{2,}/',
- 'videos' => '/\/\/(www\.)?((dailymotion|youtube|youtube-nocookie|player\.vimeo|v\.qq)\.com|(archive|upload\.wikimedia)\.org|player\.twitch\.tv)/i',
- 'nextLink' => '/(next|weiter|continue|>([^\|]|$)|»([^\|]|$))/i',
- 'prevLink' => '/(prev|earl|old|new|<|«)/i',
- 'whitespace' => '/^\s*$/',
- 'hasContent' => '/\S$/',
- 'positive' => '/article|body|content|entry|hentry|h-entry|main|page|pagination|post|text|blog|story/i',
- 'negative' => '/hidden|^hid$| hid$| hid |^hid |banner|combx|comment|com-|contact|foot|footer|footnote|masthead|media|meta|outbrain|promo|related|scroll|share|shoutbox|sidebar|skyscraper|sponsor|shopping|tags|tool|widget/i',
- // \x{00A0} is the unicode version of &nbsp;
- 'onlyWhitespace' => '/\x{00A0}|\s+/u'
- ];
-
- /**
- * Imported from the Element class on league\html-to-markdown.
- *
- * @param $node
- *
- * @return DOMElement
- */
- public static function nextElement($node)
- {
- $next = $node;
- while ($next
- && $next->nodeType !== XML_ELEMENT_NODE
- && $next->isWhitespace()) {
- $next = $next->nextSibling;
- }
-
- return $next;
- }
-
- /**
- * Changes the node tag name. Since tagName on DOMElement is a read only value, this must be done creating a new
- * element with the new tag name and importing it to the main DOMDocument.
- *
- * @param DOMNode $node
- * @param string $value
- * @param bool $importAttributes
- *
- * @return DOMNode
- */
- public static function setNodeTag($node, $value, $importAttributes = true)
- {
- $new = new DOMDocument('1.0', 'utf-8');
- $new->appendChild($new->createElement($value));
-
- $children = $node->childNodes;
- /** @var $children \DOMNodeList $i */
- for ($i = 0; $i < $children->length; $i++) {
- $import = $new->importNode($children->item($i), true);
- $new->firstChild->appendChild($import);
- }
-
- if ($importAttributes) {
- // Import attributes from the original node.
- foreach ($node->attributes as $attribute) {
- $new->firstChild->setAttribute($attribute->nodeName, $attribute->nodeValue);
- }
- }
-
- // The import must be done on the firstChild of $new, since $new is a DOMDocument and not a DOMElement.
- $import = $node->ownerDocument->importNode($new->firstChild, true);
- $node->parentNode->replaceChild($import, $node);
-
- return $import;
- }
-
- /**
- * Removes the current node and returns the next node to be parsed (child, sibling or parent).
- *
- * @param DOMNode $node
- *
- * @return DOMNode
- */
- public static function removeAndGetNext($node)
- {
- $nextNode = self::getNextNode($node, true);
- $node->parentNode->removeChild($node);
-
- return $nextNode;
- }
-
- /**
- * Remove the selected node.
- *
- * @param $node DOMElement
- *
- * @return void
- **/
- public static function removeNode($node)
- {
- $parent = $node->parentNode;
- if ($parent) {
- $parent->removeChild($node);
- }
- }
-
- /**
- * Returns the next node. First checks for children (if the flag allows it), then for siblings, and finally
- * for parents.
- *
- * @param DOMNode $originalNode
- * @param bool $ignoreSelfAndKids
- *
- * @return DOMNode
- */
- public static function getNextNode($originalNode, $ignoreSelfAndKids = false)
- {
- /*
- * Traverse the DOM from node to node, starting at the node passed in.
- * Pass true for the second parameter to indicate this node itself
- * (and its kids) are going away, and we want the next node over.
- *
- * Calling this in a loop will traverse the DOM depth-first.
- */
-
- // First check for kids if those aren't being ignored
- if (!$ignoreSelfAndKids && $originalNode->firstChild) {
- return $originalNode->firstChild;
- }
-
- // Then for siblings...
- if ($originalNode->nextSibling) {
- return $originalNode->nextSibling;
- }
-
- // And finally, move up the parent chain *and* find a sibling
- // (because this is depth-first traversal, we will have already
- // seen the parent nodes themselves).
- do {
- $originalNode = $originalNode->parentNode;
- } while ($originalNode && !$originalNode->nextSibling);
-
- return ($originalNode) ? $originalNode->nextSibling : $originalNode;
- }
-}