From 8ea537123d1cef38f25f9fbe92e3a9c0f89de55a Mon Sep 17 00:00:00 2001 From: Andrew Dolgov Date: Tue, 13 Dec 2022 20:08:43 +0300 Subject: move af_readability out of master tree --- .../readability.php/src/Nodes/DOM/DOMAttr.php | 10 - .../src/Nodes/DOM/DOMCdataSection.php | 10 - .../src/Nodes/DOM/DOMCharacterData.php | 10 - .../readability.php/src/Nodes/DOM/DOMComment.php | 10 - .../readability.php/src/Nodes/DOM/DOMDocument.php | 30 -- .../src/Nodes/DOM/DOMDocumentFragment.php | 10 - .../src/Nodes/DOM/DOMDocumentType.php | 10 - .../readability.php/src/Nodes/DOM/DOMElement.php | 46 -- .../readability.php/src/Nodes/DOM/DOMEntity.php | 10 - .../src/Nodes/DOM/DOMEntityReference.php | 10 - .../readability.php/src/Nodes/DOM/DOMNode.php | 14 - .../readability.php/src/Nodes/DOM/DOMNodeList.php | 82 --- .../readability.php/src/Nodes/DOM/DOMNotation.php | 10 - .../src/Nodes/DOM/DOMProcessingInstruction.php | 10 - .../readability.php/src/Nodes/DOM/DOMText.php | 10 - .../readability.php/src/Nodes/NodeTrait.php | 566 --------------------- .../readability.php/src/Nodes/NodeUtility.php | 192 ------- 17 files changed, 1040 deletions(-) delete mode 100644 plugins/af_readability/vendor/fivefilters/readability.php/src/Nodes/DOM/DOMAttr.php delete mode 100644 plugins/af_readability/vendor/fivefilters/readability.php/src/Nodes/DOM/DOMCdataSection.php delete mode 100644 plugins/af_readability/vendor/fivefilters/readability.php/src/Nodes/DOM/DOMCharacterData.php delete mode 100644 plugins/af_readability/vendor/fivefilters/readability.php/src/Nodes/DOM/DOMComment.php delete mode 100644 plugins/af_readability/vendor/fivefilters/readability.php/src/Nodes/DOM/DOMDocument.php delete mode 100644 plugins/af_readability/vendor/fivefilters/readability.php/src/Nodes/DOM/DOMDocumentFragment.php delete mode 100644 plugins/af_readability/vendor/fivefilters/readability.php/src/Nodes/DOM/DOMDocumentType.php delete mode 100644 plugins/af_readability/vendor/fivefilters/readability.php/src/Nodes/DOM/DOMElement.php delete mode 100644 plugins/af_readability/vendor/fivefilters/readability.php/src/Nodes/DOM/DOMEntity.php delete mode 100644 plugins/af_readability/vendor/fivefilters/readability.php/src/Nodes/DOM/DOMEntityReference.php delete mode 100644 plugins/af_readability/vendor/fivefilters/readability.php/src/Nodes/DOM/DOMNode.php delete mode 100644 plugins/af_readability/vendor/fivefilters/readability.php/src/Nodes/DOM/DOMNodeList.php delete mode 100644 plugins/af_readability/vendor/fivefilters/readability.php/src/Nodes/DOM/DOMNotation.php delete mode 100644 plugins/af_readability/vendor/fivefilters/readability.php/src/Nodes/DOM/DOMProcessingInstruction.php delete mode 100644 plugins/af_readability/vendor/fivefilters/readability.php/src/Nodes/DOM/DOMText.php delete mode 100644 plugins/af_readability/vendor/fivefilters/readability.php/src/Nodes/NodeTrait.php delete mode 100644 plugins/af_readability/vendor/fivefilters/readability.php/src/Nodes/NodeUtility.php (limited to 'plugins/af_readability/vendor/fivefilters/readability.php/src/Nodes') diff --git a/plugins/af_readability/vendor/fivefilters/readability.php/src/Nodes/DOM/DOMAttr.php b/plugins/af_readability/vendor/fivefilters/readability.php/src/Nodes/DOM/DOMAttr.php deleted file mode 100644 index 1bdf395e7..000000000 --- a/plugins/af_readability/vendor/fivefilters/readability.php/src/Nodes/DOM/DOMAttr.php +++ /dev/null @@ -1,10 +0,0 @@ -registerNodeClass('DOMAttr', DOMAttr::class); - $this->registerNodeClass('DOMCdataSection', DOMCdataSection::class); - $this->registerNodeClass('DOMCharacterData', DOMCharacterData::class); - $this->registerNodeClass('DOMComment', DOMComment::class); - $this->registerNodeClass('DOMDocument', self::class); - $this->registerNodeClass('DOMDocumentFragment', DOMDocumentFragment::class); - $this->registerNodeClass('DOMDocumentType', DOMDocumentType::class); - $this->registerNodeClass('DOMElement', DOMElement::class); - $this->registerNodeClass('DOMEntity', DOMEntity::class); - $this->registerNodeClass('DOMEntityReference', DOMEntityReference::class); - $this->registerNodeClass('DOMNode', DOMNode::class); - $this->registerNodeClass('DOMNotation', DOMNotation::class); - $this->registerNodeClass('DOMProcessingInstruction', DOMProcessingInstruction::class); - $this->registerNodeClass('DOMText', DOMText::class); - } -} diff --git a/plugins/af_readability/vendor/fivefilters/readability.php/src/Nodes/DOM/DOMDocumentFragment.php b/plugins/af_readability/vendor/fivefilters/readability.php/src/Nodes/DOM/DOMDocumentFragment.php deleted file mode 100644 index 33a3f950a..000000000 --- a/plugins/af_readability/vendor/fivefilters/readability.php/src/Nodes/DOM/DOMDocumentFragment.php +++ /dev/null @@ -1,10 +0,0 @@ -childNodes as $node) { - if ($node->nodeType === XML_ELEMENT_NODE) { - $newList->add($node); - } - } - return $newList; - } - - /** - * Returns the Element immediately prior to the specified one in its parent's children list, or null if the specified element is the first one in the list. - * - * @see https://wiki.php.net/rfc/dom_living_standard_api - * @return DOMElement|null - */ - public function previousElementSibling() - { - $previous = $this->previousSibling; - while ($previous) { - if ($previous->nodeType === XML_ELEMENT_NODE) { - return $previous; - } - $previous = $previous->previousSibling; - } - return null; - } -} diff --git a/plugins/af_readability/vendor/fivefilters/readability.php/src/Nodes/DOM/DOMEntity.php b/plugins/af_readability/vendor/fivefilters/readability.php/src/Nodes/DOM/DOMEntity.php deleted file mode 100644 index 751b59c48..000000000 --- a/plugins/af_readability/vendor/fivefilters/readability.php/src/Nodes/DOM/DOMEntity.php +++ /dev/null @@ -1,10 +0,0 @@ -length is hidden - * from the user and cannot be extended, changed, or tweaked. - */ -class DOMNodeList implements \Countable, \IteratorAggregate -{ - /** - * @var array - */ - protected $items = []; - - /** - * @var int - */ - protected $length = 0; - - /** - * To allow access to length in the same way that DOMNodeList allows. - * - * {@inheritdoc} - */ - public function __get($name) - { - switch ($name) { - case 'length': - return $this->length; - default: - trigger_error(sprintf('Undefined property: %s::%s', static::class, $name)); - } - } - - /** - * @param DOMNode|DOMElement|DOMComment $node - * - * @return DOMNodeList - */ - public function add($node) - { - $this->items[] = $node; - $this->length++; - - return $this; - } - - /** - * @param int $offset - * - * @return DOMNode|DOMElement|DOMComment - */ - public function item(int $offset) - { - return $this->items[$offset]; - } - - /** - * @return int|void - */ - public function count(): int - { - return $this->length; - } - - /** - * To make it compatible with iterator_to_array() function. - * - * {@inheritdoc} - */ - public function getIterator(): \ArrayIterator - { - return new \ArrayIterator($this->items); - } -} diff --git a/plugins/af_readability/vendor/fivefilters/readability.php/src/Nodes/DOM/DOMNotation.php b/plugins/af_readability/vendor/fivefilters/readability.php/src/Nodes/DOM/DOMNotation.php deleted file mode 100644 index d276e42a4..000000000 --- a/plugins/af_readability/vendor/fivefilters/readability.php/src/Nodes/DOM/DOMNotation.php +++ /dev/null @@ -1,10 +0,0 @@ -initialized; - } - - /** - * @return bool - */ - public function isReadabilityDataTable() - { - /* - * This is a workaround that I'd like to remove in the future. - * Seems that although we are extending the base DOMElement and adding custom properties (like this one, - * 'readabilityDataTable'), these properties get lost when you search for elements with getElementsByTagName. - * This means that even if we mark the tables in a previous step, when we want to retrieve that information, - * all the custom properties are in their default values. Somehow we need to find a way to make these properties - * permanent across the whole DOM. - * - * @see https://stackoverflow.com/questions/35654709/php-registernodeclass-and-reusing-variable-names - */ - return $this->hasAttribute('readabilityDataTable') - && $this->getAttribute('readabilityDataTable') === '1'; -// return $this->readabilityDataTable; - } - - /** - * @param bool $param - */ - public function setReadabilityDataTable($param) - { - // Can't be "true" because DOMDocument casts it to "1" - $this->setAttribute('readabilityDataTable', $param ? '1' : '0'); -// $this->readabilityDataTable = $param; - } - - /** - * Initializer. Calculates the current score of the node and returns a full Readability object. - * - * @ TODO: I don't like the weightClasses param. How can we get the config here? - * - * @param $weightClasses bool Weight classes? - * - * @return static - */ - public function initializeNode($weightClasses) - { - if (!$this->isInitialized()) { - $contentScore = 0; - - switch ($this->nodeName) { - case 'div': - $contentScore += 5; - break; - - case 'pre': - case 'td': - case 'blockquote': - $contentScore += 3; - break; - - case 'address': - case 'ol': - case 'ul': - case 'dl': - case 'dd': - case 'dt': - case 'li': - case 'form': - $contentScore -= 3; - break; - - case 'h1': - case 'h2': - case 'h3': - case 'h4': - case 'h5': - case 'h6': - case 'th': - $contentScore -= 5; - break; - } - - $this->contentScore = $contentScore + ($weightClasses ? $this->getClassWeight() : 0); - - $this->initialized = true; - } - - return $this; - } - - /** - * Override for native getAttribute method. Some nodes have the getAttribute method, some don't, so we need - * to check first the existence of the attributes property. - * - * @param $attributeName string Attribute to retrieve - * - * @return string - */ - #[\ReturnTypeWillChange] - public function getAttribute($attributeName) - { - if (!is_null($this->attributes)) { - return parent::getAttribute($attributeName); - } - - return ''; - } - - /** - * Override for native hasAttribute. - * - * @param $attributeName - * - * @return bool - * - * @see getAttribute - */ - #[\ReturnTypeWillChange] - public function hasAttribute($attributeName) - { - if (!is_null($this->attributes)) { - return parent::hasAttribute($attributeName); - } - - return false; - } - - /** - * Get the ancestors of the current node. - * - * @param int|bool $maxLevel Max amount of ancestors to get. False for all of them - * - * @return array - */ - public function getNodeAncestors($maxLevel = 3) - { - $ancestors = []; - $level = 0; - - $node = $this->parentNode; - - while ($node && !($node instanceof DOMDocument)) { - $ancestors[] = $node; - $level++; - if ($level === $maxLevel) { - break; - } - $node = $node->parentNode; - } - - return $ancestors; - } - - /** - * Returns all links from the current element. - * - * @return array - */ - public function getAllLinks() - { - return iterator_to_array($this->getElementsByTagName('a')); - } - - /** - * Get the density of links as a percentage of the content - * This is the amount of text that is inside a link divided by the total text in the node. - * - * @return int - */ - public function getLinkDensity() - { - $textLength = mb_strlen($this->getTextContent(true)); - if ($textLength === 0) { - return 0; - } - - $linkLength = 0; - - $links = $this->getAllLinks(); - - if ($links) { - /** @var DOMElement $link */ - foreach ($links as $link) { - $href = $link->getAttribute('href'); - $coefficient = ($href && preg_match(NodeUtility::$regexps['hashUrl'], $href)) ? 0.3 : 1; - $linkLength += mb_strlen($link->getTextContent(true)) * $coefficient; - } - } - - return $linkLength / $textLength; - } - - /** - * Calculates the weight of the class/id of the current element. - * - * @return int - */ - public function getClassWeight() - { - $weight = 0; - - // Look for a special classname - $class = $this->getAttribute('class'); - if (trim($class)) { - if (preg_match(NodeUtility::$regexps['negative'], $class)) { - $weight -= 25; - } - - if (preg_match(NodeUtility::$regexps['positive'], $class)) { - $weight += 25; - } - } - - // Look for a special ID - $id = $this->getAttribute('id'); - if (trim($id) !== '') { - if (preg_match(NodeUtility::$regexps['negative'], $id)) { - $weight -= 25; - } - - if (preg_match(NodeUtility::$regexps['positive'], $id)) { - $weight += 25; - } - } - - return $weight; - } - - /** - * Returns the full text of the node. - * - * @param bool $normalize Normalize white space? - * - * @return string - */ - public function getTextContent($normalize = true) - { - $nodeValue = trim($this->textContent); - if ($normalize) { - $nodeValue = preg_replace(NodeUtility::$regexps['normalize'], ' ', $nodeValue); - } - - return $nodeValue; - } - - /** - * Return an array indicating how many rows and columns this table has. - * - * @return array - */ - public function getRowAndColumnCount() - { - $rows = $columns = 0; - $trs = $this->getElementsByTagName('tr'); - foreach ($trs as $tr) { - /** @var \DOMElement $tr */ - $rowspan = $tr->getAttribute('rowspan'); - $rows += ($rowspan || 1); - - // Now look for column-related info - $columnsInThisRow = 0; - $cells = $tr->getElementsByTagName('td'); - foreach ($cells as $cell) { - /** @var \DOMElement $cell */ - $colspan = $cell->getAttribute('colspan'); - $columnsInThisRow += ($colspan || 1); - } - $columns = max($columns, $columnsInThisRow); - } - - return ['rows' => $rows, 'columns' => $columns]; - } - - /** - * Creates a new node based on the text content of the original node. - * - * @param $originalNode DOMNode - * @param $tagName string - * - * @return DOMElement - */ - public function createNode($originalNode, $tagName) - { - $text = $originalNode->getTextContent(false); - $newNode = $originalNode->ownerDocument->createElement($tagName, $text); - - return $newNode; - } - - /** - * Check if a given node has one of its ancestor tag name matching the - * provided one. - * - * @param string $tagName - * @param int $maxDepth - * @param callable $filterFn - * - * @return bool - */ - public function hasAncestorTag($tagName, $maxDepth = 3, callable $filterFn = null) - { - $depth = 0; - $node = $this; - - while ($node->parentNode) { - if ($maxDepth > 0 && $depth > $maxDepth) { - return false; - } - - if ($node->parentNode->nodeName === $tagName && (!$filterFn || $filterFn($node->parentNode))) { - return true; - } - - $node = $node->parentNode; - $depth++; - } - - return false; - } - - /** - * Check if this node has only whitespace and a single element with given tag - * or if it contains no element with given tag or more than 1 element. - * - * @param $tag string Name of tag - * - * @return bool - */ - public function hasSingleTagInsideElement($tag) - { - // There should be exactly 1 element child with given tag - if (count($children = NodeUtility::filterTextNodes($this->childNodes)) !== 1 || $children->item(0)->nodeName !== $tag) { - return false; - } - - // And there should be no text nodes with real content - return array_reduce(iterator_to_array($children), function ($carry, $child) { - if (!$carry === false) { - return false; - } - - /* @var DOMNode $child */ - return !($child->nodeType === XML_TEXT_NODE && preg_match(NodeUtility::$regexps['hasContent'], $child->textContent)); - }); - } - - /** - * Check if the current element has a single child block element. - * Block elements are the ones defined in the divToPElements array. - * - * @return bool - */ - public function hasSingleChildBlockElement() - { - $result = false; - if ($this->hasChildNodes()) { - foreach ($this->childNodes as $child) { - if (in_array($child->nodeName, $this->divToPElements)) { - $result = true; - } else { - // If any of the hasSingleChildBlockElement calls return true, return true then. - /** @var $child DOMElement */ - $result = ($result || $child->hasSingleChildBlockElement()); - } - } - } - - return $result; - } - - /** - * Determines if a node has no content or it is just a bunch of dividing lines and/or whitespace. - * - * @return bool - */ - public function isElementWithoutContent() - { - return $this instanceof DOMElement && - mb_strlen(preg_replace(NodeUtility::$regexps['onlyWhitespace'], '', $this->textContent)) === 0 && - ($this->childNodes->length === 0 || - $this->childNodes->length === $this->getElementsByTagName('br')->length + $this->getElementsByTagName('hr')->length - /* - * Special PHP DOMDocument case: We also need to count how many DOMText we have inside the node. - * If there's an empty tag with an space inside and a BR (for example "


) counting only BRs and - * HRs will will say that the example has 2 nodes, instead of one. This happens because in DOMDocument, - * DOMTexts are also nodes (which doesn't happen in JS). So we need to also count how many DOMText we - * are dealing with (And at this point we know they are empty or are just whitespace, because of the - * mb_strlen in this chain of checks). - */ - + count(array_filter(iterator_to_array($this->childNodes), function ($child) { - return $child instanceof DOMText; - })) - - ); - } - - /** - * Determine if a node qualifies as phrasing content. - * https://developer.mozilla.org/en-US/docs/Web/Guide/HTML/Content_categories#Phrasing_content. - * - * @return bool - */ - public function isPhrasingContent() - { - return $this->nodeType === XML_TEXT_NODE || in_array($this->nodeName, $this->phrasing_elems) !== false || - (!is_null($this->childNodes) && - ($this->nodeName === 'a' || $this->nodeName === 'del' || $this->nodeName === 'ins') && - array_reduce(iterator_to_array($this->childNodes), function ($carry, $node) { - return $node->isPhrasingContent() && $carry; - }, true) - ); - } - - /** - * In the original JS project they check if the node has the style display=none, which unfortunately - * in our case we have no way of knowing that. So we just check for the attribute hidden or "display: none". - * - * @return bool - */ - public function isProbablyVisible() - { - return !preg_match('/display:( )?none/i', $this->getAttribute('style')) && - !$this->hasAttribute('hidden') && - //check for "fallback-image" so that wikimedia math images are displayed - (!$this->hasAttribute('aria-hidden') || $this->getAttribute('aria-hidden') !== 'true' || ($this->hasAttribute('class') && strpos($this->getAttribute('class'), 'fallback-image') !== false)); - } - - /** - * @return bool - */ - public function isWhitespace() - { - return ($this->nodeType === XML_TEXT_NODE && mb_strlen(trim($this->textContent)) === 0) || - ($this->nodeType === XML_ELEMENT_NODE && $this->nodeName === 'br'); - } - - /** - * This is a hack that overcomes the issue of node shifting when scanning and removing nodes. - * - * In the JS version of getElementsByTagName, if you remove a node it will not appear during the - * foreach. This does not happen in PHP DOMDocument, because if you remove a node, it will still appear but as an - * orphan node and will give an exception if you try to do anything with it. - * - * Shifting also occurs when converting parent nodes (like a P to a DIV), which in that case the found nodes are - * removed from the foreach "pool" but the internal index of the foreach is not aware and skips over nodes that - * never looped over. (index is at position 5, 2 nodes are removed, next one should be node 3, but the foreach tries - * to access node 6) - * - * This function solves this by searching for the nodes on every loop and keeping track of the count differences. - * Because on every loop we call getElementsByTagName again, this could cause a performance impact and should be - * used only when the results of the search are going to be used to remove the nodes. - * - * @param string $tag - * - * @return \Generator - */ - public function shiftingAwareGetElementsByTagName($tag) - { - /** @var $nodes DOMNodeList */ - $nodes = $this->getElementsByTagName($tag); - $count = $nodes->length; - - for ($i = 0; $i < $count; $i = max(++$i, 0)) { - yield $nodes->item($i); - - // Search for all the nodes again - $nodes = $this->getElementsByTagName($tag); - - // Subtract the amount of nodes removed from the current index - $i -= $count - $nodes->length; - - // Subtract the amount of nodes removed from the current count - $count -= ($count - $nodes->length); - } - } - - /** - * Mimics JS's firstElementChild property. PHP only has firstChild which could be any type of DOMNode. Use this - * function to get the first one that is an DOMElement node. - * - * @return \DOMElement|null - */ - public function getFirstElementChild() - { - if ($this->childNodes instanceof \Traversable) { - foreach ($this->childNodes as $node) { - if ($node instanceof \DOMElement) { - return $node; - } - } - } - - return null; - } -} diff --git a/plugins/af_readability/vendor/fivefilters/readability.php/src/Nodes/NodeUtility.php b/plugins/af_readability/vendor/fivefilters/readability.php/src/Nodes/NodeUtility.php deleted file mode 100644 index 56de70517..000000000 --- a/plugins/af_readability/vendor/fivefilters/readability.php/src/Nodes/NodeUtility.php +++ /dev/null @@ -1,192 +0,0 @@ - '/-ad-|ai2html|banner|breadcrumbs|combx|comment|community|cover-wrap|disqus|extra|footer|gdpr|header|legends|menu|related|remark|replies|rss|shoutbox|sidebar|skyscraper|social|sponsor|supplemental|ad-break|agegate|pagination|pager|popup|yom-remote/i', - 'okMaybeItsACandidate' => '/and|article|body|column|content|main|shadow/i', - 'extraneous' => '/print|archive|comment|discuss|e[\-]?mail|share|reply|all|login|sign|single|utility/i', - 'byline' => '/byline|author|dateline|writtenby|p-author/i', - 'replaceFonts' => '/<(\/?)font[^>]*>/i', - 'normalize' => '/\s{2,}/', - 'videos' => '/\/\/(www\.)?((dailymotion|youtube|youtube-nocookie|player\.vimeo|v\.qq)\.com|(archive|upload\.wikimedia)\.org|player\.twitch\.tv)/i', - 'shareElements' => '/(\b|_)(share|sharedaddy)(\b|_)/i', - 'nextLink' => '/(next|weiter|continue|>([^\|]|$)|»([^\|]|$))/i', - 'prevLink' => '/(prev|earl|old|new|<|«)/i', - 'tokenize' => '/\W+/', - 'whitespace' => '/^\s*$/', - 'hasContent' => '/\S$/', - 'positive' => '/article|body|content|entry|hentry|h-entry|main|page|pagination|post|text|blog|story/i', - 'negative' => '/-ad-|hidden|^hid$| hid$| hid |^hid |banner|combx|comment|com-|contact|foot|footer|footnote|gdpr|masthead|media|meta|outbrain|promo|related|scroll|share|shoutbox|sidebar|skyscraper|sponsor|shopping|tags|tool|widget/i', - // \x{00A0} is the unicode version of   - 'onlyWhitespace' => '/\x{00A0}|\s+/u', - 'hashUrl' => '/^#.+/', - 'srcsetUrl' => '/(\S+)(\s+[\d.]+[xw])?(\s*(?:,|$))/', - 'b64DataUrl' => '/^data:\s*([^\s;,]+)\s*;\s*base64\s*,/i', - // See: https://schema.org/Article - 'jsonLdArticleTypes' => '/^Article|AdvertiserContentArticle|NewsArticle|AnalysisNewsArticle|AskPublicNewsArticle|BackgroundNewsArticle|OpinionNewsArticle|ReportageNewsArticle|ReviewNewsArticle|Report|SatiricalArticle|ScholarlyArticle|MedicalScholarlyArticle|SocialMediaPosting|BlogPosting|LiveBlogPosting|DiscussionForumPosting|TechArticle|APIReference$/' - - ]; - - /** - * Finds the next node, starting from the given node, and ignoring - * whitespace in between. If the given node is an element, the same node is - * returned. - * - * Imported from the Element class on league\html-to-markdown. - * - * @param $node - * - * @return DOMNode - */ - public static function nextNode($node) - { - $next = $node; - while ($next - && $next->nodeType !== XML_ELEMENT_NODE - && $next->isWhitespace()) { - $next = $next->nextSibling; - } - - return $next; - } - - /** - * Changes the node tag name. Since tagName on DOMElement is a read only value, this must be done creating a new - * element with the new tag name and importing it to the main DOMDocument. - * - * @param DOMNode $node - * @param string $value - * @param bool $importAttributes - * - * @return DOMNode - */ - public static function setNodeTag($node, $value, $importAttributes = true) - { - $new = new DOMDocument('1.0', 'utf-8'); - $new->appendChild($new->createElement($value)); - - $children = $node->childNodes; - /** @var $children \DOMNodeList $i */ - for ($i = 0; $i < $children->length; $i++) { - $import = $new->importNode($children->item($i), true); - $new->firstChild->appendChild($import); - } - - if ($importAttributes) { - // Import attributes from the original node. - foreach ($node->attributes as $attribute) { - $new->firstChild->setAttribute($attribute->nodeName, $attribute->nodeValue); - } - } - - // The import must be done on the firstChild of $new, since $new is a DOMDocument and not a DOMElement. - $import = $node->ownerDocument->importNode($new->firstChild, true); - $node->parentNode->replaceChild($import, $node); - - return $import; - } - - /** - * Removes the current node and returns the next node to be parsed (child, sibling or parent). - * - * @param DOMNode $node - * - * @return DOMNode - */ - public static function removeAndGetNext($node) - { - $nextNode = self::getNextNode($node, true); - $node->parentNode->removeChild($node); - - return $nextNode; - } - - /** - * Remove the selected node. - * - * @param $node DOMElement - * - * @return void - **/ - public static function removeNode($node) - { - $parent = $node->parentNode; - if ($parent) { - $parent->removeChild($node); - } - } - - /** - * Returns the next node. First checks for children (if the flag allows it), then for siblings, and finally - * for parents. - * - * @param DOMNode $originalNode - * @param bool $ignoreSelfAndKids - * - * @return DOMNode - */ - public static function getNextNode($originalNode, $ignoreSelfAndKids = false) - { - /* - * Traverse the DOM from node to node, starting at the node passed in. - * Pass true for the second parameter to indicate this node itself - * (and its kids) are going away, and we want the next node over. - * - * Calling this in a loop will traverse the DOM depth-first. - */ - - // First check for kids if those aren't being ignored - if (!$ignoreSelfAndKids && $originalNode->firstChild) { - return $originalNode->firstChild; - } - - // Then for siblings... - if ($originalNode->nextSibling) { - return $originalNode->nextSibling; - } - - // And finally, move up the parent chain *and* find a sibling - // (because this is depth-first traversal, we will have already - // seen the parent nodes themselves). - do { - $originalNode = $originalNode->parentNode; - } while ($originalNode && !$originalNode->nextSibling); - - return ($originalNode) ? $originalNode->nextSibling : $originalNode; - } - - /** - * Remove all empty DOMNodes from DOMNodeLists. - * - * @param \DOMNodeList $list - * - * @return DOMNodeList - */ - public static function filterTextNodes(\DOMNodeList $list) - { - $newList = new DOMNodeList(); - foreach ($list as $node) { - if ($node->nodeType !== XML_TEXT_NODE || mb_strlen(trim($node->nodeValue))) { - $newList->add($node); - } - } - - return $newList; - } -} -- cgit v1.2.3