diff options
author | Andres Rey <[email protected]> | 2019-06-17 20:29:32 +0100 |
---|---|---|
committer | Andres Rey <[email protected]> | 2019-06-17 20:29:32 +0100 |
commit | df9d3e1634bc76978baa989a6cee4035a2a346d4 (patch) | |
tree | 01a43b02943eeb95a06640bd20a18943cdd598e0 /src/Nodes | |
parent | e38cbfaebd1f826233f7a0e07c66a52f44e9b796 (diff) |
Remove getChildren function, implement a fake DOMNodeList class. Prepare everything to support newer versions of libxml2
Diffstat (limited to 'src/Nodes')
-rw-r--r-- | src/Nodes/DOM/DOMNodeList.php | 108 | ||||
-rw-r--r-- | src/Nodes/NodeTrait.php | 26 | ||||
-rw-r--r-- | src/Nodes/NodeUtility.php | 18 |
3 files changed, 129 insertions, 23 deletions
diff --git a/src/Nodes/DOM/DOMNodeList.php b/src/Nodes/DOM/DOMNodeList.php new file mode 100644 index 0000000..28d4d42 --- /dev/null +++ b/src/Nodes/DOM/DOMNodeList.php @@ -0,0 +1,108 @@ +<?php + +namespace andreskrey\Readability\Nodes\DOM; + +/** + * Class DOMNodeList + * + * This is a fake DOMNodeList class that allows adding items to the list. The original class is static and the nodes + * are defined automagically when instantiating it. This fake version behaves exactly the same way but adds the function + * add() that allows to insert new DOMNodes into the DOMNodeList. + * + * It cannot extend the original DOMNodeList class because the functionality behind the property ->length is hidden + * from the user and cannot be extended, changed, or tweaked. + * + * @package andreskrey\Readability\Nodes\DOM + */ +class DOMNodeList implements \ArrayAccess, \Countable, \IteratorAggregate +{ + /** + * @var array + */ + protected $items = []; + + /** + * @var int + */ + protected $length = 0; + + /** + * To allow access to length in the same way that DOMNodeList allows + * + * {@inheritDoc} + */ + public function __get($name) + { + switch ($name) { + case 'length': + return $this->length; + default: + trigger_error(sprintf('Undefined property: %s::%s', static::class, $name)); + } + } + + /** + * @param \DOMNode $node + * + * @return DOMNodeList + */ + public function add(\DOMNode $node) + { + $this->items[] = $node; + $this->length++; + + return $this; + } + + /** + * @return int|void + */ + public function count() + { + return $this->length; + } + + /** + * To make it compatible with iterator_to_array() function + * + * {@inheritDoc} + */ + public function getIterator() + { + return new \ArrayIterator($this->items); + } + + /** + * {@inheritDoc} + */ + public function offsetExists($offset) + { + return isset($this->items[$offset]); + } + + /** + * {@inheritDoc} + */ + public function offsetGet($offset) + { + return $this->items[$offset]; + } + + /** + * {@inheritDoc} + */ + public function offsetSet($offset, $value) + { + $this->items[$offset] = $value; + $this->length = count($this->items); + } + + /** + * {@inheritDoc} + */ + public function offsetUnset($offset) + { + unset($this->items[$offset]); + $this->length--; + } +} diff --git a/src/Nodes/NodeTrait.php b/src/Nodes/NodeTrait.php index 9ef1fa2..2ce4383 100644 --- a/src/Nodes/NodeTrait.php +++ b/src/Nodes/NodeTrait.php @@ -313,26 +313,6 @@ trait NodeTrait } /** - * Returns the children of the current node. - * - * @param bool $filterEmptyDOMText Filter empty DOMText nodes? - * - * @return array - */ - public function getChildren($filterEmptyDOMText = false) - { - $ret = iterator_to_array($this->childNodes); - if ($filterEmptyDOMText) { - // Array values is used to discard the key order. Needs to be 0 to whatever without skipping any number - $ret = array_values(array_filter($ret, function ($node) { - return $node->nodeName !== '#text' || mb_strlen(trim($node->nodeValue)); - })); - } - - return $ret; - } - - /** * Return an array indicating how many rows and columns this table has. * * @return array @@ -418,12 +398,12 @@ trait NodeTrait public function hasSingleTagInsideElement($tag) { // There should be exactly 1 element child with given tag - if (count($children = $this->getChildren(true)) !== 1 || $children[0]->nodeName !== $tag) { + if (count($children = NodeUtility::filterTextNodes($this->childNodes)) !== 1 || $children[0]->nodeName !== $tag) { return false; } // And there should be no text nodes with real content - return array_reduce($children, function ($carry, $child) { + return array_reduce(iterator_to_array($children), function ($carry, $child) { if (!$carry === false) { return false; } @@ -443,7 +423,7 @@ trait NodeTrait { $result = false; if ($this->hasChildNodes()) { - foreach ($this->getChildren() as $child) { + foreach ($this->childNodes as $child) { if (in_array($child->nodeName, $this->divToPElements)) { $result = true; } else { diff --git a/src/Nodes/NodeUtility.php b/src/Nodes/NodeUtility.php index 7a1f18e..631a0aa 100644 --- a/src/Nodes/NodeUtility.php +++ b/src/Nodes/NodeUtility.php @@ -5,6 +5,7 @@ namespace andreskrey\Readability\Nodes; use andreskrey\Readability\Nodes\DOM\DOMDocument; use andreskrey\Readability\Nodes\DOM\DOMElement; use andreskrey\Readability\Nodes\DOM\DOMNode; +use andreskrey\Readability\Nodes\DOM\DOMNodeList; /** * Class NodeUtility. @@ -157,4 +158,21 @@ class NodeUtility return ($originalNode) ? $originalNode->nextSibling : $originalNode; } + + /** + * Remove all empty DOMNodes from DOMNodeLists + * + * @param \DOMNodeList $list + * @return DOMNodeList + */ + public static function filterTextNodes(\DOMNodeList $list) + { + $newList = new DOMNodeList(); + foreach($list as $node){ + if($node->nodeType !== XML_TEXT_NODE || mb_strlen(trim($node->nodeValue))){ + $newList->add($node); + } + } + return $newList; + } } |