summaryrefslogtreecommitdiff
path: root/src/Nodes
diff options
context:
space:
mode:
authorAndres Rey <[email protected]>2019-06-17 20:29:32 +0100
committerAndres Rey <[email protected]>2019-06-17 20:29:32 +0100
commitdf9d3e1634bc76978baa989a6cee4035a2a346d4 (patch)
tree01a43b02943eeb95a06640bd20a18943cdd598e0 /src/Nodes
parente38cbfaebd1f826233f7a0e07c66a52f44e9b796 (diff)
Remove getChildren function, implement a fake DOMNodeList class. Prepare everything to support newer versions of libxml2
Diffstat (limited to 'src/Nodes')
-rw-r--r--src/Nodes/DOM/DOMNodeList.php108
-rw-r--r--src/Nodes/NodeTrait.php26
-rw-r--r--src/Nodes/NodeUtility.php18
3 files changed, 129 insertions, 23 deletions
diff --git a/src/Nodes/DOM/DOMNodeList.php b/src/Nodes/DOM/DOMNodeList.php
new file mode 100644
index 0000000..28d4d42
--- /dev/null
+++ b/src/Nodes/DOM/DOMNodeList.php
@@ -0,0 +1,108 @@
+<?php
+
+namespace andreskrey\Readability\Nodes\DOM;
+
+/**
+ * Class DOMNodeList
+ *
+ * This is a fake DOMNodeList class that allows adding items to the list. The original class is static and the nodes
+ * are defined automagically when instantiating it. This fake version behaves exactly the same way but adds the function
+ * add() that allows to insert new DOMNodes into the DOMNodeList.
+ *
+ * It cannot extend the original DOMNodeList class because the functionality behind the property ->length is hidden
+ * from the user and cannot be extended, changed, or tweaked.
+ *
+ * @package andreskrey\Readability\Nodes\DOM
+ */
+class DOMNodeList implements \ArrayAccess, \Countable, \IteratorAggregate
+{
+ /**
+ * @var array
+ */
+ protected $items = [];
+
+ /**
+ * @var int
+ */
+ protected $length = 0;
+
+ /**
+ * To allow access to length in the same way that DOMNodeList allows
+ *
+ * {@inheritDoc}
+ */
+ public function __get($name)
+ {
+ switch ($name) {
+ case 'length':
+ return $this->length;
+ default:
+ trigger_error(sprintf('Undefined property: %s::%s', static::class, $name));
+ }
+ }
+
+ /**
+ * @param \DOMNode $node
+ *
+ * @return DOMNodeList
+ */
+ public function add(\DOMNode $node)
+ {
+ $this->items[] = $node;
+ $this->length++;
+
+ return $this;
+ }
+
+ /**
+ * @return int|void
+ */
+ public function count()
+ {
+ return $this->length;
+ }
+
+ /**
+ * To make it compatible with iterator_to_array() function
+ *
+ * {@inheritDoc}
+ */
+ public function getIterator()
+ {
+ return new \ArrayIterator($this->items);
+ }
+
+ /**
+ * {@inheritDoc}
+ */
+ public function offsetExists($offset)
+ {
+ return isset($this->items[$offset]);
+ }
+
+ /**
+ * {@inheritDoc}
+ */
+ public function offsetGet($offset)
+ {
+ return $this->items[$offset];
+ }
+
+ /**
+ * {@inheritDoc}
+ */
+ public function offsetSet($offset, $value)
+ {
+ $this->items[$offset] = $value;
+ $this->length = count($this->items);
+ }
+
+ /**
+ * {@inheritDoc}
+ */
+ public function offsetUnset($offset)
+ {
+ unset($this->items[$offset]);
+ $this->length--;
+ }
+}
diff --git a/src/Nodes/NodeTrait.php b/src/Nodes/NodeTrait.php
index 9ef1fa2..2ce4383 100644
--- a/src/Nodes/NodeTrait.php
+++ b/src/Nodes/NodeTrait.php
@@ -313,26 +313,6 @@ trait NodeTrait
}
/**
- * Returns the children of the current node.
- *
- * @param bool $filterEmptyDOMText Filter empty DOMText nodes?
- *
- * @return array
- */
- public function getChildren($filterEmptyDOMText = false)
- {
- $ret = iterator_to_array($this->childNodes);
- if ($filterEmptyDOMText) {
- // Array values is used to discard the key order. Needs to be 0 to whatever without skipping any number
- $ret = array_values(array_filter($ret, function ($node) {
- return $node->nodeName !== '#text' || mb_strlen(trim($node->nodeValue));
- }));
- }
-
- return $ret;
- }
-
- /**
* Return an array indicating how many rows and columns this table has.
*
* @return array
@@ -418,12 +398,12 @@ trait NodeTrait
public function hasSingleTagInsideElement($tag)
{
// There should be exactly 1 element child with given tag
- if (count($children = $this->getChildren(true)) !== 1 || $children[0]->nodeName !== $tag) {
+ if (count($children = NodeUtility::filterTextNodes($this->childNodes)) !== 1 || $children[0]->nodeName !== $tag) {
return false;
}
// And there should be no text nodes with real content
- return array_reduce($children, function ($carry, $child) {
+ return array_reduce(iterator_to_array($children), function ($carry, $child) {
if (!$carry === false) {
return false;
}
@@ -443,7 +423,7 @@ trait NodeTrait
{
$result = false;
if ($this->hasChildNodes()) {
- foreach ($this->getChildren() as $child) {
+ foreach ($this->childNodes as $child) {
if (in_array($child->nodeName, $this->divToPElements)) {
$result = true;
} else {
diff --git a/src/Nodes/NodeUtility.php b/src/Nodes/NodeUtility.php
index 7a1f18e..631a0aa 100644
--- a/src/Nodes/NodeUtility.php
+++ b/src/Nodes/NodeUtility.php
@@ -5,6 +5,7 @@ namespace andreskrey\Readability\Nodes;
use andreskrey\Readability\Nodes\DOM\DOMDocument;
use andreskrey\Readability\Nodes\DOM\DOMElement;
use andreskrey\Readability\Nodes\DOM\DOMNode;
+use andreskrey\Readability\Nodes\DOM\DOMNodeList;
/**
* Class NodeUtility.
@@ -157,4 +158,21 @@ class NodeUtility
return ($originalNode) ? $originalNode->nextSibling : $originalNode;
}
+
+ /**
+ * Remove all empty DOMNodes from DOMNodeLists
+ *
+ * @param \DOMNodeList $list
+ * @return DOMNodeList
+ */
+ public static function filterTextNodes(\DOMNodeList $list)
+ {
+ $newList = new DOMNodeList();
+ foreach($list as $node){
+ if($node->nodeType !== XML_TEXT_NODE || mb_strlen(trim($node->nodeValue))){
+ $newList->add($node);
+ }
+ }
+ return $newList;
+ }
}