summaryrefslogtreecommitdiff
path: root/plugins
diff options
context:
space:
mode:
authorAndrew Dolgov <[email protected]>2019-08-16 15:29:24 +0300
committerAndrew Dolgov <[email protected]>2019-08-16 15:29:24 +0300
commit3e4701116d9a7a2b93646f2c9aed80b63175d206 (patch)
tree76a6d8664cb7ff9b7bbb2e6da5b3ee434baca59f /plugins
parent865c54abcb5bdfb6d68757cfadccd8383cdc213c (diff)
af_readability: add missing file
Diffstat (limited to 'plugins')
-rw-r--r--plugins/af_readability/vendor/andreskrey/Readability/Configuration.php26
-rw-r--r--plugins/af_readability/vendor/andreskrey/Readability/Nodes/DOM/DOMNodeList.php82
-rw-r--r--plugins/af_readability/vendor/andreskrey/Readability/Nodes/NodeTrait.php51
-rw-r--r--plugins/af_readability/vendor/andreskrey/Readability/Nodes/NodeUtility.php20
-rw-r--r--plugins/af_readability/vendor/andreskrey/Readability/Readability.php56
5 files changed, 184 insertions, 51 deletions
diff --git a/plugins/af_readability/vendor/andreskrey/Readability/Configuration.php b/plugins/af_readability/vendor/andreskrey/Readability/Configuration.php
index 6c17bc757..0632399c6 100644
--- a/plugins/af_readability/vendor/andreskrey/Readability/Configuration.php
+++ b/plugins/af_readability/vendor/andreskrey/Readability/Configuration.php
@@ -167,32 +167,6 @@ class Configuration
}
/**
- * @deprecated Use getCharThreshold. Will be removed in version 2.0
- *
- * @return int
- */
- public function getWordThreshold()
- {
- @trigger_error('getWordThreshold was replaced with getCharThreshold and will be removed in version 3.0', E_USER_DEPRECATED);
-
- return $this->charThreshold;
- }
-
- /**
- * @param int $charThreshold
- *
- * @return $this
- */
- public function setWordThreshold($charThreshold)
- {
- @trigger_error('setWordThreshold was replaced with setCharThreshold and will be removed in version 3.0', E_USER_DEPRECATED);
-
- $this->charThreshold = $charThreshold;
-
- return $this;
- }
-
- /**
* @return bool
*/
public function getArticleByLine()
diff --git a/plugins/af_readability/vendor/andreskrey/Readability/Nodes/DOM/DOMNodeList.php b/plugins/af_readability/vendor/andreskrey/Readability/Nodes/DOM/DOMNodeList.php
new file mode 100644
index 000000000..5149c0b98
--- /dev/null
+++ b/plugins/af_readability/vendor/andreskrey/Readability/Nodes/DOM/DOMNodeList.php
@@ -0,0 +1,82 @@
+<?php
+
+namespace andreskrey\Readability\Nodes\DOM;
+
+/**
+ * Class DOMNodeList.
+ *
+ * This is a fake DOMNodeList class that allows adding items to the list. The original class is static and the nodes
+ * are defined automagically when instantiating it. This fake version behaves exactly the same way but adds the function
+ * add() that allows to insert new DOMNodes into the DOMNodeList.
+ *
+ * It cannot extend the original DOMNodeList class because the functionality behind the property ->length is hidden
+ * from the user and cannot be extended, changed, or tweaked.
+ */
+class DOMNodeList implements \Countable, \IteratorAggregate
+{
+ /**
+ * @var array
+ */
+ protected $items = [];
+
+ /**
+ * @var int
+ */
+ protected $length = 0;
+
+ /**
+ * To allow access to length in the same way that DOMNodeList allows.
+ *
+ * {@inheritdoc}
+ */
+ public function __get($name)
+ {
+ switch ($name) {
+ case 'length':
+ return $this->length;
+ default:
+ trigger_error(sprintf('Undefined property: %s::%s', static::class, $name));
+ }
+ }
+
+ /**
+ * @param DOMNode|DOMElement|DOMComment $node
+ *
+ * @return DOMNodeList
+ */
+ public function add($node)
+ {
+ $this->items[] = $node;
+ $this->length++;
+
+ return $this;
+ }
+
+ /**
+ * @param int $offset
+ *
+ * @return DOMNode|DOMElement|DOMComment
+ */
+ public function item(int $offset)
+ {
+ return $this->items[$offset];
+ }
+
+ /**
+ * @return int|void
+ */
+ public function count(): int
+ {
+ return $this->length;
+ }
+
+ /**
+ * To make it compatible with iterator_to_array() function.
+ *
+ * {@inheritdoc}
+ */
+ public function getIterator(): \ArrayIterator
+ {
+ return new \ArrayIterator($this->items);
+ }
+}
diff --git a/plugins/af_readability/vendor/andreskrey/Readability/Nodes/NodeTrait.php b/plugins/af_readability/vendor/andreskrey/Readability/Nodes/NodeTrait.php
index d7060ccbb..5198bbb5f 100644
--- a/plugins/af_readability/vendor/andreskrey/Readability/Nodes/NodeTrait.php
+++ b/plugins/af_readability/vendor/andreskrey/Readability/Nodes/NodeTrait.php
@@ -181,11 +181,11 @@ trait NodeTrait
/**
* Override for native hasAttribute.
*
- * @see getAttribute
- *
* @param $attributeName
*
* @return bool
+ *
+ * @see getAttribute
*/
public function hasAttribute($attributeName)
{
@@ -317,10 +317,14 @@ trait NodeTrait
*
* @param bool $filterEmptyDOMText Filter empty DOMText nodes?
*
+ * @deprecated Use NodeUtility::filterTextNodes, function will be removed in version 3.0
+ *
* @return array
*/
public function getChildren($filterEmptyDOMText = false)
{
+ @trigger_error('getChildren was replaced with NodeUtility::filterTextNodes and will be removed in version 3.0', E_USER_DEPRECATED);
+
$ret = iterator_to_array($this->childNodes);
if ($filterEmptyDOMText) {
// Array values is used to discard the key order. Needs to be 0 to whatever without skipping any number
@@ -418,12 +422,12 @@ trait NodeTrait
public function hasSingleTagInsideElement($tag)
{
// There should be exactly 1 element child with given tag
- if (count($children = $this->getChildren(true)) !== 1 || $children[0]->nodeName !== $tag) {
+ if (count($children = NodeUtility::filterTextNodes($this->childNodes)) !== 1 || $children->item(0)->nodeName !== $tag) {
return false;
}
// And there should be no text nodes with real content
- return array_reduce($children, function ($carry, $child) {
+ return array_reduce(iterator_to_array($children), function ($carry, $child) {
if (!$carry === false) {
return false;
}
@@ -443,7 +447,7 @@ trait NodeTrait
{
$result = false;
if ($this->hasChildNodes()) {
- foreach ($this->getChildren() as $child) {
+ foreach ($this->childNodes as $child) {
if (in_array($child->nodeName, $this->divToPElements)) {
$result = true;
} else {
@@ -500,18 +504,22 @@ trait NodeTrait
);
}
+ /**
+ * In the original JS project they check if the node has the style display=none, which unfortunately
+ * in our case we have no way of knowing that. So we just check for the attribute hidden or "display: none".
+ *
+ * Might be a good idea to check for classes or other attributes like 'aria-hidden'
+ *
+ * @return bool
+ */
public function isProbablyVisible()
{
- /*
- * In the original JS project they check if the node has the style display=none, which unfortunately
- * in our case we have no way of knowing that. So we just check for the attribute hidden or "display: none".
- *
- * Might be a good idea to check for classes or other attributes like 'aria-hidden'
- */
-
return !preg_match('/display:( )?none/', $this->getAttribute('style')) && !$this->hasAttribute('hidden');
}
+ /**
+ * @return bool
+ */
public function isWhitespace()
{
return ($this->nodeType === XML_TEXT_NODE && mb_strlen(trim($this->textContent)) === 0) ||
@@ -557,4 +565,23 @@ trait NodeTrait
$count -= ($count - $nodes->length);
}
}
+
+ /**
+ * Mimics JS's firstElementChild property. PHP only has firstChild which could be any type of DOMNode. Use this
+ * function to get the first one that is an DOMElement node.
+ *
+ * @return \DOMElement|null
+ */
+ public function getFirstElementChild()
+ {
+ if ($this->childNodes instanceof \Traversable) {
+ foreach ($this->childNodes as $node) {
+ if ($node instanceof \DOMElement) {
+ return $node;
+ }
+ }
+ }
+
+ return null;
+ }
}
diff --git a/plugins/af_readability/vendor/andreskrey/Readability/Nodes/NodeUtility.php b/plugins/af_readability/vendor/andreskrey/Readability/Nodes/NodeUtility.php
index 7a1f18ee4..cbf78bae0 100644
--- a/plugins/af_readability/vendor/andreskrey/Readability/Nodes/NodeUtility.php
+++ b/plugins/af_readability/vendor/andreskrey/Readability/Nodes/NodeUtility.php
@@ -5,6 +5,7 @@ namespace andreskrey\Readability\Nodes;
use andreskrey\Readability\Nodes\DOM\DOMDocument;
use andreskrey\Readability\Nodes\DOM\DOMElement;
use andreskrey\Readability\Nodes\DOM\DOMNode;
+use andreskrey\Readability\Nodes\DOM\DOMNodeList;
/**
* Class NodeUtility.
@@ -157,4 +158,23 @@ class NodeUtility
return ($originalNode) ? $originalNode->nextSibling : $originalNode;
}
+
+ /**
+ * Remove all empty DOMNodes from DOMNodeLists.
+ *
+ * @param \DOMNodeList $list
+ *
+ * @return DOMNodeList
+ */
+ public static function filterTextNodes(\DOMNodeList $list)
+ {
+ $newList = new DOMNodeList();
+ foreach ($list as $node) {
+ if ($node->nodeType !== XML_TEXT_NODE || mb_strlen(trim($node->nodeValue))) {
+ $newList->add($node);
+ }
+ }
+
+ return $newList;
+ }
}
diff --git a/plugins/af_readability/vendor/andreskrey/Readability/Readability.php b/plugins/af_readability/vendor/andreskrey/Readability/Readability.php
index 7b7eed6bf..6bcbf78d7 100644
--- a/plugins/af_readability/vendor/andreskrey/Readability/Readability.php
+++ b/plugins/af_readability/vendor/andreskrey/Readability/Readability.php
@@ -57,6 +57,13 @@ class Readability
protected $author = null;
/**
+ * Website name.
+ *
+ * @var string|null
+ */
+ protected $siteName = null;
+
+ /**
* Direction of the text.
*
* @var string|null
@@ -287,10 +294,10 @@ class Readability
$values = [];
// property is a space-separated list of values
- $propertyPattern = '/\s*(dc|dcterm|og|twitter)\s*:\s*(author|creator|description|title|image)\s*/i';
+ $propertyPattern = '/\s*(dc|dcterm|og|twitter)\s*:\s*(author|creator|description|title|image|site_name)(?!:)\s*/i';
// name is a single value
- $namePattern = '/^\s*(?:(dc|dcterm|og|twitter|weibo:(article|webpage))\s*[\.:]\s*)?(author|creator|description|title|image)\s*$/i';
+ $namePattern = '/^\s*(?:(dc|dcterm|og|twitter|weibo:(article|webpage))\s*[\.:]\s*)?(author|creator|description|title|image|site_name)(?!:)\s*$/i';
// Find description tags.
foreach ($this->dom->getElementsByTagName('meta') as $meta) {
@@ -332,7 +339,6 @@ class Readability
* This could be easily replaced with an ugly set of isset($values['key']) or a bunch of ??s.
* Will probably replace it with ??s after dropping support of PHP5.6
*/
-
$key = current(array_intersect([
'dc:title',
'dcterm:title',
@@ -373,11 +379,18 @@ class Readability
// get main image
$key = current(array_intersect([
+ 'image',
'og:image',
'twitter:image'
], array_keys($values)));
$this->setImage(isset($values[$key]) ? $values[$key] : null);
+
+ $key = current(array_intersect([
+ 'og:site_name'
+ ], array_keys($values)));
+
+ $this->setSiteName(isset($values[$key]) ? $values[$key] : null);
}
/**
@@ -722,7 +735,7 @@ class Readability
*/
if ($node->hasSingleTagInsideElement('p') && $node->getLinkDensity() < 0.25) {
$this->logger->debug(sprintf('[Get Nodes] Found DIV with a single P node, removing DIV. Node content is: \'%s\'', substr($node->nodeValue, 0, 128)));
- $pNode = $node->getChildren(true)[0];
+ $pNode = NodeUtility::filterTextNodes($node->childNodes)->item(0);
$node->parentNode->replaceChild($pNode, $node);
$node = $pNode;
$elementsToScore[] = $node;
@@ -1082,7 +1095,7 @@ class Readability
// If the top candidate is the only child, use parent instead. This will help sibling
// joining logic when adjacent content is actually located in parent's sibling node.
$parentOfTopCandidate = $topCandidate->parentNode;
- while ($parentOfTopCandidate->nodeName !== 'body' && count($parentOfTopCandidate->getChildren(true)) === 1) {
+ while ($parentOfTopCandidate->nodeName !== 'body' && count(NodeUtility::filterTextNodes($parentOfTopCandidate->childNodes)) === 1) {
$topCandidate = $parentOfTopCandidate;
$parentOfTopCandidate = $topCandidate->parentNode;
}
@@ -1102,14 +1115,16 @@ class Readability
$siblingScoreThreshold = max(10, $topCandidate->contentScore * 0.2);
// Keep potential top candidate's parent node to try to get text direction of it later.
$parentOfTopCandidate = $topCandidate->parentNode;
- $siblings = $parentOfTopCandidate->getChildren();
+ $siblings = $parentOfTopCandidate->childNodes;
$hasContent = false;
$this->logger->info('[Rating] Adding top candidate siblings...');
- /** @var DOMElement $sibling */
- foreach ($siblings as $sibling) {
+ /* @var DOMElement $sibling */
+ // Can't foreach here because down there we might change the tag name and that causes the foreach to skip items
+ for ($i = 0; $i < $siblings->length; $i++) {
+ $sibling = $siblings[$i];
$append = false;
if ($sibling === $topCandidate) {
@@ -1147,7 +1162,6 @@ class Readability
* We have a node that isn't a common block level element, like a form or td tag.
* Turn it into a div so it doesn't get filtered out later by accident.
*/
-
$sibling = NodeUtility::setNodeTag($sibling, 'div');
}
@@ -1266,11 +1280,11 @@ class Readability
// Remove single-cell tables
foreach ($article->shiftingAwareGetElementsByTagName('table') as $table) {
/** @var DOMNode $table */
- $tbody = $table->hasSingleTagInsideElement('tbody') ? $table->childNodes[0] : $table;
+ $tbody = $table->hasSingleTagInsideElement('tbody') ? $table->getFirstElementChild() : $table;
if ($tbody->hasSingleTagInsideElement('tr')) {
- $row = $tbody->firstChild;
+ $row = $tbody->getFirstElementChild();
if ($row->hasSingleTagInsideElement('td')) {
- $cell = $row->firstChild;
+ $cell = $row->getFirstElementChild();
$cell = NodeUtility::setNodeTag($cell, (array_reduce(iterator_to_array($cell->childNodes), function ($carry, $node) {
return $node->isPhrasingContent() && $carry;
}, true)) ? 'p' : 'div');
@@ -1597,7 +1611,7 @@ class Readability
$node->removeAttribute('class');
}
- for ($node = $node->firstChild; $node !== null; $node = $node->nextSibling) {
+ for ($node = $node->getFirstElementChild(); $node !== null; $node = $node->nextSibling) {
$this->_cleanClasses($node);
}
}
@@ -1757,6 +1771,22 @@ class Readability
}
/**
+ * @return string|null
+ */
+ public function getSiteName()
+ {
+ return $this->siteName;
+ }
+
+ /**
+ * @param string $siteName
+ */
+ protected function setSiteName($siteName)
+ {
+ $this->siteName = $siteName;
+ }
+
+ /**
* @return null|string
*/
public function getDirection()