summaryrefslogtreecommitdiff
path: root/plugins/af_readability/vendor/fivefilters/readability.php/src/Nodes
diff options
context:
space:
mode:
authorAndrew Dolgov <[email protected]>2022-07-16 16:30:46 +0300
committerAndrew Dolgov <[email protected]>2022-07-16 16:30:46 +0300
commitb8c1d622a77226b14fb307cfe3e0f4cea9e4268a (patch)
tree3df519798dcfef20c96a079e0956ebcd5509b619 /plugins/af_readability/vendor/fivefilters/readability.php/src/Nodes
parentfdd1c43612011060b4b876db438eb7ec62dd077d (diff)
add missing files for forked idiorm
Diffstat (limited to 'plugins/af_readability/vendor/fivefilters/readability.php/src/Nodes')
-rw-r--r--plugins/af_readability/vendor/fivefilters/readability.php/src/Nodes/DOM/DOMAttr.php4
-rw-r--r--plugins/af_readability/vendor/fivefilters/readability.php/src/Nodes/DOM/DOMCdataSection.php4
-rw-r--r--plugins/af_readability/vendor/fivefilters/readability.php/src/Nodes/DOM/DOMCharacterData.php4
-rw-r--r--plugins/af_readability/vendor/fivefilters/readability.php/src/Nodes/DOM/DOMComment.php4
-rw-r--r--plugins/af_readability/vendor/fivefilters/readability.php/src/Nodes/DOM/DOMDocument.php4
-rw-r--r--plugins/af_readability/vendor/fivefilters/readability.php/src/Nodes/DOM/DOMDocumentFragment.php4
-rw-r--r--plugins/af_readability/vendor/fivefilters/readability.php/src/Nodes/DOM/DOMDocumentType.php4
-rw-r--r--plugins/af_readability/vendor/fivefilters/readability.php/src/Nodes/DOM/DOMElement.php40
-rw-r--r--plugins/af_readability/vendor/fivefilters/readability.php/src/Nodes/DOM/DOMEntity.php4
-rw-r--r--plugins/af_readability/vendor/fivefilters/readability.php/src/Nodes/DOM/DOMEntityReference.php4
-rw-r--r--plugins/af_readability/vendor/fivefilters/readability.php/src/Nodes/DOM/DOMNode.php4
-rw-r--r--plugins/af_readability/vendor/fivefilters/readability.php/src/Nodes/DOM/DOMNodeList.php2
-rw-r--r--plugins/af_readability/vendor/fivefilters/readability.php/src/Nodes/DOM/DOMNotation.php4
-rw-r--r--plugins/af_readability/vendor/fivefilters/readability.php/src/Nodes/DOM/DOMProcessingInstruction.php4
-rw-r--r--plugins/af_readability/vendor/fivefilters/readability.php/src/Nodes/DOM/DOMText.php4
-rw-r--r--plugins/af_readability/vendor/fivefilters/readability.php/src/Nodes/NodeTrait.php69
-rw-r--r--plugins/af_readability/vendor/fivefilters/readability.php/src/Nodes/NodeUtility.php36
17 files changed, 113 insertions, 86 deletions
diff --git a/plugins/af_readability/vendor/fivefilters/readability.php/src/Nodes/DOM/DOMAttr.php b/plugins/af_readability/vendor/fivefilters/readability.php/src/Nodes/DOM/DOMAttr.php
index 91729f3b1..1bdf395e7 100644
--- a/plugins/af_readability/vendor/fivefilters/readability.php/src/Nodes/DOM/DOMAttr.php
+++ b/plugins/af_readability/vendor/fivefilters/readability.php/src/Nodes/DOM/DOMAttr.php
@@ -1,8 +1,8 @@
<?php
-namespace andreskrey\Readability\Nodes\DOM;
+namespace fivefilters\Readability\Nodes\DOM;
-use andreskrey\Readability\Nodes\NodeTrait;
+use fivefilters\Readability\Nodes\NodeTrait;
class DOMAttr extends \DOMAttr
{
diff --git a/plugins/af_readability/vendor/fivefilters/readability.php/src/Nodes/DOM/DOMCdataSection.php b/plugins/af_readability/vendor/fivefilters/readability.php/src/Nodes/DOM/DOMCdataSection.php
index 4535e4e99..6ac3dcdfc 100644
--- a/plugins/af_readability/vendor/fivefilters/readability.php/src/Nodes/DOM/DOMCdataSection.php
+++ b/plugins/af_readability/vendor/fivefilters/readability.php/src/Nodes/DOM/DOMCdataSection.php
@@ -1,8 +1,8 @@
<?php
-namespace andreskrey\Readability\Nodes\DOM;
+namespace fivefilters\Readability\Nodes\DOM;
-use andreskrey\Readability\Nodes\NodeTrait;
+use fivefilters\Readability\Nodes\NodeTrait;
class DOMCdataSection extends \DOMCdataSection
{
diff --git a/plugins/af_readability/vendor/fivefilters/readability.php/src/Nodes/DOM/DOMCharacterData.php b/plugins/af_readability/vendor/fivefilters/readability.php/src/Nodes/DOM/DOMCharacterData.php
index e4db11a06..b196979fc 100644
--- a/plugins/af_readability/vendor/fivefilters/readability.php/src/Nodes/DOM/DOMCharacterData.php
+++ b/plugins/af_readability/vendor/fivefilters/readability.php/src/Nodes/DOM/DOMCharacterData.php
@@ -1,8 +1,8 @@
<?php
-namespace andreskrey\Readability\Nodes\DOM;
+namespace fivefilters\Readability\Nodes\DOM;
-use andreskrey\Readability\Nodes\NodeTrait;
+use fivefilters\Readability\Nodes\NodeTrait;
class DOMCharacterData extends \DOMCharacterData
{
diff --git a/plugins/af_readability/vendor/fivefilters/readability.php/src/Nodes/DOM/DOMComment.php b/plugins/af_readability/vendor/fivefilters/readability.php/src/Nodes/DOM/DOMComment.php
index 13b004aff..3b691f486 100644
--- a/plugins/af_readability/vendor/fivefilters/readability.php/src/Nodes/DOM/DOMComment.php
+++ b/plugins/af_readability/vendor/fivefilters/readability.php/src/Nodes/DOM/DOMComment.php
@@ -1,8 +1,8 @@
<?php
-namespace andreskrey\Readability\Nodes\DOM;
+namespace fivefilters\Readability\Nodes\DOM;
-use andreskrey\Readability\Nodes\NodeTrait;
+use fivefilters\Readability\Nodes\NodeTrait;
class DOMComment extends \DOMComment
{
diff --git a/plugins/af_readability/vendor/fivefilters/readability.php/src/Nodes/DOM/DOMDocument.php b/plugins/af_readability/vendor/fivefilters/readability.php/src/Nodes/DOM/DOMDocument.php
index 81e9c7de6..d91233807 100644
--- a/plugins/af_readability/vendor/fivefilters/readability.php/src/Nodes/DOM/DOMDocument.php
+++ b/plugins/af_readability/vendor/fivefilters/readability.php/src/Nodes/DOM/DOMDocument.php
@@ -1,8 +1,8 @@
<?php
-namespace andreskrey\Readability\Nodes\DOM;
+namespace fivefilters\Readability\Nodes\DOM;
-use andreskrey\Readability\Nodes\NodeTrait;
+use fivefilters\Readability\Nodes\NodeTrait;
class DOMDocument extends \DOMDocument
{
diff --git a/plugins/af_readability/vendor/fivefilters/readability.php/src/Nodes/DOM/DOMDocumentFragment.php b/plugins/af_readability/vendor/fivefilters/readability.php/src/Nodes/DOM/DOMDocumentFragment.php
index bcb89468d..33a3f950a 100644
--- a/plugins/af_readability/vendor/fivefilters/readability.php/src/Nodes/DOM/DOMDocumentFragment.php
+++ b/plugins/af_readability/vendor/fivefilters/readability.php/src/Nodes/DOM/DOMDocumentFragment.php
@@ -1,8 +1,8 @@
<?php
-namespace andreskrey\Readability\Nodes\DOM;
+namespace fivefilters\Readability\Nodes\DOM;
-use andreskrey\Readability\Nodes\NodeTrait;
+use fivefilters\Readability\Nodes\NodeTrait;
class DOMDocumentFragment extends \DOMDocumentFragment
{
diff --git a/plugins/af_readability/vendor/fivefilters/readability.php/src/Nodes/DOM/DOMDocumentType.php b/plugins/af_readability/vendor/fivefilters/readability.php/src/Nodes/DOM/DOMDocumentType.php
index 18705a7a7..cf3cd0f88 100644
--- a/plugins/af_readability/vendor/fivefilters/readability.php/src/Nodes/DOM/DOMDocumentType.php
+++ b/plugins/af_readability/vendor/fivefilters/readability.php/src/Nodes/DOM/DOMDocumentType.php
@@ -1,8 +1,8 @@
<?php
-namespace andreskrey\Readability\Nodes\DOM;
+namespace fivefilters\Readability\Nodes\DOM;
-use andreskrey\Readability\Nodes\NodeTrait;
+use fivefilters\Readability\Nodes\NodeTrait;
class DOMDocumentType extends \DOMDocumentType
{
diff --git a/plugins/af_readability/vendor/fivefilters/readability.php/src/Nodes/DOM/DOMElement.php b/plugins/af_readability/vendor/fivefilters/readability.php/src/Nodes/DOM/DOMElement.php
index c07670bf7..b0da84fb9 100644
--- a/plugins/af_readability/vendor/fivefilters/readability.php/src/Nodes/DOM/DOMElement.php
+++ b/plugins/af_readability/vendor/fivefilters/readability.php/src/Nodes/DOM/DOMElement.php
@@ -1,10 +1,46 @@
<?php
-namespace andreskrey\Readability\Nodes\DOM;
+namespace fivefilters\Readability\Nodes\DOM;
-use andreskrey\Readability\Nodes\NodeTrait;
+use fivefilters\Readability\Nodes\NodeTrait;
class DOMElement extends \DOMElement
{
use NodeTrait;
+
+ /**
+ * Returns the child elements of this element.
+ *
+ * To get all child nodes, including non-element nodes like text and comment nodes, use childNodes.
+ *
+ * @return DOMNodeList
+ */
+ public function children()
+ {
+ $newList = new DOMNodeList();
+ foreach ($this->childNodes as $node) {
+ if ($node->nodeType === XML_ELEMENT_NODE) {
+ $newList->add($node);
+ }
+ }
+ return $newList;
+ }
+
+ /**
+ * Returns the Element immediately prior to the specified one in its parent's children list, or null if the specified element is the first one in the list.
+ *
+ * @see https://wiki.php.net/rfc/dom_living_standard_api
+ * @return DOMElement|null
+ */
+ public function previousElementSibling()
+ {
+ $previous = $this->previousSibling;
+ while ($previous) {
+ if ($previous->nodeType === XML_ELEMENT_NODE) {
+ return $previous;
+ }
+ $previous = $previous->previousSibling;
+ }
+ return null;
+ }
}
diff --git a/plugins/af_readability/vendor/fivefilters/readability.php/src/Nodes/DOM/DOMEntity.php b/plugins/af_readability/vendor/fivefilters/readability.php/src/Nodes/DOM/DOMEntity.php
index 8493e7319..751b59c48 100644
--- a/plugins/af_readability/vendor/fivefilters/readability.php/src/Nodes/DOM/DOMEntity.php
+++ b/plugins/af_readability/vendor/fivefilters/readability.php/src/Nodes/DOM/DOMEntity.php
@@ -1,8 +1,8 @@
<?php
-namespace andreskrey\Readability\Nodes\DOM;
+namespace fivefilters\Readability\Nodes\DOM;
-use andreskrey\Readability\Nodes\NodeTrait;
+use fivefilters\Readability\Nodes\NodeTrait;
class DOMEntity extends \DOMEntity
{
diff --git a/plugins/af_readability/vendor/fivefilters/readability.php/src/Nodes/DOM/DOMEntityReference.php b/plugins/af_readability/vendor/fivefilters/readability.php/src/Nodes/DOM/DOMEntityReference.php
index 32cecb75c..d6fd6d49c 100644
--- a/plugins/af_readability/vendor/fivefilters/readability.php/src/Nodes/DOM/DOMEntityReference.php
+++ b/plugins/af_readability/vendor/fivefilters/readability.php/src/Nodes/DOM/DOMEntityReference.php
@@ -1,8 +1,8 @@
<?php
-namespace andreskrey\Readability\Nodes\DOM;
+namespace fivefilters\Readability\Nodes\DOM;
-use andreskrey\Readability\Nodes\NodeTrait;
+use fivefilters\Readability\Nodes\NodeTrait;
class DOMEntityReference extends \DOMEntityReference
{
diff --git a/plugins/af_readability/vendor/fivefilters/readability.php/src/Nodes/DOM/DOMNode.php b/plugins/af_readability/vendor/fivefilters/readability.php/src/Nodes/DOM/DOMNode.php
index 7c3c4f3a2..4a3ab0dc3 100644
--- a/plugins/af_readability/vendor/fivefilters/readability.php/src/Nodes/DOM/DOMNode.php
+++ b/plugins/af_readability/vendor/fivefilters/readability.php/src/Nodes/DOM/DOMNode.php
@@ -1,8 +1,8 @@
<?php
-namespace andreskrey\Readability\Nodes\DOM;
+namespace fivefilters\Readability\Nodes\DOM;
-use andreskrey\Readability\Nodes\NodeTrait;
+use fivefilters\Readability\Nodes\NodeTrait;
/**
* @method getAttribute($attribute)
diff --git a/plugins/af_readability/vendor/fivefilters/readability.php/src/Nodes/DOM/DOMNodeList.php b/plugins/af_readability/vendor/fivefilters/readability.php/src/Nodes/DOM/DOMNodeList.php
index 5149c0b98..a718c00ce 100644
--- a/plugins/af_readability/vendor/fivefilters/readability.php/src/Nodes/DOM/DOMNodeList.php
+++ b/plugins/af_readability/vendor/fivefilters/readability.php/src/Nodes/DOM/DOMNodeList.php
@@ -1,6 +1,6 @@
<?php
-namespace andreskrey\Readability\Nodes\DOM;
+namespace fivefilters\Readability\Nodes\DOM;
/**
* Class DOMNodeList.
diff --git a/plugins/af_readability/vendor/fivefilters/readability.php/src/Nodes/DOM/DOMNotation.php b/plugins/af_readability/vendor/fivefilters/readability.php/src/Nodes/DOM/DOMNotation.php
index 2e888ce00..d276e42a4 100644
--- a/plugins/af_readability/vendor/fivefilters/readability.php/src/Nodes/DOM/DOMNotation.php
+++ b/plugins/af_readability/vendor/fivefilters/readability.php/src/Nodes/DOM/DOMNotation.php
@@ -1,8 +1,8 @@
<?php
-namespace andreskrey\Readability\Nodes\DOM;
+namespace fivefilters\Readability\Nodes\DOM;
-use andreskrey\Readability\Nodes\NodeTrait;
+use fivefilters\Readability\Nodes\NodeTrait;
class DOMNotation extends \DOMNotation
{
diff --git a/plugins/af_readability/vendor/fivefilters/readability.php/src/Nodes/DOM/DOMProcessingInstruction.php b/plugins/af_readability/vendor/fivefilters/readability.php/src/Nodes/DOM/DOMProcessingInstruction.php
index 9dd4c5c70..82c69cf01 100644
--- a/plugins/af_readability/vendor/fivefilters/readability.php/src/Nodes/DOM/DOMProcessingInstruction.php
+++ b/plugins/af_readability/vendor/fivefilters/readability.php/src/Nodes/DOM/DOMProcessingInstruction.php
@@ -1,8 +1,8 @@
<?php
-namespace andreskrey\Readability\Nodes\DOM;
+namespace fivefilters\Readability\Nodes\DOM;
-use andreskrey\Readability\Nodes\NodeTrait;
+use fivefilters\Readability\Nodes\NodeTrait;
class DOMProcessingInstruction extends \DOMProcessingInstruction
{
diff --git a/plugins/af_readability/vendor/fivefilters/readability.php/src/Nodes/DOM/DOMText.php b/plugins/af_readability/vendor/fivefilters/readability.php/src/Nodes/DOM/DOMText.php
index 42c575b49..6a5716c92 100644
--- a/plugins/af_readability/vendor/fivefilters/readability.php/src/Nodes/DOM/DOMText.php
+++ b/plugins/af_readability/vendor/fivefilters/readability.php/src/Nodes/DOM/DOMText.php
@@ -1,8 +1,8 @@
<?php
-namespace andreskrey\Readability\Nodes\DOM;
+namespace fivefilters\Readability\Nodes\DOM;
-use andreskrey\Readability\Nodes\NodeTrait;
+use fivefilters\Readability\Nodes\NodeTrait;
class DOMText extends \DOMText
{
diff --git a/plugins/af_readability/vendor/fivefilters/readability.php/src/Nodes/NodeTrait.php b/plugins/af_readability/vendor/fivefilters/readability.php/src/Nodes/NodeTrait.php
index 5198bbb5f..1e9e3d1ee 100644
--- a/plugins/af_readability/vendor/fivefilters/readability.php/src/Nodes/NodeTrait.php
+++ b/plugins/af_readability/vendor/fivefilters/readability.php/src/Nodes/NodeTrait.php
@@ -1,11 +1,11 @@
<?php
-namespace andreskrey\Readability\Nodes;
+namespace fivefilters\Readability\Nodes;
-use andreskrey\Readability\Nodes\DOM\DOMDocument;
-use andreskrey\Readability\Nodes\DOM\DOMElement;
-use andreskrey\Readability\Nodes\DOM\DOMNode;
-use andreskrey\Readability\Nodes\DOM\DOMText;
+use fivefilters\Readability\Nodes\DOM\DOMDocument;
+use fivefilters\Readability\Nodes\DOM\DOMElement;
+use fivefilters\Readability\Nodes\DOM\DOMNode;
+use fivefilters\Readability\Nodes\DOM\DOMText;
use DOMNodeList;
/**
@@ -38,7 +38,6 @@ trait NodeTrait
* @var array
*/
private $divToPElements = [
- 'a',
'blockquote',
'dl',
'div',
@@ -47,8 +46,7 @@ trait NodeTrait
'p',
'pre',
'table',
- 'ul',
- 'select',
+ 'ul'
];
/**
@@ -169,6 +167,7 @@ trait NodeTrait
*
* @return string
*/
+ #[\ReturnTypeWillChange]
public function getAttribute($attributeName)
{
if (!is_null($this->attributes)) {
@@ -187,6 +186,7 @@ trait NodeTrait
*
* @see getAttribute
*/
+ #[\ReturnTypeWillChange]
public function hasAttribute($attributeName)
{
if (!is_null($this->attributes)) {
@@ -240,19 +240,21 @@ trait NodeTrait
*/
public function getLinkDensity()
{
- $linkLength = 0;
$textLength = mb_strlen($this->getTextContent(true));
-
- if (!$textLength) {
+ if ($textLength === 0) {
return 0;
}
+ $linkLength = 0;
+
$links = $this->getAllLinks();
if ($links) {
/** @var DOMElement $link */
foreach ($links as $link) {
- $linkLength += mb_strlen($link->getTextContent(true));
+ $href = $link->getAttribute('href');
+ $coefficient = ($href && preg_match(NodeUtility::$regexps['hashUrl'], $href)) ? 0.3 : 1;
+ $linkLength += mb_strlen($link->getTextContent(true)) * $coefficient;
}
}
@@ -282,7 +284,7 @@ trait NodeTrait
// Look for a special ID
$id = $this->getAttribute('id');
- if (trim($id)) {
+ if (trim($id) !== '') {
if (preg_match(NodeUtility::$regexps['negative'], $id)) {
$weight -= 25;
}
@@ -302,41 +304,17 @@ trait NodeTrait
*
* @return string
*/
- public function getTextContent($normalize = false)
+ public function getTextContent($normalize = true)
{
- $nodeValue = $this->nodeValue;
+ $nodeValue = trim($this->textContent);
if ($normalize) {
- $nodeValue = trim(preg_replace('/\s{2,}/', ' ', $nodeValue));
+ $nodeValue = preg_replace(NodeUtility::$regexps['normalize'], ' ', $nodeValue);
}
return $nodeValue;
}
/**
- * Returns the children of the current node.
- *
- * @param bool $filterEmptyDOMText Filter empty DOMText nodes?
- *
- * @deprecated Use NodeUtility::filterTextNodes, function will be removed in version 3.0
- *
- * @return array
- */
- public function getChildren($filterEmptyDOMText = false)
- {
- @trigger_error('getChildren was replaced with NodeUtility::filterTextNodes and will be removed in version 3.0', E_USER_DEPRECATED);
-
- $ret = iterator_to_array($this->childNodes);
- if ($filterEmptyDOMText) {
- // Array values is used to discard the key order. Needs to be 0 to whatever without skipping any number
- $ret = array_values(array_filter($ret, function ($node) {
- return $node->nodeName !== '#text' || mb_strlen(trim($node->nodeValue));
- }));
- }
-
- return $ret;
- }
-
- /**
* Return an array indicating how many rows and columns this table has.
*
* @return array
@@ -374,7 +352,7 @@ trait NodeTrait
*/
public function createNode($originalNode, $tagName)
{
- $text = $originalNode->getTextContent();
+ $text = $originalNode->getTextContent(false);
$newNode = $originalNode->ownerDocument->createElement($tagName, $text);
return $newNode;
@@ -433,7 +411,7 @@ trait NodeTrait
}
/* @var DOMNode $child */
- return !($child->nodeType === XML_TEXT_NODE && !preg_match('/\S$/', $child->getTextContent()));
+ return !($child->nodeType === XML_TEXT_NODE && preg_match(NodeUtility::$regexps['hasContent'], $child->textContent));
});
}
@@ -508,13 +486,14 @@ trait NodeTrait
* In the original JS project they check if the node has the style display=none, which unfortunately
* in our case we have no way of knowing that. So we just check for the attribute hidden or "display: none".
*
- * Might be a good idea to check for classes or other attributes like 'aria-hidden'
- *
* @return bool
*/
public function isProbablyVisible()
{
- return !preg_match('/display:( )?none/', $this->getAttribute('style')) && !$this->hasAttribute('hidden');
+ return !preg_match('/display:( )?none/i', $this->getAttribute('style')) &&
+ !$this->hasAttribute('hidden') &&
+ //check for "fallback-image" so that wikimedia math images are displayed
+ (!$this->hasAttribute('aria-hidden') || $this->getAttribute('aria-hidden') !== 'true' || ($this->hasAttribute('class') && strpos($this->getAttribute('class'), 'fallback-image') !== false));
}
/**
diff --git a/plugins/af_readability/vendor/fivefilters/readability.php/src/Nodes/NodeUtility.php b/plugins/af_readability/vendor/fivefilters/readability.php/src/Nodes/NodeUtility.php
index cbf78bae0..56de70517 100644
--- a/plugins/af_readability/vendor/fivefilters/readability.php/src/Nodes/NodeUtility.php
+++ b/plugins/af_readability/vendor/fivefilters/readability.php/src/Nodes/NodeUtility.php
@@ -1,11 +1,11 @@
<?php
-namespace andreskrey\Readability\Nodes;
+namespace fivefilters\Readability\Nodes;
-use andreskrey\Readability\Nodes\DOM\DOMDocument;
-use andreskrey\Readability\Nodes\DOM\DOMElement;
-use andreskrey\Readability\Nodes\DOM\DOMNode;
-use andreskrey\Readability\Nodes\DOM\DOMNodeList;
+use fivefilters\Readability\Nodes\DOM\DOMDocument;
+use fivefilters\Readability\Nodes\DOM\DOMElement;
+use fivefilters\Readability\Nodes\DOM\DOMNode;
+use fivefilters\Readability\Nodes\DOM\DOMNodeList;
/**
* Class NodeUtility.
@@ -18,31 +18,43 @@ class NodeUtility
* @var array
*/
public static $regexps = [
- 'unlikelyCandidates' => '/-ad-|banner|breadcrumbs|combx|comment|community|cover-wrap|disqus|extra|foot|header|legends|menu|related|remark|replies|rss|shoutbox|sidebar|skyscraper|social|sponsor|supplemental|ad-break|agegate|pagination|pager|popup|yom-remote/i',
- 'okMaybeItsACandidate' => '/and|article|body|column|main|shadow/i',
+ 'unlikelyCandidates' => '/-ad-|ai2html|banner|breadcrumbs|combx|comment|community|cover-wrap|disqus|extra|footer|gdpr|header|legends|menu|related|remark|replies|rss|shoutbox|sidebar|skyscraper|social|sponsor|supplemental|ad-break|agegate|pagination|pager|popup|yom-remote/i',
+ 'okMaybeItsACandidate' => '/and|article|body|column|content|main|shadow/i',
'extraneous' => '/print|archive|comment|discuss|e[\-]?mail|share|reply|all|login|sign|single|utility/i',
'byline' => '/byline|author|dateline|writtenby|p-author/i',
- 'replaceFonts' => '/<(\/?)font[^>]*>/gi',
+ 'replaceFonts' => '/<(\/?)font[^>]*>/i',
'normalize' => '/\s{2,}/',
'videos' => '/\/\/(www\.)?((dailymotion|youtube|youtube-nocookie|player\.vimeo|v\.qq)\.com|(archive|upload\.wikimedia)\.org|player\.twitch\.tv)/i',
+ 'shareElements' => '/(\b|_)(share|sharedaddy)(\b|_)/i',
'nextLink' => '/(next|weiter|continue|>([^\|]|$)|»([^\|]|$))/i',
'prevLink' => '/(prev|earl|old|new|<|«)/i',
+ 'tokenize' => '/\W+/',
'whitespace' => '/^\s*$/',
'hasContent' => '/\S$/',
'positive' => '/article|body|content|entry|hentry|h-entry|main|page|pagination|post|text|blog|story/i',
- 'negative' => '/hidden|^hid$| hid$| hid |^hid |banner|combx|comment|com-|contact|foot|footer|footnote|masthead|media|meta|outbrain|promo|related|scroll|share|shoutbox|sidebar|skyscraper|sponsor|shopping|tags|tool|widget/i',
+ 'negative' => '/-ad-|hidden|^hid$| hid$| hid |^hid |banner|combx|comment|com-|contact|foot|footer|footnote|gdpr|masthead|media|meta|outbrain|promo|related|scroll|share|shoutbox|sidebar|skyscraper|sponsor|shopping|tags|tool|widget/i',
// \x{00A0} is the unicode version of &nbsp;
- 'onlyWhitespace' => '/\x{00A0}|\s+/u'
+ 'onlyWhitespace' => '/\x{00A0}|\s+/u',
+ 'hashUrl' => '/^#.+/',
+ 'srcsetUrl' => '/(\S+)(\s+[\d.]+[xw])?(\s*(?:,|$))/',
+ 'b64DataUrl' => '/^data:\s*([^\s;,]+)\s*;\s*base64\s*,/i',
+ // See: https://schema.org/Article
+ 'jsonLdArticleTypes' => '/^Article|AdvertiserContentArticle|NewsArticle|AnalysisNewsArticle|AskPublicNewsArticle|BackgroundNewsArticle|OpinionNewsArticle|ReportageNewsArticle|ReviewNewsArticle|Report|SatiricalArticle|ScholarlyArticle|MedicalScholarlyArticle|SocialMediaPosting|BlogPosting|LiveBlogPosting|DiscussionForumPosting|TechArticle|APIReference$/'
+
];
/**
+ * Finds the next node, starting from the given node, and ignoring
+ * whitespace in between. If the given node is an element, the same node is
+ * returned.
+ *
* Imported from the Element class on league\html-to-markdown.
*
* @param $node
*
- * @return DOMElement
+ * @return DOMNode
*/
- public static function nextElement($node)
+ public static function nextNode($node)
{
$next = $node;
while ($next