summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorFiveFilters.org <[email protected]>2021-08-25 02:06:19 +0200
committerFiveFilters.org <[email protected]>2021-08-25 02:06:19 +0200
commit803131a61052fb657022b298944cdda0c5e41b07 (patch)
tree0fb10ca3c056e6c5c95294c38b2b27c46a2f3bcd /src
parentea11ba2444d3c610638dab5de5fa0a6d4e15ad09 (diff)
Clean up and use regex by name
This should resemble the Readability.js code a little more.
Diffstat (limited to 'src')
-rw-r--r--src/Nodes/NodeTrait.php14
1 files changed, 7 insertions, 7 deletions
diff --git a/src/Nodes/NodeTrait.php b/src/Nodes/NodeTrait.php
index ce27d7b..e0761e5 100644
--- a/src/Nodes/NodeTrait.php
+++ b/src/Nodes/NodeTrait.php
@@ -282,7 +282,7 @@ trait NodeTrait
// Look for a special ID
$id = $this->getAttribute('id');
- if (trim($id)) {
+ if (trim($id) !== '') {
if (preg_match(NodeUtility::$regexps['negative'], $id)) {
$weight -= 25;
}
@@ -302,11 +302,11 @@ trait NodeTrait
*
* @return string
*/
- public function getTextContent($normalize = false)
+ public function getTextContent($normalize = true)
{
- $nodeValue = $this->nodeValue;
+ $nodeValue = trim($this->textContent);
if ($normalize) {
- $nodeValue = trim(preg_replace('/\s{2,}/', ' ', $nodeValue));
+ $nodeValue = preg_replace(NodeUtility::$regexps['normalize'], ' ', $nodeValue);
}
return $nodeValue;
@@ -374,7 +374,7 @@ trait NodeTrait
*/
public function createNode($originalNode, $tagName)
{
- $text = $originalNode->getTextContent();
+ $text = $originalNode->getTextContent(false);
$newNode = $originalNode->ownerDocument->createElement($tagName, $text);
return $newNode;
@@ -400,7 +400,7 @@ trait NodeTrait
return false;
}
- if ($node->parentNode->nodeName === $tagName && (!$filterFn || $filterFn($node->parentNode))) {
+ if ($node->parentNode->tagName === $tagName && (!$filterFn || $filterFn($node->parentNode))) {
return true;
}
@@ -433,7 +433,7 @@ trait NodeTrait
}
/* @var DOMNode $child */
- return !($child->nodeType === XML_TEXT_NODE && !preg_match('/\S$/', $child->getTextContent()));
+ return !($child->nodeType === XML_TEXT_NODE && preg_match(NodeUtility::$regexps['hasContent'], $child->textContent));
});
}