diff options
author | FiveFilters.org <[email protected]> | 2021-08-25 02:06:19 +0200 |
---|---|---|
committer | FiveFilters.org <[email protected]> | 2021-08-25 02:06:19 +0200 |
commit | 803131a61052fb657022b298944cdda0c5e41b07 (patch) | |
tree | 0fb10ca3c056e6c5c95294c38b2b27c46a2f3bcd | |
parent | ea11ba2444d3c610638dab5de5fa0a6d4e15ad09 (diff) |
Clean up and use regex by name
This should resemble the Readability.js code a little more.
-rw-r--r-- | src/Nodes/NodeTrait.php | 14 |
1 files changed, 7 insertions, 7 deletions
diff --git a/src/Nodes/NodeTrait.php b/src/Nodes/NodeTrait.php index ce27d7b..e0761e5 100644 --- a/src/Nodes/NodeTrait.php +++ b/src/Nodes/NodeTrait.php @@ -282,7 +282,7 @@ trait NodeTrait // Look for a special ID $id = $this->getAttribute('id'); - if (trim($id)) { + if (trim($id) !== '') { if (preg_match(NodeUtility::$regexps['negative'], $id)) { $weight -= 25; } @@ -302,11 +302,11 @@ trait NodeTrait * * @return string */ - public function getTextContent($normalize = false) + public function getTextContent($normalize = true) { - $nodeValue = $this->nodeValue; + $nodeValue = trim($this->textContent); if ($normalize) { - $nodeValue = trim(preg_replace('/\s{2,}/', ' ', $nodeValue)); + $nodeValue = preg_replace(NodeUtility::$regexps['normalize'], ' ', $nodeValue); } return $nodeValue; @@ -374,7 +374,7 @@ trait NodeTrait */ public function createNode($originalNode, $tagName) { - $text = $originalNode->getTextContent(); + $text = $originalNode->getTextContent(false); $newNode = $originalNode->ownerDocument->createElement($tagName, $text); return $newNode; @@ -400,7 +400,7 @@ trait NodeTrait return false; } - if ($node->parentNode->nodeName === $tagName && (!$filterFn || $filterFn($node->parentNode))) { + if ($node->parentNode->tagName === $tagName && (!$filterFn || $filterFn($node->parentNode))) { return true; } @@ -433,7 +433,7 @@ trait NodeTrait } /* @var DOMNode $child */ - return !($child->nodeType === XML_TEXT_NODE && !preg_match('/\S$/', $child->getTextContent())); + return !($child->nodeType === XML_TEXT_NODE && preg_match(NodeUtility::$regexps['hasContent'], $child->textContent)); }); } |