From 9ba1adcde4953c8e04630b7b31c29b66d158d428 Mon Sep 17 00:00:00 2001 From: wn_ Date: Wed, 3 Apr 2024 17:42:59 +0000 Subject: Update 'fivefilters/readability.php' to 37c0c2ce76aa90e8adafbd5cb0b0332a54df1523 --- .../readability.php/src/Configuration.php | 25 ++++++++++++++++++++++ .../readability.php/src/Readability.php | 8 +++---- 2 files changed, 29 insertions(+), 4 deletions(-) (limited to 'vendor/fivefilters/readability.php/src') diff --git a/vendor/fivefilters/readability.php/src/Configuration.php b/vendor/fivefilters/readability.php/src/Configuration.php index 6d1f03f..0659a82 100644 --- a/vendor/fivefilters/readability.php/src/Configuration.php +++ b/vendor/fivefilters/readability.php/src/Configuration.php @@ -83,6 +83,11 @@ class Configuration */ protected $disableJSONLD = false; + /** + * @var array + */ + protected $extraIgnoredElements = []; + /** * Configuration constructor. * @@ -420,4 +425,24 @@ class Configuration return $this; } + + /** + * @return array + */ + public function getExtraIgnoredElements() + { + return $this->extraIgnoredElements; + } + + /** + * @param array + * + * @return $this + */ + public function setExtraIgnoredElements($extraIgnoredElements) + { + $this->extraIgnoredElements = $extraIgnoredElements; + + return $this; + } } diff --git a/vendor/fivefilters/readability.php/src/Readability.php b/vendor/fivefilters/readability.php/src/Readability.php index 6407a92..c5318a4 100644 --- a/vendor/fivefilters/readability.php/src/Readability.php +++ b/vendor/fivefilters/readability.php/src/Readability.php @@ -342,7 +342,7 @@ class Readability // Extract JSON-LD metadata before removing scripts $this->jsonld = $this->configuration->getDisableJSONLD() ? [] : $this->getJSONLD($dom); - $this->removeScripts($dom); + $this->removeIgnoredElements($dom); $this->prepDocument($dom); @@ -1188,13 +1188,13 @@ class Readability } /** - * Removes all the scripts of the html. + * Removes elements that should be ignored. * * @param DOMDocument $dom */ - private function removeScripts(DOMDocument $dom) + private function removeIgnoredElements(DOMDocument $dom) { - foreach (['script', 'noscript'] as $tag) { + foreach (['noscript', 'script', ...$this->configuration->getExtraIgnoredElements()] as $tag) { $nodes = $dom->getElementsByTagName($tag); foreach (iterator_to_array($nodes) as $node) { NodeUtility::removeNode($node); -- cgit v1.2.3