From 9b0c58b8a610d109e88b22ba5377576f2fe4a575 Mon Sep 17 00:00:00 2001 From: "FiveFilters.org" Date: Fri, 20 Aug 2021 15:54:08 +0200 Subject: Add method to unwrap img inside noscript https://github.com/mozilla/readability/commit/d784bf7e20e25ec1b3a6102a20c83d35fe3ef87d (but code based on current version of Readability.js) --- src/Readability.php | 102 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 102 insertions(+) (limited to 'src/Readability.php') diff --git a/src/Readability.php b/src/Readability.php index d85b987..73a8a54 100644 --- a/src/Readability.php +++ b/src/Readability.php @@ -305,6 +305,9 @@ class Readability } $dom->encoding = 'UTF-8'; + // Unwrap image from noscript + $this->unwrapNoscriptImages($dom); + $this->removeScripts($dom); $this->prepDocument($dom); @@ -834,6 +837,105 @@ class Readability return false; } + + /** + * Check if node is image, or if node contains exactly only one image + * whether as a direct child or as its descendants. + * + * @param DOMElement $node + */ + private function isSingleImage(DOMElement $node) { + if ($node->tagName === 'img') { + return true; + } + + if ($node->children()->length !== 1 || trim($node->textContent) !== '') { + return false; + } + + return $this->isSingleImage($node->children()->item(0)); + } + + /** + * Find all