From b1898de6974d07877ae477cb300e8c330e1bf3b4 Mon Sep 17 00:00:00 2001 From: Andres Rey Date: Sun, 3 Dec 2017 12:07:22 +0000 Subject: Add function to extract img srcs from other tags that might be used on lazy loading or other type of post load processing. --- src/Readability.php | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) (limited to 'src') diff --git a/src/Readability.php b/src/Readability.php index 4dcde3c..19ccd5d 100644 --- a/src/Readability.php +++ b/src/Readability.php @@ -1396,7 +1396,17 @@ class Readability foreach ($article->getElementsByTagName('img') as $img) { /** @var DOMElement $img */ - $src = $img->getAttribute('src'); + /* + * Extract all possible sources of img url and select the first one on the list. + */ + $url = [ + $img->getAttribute('src'), + $img->getAttribute('data-original'), + $img->getAttribute('data-url') + ]; + + $src = array_filter($url); + $src = reset($src); if ($src) { $img->setAttribute('src', $this->toAbsoluteURI($src)); } -- cgit v1.2.3