summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/Configuration.php19
-rw-r--r--src/Readability.php124
2 files changed, 138 insertions, 5 deletions
diff --git a/src/Configuration.php b/src/Configuration.php
index 0b5e0bb..de3d16a 100644
--- a/src/Configuration.php
+++ b/src/Configuration.php
@@ -2,11 +2,17 @@
namespace andreskrey\Readability;
+use Psr\Log\LoggerAwareTrait;
+use Psr\Log\LoggerInterface;
+use Psr\Log\NullLogger;
+
/**
* Class Configuration.
*/
class Configuration
{
+ use LoggerAwareTrait;
+
/**
* @var int
*/
@@ -49,6 +55,19 @@ class Configuration
protected $originalURL = 'http://fakehost';
/**
+ * @return LoggerInterface
+ */
+ public function getLogger()
+ {
+ // If no logger has been set, just return a null logger
+ if ($this->logger === null) {
+ return new NullLogger();
+ } else {
+ return $this->logger;
+ }
+ }
+
+ /**
* @return int
*/
public function getMaxTopCandidates()
diff --git a/src/Readability.php b/src/Readability.php
index 7ff302f..7a61602 100644
--- a/src/Readability.php
+++ b/src/Readability.php
@@ -7,6 +7,7 @@ use andreskrey\Readability\Nodes\DOM\DOMElement;
use andreskrey\Readability\Nodes\DOM\DOMNode;
use andreskrey\Readability\Nodes\DOM\DOMText;
use andreskrey\Readability\Nodes\NodeUtility;
+use Psr\Log\LoggerInterface;
/**
* Class Readability.
@@ -70,6 +71,13 @@ class Readability
private $configuration;
/**
+ * Logger object.
+ *
+ * @var LoggerInterface
+ */
+ private $logger;
+
+ /**
* @var array
*/
private $defaultTagsToScore = [
@@ -102,6 +110,7 @@ class Readability
public function __construct(Configuration $configuration)
{
$this->configuration = $configuration;
+ $this->logger = $this->configuration->getLogger();
}
/**
@@ -115,21 +124,25 @@ class Readability
*/
public function parse($html)
{
- $this->dom = $this->loadHTML($html);
-
- $this->getMetadata();
+ $this->logger->info('*** Starting parse process...');
- $this->getMainImage();
+ $this->dom = $this->loadHTML($html);
// Checking for minimum HTML to work with.
if (!($root = $this->dom->getElementsByTagName('body')->item(0)) || !$root->firstChild) {
+ $this->logger->emergency('No body tag present or body tag empty');
throw new ParseException('Invalid or incomplete HTML.');
}
+ $this->getMetadata();
+
+ $this->getMainImage();
+
while (true) {
$root = $root->firstChild;
$elementsToScore = $this->getNodes($root);
+ $this->logger->debug(sprintf('Elements to score: \'%s\'', count($elementsToScore)));
$result = $this->rateNodes($elementsToScore);
@@ -145,17 +158,24 @@ class Readability
foreach ($result->getElementsByTagName('p') as $p) {
$length += mb_strlen($p->textContent);
}
+
+ $this->logger->info(sprintf('[Parsing] Article parsed. Amount of words: %s. Current threshold is: %s', $length, $this->configuration->getWordThreshold()));
+
if ($result && mb_strlen(preg_replace('/\s/', '', $result->textContent)) < $this->configuration->getWordThreshold()) {
$this->dom = $this->loadHTML($html);
$root = $this->dom->getElementsByTagName('body')->item(0);
if ($this->configuration->getStripUnlikelyCandidates()) {
+ $this->logger->debug('[Parsing] Threshold not met, trying again setting StripUnlikelyCandidates as false');
$this->configuration->setStripUnlikelyCandidates(false);
} elseif ($this->configuration->getWeightClasses()) {
+ $this->logger->debug('[Parsing] Threshold not met, trying again setting WeightClasses as false');
$this->configuration->setWeightClasses(false);
} elseif ($this->configuration->getCleanConditionally()) {
+ $this->logger->debug('[Parsing] Threshold not met, trying again setting CleanConditionally as false');
$this->configuration->setCleanConditionally(false);
} else {
+ $this->logger->emergency('[Parsing] Could not parse text, giving up :(');
throw new ParseException('Could not parse text.');
}
} else {
@@ -169,6 +189,7 @@ class Readability
// first paragraph as the excerpt. This can be used for displaying a preview of
// the article's content.
if (!$this->getExcerpt()) {
+ $this->logger->debug('[Parsing] No excerpt text found on metadata, extracting first p node and using it as excerpt.');
$paragraphs = $result->getElementsByTagName('p');
if ($paragraphs->length > 0) {
$this->setExcerpt(trim($paragraphs->item(0)->textContent));
@@ -177,6 +198,8 @@ class Readability
$this->setContent($result->C14N());
+ $this->logger->info('*** Parse successful :)');
+
return true;
}
@@ -194,6 +217,8 @@ class Readability
*/
private function loadHTML($html)
{
+ $this->logger->debug('[Loading] Loading HTML...');
+
// To avoid throwing a gazillion of errors on malformed HTMLs
libxml_use_internal_errors(true);
@@ -205,11 +230,13 @@ class Readability
}
if ($this->configuration->getNormalizeEntities()) {
+ $this->logger->debug('[Loading] Normalized entities via mb_convert_encoding.');
// Replace UTF-8 characters with the HTML Entity equivalent. Useful to fix html with mixed content
$html = mb_convert_encoding($html, 'HTML-ENTITIES', 'UTF-8');
}
if ($this->configuration->getSummonCthulhu()) {
+ $this->logger->debug('[Loading] Removed script tags via regex H̶͈̩̟̬̱͠E̡̨̬͔̳̜͢͠ ̡̧̯͉̩͙̩̹̞̠͎͈̹̥̠͞ͅͅC̶͉̞̘̖̝̗͓̬̯͍͉̤̬͢͢͞Ò̟̘͉͖͎͉̱̭̣̕M̴̯͈̻̱̱̣̗͈̠̙̲̥͘͞E̷̛͙̼̲͍͕̹͍͇̗̻̬̮̭̱̥͢Ş̛̟͔̙̜̤͇̮͍̙̝̀͘');
$html = preg_replace('/<script\b[^>]*>([\s\S]*?)<\/script>/', '', $html);
}
@@ -221,6 +248,8 @@ class Readability
$this->prepDocument($dom);
+ $this->logger->debug('[Loading] Loaded HTML successfully.');
+
return $dom;
}
@@ -229,6 +258,8 @@ class Readability
*/
private function getMetadata()
{
+ $this->logger->debug('[Metadata] Retrieving metadata...');
+
$values = [];
// Match "description", or Twitter's "twitter:description" (Cards)
// in name attribute.
@@ -243,6 +274,7 @@ class Readability
$elementProperty = $meta->getAttribute('property');
if (in_array('author', [$elementName, $elementProperty])) {
+ $this->logger->info(sprintf('[Metadata] Found author: \'%s\'', $meta->getAttribute('content')));
$this->setAuthor($meta->getAttribute('content'));
continue;
}
@@ -265,12 +297,15 @@ class Readability
}
}
if (array_key_exists('description', $values)) {
+ $this->logger->info(sprintf('[Metadata] Found excerpt in \'description\' tag: \'%s\'', $values['description']));
$this->setExcerpt($values['description']);
} elseif (array_key_exists('og:description', $values)) {
// Use facebook open graph description.
+ $this->logger->info(sprintf('[Metadata] Found excerpt in \'og:description\' tag: \'%s\'', $values['og:description']));
$this->setExcerpt($values['og:description']);
} elseif (array_key_exists('twitter:description', $values)) {
// Use twitter cards description.
+ $this->logger->info(sprintf('[Metadata] Found excerpt in \'twitter:description\' tag: \'%s\'', $values['twitter:description']));
$this->setExcerpt($values['twitter:description']);
}
@@ -279,15 +314,23 @@ class Readability
if (!$this->getTitle()) {
if (array_key_exists('og:title', $values)) {
// Use facebook open graph title.
+ $this->logger->info(sprintf('[Metadata] Found title in \'og:title\' tag: \'%s\'', $values['og:title']));
$this->setTitle($values['og:title']);
} elseif (array_key_exists('twitter:title', $values)) {
// Use twitter cards title.
+ $this->logger->info(sprintf('[Metadata] Found title in \'twitter:title\' tag: \'%s\'', $values['twitter:title']));
$this->setTitle($values['twitter:title']);
}
}
if (array_key_exists('og:image', $values) || array_key_exists('twitter:image', $values)) {
- $this->setImage(array_key_exists('og:image', $values) ? $values['og:image'] : $values['twitter:image']);
+ if (array_key_exists('og:image', $values)) {
+ $this->logger->info(sprintf('[Metadata] Found main image in \'og:image\' tag: \'%s\'', $values['og:image']));
+ $this->setImage($values['og:image']);
+ } else {
+ $this->logger->info(sprintf('[Metadata] Found main image in \'twitter:image\' tag: \'%s\'', $values['twitter:image']));
+ $this->setImage($values['twitter:image']);
+ }
}
}
@@ -367,8 +410,10 @@ class Readability
if ($this->getTitle()) {
$originalTitle = $this->getTitle();
} else {
+ $this->logger->debug('[Metadata] Could not find title in metadata, searching for the title tag...');
$titleTag = $this->dom->getElementsByTagName('title');
if ($titleTag->length > 0) {
+ $this->logger->info(sprintf('[Metadata] Using title tag as article title: \'%s\'', $titleTag->item(0)->nodeValue));
$originalTitle = $titleTag->item(0)->nodeValue;
}
}
@@ -390,10 +435,13 @@ class Readability
$titleHadHierarchicalSeparators = (bool)preg_match('/ [\\\\\/>»] /', $curTitle);
$curTitle = preg_replace('/(.*)[\|\-\\\\\/>»] .*/i', '$1', $originalTitle);
+ $this->logger->info(sprintf('[Metadata] Found hierarchical separators in title, new title is: \'%s\'', $curTitle));
+
// If the resulting title is too short (3 words or fewer), remove
// the first part instead:
if (count(preg_split('/\s+/', $curTitle)) < 3) {
$curTitle = preg_replace('/[^\|\-\\\\\/>»]*[\|\-\\\\\/>»](.*)/i', '$1', $originalTitle);
+ $this->logger->info(sprintf('[Metadata] Title too short, using the first part of the title instead: \'%s\'', $curTitle));
}
} elseif (strpos($curTitle, ': ') !== false) {
// Check if we have an heading containing this exact string, so we
@@ -412,9 +460,12 @@ class Readability
if (!$match) {
$curTitle = substr($originalTitle, strrpos($originalTitle, ':') + 1);
+ $this->logger->info(sprintf('[Metadata] Title has a colon in the middle, new title is: \'%s\'', $curTitle));
+
// If the title is now too short, try the first colon instead:
if (count(preg_split('/\s+/', $curTitle)) < 3) {
$curTitle = substr($originalTitle, strpos($originalTitle, ':') + 1);
+ $this->logger->info(sprintf('[Metadata] Title too short, using the first part of the title instead: \'%s\'', $curTitle));
}
}
} elseif (mb_strlen($curTitle) > 150 || mb_strlen($curTitle) < 15) {
@@ -422,6 +473,7 @@ class Readability
if ($hOnes->length === 1) {
$curTitle = $hOnes->item(0)->nodeValue;
+ $this->logger->info(sprintf('[Metadata] Using title from an H1 node: \'%s\'', $curTitle));
}
}
@@ -439,6 +491,8 @@ class Readability
if ($curTitleWordCount <= 4 &&
(!$titleHadHierarchicalSeparators || $curTitleWordCount !== $originalTitleWordCount)) {
$curTitle = $originalTitle;
+
+ $this->logger->info(sprintf('Using title from an H1 node: \'%s\'', $curTitle));
}
return $curTitle;
@@ -509,6 +563,8 @@ class Readability
*/
private function getNodes($node)
{
+ $this->logger->info('[Get Nodes] Retrieving nodes...');
+
$stripUnlikelyCandidates = $this->configuration->getStripUnlikelyCandidates();
$elementsToScore = [];
@@ -524,12 +580,14 @@ class Readability
// Remove DOMComments nodes as we don't need them and mess up children counting
if ($node->nodeType === XML_COMMENT_NODE) {
+ $this->logger->debug(sprintf('[Get Nodes] Found comment node, removing... Node content was: \'%s\'', substr($node->nodeValue, 0, 128)));
$node = NodeUtility::removeAndGetNext($node);
continue;
}
// Check to see if this node is a byline, and remove it if it is.
if ($this->checkByline($node, $matchString)) {
+ $this->logger->debug(sprintf('[Get Nodes] Found byline, removing... Node content was: \'%s\'', substr($node->nodeValue, 0, 128)));
$node = NodeUtility::removeAndGetNext($node);
continue;
}
@@ -542,6 +600,7 @@ class Readability
$node->nodeName !== 'body' &&
$node->nodeName !== 'a'
) {
+ $this->logger->debug(sprintf('[Get Nodes] Removing unlikely candidate. Node content was: \'%s\'', substr($node->nodeValue, 0, 128)));
$node = NodeUtility::removeAndGetNext($node);
continue;
}
@@ -553,11 +612,13 @@ class Readability
$node->nodeName === 'h4' || $node->nodeName === 'h5' || $node->nodeName === 'h6' ||
$node->nodeName === 'p') &&
$node->isElementWithoutContent()) {
+ $this->logger->debug(sprintf('[Get Nodes] Removing empty \'%s\' node.', $node->nodeName));
$node = NodeUtility::removeAndGetNext($node);
continue;
}
if (in_array(strtolower($node->nodeName), $this->defaultTagsToScore)) {
+ $this->logger->debug(sprintf('[Get Nodes] Adding node to score list, node content is: \'%s\'', substr($node->nodeValue, 0, 128)));
$elementsToScore[] = $node;
}
@@ -570,11 +631,13 @@ class Readability
* algorithm with DIVs with are, in practice, paragraphs.
*/
if ($node->hasSinglePNode()) {
+ $this->logger->debug(sprintf('[Get Nodes] Found DIV with a single P node, removing DIV. Node content is: \'%s\'', substr($node->nodeValue, 0, 128)));
$pNode = $node->getChildren(true)[0];
$node->parentNode->replaceChild($pNode, $node);
$node = $pNode;
$elementsToScore[] = $node;
} elseif (!$node->hasSingleChildBlockElement()) {
+ $this->logger->debug(sprintf('[Get Nodes] Found DIV with a single child block element, converting to a P node. Node content is: \'%s\'', substr($node->nodeValue, 0, 128)));
$node = NodeUtility::setNodeTag($node, 'p');
$elementsToScore[] = $node;
} else {
@@ -582,6 +645,7 @@ class Readability
foreach ($node->getChildren() as $child) {
/** @var $child DOMNode */
if ($child->nodeType === XML_TEXT_NODE && mb_strlen(trim($child->getTextContent())) > 0) {
+ $this->logger->debug(sprintf('[Get Nodes] Found DIV a text node inside, converting to a P node. Node content is: \'%s\'', substr($node->nodeValue, 0, 128)));
$newNode = $node->createNode($child, 'p');
$child->parentNode->replaceChild($newNode, $child);
}
@@ -619,6 +683,7 @@ class Readability
$rel = $node->getAttribute('rel');
if ($rel === 'author' || preg_match(NodeUtility::$regexps['byline'], $matchString) && $this->isValidByline($node->getTextContent())) {
+ $this->logger->info(sprintf('[Metadata] Found article author: \'%s\'', $node->getTextContent()));
$this->setAuthor(trim($node->getTextContent()));
return true;
@@ -672,6 +737,8 @@ class Readability
*/
private function prepDocument(DOMDocument $dom)
{
+ $this->logger->info('[PrepDocument] Preparing document for parsing...');
+
/*
* DOMNodeList must be converted to an array before looping over it.
* This is done to avoid node shifting when removing nodes.
@@ -695,6 +762,8 @@ class Readability
* (which will be replaced with a <p> later).
*/
while (($next = NodeUtility::nextElement($next)) && ($next->nodeName === 'br')) {
+ $this->logger->debug('[PrepDocument] Removing chain of BR nodes...');
+
$replaced = true;
$brSibling = $next->nextSibling;
$next->parentNode->removeChild($next);
@@ -721,6 +790,8 @@ class Readability
}
}
+ $this->logger->debug('[PrepDocument] Replacing BR with a P node...');
+
// Otherwise, make this node a child of the new <p>.
$sibling = $next->nextSibling;
$p->appendChild($next);
@@ -733,6 +804,7 @@ class Readability
$fonts = $dom->getElementsByTagName('font');
$length = $fonts->length;
for ($i = 0; $i < $length; $i++) {
+ $this->logger->debug('[PrepDocument] Converting font tag into a span tag.');
$font = $fonts->item($length - 1 - $i);
NodeUtility::setNodeTag($font, 'span', true);
}
@@ -747,6 +819,8 @@ class Readability
*/
private function rateNodes($nodes)
{
+ $this->logger->info('[Rating] Rating nodes...');
+
$candidates = [];
/** @var DOMElement $node */
@@ -776,8 +850,11 @@ class Readability
// For every 100 characters in this paragraph, add another point. Up to 3 points.
$contentScore += min(floor(mb_strlen($node->getTextContent(true)) / 100), 3);
+ $this->logger->debug(sprintf('[Rating] Node score %s, content: \'%s\'', $contentScore, substr($node->nodeValue, 0, 128)));
+
/** @var $ancestor DOMElement */
foreach ($ancestors as $level => $ancestor) {
+ $this->logger->debug('[Rating] Found ancestor, initializing and adding it as a candidate...');
if (!$ancestor->isInitialized()) {
$ancestor->initializeNode($this->configuration->getWeightClasses());
$candidates[] = $ancestor;
@@ -800,6 +877,8 @@ class Readability
$currentScore = $ancestor->contentScore;
$ancestor->contentScore = $currentScore + ($contentScore / $scoreDivider);
+
+ $this->logger->debug(sprintf('[Rating] Ancestor score %s, value: \'%s\'', $ancestor->contentScore, substr($ancestor->nodeValue, 0, 128)));
}
}
@@ -841,6 +920,8 @@ class Readability
*/
if ($topCandidate === null || $topCandidate->nodeName === 'body') {
+ $this->logger->info('[Rating] No top candidate found or top candidate is the body tag. Moving all child nodes to a new DIV node.');
+
// Move all of the page's children into topCandidate
$topCandidate = new DOMDocument('1.0', 'utf-8');
$topCandidate->encoding = 'UTF-8';
@@ -856,6 +937,7 @@ class Readability
// Candidate must be created using firstChild to grab the DOMElement instead of the DOMDocument.
$topCandidate = $topCandidate->firstChild;
} elseif ($topCandidate) {
+ $this->logger->info(sprintf('[Rating] Found top candidate, score: %s', $topCandidate->contentScore));
// Find a better top candidate node if it contains (at least three) nodes which belong to `topCandidates` array
// and whose scores are quite closed with current `topCandidate` node.
$alternativeCandidateAncestors = [];
@@ -908,6 +990,7 @@ class Readability
if ($parentScore > $lastScore) {
// Alright! We found a better parent to use.
$topCandidate = $parentOfTopCandidate;
+ $this->logger->info('[Rating] Found a better top candidate.');
break;
}
$lastScore = $parentOfTopCandidate->contentScore;
@@ -929,6 +1012,8 @@ class Readability
* that we removed, etc.
*/
+ $this->logger->info('[Rating] Creating final article content document...');
+
$articleContent = new DOMDocument('1.0', 'utf-8');
$articleContent->createElement('div');
@@ -939,11 +1024,15 @@ class Readability
$hasContent = false;
+ $this->logger->info('[Rating] Adding top candidate siblings...');
+
/** @var DOMElement $sibling */
foreach ($siblings as $sibling) {
$append = false;
if ($sibling === $topCandidate) {
+ $this->logger->debug('[Rating] Sibling is equal to the top candidate, adding to the final article...');
+
$append = true;
} else {
$contentBonus = 0;
@@ -967,6 +1056,8 @@ class Readability
}
if ($append) {
+ $this->logger->debug(sprintf('[Rating] Appending sibling to final article, content is: \'%s\'', substr($sibling->nodeValue, 0, 128)));
+
$hasContent = true;
if (!in_array(strtolower($sibling->nodeName), $this->alterToDIVExceptions)) {
@@ -999,6 +1090,7 @@ class Readability
$articleDir = $ancestor->getAttribute('dir');
if ($articleDir) {
$this->setDirection($articleDir);
+ $this->logger->debug(sprintf('[Rating] Found article direction: %s', $articleDir));
break;
}
}
@@ -1018,6 +1110,8 @@ class Readability
*/
public function prepArticle(DOMDocument $article)
{
+ $this->logger->info('[PrepArticle] Preparing final article...');
+
$this->_cleanStyles($article);
$this->_clean($article, 'style');
@@ -1056,6 +1150,7 @@ class Readability
$titlesMatch = strpos($this->getTitle(), $h2->item(0)->textContent) !== false;
}
if ($titlesMatch) {
+ $this->logger->info('[PrepArticle] Found title repeated in an H2 node, removing...');
$this->_clean($article, 'h2');
}
}
@@ -1079,6 +1174,7 @@ class Readability
foreach (iterator_to_array($article->getElementsByTagName('br')) as $br) {
$next = $br->nextSibling;
if ($next && $next->nodeName === 'p') {
+ $this->logger->debug('[PrepArticle] Removing br node next to a p node.');
$br->parentNode->removeChild($br);
}
}
@@ -1193,6 +1289,7 @@ class Readability
$next = NodeUtility::getNextNode($node);
while ($next && $next !== $endOfSearchMarkerNode) {
if (preg_match($regex, sprintf('%s %s', $next->getAttribute('class'), $next->getAttribute('id')))) {
+ $this->logger->debug(sprintf('Removing matched node with regex: \'%s\', node class was: \'%s\', id: \'%s\'', $regex, $next->getAttribute('class'), $next->getAttribute('id')));
$next = NodeUtility::removeAndGetNext($next);
} else {
$next = NodeUtility::getNextNode($next);
@@ -1221,6 +1318,7 @@ class Readability
$totalCount = $imgCount + $embedCount + $objectCount + $iframeCount;
if ($totalCount === 0 && !preg_replace(NodeUtility::$regexps['onlyWhitespace'], '', $paragraph->textContent)) {
+ $this->logger->debug(sprintf('[PrepArticle] Removing extra paragraph. Text content was: \'%s\'', substr($paragraph->textContent, 0, 128)));
$paragraph->parentNode->removeChild($paragraph);
}
}
@@ -1262,6 +1360,8 @@ class Readability
}
if ($weight < 0) {
+ $this->logger->debug(sprintf('[PrepArticle] Removing tag \'%s\' with 0 or less weight', $tag));
+
NodeUtility::removeNode($node);
continue;
}
@@ -1300,6 +1400,8 @@ class Readability
(($embedCount === 1 && $contentLength < 75) || $embedCount > 1);
if ($haveToRemove) {
+ $this->logger->debug(sprintf('[PrepArticle] Removing tag \'%s\'.', $tag));
+
NodeUtility::removeNode($node);
}
}
@@ -1342,6 +1444,8 @@ class Readability
continue;
}
}
+ $this->logger->debug(sprintf('[PrepArticle] Removing node \'%s\'.', $item->tagName));
+
NodeUtility::removeNode($item);
}
}
@@ -1365,6 +1469,8 @@ class Readability
}
if ($weight < 0) {
+ $this->logger->debug(sprintf('[PrepArticle] Removing H node with 0 or less weight. Content was: \'%s\'', substr($header->nodeValue, 0, 128)));
+
NodeUtility::removeNode($header);
}
}
@@ -1378,6 +1484,8 @@ class Readability
*/
public function postProcessContent(DOMDocument $article)
{
+ $this->logger->info('[PostProcess] PostProcessing content...');
+
// Readability cannot open relative uris so we convert them to absolute uris.
if ($this->configuration->getFixRelativeURLs()) {
foreach (iterator_to_array($article->getElementsByTagName('a')) as $link) {
@@ -1387,9 +1495,13 @@ class Readability
// Replace links with javascript: URIs with text content, since
// they won't work after scripts have been removed from the page.
if (strpos($href, 'javascript:') === 0) {
+ $this->logger->debug(sprintf('[PostProcess] Removing \'javascript:\' link. Content is: \'%s\'', substr($link->textContent, 0, 128)));
+
$text = $article->createTextNode($link->textContent);
$link->parentNode->replaceChild($text, $link);
} else {
+ $this->logger->debug(sprintf('[PostProcess] Converting link to absolute URI: \'%s\'', substr($href, 0, 128)));
+
$link->setAttribute('href', $this->toAbsoluteURI($href));
}
}
@@ -1410,6 +1522,8 @@ class Readability
$src = array_filter($url);
$src = reset($src);
if ($src) {
+ $this->logger->debug(sprintf('[PostProcess] Converting image URL to absolute URI: \'%s\'', substr($src, 0, 128)));
+
$img->setAttribute('src', $this->toAbsoluteURI($src));
}
}