diff options
author | Andres Rey <[email protected]> | 2017-12-10 20:47:36 +0000 |
---|---|---|
committer | Andres Rey <[email protected]> | 2017-12-10 20:47:36 +0000 |
commit | 2a2c4129f969cb9f3af83cb5bd6f807d8e2792cb (patch) | |
tree | 884263c3d817b9d9742bac490c49f5b3db89988b /src/Readability.php | |
parent | 8b496d68788694b34c6fe898c380bf181981019d (diff) |
Improve logging messages
Diffstat (limited to 'src/Readability.php')
-rw-r--r-- | src/Readability.php | 140 |
1 files changed, 68 insertions, 72 deletions
diff --git a/src/Readability.php b/src/Readability.php index 8df0189..7160fbe 100644 --- a/src/Readability.php +++ b/src/Readability.php @@ -142,7 +142,7 @@ class Readability $root = $root->firstChild; $elementsToScore = $this->getNodes($root); - $this->logger->debug(sprintf('Elements to score: %s', count($elementsToScore)), $elementsToScore); + $this->logger->debug(sprintf('Elements to score: \'%s\'', count($elementsToScore))); $result = $this->rateNodes($elementsToScore); @@ -159,23 +159,23 @@ class Readability $length += mb_strlen($p->textContent); } - $this->logger->info(sprintf('Article parsed. Amount of words: %s. Current threshold is: %s', $length, $this->configuration->getWordThreshold())); + $this->logger->info(sprintf('[Parsing] Article parsed. Amount of words: %s. Current threshold is: %s', $length, $this->configuration->getWordThreshold())); if ($result && mb_strlen(preg_replace('/\s/', '', $result->textContent)) < $this->configuration->getWordThreshold()) { $this->dom = $this->loadHTML($html); $root = $this->dom->getElementsByTagName('body')->item(0); if ($this->configuration->getStripUnlikelyCandidates()) { - $this->logger->debug('Threshold not passed, trying again setting StripUnlikelyCandidates as false'); + $this->logger->debug('[Parsing] Threshold not met, trying again setting StripUnlikelyCandidates as false'); $this->configuration->setStripUnlikelyCandidates(false); } elseif ($this->configuration->getWeightClasses()) { - $this->logger->debug('Threshold not passed, trying again setting WeightClasses as false'); + $this->logger->debug('[Parsing] Threshold not met, trying again setting WeightClasses as false'); $this->configuration->setWeightClasses(false); } elseif ($this->configuration->getCleanConditionally()) { - $this->logger->debug('Threshold not passed, trying again setting CleanConditionally as false'); + $this->logger->debug('[Parsing] Threshold not met, trying again setting CleanConditionally as false'); $this->configuration->setCleanConditionally(false); } else { - $this->logger->emergency('Could not parse text, giving up.'); + $this->logger->emergency('[Parsing] Could not parse text, giving up :('); throw new ParseException('Could not parse text.'); } } else { @@ -189,7 +189,7 @@ class Readability // first paragraph as the excerpt. This can be used for displaying a preview of // the article's content. if (!$this->getExcerpt()) { - $this->logger->debug('No excerpt text found on metadata, extracting first p node and using it as excerpt.'); + $this->logger->debug('[Parsing] No excerpt text found on metadata, extracting first p node and using it as excerpt.'); $paragraphs = $result->getElementsByTagName('p'); if ($paragraphs->length > 0) { $this->setExcerpt(trim($paragraphs->item(0)->textContent)); @@ -217,7 +217,7 @@ class Readability */ private function loadHTML($html) { - $this->logger->debug('Loading HTML...'); + $this->logger->debug('[Loading] Loading HTML...'); // To avoid throwing a gazillion of errors on malformed HTMLs libxml_use_internal_errors(true); @@ -230,13 +230,13 @@ class Readability } if ($this->configuration->getNormalizeEntities()) { - $this->logger->debug('Normalized entities via mb_convert_encoding.'); + $this->logger->debug('[Loading] Normalized entities via mb_convert_encoding.'); // Replace UTF-8 characters with the HTML Entity equivalent. Useful to fix html with mixed content $html = mb_convert_encoding($html, 'HTML-ENTITIES', 'UTF-8'); } if ($this->configuration->getSummonCthulhu()) { - $this->logger->debug('Removed script tags via regex H̶͈̩̟̬̱͠E̡̨̬͔̳̜͢͠ ̡̧̯͉̩͙̩̹̞̠͎͈̹̥̠͞ͅͅC̶͉̞̘̖̝̗͓̬̯͍͉̤̬͢͢͞Ò̟̘͉͖͎͉̱̭̣̕M̴̯͈̻̱̱̣̗͈̠̙̲̥͘͞E̷̛͙̼̲͍͕̹͍͇̗̻̬̮̭̱̥͢Ş̛̟͔̙̜̤͇̮͍̙̝̀͘'); + $this->logger->debug('[Loading] Removed script tags via regex H̶͈̩̟̬̱͠E̡̨̬͔̳̜͢͠ ̡̧̯͉̩͙̩̹̞̠͎͈̹̥̠͞ͅͅC̶͉̞̘̖̝̗͓̬̯͍͉̤̬͢͢͞Ò̟̘͉͖͎͉̱̭̣̕M̴̯͈̻̱̱̣̗͈̠̙̲̥͘͞E̷̛͙̼̲͍͕̹͍͇̗̻̬̮̭̱̥͢Ş̛̟͔̙̜̤͇̮͍̙̝̀͘'); $html = preg_replace('/<script\b[^>]*>([\s\S]*?)<\/script>/', '', $html); } @@ -248,7 +248,7 @@ class Readability $this->prepDocument($dom); - $this->logger->debug('Loaded HTML successfully.'); + $this->logger->debug('[Loading] Loaded HTML successfully.'); return $dom; } @@ -258,7 +258,7 @@ class Readability */ private function getMetadata() { - $this->logger->debug('Retrieving metadata...'); + $this->logger->debug('[Metadata] Retrieving metadata...'); $values = []; // Match "description", or Twitter's "twitter:description" (Cards) @@ -274,7 +274,7 @@ class Readability $elementProperty = $meta->getAttribute('property'); if (in_array('author', [$elementName, $elementProperty])) { - $this->logger->info(sprintf('[Metadata] Found author: %s', $meta->getAttribute('content'))); + $this->logger->info(sprintf('[Metadata] Found author: \'%s\'', $meta->getAttribute('content'))); $this->setAuthor($meta->getAttribute('content')); continue; } @@ -297,15 +297,15 @@ class Readability } } if (array_key_exists('description', $values)) { - $this->logger->info(sprintf('[Metadata] Found excerpt in \'description\' tag: %s', $values['description'])); + $this->logger->info(sprintf('[Metadata] Found excerpt in \'description\' tag: \'%s\'', $values['description'])); $this->setExcerpt($values['description']); } elseif (array_key_exists('og:description', $values)) { // Use facebook open graph description. - $this->logger->info(sprintf('[Metadata] Found excerpt in \'og:description\' tag: %s', $values['og:description'])); + $this->logger->info(sprintf('[Metadata] Found excerpt in \'og:description\' tag: \'%s\'', $values['og:description'])); $this->setExcerpt($values['og:description']); } elseif (array_key_exists('twitter:description', $values)) { // Use twitter cards description. - $this->logger->info(sprintf('[Metadata] Found excerpt in \'twitter:description\' tag: %s', $values['twitter:description'])); + $this->logger->info(sprintf('[Metadata] Found excerpt in \'twitter:description\' tag: \'%s\'', $values['twitter:description'])); $this->setExcerpt($values['twitter:description']); } @@ -314,21 +314,21 @@ class Readability if (!$this->getTitle()) { if (array_key_exists('og:title', $values)) { // Use facebook open graph title. - $this->logger->info(sprintf('[Metadata] Found title in \'og:title\' tag: %s', $values['og:title'])); + $this->logger->info(sprintf('[Metadata] Found title in \'og:title\' tag: \'%s\'', $values['og:title'])); $this->setTitle($values['og:title']); } elseif (array_key_exists('twitter:title', $values)) { // Use twitter cards title. - $this->logger->info(sprintf('[Metadata] Found title in \'twitter:title\' tag: %s', $values['twitter:title'])); + $this->logger->info(sprintf('[Metadata] Found title in \'twitter:title\' tag: \'%s\'', $values['twitter:title'])); $this->setTitle($values['twitter:title']); } } if (array_key_exists('og:image', $values) || array_key_exists('twitter:image', $values)) { if (array_key_exists('og:image', $values)) { - $this->logger->info(sprintf('[Metadata] Found main image in \'og:image\' tag: %s', $values['og:image'])); + $this->logger->info(sprintf('[Metadata] Found main image in \'og:image\' tag: \'%s\'', $values['og:image'])); $this->setImage($values['og:image']); } else { - $this->logger->info(sprintf('[Metadata] Found main image in \'twitter:image\' tag: %s', $values['twitter:image'])); + $this->logger->info(sprintf('[Metadata] Found main image in \'twitter:image\' tag: \'%s\'', $values['twitter:image'])); $this->setImage($values['twitter:image']); } } @@ -410,10 +410,10 @@ class Readability if ($this->getTitle()) { $originalTitle = $this->getTitle(); } else { - $this->logger->debug('Could not find title in metadata, searching for the title tag...'); + $this->logger->debug('[Metadata] Could not find title in metadata, searching for the title tag...'); $titleTag = $this->dom->getElementsByTagName('title'); if ($titleTag->length > 0) { - $this->logger->info(sprintf('Using title tag as article title: %s', $titleTag->item(0)->nodeValue)); + $this->logger->info(sprintf('[Metadata] Using title tag as article title: \'%s\'', $titleTag->item(0)->nodeValue)); $originalTitle = $titleTag->item(0)->nodeValue; } } @@ -435,13 +435,13 @@ class Readability $titleHadHierarchicalSeparators = (bool)preg_match('/ [\\\\\/>»] /', $curTitle); $curTitle = preg_replace('/(.*)[\|\-\\\\\/>»] .*/i', '$1', $originalTitle); - $this->logger->info(sprintf('Found hierarchical separators in title, new title is: %s', $curTitle)); + $this->logger->info(sprintf('[Metadata] Found hierarchical separators in title, new title is: \'%s\'', $curTitle)); // If the resulting title is too short (3 words or fewer), remove // the first part instead: if (count(preg_split('/\s+/', $curTitle)) < 3) { $curTitle = preg_replace('/[^\|\-\\\\\/>»]*[\|\-\\\\\/>»](.*)/i', '$1', $originalTitle); - $this->logger->info(sprintf('Title too short, using the first part of the title instead: %s', $curTitle)); + $this->logger->info(sprintf('[Metadata] Title too short, using the first part of the title instead: \'%s\'', $curTitle)); } } elseif (strpos($curTitle, ': ') !== false) { // Check if we have an heading containing this exact string, so we @@ -460,12 +460,12 @@ class Readability if (!$match) { $curTitle = substr($originalTitle, strrpos($originalTitle, ':') + 1); - $this->logger->info(sprintf('Title has a colon in the middle, new title is: %s', $curTitle)); + $this->logger->info(sprintf('[Metadata] Title has a colon in the middle, new title is: \'%s\'', $curTitle)); // If the title is now too short, try the first colon instead: if (count(preg_split('/\s+/', $curTitle)) < 3) { $curTitle = substr($originalTitle, strpos($originalTitle, ':') + 1); - $this->logger->info(sprintf('Title too short, using the first part of the title instead: %s', $curTitle)); + $this->logger->info(sprintf('[Metadata] Title too short, using the first part of the title instead: \'%s\'', $curTitle)); } } } elseif (mb_strlen($curTitle) > 150 || mb_strlen($curTitle) < 15) { @@ -473,7 +473,7 @@ class Readability if ($hOnes->length === 1) { $curTitle = $hOnes->item(0)->nodeValue; - $this->logger->info(sprintf('Using title from an H1 node: %s', $curTitle)); + $this->logger->info(sprintf('[Metadata] Using title from an H1 node: \'%s\'', $curTitle)); } } @@ -492,7 +492,7 @@ class Readability (!$titleHadHierarchicalSeparators || $curTitleWordCount !== $originalTitleWordCount)) { $curTitle = $originalTitle; - $this->logger->info(sprintf('Using title from an H1 node: %s', $curTitle)); + $this->logger->info(sprintf('Using title from an H1 node: \'%s\'', $curTitle)); } return $curTitle; @@ -563,7 +563,7 @@ class Readability */ private function getNodes($node) { - $this->logger->info('Retrieving nodes...'); + $this->logger->info('[Get Nodes] Retrieving nodes...'); $stripUnlikelyCandidates = $this->configuration->getStripUnlikelyCandidates(); @@ -578,18 +578,16 @@ class Readability while ($node) { $matchString = $node->getAttribute('class') . ' ' . $node->getAttribute('id'); - $this->logger->debug(sprintf('Match string from class and id is: %s', $matchString)); - // Remove DOMComments nodes as we don't need them and mess up children counting if ($node->nodeType === XML_COMMENT_NODE) { - $this->logger->debug(sprintf('Found comment node, removing... Node content was: %s', substr($node->nodeValue, 0, 128))); + $this->logger->debug(sprintf('[Get Nodes] Found comment node, removing... Node content was: \'%s\'', substr($node->nodeValue, 0, 128))); $node = NodeUtility::removeAndGetNext($node); continue; } // Check to see if this node is a byline, and remove it if it is. if ($this->checkByline($node, $matchString)) { - $this->logger->debug(sprintf('Found byline, removing... Node content was: %s', substr($node->nodeValue, 0, 128))); + $this->logger->debug(sprintf('[Get Nodes] Found byline, removing... Node content was: \'%s\'', substr($node->nodeValue, 0, 128))); $node = NodeUtility::removeAndGetNext($node); continue; } @@ -602,7 +600,7 @@ class Readability $node->nodeName !== 'body' && $node->nodeName !== 'a' ) { - $this->logger->debug(sprintf('Removing unlikely candidate. Node content was: %s', substr($node->nodeValue, 0, 128))); + $this->logger->debug(sprintf('[Get Nodes] Removing unlikely candidate. Node content was: \'%s\'', substr($node->nodeValue, 0, 128))); $node = NodeUtility::removeAndGetNext($node); continue; } @@ -614,13 +612,13 @@ class Readability $node->nodeName === 'h4' || $node->nodeName === 'h5' || $node->nodeName === 'h6' || $node->nodeName === 'p') && $node->isElementWithoutContent()) { - $this->logger->debug(sprintf('Removing empty \'%s\' node.', $node->nodeName)); + $this->logger->debug(sprintf('[Get Nodes] Removing empty \'%s\' node.', $node->nodeName)); $node = NodeUtility::removeAndGetNext($node); continue; } if (in_array(strtolower($node->nodeName), $this->defaultTagsToScore)) { - $this->logger->debug(sprintf('Adding node to score list, node content is: %s', substr($node->nodeValue, 0, 128))); + $this->logger->debug(sprintf('[Get Nodes] Adding node to score list, node content is: \'%s\'', substr($node->nodeValue, 0, 128))); $elementsToScore[] = $node; } @@ -633,13 +631,13 @@ class Readability * algorithm with DIVs with are, in practice, paragraphs. */ if ($node->hasSinglePNode()) { - $this->logger->debug(sprintf('Found DIV with a single P node, removing DIV. Node content is: %s', substr($node->nodeValue, 0, 128))); + $this->logger->debug(sprintf('[Get Nodes] Found DIV with a single P node, removing DIV. Node content is: \'%s\'', substr($node->nodeValue, 0, 128))); $pNode = $node->getChildren(true)[0]; $node->parentNode->replaceChild($pNode, $node); $node = $pNode; $elementsToScore[] = $node; } elseif (!$node->hasSingleChildBlockElement()) { - $this->logger->debug(sprintf('Found DIV with a single child block element, converting to a P node. Node content is: %s', substr($node->nodeValue, 0, 128))); + $this->logger->debug(sprintf('[Get Nodes] Found DIV with a single child block element, converting to a P node. Node content is: \'%s\'', substr($node->nodeValue, 0, 128))); $node = NodeUtility::setNodeTag($node, 'p'); $elementsToScore[] = $node; } else { @@ -647,7 +645,7 @@ class Readability foreach ($node->getChildren() as $child) { /** @var $child DOMNode */ if ($child->nodeType === XML_TEXT_NODE && mb_strlen(trim($child->getTextContent())) > 0) { - $this->logger->debug(sprintf('Found DIV a text node inside, converting to a P node. Node content is: %s', substr($node->nodeValue, 0, 128))); + $this->logger->debug(sprintf('[Get Nodes] Found DIV a text node inside, converting to a P node. Node content is: \'%s\'', substr($node->nodeValue, 0, 128))); $newNode = $node->createNode($child, 'p'); $child->parentNode->replaceChild($newNode, $child); } @@ -685,7 +683,7 @@ class Readability $rel = $node->getAttribute('rel'); if ($rel === 'author' || preg_match(NodeUtility::$regexps['byline'], $matchString) && $this->isValidByline($node->getTextContent())) { - $this->logger->info(sprintf('Found article author: %s', $node->getTextContent())); + $this->logger->info(sprintf('[Metadata] Found article author: \'%s\'', $node->getTextContent())); $this->setAuthor(trim($node->getTextContent())); return true; @@ -739,7 +737,7 @@ class Readability */ private function prepDocument(DOMDocument $dom) { - $this->logger->info('Preparing document for parsing...'); + $this->logger->info('[PrepDocument] Preparing document for parsing...'); /* * DOMNodeList must be converted to an array before looping over it. @@ -764,7 +762,7 @@ class Readability * (which will be replaced with a <p> later). */ while (($next = NodeUtility::nextElement($next)) && ($next->nodeName === 'br')) { - $this->logger->debug('Removing chain of BR nodes...'); + $this->logger->debug('[PrepDocument] Removing chain of BR nodes...'); $replaced = true; $brSibling = $next->nextSibling; @@ -792,7 +790,7 @@ class Readability } } - $this->logger->debug('Replacing BR with a P node...'); + $this->logger->debug('[PrepDocument] Replacing BR with a P node...'); // Otherwise, make this node a child of the new <p>. $sibling = $next->nextSibling; @@ -806,7 +804,7 @@ class Readability $fonts = $dom->getElementsByTagName('font'); $length = $fonts->length; for ($i = 0; $i < $length; $i++) { - $this->logger->debug('Converting font tag into a span tag.'); + $this->logger->debug('[PrepDocument] Converting font tag into a span tag.'); $font = $fonts->item($length - 1 - $i); NodeUtility::setNodeTag($font, 'span', true); } @@ -821,7 +819,7 @@ class Readability */ private function rateNodes($nodes) { - $this->logger->info('Rating nodes...'); + $this->logger->info('[Rating] Rating nodes...'); $candidates = []; @@ -852,11 +850,11 @@ class Readability // For every 100 characters in this paragraph, add another point. Up to 3 points. $contentScore += min(floor(mb_strlen($node->getTextContent(true)) / 100), 3); - $this->logger->debug(sprintf('Node score %s, content: %s', $contentScore, substr($node->nodeValue, 0, 128))); + $this->logger->debug(sprintf('[Rating] Node score %s, content: \'%s\'', $contentScore, substr($node->nodeValue, 0, 128))); /** @var $ancestor DOMElement */ foreach ($ancestors as $level => $ancestor) { - $this->logger->debug('Found ancestor, initializing and adding it as a candidate...'); + $this->logger->debug('[Rating] Found ancestor, initializing and adding it as a candidate...'); if (!$ancestor->isInitialized()) { $ancestor->initializeNode($this->configuration->getWeightClasses()); $candidates[] = $ancestor; @@ -880,7 +878,7 @@ class Readability $currentScore = $ancestor->contentScore; $ancestor->contentScore = $currentScore + ($contentScore / $scoreDivider); - $this->logger->debug(sprintf('Ancestor score %s, value: %s', $ancestor->contentScore, substr($ancestor->nodeValue, 0, 128))); + $this->logger->debug(sprintf('[Rating] Ancestor score %s, value: \'%s\'', $ancestor->contentScore, substr($ancestor->nodeValue, 0, 128))); } } @@ -922,7 +920,7 @@ class Readability */ if ($topCandidate === null || $topCandidate->nodeName === 'body') { - $this->logger->info('No top candidate found or top candidate is the body tag. Moving all child nodes to a new DIV node.'); + $this->logger->info('[Rating] No top candidate found or top candidate is the body tag. Moving all child nodes to a new DIV node.'); // Move all of the page's children into topCandidate $topCandidate = new DOMDocument('1.0', 'utf-8'); @@ -939,7 +937,7 @@ class Readability // Candidate must be created using firstChild to grab the DOMElement instead of the DOMDocument. $topCandidate = $topCandidate->firstChild; } elseif ($topCandidate) { - $this->logger->info('Found top candidate'); + $this->logger->info(sprintf('[Rating] Found top candidate, score: %s', $topCandidate->contentScore)); // Find a better top candidate node if it contains (at least three) nodes which belong to `topCandidates` array // and whose scores are quite closed with current `topCandidate` node. $alternativeCandidateAncestors = []; @@ -991,7 +989,7 @@ class Readability if ($parentScore > $lastScore) { // Alright! We found a better parent to use. $topCandidate = $parentOfTopCandidate; - $this->logger->info('Found a better top candidate.'); + $this->logger->info('[Rating] Found a better top candidate.'); break; } $lastScore = $parentOfTopCandidate->contentScore; @@ -1013,7 +1011,7 @@ class Readability * that we removed, etc. */ - $this->logger->info('Creating final article content document...'); + $this->logger->info('[Rating] Creating final article content document...'); $articleContent = new DOMDocument('1.0', 'utf-8'); $articleContent->createElement('div'); @@ -1025,14 +1023,14 @@ class Readability $hasContent = false; + $this->logger->info('[Rating] Adding top candidate siblings...'); + /** @var DOMElement $sibling */ foreach ($siblings as $sibling) { - $this->logger->info('Adding top candidate siblings...'); - $append = false; if ($sibling === $topCandidate) { - $this->logger->debug('Sibling is equal to the top candidate, adding to the final article...'); + $this->logger->debug('[Rating] Sibling is equal to the top candidate, adding to the final article...'); $append = true; } else { @@ -1057,7 +1055,7 @@ class Readability } if ($append) { - $this->logger->debug(sprintf('Appending sibling to final article, content is: %s', substr($sibling->nodeValue, 0, 128))); + $this->logger->debug(sprintf('[Rating] Appending sibling to final article, content is: \'%s\'', substr($sibling->nodeValue, 0, 128))); $hasContent = true; @@ -1091,7 +1089,7 @@ class Readability $articleDir = $ancestor->getAttribute('dir'); if ($articleDir) { $this->setDirection($articleDir); - $this->logger->debug(sprintf('Found article direction: %s', $articleDir)); + $this->logger->debug(sprintf('[Rating] Found article direction: %s', $articleDir)); break; } } @@ -1111,7 +1109,7 @@ class Readability */ public function prepArticle(DOMDocument $article) { - $this->logger->info('Preparing final article...'); + $this->logger->info('[PrepArticle] Preparing final article...'); $this->_cleanStyles($article); $this->_clean($article, 'style'); @@ -1151,7 +1149,7 @@ class Readability $titlesMatch = strpos($this->getTitle(), $h2->item(0)->textContent) !== false; } if ($titlesMatch) { - $this->logger->info('Found title repeated in an H2 node, removing...'); + $this->logger->info('[PrepArticle] Found title repeated in an H2 node, removing...'); $this->_clean($article, 'h2'); } } @@ -1175,7 +1173,7 @@ class Readability foreach (iterator_to_array($article->getElementsByTagName('br')) as $br) { $next = $br->nextSibling; if ($next && $next->nodeName === 'p') { - $this->logger->debug('Removing br node next to a p node.'); + $this->logger->debug('[PrepArticle] Removing br node next to a p node.'); $br->parentNode->removeChild($br); } } @@ -1250,8 +1248,6 @@ class Readability **/ public function _cleanStyles($node) { - $this->logger->info('Cleaning styles...'); - if (property_exists($node, 'tagName') && $node->tagName === 'svg') { return; } @@ -1292,7 +1288,7 @@ class Readability $next = NodeUtility::getNextNode($node); while ($next && $next !== $endOfSearchMarkerNode) { if (preg_match($regex, sprintf('%s %s', $next->getAttribute('class'), $next->getAttribute('id')))) { - $this->logger->debug(sprintf('Removing matched node with regex: %s, node content was: %s', $regex, substr($next->nodeValue, 0, 128))); + $this->logger->debug(sprintf('Removing matched node with regex: \'%s\', node class was: \'%s\', id: \'%s\'', $regex, $next->getAttribute('class'), $next->getAttribute('id'))); $next = NodeUtility::removeAndGetNext($next); } else { $next = NodeUtility::getNextNode($next); @@ -1321,7 +1317,7 @@ class Readability $totalCount = $imgCount + $embedCount + $objectCount + $iframeCount; if ($totalCount === 0 && !preg_replace(NodeUtility::$regexps['onlyWhitespace'], '', $paragraph->textContent)) { - $this->logger->debug(sprintf('Removing extra paragraph. Text content was: %s', substr($paragraph->textContent, 0, 128))); + $this->logger->debug(sprintf('[PrepArticle] Removing extra paragraph. Text content was: \'%s\'', substr($paragraph->textContent, 0, 128))); $paragraph->parentNode->removeChild($paragraph); } } @@ -1363,7 +1359,7 @@ class Readability } if ($weight < 0) { - $this->logger->debug(sprintf('Removing tag %s with 0 or less weight', $tag)); + $this->logger->debug(sprintf('[PrepArticle] Removing tag \'%s\' with 0 or less weight', $tag)); NodeUtility::removeNode($node); continue; @@ -1403,7 +1399,7 @@ class Readability (($embedCount === 1 && $contentLength < 75) || $embedCount > 1); if ($haveToRemove) { - $this->logger->debug(sprintf('Removing tag %s.', $tag)); + $this->logger->debug(sprintf('[PrepArticle] Removing tag \'%s\'.', $tag)); NodeUtility::removeNode($node); } @@ -1447,7 +1443,7 @@ class Readability continue; } } - $this->logger->debug(sprintf('Removing node %s.', $item->tagName)); + $this->logger->debug(sprintf('[PrepArticle] Removing node \'%s\'.', $item->tagName)); NodeUtility::removeNode($item); } @@ -1472,7 +1468,7 @@ class Readability } if ($weight < 0) { - $this->logger->debug(sprintf('Removing H node with 0 or less weight. Content was: %s', substr($header->nodeValue, 0, 128))); + $this->logger->debug(sprintf('[PrepArticle] Removing H node with 0 or less weight. Content was: \'%s\'', substr($header->nodeValue, 0, 128))); NodeUtility::removeNode($header); } @@ -1487,7 +1483,7 @@ class Readability */ public function postProcessContent(DOMDocument $article) { - $this->logger->info('PostProcessing content...'); + $this->logger->info('[PostProcess] PostProcessing content...'); // Readability cannot open relative uris so we convert them to absolute uris. if ($this->configuration->getFixRelativeURLs()) { @@ -1498,12 +1494,12 @@ class Readability // Replace links with javascript: URIs with text content, since // they won't work after scripts have been removed from the page. if (strpos($href, 'javascript:') === 0) { - $this->logger->debug(sprintf('Removing \'javascript:\' link. Content is: %s', substr($link->textContent, 0, 128))); + $this->logger->debug(sprintf('[PostProcess] Removing \'javascript:\' link. Content is: \'%s\'', substr($link->textContent, 0, 128))); $text = $article->createTextNode($link->textContent); $link->parentNode->replaceChild($text, $link); } else { - $this->logger->debug(sprintf('Converting link to absolute URI: %s', substr($href, 0, 128))); + $this->logger->debug(sprintf('[PostProcess] Converting link to absolute URI: \'%s\'', substr($href, 0, 128))); $link->setAttribute('href', $this->toAbsoluteURI($href)); } @@ -1525,7 +1521,7 @@ class Readability $src = array_filter($url); $src = reset($src); if ($src) { - $this->logger->debug(sprintf('Converting image URL to absolute URI: %s', substr($src, 0, 128))); + $this->logger->debug(sprintf('[PostProcess] Converting image URL to absolute URI: \'%s\'', substr($src, 0, 128))); $img->setAttribute('src', $this->toAbsoluteURI($src)); } |