From 995d6fb8045bbbebfe8578f86431aa522985ee6e Mon Sep 17 00:00:00 2001 From: Andres Rey Date: Fri, 1 Dec 2017 17:17:19 +0000 Subject: Apply fixes from StyleCI --- src/Configuration.php | 26 +++++++++++++++++++++++++- src/NodeClass/DOMDocument.php | 2 +- src/NodeClass/DOMNode.php | 3 +-- src/NodeClass/NodeClassTrait.php | 9 ++++----- src/NodeUtility.php | 15 +++++---------- src/Readability.php | 36 ++++++++++-------------------------- test/ReadabilityTest.php | 5 ++--- 7 files changed, 48 insertions(+), 48 deletions(-) diff --git a/src/Configuration.php b/src/Configuration.php index 6ce8b5b..1a405de 100644 --- a/src/Configuration.php +++ b/src/Configuration.php @@ -3,7 +3,7 @@ namespace andreskrey\Readability; /** - * Class Configuration + * Class Configuration. */ class Configuration { @@ -62,11 +62,13 @@ class Configuration /** * @param int $maxTopCandidates + * * @return $this */ public function setMaxTopCandidates($maxTopCandidates) { $this->maxTopCandidates = $maxTopCandidates; + return $this; } @@ -80,11 +82,13 @@ class Configuration /** * @param int $wordThreshold + * * @return $this */ public function setWordThreshold($wordThreshold) { $this->wordThreshold = $wordThreshold; + return $this; } @@ -98,11 +102,13 @@ class Configuration /** * @param bool $articleByLine + * * @return $this */ public function setArticleByLine($articleByLine) { $this->articleByLine = $articleByLine; + return $this; } @@ -116,11 +122,13 @@ class Configuration /** * @param bool $stripUnlikelyCandidates + * * @return $this */ public function setStripUnlikelyCandidates($stripUnlikelyCandidates) { $this->stripUnlikelyCandidates = $stripUnlikelyCandidates; + return $this; } @@ -134,11 +142,13 @@ class Configuration /** * @param bool $cleanConditionally + * * @return $this */ public function setCleanConditionally($cleanConditionally) { $this->cleanConditionally = $cleanConditionally; + return $this; } @@ -152,11 +162,13 @@ class Configuration /** * @param bool $weightClasses + * * @return $this */ public function setWeightClasses($weightClasses) { $this->weightClasses = $weightClasses; + return $this; } @@ -170,11 +182,13 @@ class Configuration /** * @param bool $removeReadabilityTags + * * @return $this */ public function setRemoveReadabilityTags($removeReadabilityTags) { $this->removeReadabilityTags = $removeReadabilityTags; + return $this; } @@ -188,11 +202,13 @@ class Configuration /** * @param bool $fixRelativeURLs + * * @return $this */ public function setFixRelativeURLs($fixRelativeURLs) { $this->fixRelativeURLs = $fixRelativeURLs; + return $this; } @@ -206,11 +222,13 @@ class Configuration /** * @param bool $substituteEntities + * * @return $this */ public function setSubstituteEntities($substituteEntities) { $this->substituteEntities = $substituteEntities; + return $this; } @@ -224,11 +242,13 @@ class Configuration /** * @param bool $normalizeEntities + * * @return $this */ public function setNormalizeEntities($normalizeEntities) { $this->normalizeEntities = $normalizeEntities; + return $this; } @@ -242,11 +262,13 @@ class Configuration /** * @param string $originalURL + * * @return $this */ public function setOriginalURL($originalURL) { $this->originalURL = $originalURL; + return $this; } @@ -260,11 +282,13 @@ class Configuration /** * @param bool $summonCthulhu + * * @return $this */ public function setSummonCthulhu($summonCthulhu) { $this->summonCthulhu = $summonCthulhu; + return $this; } diff --git a/src/NodeClass/DOMDocument.php b/src/NodeClass/DOMDocument.php index f379268..98b1215 100644 --- a/src/NodeClass/DOMDocument.php +++ b/src/NodeClass/DOMDocument.php @@ -14,7 +14,7 @@ class DOMDocument extends \DOMDocument $this->registerNodeClass('DOMCdataSection', DOMCdataSection::class); $this->registerNodeClass('DOMCharacterData', DOMCharacterData::class); $this->registerNodeClass('DOMComment', DOMComment::class); - $this->registerNodeClass('DOMDocument', DOMDocument::class); + $this->registerNodeClass('DOMDocument', self::class); $this->registerNodeClass('DOMDocumentFragment', DOMDocumentFragment::class); $this->registerNodeClass('DOMDocumentType', DOMDocumentType::class); $this->registerNodeClass('DOMElement', DOMElement::class); diff --git a/src/NodeClass/DOMNode.php b/src/NodeClass/DOMNode.php index c9ed1c3..eb5e93e 100644 --- a/src/NodeClass/DOMNode.php +++ b/src/NodeClass/DOMNode.php @@ -3,10 +3,9 @@ namespace andreskrey\Readability\NodeClass; /** - * Class DOMNode + * Class DOMNode. * * @method getAttribute($attribute) - * @package andreskrey\Readability\NodeClass */ class DOMNode extends \DOMNode { diff --git a/src/NodeClass/NodeClassTrait.php b/src/NodeClass/NodeClassTrait.php index a1382d1..f2df51a 100644 --- a/src/NodeClass/NodeClassTrait.php +++ b/src/NodeClass/NodeClassTrait.php @@ -7,14 +7,14 @@ use andreskrey\Readability\NodeUtility; trait NodeClassTrait { /** - * Content score of the node. Used to determine the value of the content + * Content score of the node. Used to determine the value of the content. * * @var int */ public $contentScore = 0; /** - * Flag for initialized status + * Flag for initialized status. * * @var bool */ @@ -38,7 +38,7 @@ trait NodeClassTrait ]; /** - * initialized getter + * initialized getter. * * @return bool */ @@ -53,6 +53,7 @@ trait NodeClassTrait * @ TODO: I don't like the weightClasses param. How can we get the config here? * * @param $weightClasses bool Weight classes? + * * @return static */ public function initializeNode($weightClasses) @@ -181,7 +182,6 @@ trait NodeClassTrait return $linkLength / $textLength; } - /** * Calculates the weight of the class/id of the current element. * @@ -283,7 +283,6 @@ trait NodeClassTrait return ['rows' => $rows, 'columns' => $columns]; } - /** * Creates a new node based on the text content of the original node. * diff --git a/src/NodeUtility.php b/src/NodeUtility.php index d0796dd..7fbdd45 100644 --- a/src/NodeUtility.php +++ b/src/NodeUtility.php @@ -7,14 +7,12 @@ use andreskrey\Readability\NodeClass\DOMElement; use andreskrey\Readability\NodeClass\DOMNode; /** - * Class NodeUtility - * @package andreskrey\Readability + * Class NodeUtility. */ class NodeUtility { - /** - * Collection of regexps to check the node usability + * Collection of regexps to check the node usability. * * @var array */ @@ -36,12 +34,11 @@ class NodeUtility 'onlyWhitespace' => '/\x{00A0}|\s+/u' ]; - /** - * - * Imported from the Element class on league\html-to-markdown + * Imported from the Element class on league\html-to-markdown. * * @param $node + * * @return DOMElement */ public static function nextElement($node) @@ -56,13 +53,13 @@ class NodeUtility return $next; } - /** * Changes the node tag name. Since tagName on DOMElement is a read only value, this must be done creating a new * element with the new tag name and importing it to the main DOMDocument. * * @param string $value * @param bool $importAttributes + * * @return DOMNode */ public static function setNodeTag($node, $value, $importAttributes = false) @@ -72,7 +69,6 @@ class NodeUtility $children = $node->childNodes; /** @var $children \DOMNodeList $i */ - for ($i = 0; $i < $children->length; $i++) { $import = $new->importNode($children->item($i), true); $new->firstChild->appendChild($import); @@ -122,7 +118,6 @@ class NodeUtility } } - /** * Returns the next node. First checks for children (if the flag allows it), then for siblings, and finally * for parents. diff --git a/src/Readability.php b/src/Readability.php index c42e577..282e983 100644 --- a/src/Readability.php +++ b/src/Readability.php @@ -3,21 +3,12 @@ namespace andreskrey\Readability; use andreskrey\Readability\NodeClass\DOMDocument; -use andreskrey\Readability\NodeClass\DOMAttr; -use andreskrey\Readability\NodeClass\DOMCdataSection; -use andreskrey\Readability\NodeClass\DOMCharacterData; -use andreskrey\Readability\NodeClass\DOMComment; -use andreskrey\Readability\NodeClass\DOMDocumentFragment; -use andreskrey\Readability\NodeClass\DOMDocumentType; use andreskrey\Readability\NodeClass\DOMElement; use andreskrey\Readability\NodeClass\DOMNode; -use andreskrey\Readability\NodeClass\DOMNotation; -use andreskrey\Readability\NodeClass\DOMProcessingInstruction; use andreskrey\Readability\NodeClass\DOMText; -use andreskrey\Readability\NodeClass\NodeClassTrait; /** - * Class Readability + * Class Readability. */ class Readability { @@ -29,49 +20,49 @@ class Readability protected $dom; /** - * Title of the article + * Title of the article. * * @var string|null */ protected $title = null; /** - * HTML content article + * HTML content article. * * @var string|null */ protected $content = null; /** - * Excerpt of the article + * Excerpt of the article. * * @var string|null */ protected $excerpt = null; /** - * Main image of the article + * Main image of the article. * * @var string|null */ protected $image = null; /** - * Author of the article. Extracted from the byline tags and other social media properties + * Author of the article. Extracted from the byline tags and other social media properties. * * @var string|null */ protected $author = null; /** - * Direction of the text + * Direction of the text. * * @var string|null */ protected $direction = null; /** - * Configuration object + * Configuration object. * * @var Configuration */ @@ -116,7 +107,7 @@ class Readability } /** - * Main parse function + * Main parse function. * * @param $html * @@ -229,7 +220,7 @@ class Readability } /** - * Tries to guess relevant info from metadata of the html. Sets the results in the Readability properties + * Tries to guess relevant info from metadata of the html. Sets the results in the Readability properties. */ private function getMetadata() { @@ -326,7 +317,6 @@ class Readability return $result; } - /** * Tries to get the main article image. Will only update the metadata if the getMetadata function couldn't * find a correct image. @@ -447,7 +437,6 @@ class Readability return $curTitle; } - private function toAbsoluteURI($uri) { list($pathBase, $scheme, $prePath) = $this->getPathInfo($this->configuration->getOriginalURL()); @@ -495,7 +484,6 @@ class Readability return [$pathBase, $scheme, $prePath]; } - /** * Gets nodes from the root element. * @@ -641,7 +629,6 @@ class Readability return false; } - /** * Removes all the scripts of the html. * @@ -735,7 +722,6 @@ class Readability } } - /** * Assign scores to each node. This function will rate each node and return a DOMElement object for each one. * @@ -1147,7 +1133,6 @@ class Readability } } - /** * Remove the style attribute on every e and under. * TODO: To be moved to Readability. @@ -1417,7 +1402,6 @@ class Readability return $article; } - /** * @return null|string */ diff --git a/test/ReadabilityTest.php b/test/ReadabilityTest.php index a577b3d..9d29ba5 100644 --- a/test/ReadabilityTest.php +++ b/test/ReadabilityTest.php @@ -2,7 +2,6 @@ namespace andreskrey\Readability\Test; - use andreskrey\Readability\Configuration; use andreskrey\Readability\Readability; @@ -24,7 +23,7 @@ class ReadabilityTest extends \PHPUnit_Framework_TestCase $configuration = new Configuration(); - foreach($options as $key => $value){ + foreach ($options as $key => $value) { $name = 'set' . $key; $configuration->$name($value); } @@ -50,7 +49,7 @@ class ReadabilityTest extends \PHPUnit_Framework_TestCase } $configuration = new Configuration(); - foreach($options as $key => $value){ + foreach ($options as $key => $value) { $name = 'set' . $key; $configuration->$name($value); } -- cgit v1.2.3 From e052f33859c9491be413ed7130562b7eb0c4b78f Mon Sep 17 00:00:00 2001 From: Andres Rey Date: Fri, 1 Dec 2017 17:19:42 +0000 Subject: Fix phpdoc on setters --- src/Readability.php | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/Readability.php b/src/Readability.php index 282e983..10235df 100644 --- a/src/Readability.php +++ b/src/Readability.php @@ -1419,7 +1419,7 @@ class Readability } /** - * @param null $title + * @param string $title */ protected function setTitle($title) { @@ -1435,7 +1435,7 @@ class Readability } /** - * @param null $content + * @param string $content */ protected function setContent($content) { @@ -1467,7 +1467,7 @@ class Readability } /** - * @param null $image + * @param string $image */ protected function setImage($image) { @@ -1483,7 +1483,7 @@ class Readability } /** - * @param null $author + * @param string $author */ protected function setAuthor($author) { -- cgit v1.2.3 From 460e936bff25821ecb2685be54468390c8ddb4de Mon Sep 17 00:00:00 2001 From: Andres Rey Date: Fri, 1 Dec 2017 17:46:38 +0000 Subject: Minor cleanup --- src/NodeClass/NodeClassTrait.php | 2 +- src/Readability.php | 11 ++++------- 2 files changed, 5 insertions(+), 8 deletions(-) diff --git a/src/NodeClass/NodeClassTrait.php b/src/NodeClass/NodeClassTrait.php index f2df51a..4c1442b 100644 --- a/src/NodeClass/NodeClassTrait.php +++ b/src/NodeClass/NodeClassTrait.php @@ -286,7 +286,7 @@ trait NodeClassTrait /** * Creates a new node based on the text content of the original node. * - * @param $originalNode DOMElement + * @param $originalNode DOMNode * @param $tagName string * * @return DOMElement diff --git a/src/Readability.php b/src/Readability.php index 10235df..ea1595a 100644 --- a/src/Readability.php +++ b/src/Readability.php @@ -596,14 +596,14 @@ class Readability /* * Check if the byline is already set */ - if (isset($this->metadata['byline'])) { + if ($this->getAuthor()) { return false; } $rel = $node->getAttribute('rel'); if ($rel === 'author' || preg_match(NodeUtility::$regexps['byline'], $matchString) && $this->isValidByline($node->getTextContent())) { - $this->metadata['byline'] = trim($node->getTextContent()); + $this->setAuthor(trim($node->getTextContent())); return true; } @@ -817,7 +817,6 @@ class Readability } $topCandidate = isset($topCandidates[0]) ? $topCandidates[0] : null; - $neededToCreateTopCandidate = false; $parentOfTopCandidate = null; /* @@ -923,7 +922,7 @@ class Readability $hasContent = false; - /** @var Readability $sibling */ + /** @var DOMElement $sibling */ foreach ($siblings as $sibling) { $append = false; @@ -1137,7 +1136,7 @@ class Readability * Remove the style attribute on every e and under. * TODO: To be moved to Readability. * - * @param $node \DOMDocument|\DOMNode + * @param $node DOMDocument|DOMNode **/ public function _cleanStyles($node) { @@ -1213,7 +1212,6 @@ class Readability $totalCount = $imgCount + $embedCount + $objectCount + $iframeCount; if ($totalCount === 0 && !preg_replace(NodeUtility::$regexps['onlyWhitespace'], '', $paragraph->textContent)) { - // TODO must be done via readability $paragraph->parentNode->removeChild($paragraph); } } @@ -1268,7 +1266,6 @@ class Readability * ominous signs, remove the element. */ - // TODO Horrible hack, must be removed once this function is inside Readability $p = $node->getElementsByTagName('p')->length; $img = $node->getElementsByTagName('img')->length; $li = $node->getElementsByTagName('li')->length - 100; -- cgit v1.2.3 From a2eded4e4df7e48a6f20d9e0449c52bf8003c471 Mon Sep 17 00:00:00 2001 From: Andres Rey Date: Fri, 1 Dec 2017 17:54:28 +0000 Subject: Add readabilityDataTable param with getters and setters --- src/NodeClass/NodeClassTrait.php | 23 +++++++++++++++++++++++ src/Readability.php | 18 +++++++++--------- 2 files changed, 32 insertions(+), 9 deletions(-) diff --git a/src/NodeClass/NodeClassTrait.php b/src/NodeClass/NodeClassTrait.php index 4c1442b..ae3eeb1 100644 --- a/src/NodeClass/NodeClassTrait.php +++ b/src/NodeClass/NodeClassTrait.php @@ -20,6 +20,13 @@ trait NodeClassTrait */ private $initialized = false; + /** + * Flag data tables. + * + * @var bool + */ + private $readabilityDataTable = false; + /** * @var array */ @@ -47,6 +54,22 @@ trait NodeClassTrait return $this->initialized; } + /** + * @return bool + */ + public function isReadabilityDataTable() + { + return $this->readabilityDataTable; + } + + /** + * @param bool $param + */ + public function setReadabilityDataTable($param) + { + $this->readabilityDataTable = $param; + } + /** * Initializer. Calculates the current score of the node and returns a full Readability object. * diff --git a/src/Readability.php b/src/Readability.php index ea1595a..db0774a 100644 --- a/src/Readability.php +++ b/src/Readability.php @@ -1088,47 +1088,47 @@ class Readability /** @var DOMElement $table */ $role = $table->getAttribute('role'); if ($role === 'presentation') { - $table->readabilityDataTable = false; + $table->setReadabilityDataTable(false); continue; } $datatable = $table->getAttribute('datatable'); if ($datatable == '0') { - $table->readabilityDataTable = false; + $table->setReadabilityDataTable(false); continue; } $summary = $table->getAttribute('summary'); if ($summary) { - $table->readabilityDataTable = true; + $table->setReadabilityDataTable(true); continue; } $caption = $table->getElementsByTagName('caption'); if ($caption->length > 0 && $caption->item(0)->childNodes->length > 0) { - $table->readabilityDataTable = true; + $table->setReadabilityDataTable(true); continue; } // If the table has a descendant with any of these tags, consider a data table: foreach (['col', 'colgroup', 'tfoot', 'thead', 'th'] as $dataTableDescendants) { if ($table->getElementsByTagName($dataTableDescendants)->length > 0) { - $table->readabilityDataTable = true; + $table->setReadabilityDataTable(true); continue 2; } } // Nested tables indicate a layout table: if ($table->getElementsByTagName('table')->length > 0) { - $table->readabilityDataTable = false; + $table->setReadabilityDataTable(false); continue; } $sizeInfo = $table->getRowAndColumnCount(); if ($sizeInfo['rows'] >= 10 || $sizeInfo['columns'] > 4) { - $table->readabilityDataTable = true; + $table->setReadabilityDataTable(true); continue; } // Now just go by size entirely: - $table->readabilityDataTable = $sizeInfo['rows'] * $sizeInfo['columns'] > 10; + $table->setReadabilityDataTable($sizeInfo['rows'] * $sizeInfo['columns'] > 10); } } @@ -1245,7 +1245,7 @@ class Readability $node = $DOMNodeList->item($length - 1 - $i); // First check if we're in a data table, in which case don't remove us. - if ($node->hasAncestorTag($node, 'table', -1) && isset($node->readabilityDataTable)) { + if ($node->hasAncestorTag($node, 'table', -1) && $node->isReadabilityDataTable()) { continue; } -- cgit v1.2.3