summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAndres Rey <[email protected]>2016-10-21 19:04:29 +0100
committerAndres Rey <[email protected]>2016-10-21 19:04:29 +0100
commitfe73c4d7cf71023c4cc5654afb0aea47b179c084 (patch)
tree8f03b7f12986b6330f15ce8b0d88ba94cc23440f
parent8a36452b399a1b7df321a5bb2f1b7aa76cc2f129 (diff)
Added configuration class
-rw-r--r--src/Configuration.php60
-rw-r--r--src/Environment.php36
-rw-r--r--src/HTMLParser.php30
3 files changed, 116 insertions, 10 deletions
diff --git a/src/Configuration.php b/src/Configuration.php
new file mode 100644
index 0000000..1424b14
--- /dev/null
+++ b/src/Configuration.php
@@ -0,0 +1,60 @@
+<?php
+
+namespace andreskrey\Readability;
+
+class Configuration
+{
+ protected $config;
+
+ /**
+ * @param array $config
+ */
+ public function __construct(array $config = array())
+ {
+ $this->config = $config;
+ }
+
+ /**
+ * @param array $config
+ */
+ public function merge(array $config = array())
+ {
+ $this->config = array_replace_recursive($this->config, $config);
+ }
+
+ /**
+ * @param array $config
+ */
+ public function replace(array $config = array())
+ {
+ $this->config = $config;
+ }
+
+ /**
+ * @param string $key
+ * @param mixed $value
+ */
+ public function setOption($key, $value)
+ {
+ $this->config[$key] = $value;
+ }
+
+ /**
+ * @param string|null $key
+ * @param mixed|null $default
+ *
+ * @return mixed|null
+ */
+ public function getOption($key = null, $default = null)
+ {
+ if ($key === null) {
+ return $this->config;
+ }
+
+ if (!isset($this->config[$key])) {
+ return $default;
+ }
+
+ return $this->config[$key];
+ }
+}
diff --git a/src/Environment.php b/src/Environment.php
new file mode 100644
index 0000000..9f3dc86
--- /dev/null
+++ b/src/Environment.php
@@ -0,0 +1,36 @@
+<?php
+
+namespace andreskrey\Readability;
+
+final class Environment
+{
+ /**
+ * @var Configuration
+ */
+ protected $config;
+
+ public function __construct(array $config = array())
+ {
+ $this->config = new Configuration($config);
+ }
+
+ /**
+ * @return Configuration
+ */
+ public function getConfig()
+ {
+ return $this->config;
+ }
+
+ /**
+ * @param array $config
+ *
+ * @return Environment
+ */
+ public static function createDefaultEnvironment(array $config = array())
+ {
+ $environment = new static($config);
+
+ return $environment;
+ }
+}
diff --git a/src/HTMLParser.php b/src/HTMLParser.php
index 8b898d5..133a513 100644
--- a/src/HTMLParser.php
+++ b/src/HTMLParser.php
@@ -49,17 +49,19 @@ class HTMLParser
];
/**
- * @var int
- *
- * @todo this should be inside a configuration class
- */
- private $maxTopCandidates = 5;
-
- /**
* Constructor.
+ * @param array $options Options to override the default ones
*/
- public function __construct()
+ public function __construct(array $options = [])
{
+ $defaults = array(
+ 'maxTopCandidates' => 5, // Max amount of top level candidates
+ );
+
+ $this->environment = Environment::createDefaultEnvironment($defaults);
+
+ $this->environment->getConfig()->merge($options);
+
$this->dom = new DOMDocument('1.0', 'utf-8');
// To avoid having a gazillion of errors on malformed HTMLs
@@ -104,6 +106,14 @@ class HTMLParser
}
/**
+ * @return Configuration
+ */
+ public function getConfig()
+ {
+ return $this->environment->getConfig();
+ }
+
+ /**
* Removes all the scripts of the html.
*
* @TODO is this really necessary? Readability.js uses it to chop any script that might interfere with their
@@ -313,12 +323,12 @@ class HTMLParser
$candidate->setContentScore($candidate->getContentScore() * (1 - $this->getLinkDensity($candidate)));
- for ($i = 1; $i < $this->maxTopCandidates; $i++) {
+ for ($i = 1; $i < $this->getConfig()->getOption('maxTopCandidates'); $i++) {
$aTopCandidate = isset($topCandidates[$i]) ? $topCandidates[$i] : null;
if (!$aTopCandidate || $candidate->getContentScore() > $aTopCandidate->getContentScore()) {
array_splice($topCandidates, $i, 0, [$candidate]);
- if (count($topCandidates) > $this->maxTopCandidates) {
+ if (count($topCandidates) > $this->getConfig()->getOption('maxTopCandidates')) {
array_pop($topCandidates);
}
break;