summaryrefslogtreecommitdiff
path: root/test
diff options
context:
space:
mode:
authorAndres Rey <[email protected]>2019-05-26 11:56:58 +0100
committerAndres Rey <[email protected]>2019-05-26 11:56:58 +0100
commit02b9c23c4062884695c7a0b9fd911c5c756927d4 (patch)
tree58baf8183a0b447aa39e6a7858ea85c98f0d59cb /test
parentbb5e75ae05238fbe1b06d7e2164de215e2bf4b23 (diff)
Implement TestPage and upgrade the test suite
Diffstat (limited to 'test')
-rw-r--r--test/ReadabilityTest.php113
-rw-r--r--test/TestPage.php61
2 files changed, 132 insertions, 42 deletions
diff --git a/test/ReadabilityTest.php b/test/ReadabilityTest.php
index 466680e..df2a272 100644
--- a/test/ReadabilityTest.php
+++ b/test/ReadabilityTest.php
@@ -6,12 +6,21 @@ use andreskrey\Readability\Configuration;
use andreskrey\Readability\ParseException;
use andreskrey\Readability\Readability;
+/**
+ * Class ReadabilityTest
+ * @package andreskrey\Readability\Test
+ */
class ReadabilityTest extends \PHPUnit\Framework\TestCase
{
/**
+ * Test that Readability parses the HTML correctly and matches the expected result
+ *
* @dataProvider getSamplePages
+ * @param TestPage $testPage
+ *
+ * @throws ParseException
*/
- public function testReadabilityParsesHTML($html, $expectedResult, $expectedMetadata, $config, $expectedImages)
+ public function testReadabilityParsesHTML(TestPage $testPage)
{
$options = ['OriginalURL' => 'http://fakehost/test/test.html',
'FixRelativeURLs' => true,
@@ -19,28 +28,47 @@ class ReadabilityTest extends \PHPUnit\Framework\TestCase
'ArticleByLine' => true
];
- if ($config === null || $expectedMetadata === null) {
- $this->markTestSkipped('Wrong test configuration');
- }
+ $configuration = new Configuration(array_merge($testPage->getConfiguration(), $options));
- if ($config) {
- $options = array_merge($config, $options);
- }
+ $readability = new Readability($configuration);
+ $readability->parse($testPage->getSourceHTML());
- $configuration = new Configuration($options);
+ $this->assertSame($testPage->getExpectedHTML(), $readability->getContent(), 'Parsed text does not match the expected one.');
+ }
- $readability = new Readability($configuration);
- $readability->parse($html);
- $this->assertSame($expectedResult, $readability->getContent());
+ /**
+ * Test that Readability parses the HTML correctly and matches the expected result
+ *
+ * @dataProvider getSamplePages
+ * @param TestPage $testPage
+ *
+ * @throws ParseException
+ */
+ public function testReadabilityParsesMetadata(TestPage $testPage)
+ {
+ $options = ['OriginalURL' => 'http://fakehost/test/test.html',
+ 'FixRelativeURLs' => true,
+ 'SubstituteEntities' => true,
+ 'ArticleByLine' => true
+ ];
+
+ $configuration = new Configuration(array_merge($testPage->getConfiguration(), $options));
- foreach ($expectedMetadata as $key => $metadata) {
- $function = 'get' . $key;
- $this->assertEquals($metadata, $readability->$function(), sprintf('Failed asserting %s metadata', $key));
- }
+ $readability = new Readability($configuration);
+ $readability->parse($testPage->getSourceHTML());
+
+ $this->assertSame($testPage->getExpectedMetadata()->Author, $readability->getAuthor(), 'Parsed Author does not match expected value.');
+ $this->assertSame($testPage->getExpectedMetadata()->Direction, $readability->getDirection(), 'Parsed Direction does not match expected value.');
+ $this->assertSame($testPage->getExpectedMetadata()->Excerpt, $readability->getExcerpt(), 'Parsed Excerpt does not match expected value.');
+ $this->assertSame($testPage->getExpectedMetadata()->Image, $readability->getImage(), 'Parsed Image does not match expected value.');
+ $this->assertSame($testPage->getExpectedMetadata()->Title, $readability->getTitle(), 'Parsed Title does not match expected value.');
}
/**
+ * Test that Readability returns all the expected images from the test page
+ *
+ * @param TestPage $testPage
* @dataProvider getSamplePages
*
* @throws ParseException
@@ -52,49 +80,42 @@ class ReadabilityTest extends \PHPUnit\Framework\TestCase
'substituteEntities' => true,
];
- if ($config) {
- $options = array_merge($options, $config);
- }
-
- $configuration = new Configuration($options);
+ $configuration = new Configuration(array_merge($testPage->getConfiguration(), $options));
$readability = new Readability($configuration);
- $readability->parse($html);
+ $readability->parse($testPage->getSourceHTML());
- $this->assertSame(json_decode($expectedImages, true), $readability->getImages());
+ $this->assertSame($testPage->getExpectedImages(), $readability->getImages());
}
+ /**
+ * Main data provider
+ *
+ * @return \Generator
+ */
public function getSamplePages()
{
$path = pathinfo(__FILE__, PATHINFO_DIRNAME) . DIRECTORY_SEPARATOR . 'test-pages';
$testPages = scandir($path);
- if (in_array('.DS_Store', $testPages)) {
- unset($testPages[array_search('.DS_Store', $testPages)]);
- }
-
- $pages = [];
foreach (array_slice($testPages, 2) as $testPage) {
- $source = file_get_contents($path . DIRECTORY_SEPARATOR . $testPage . DIRECTORY_SEPARATOR . 'source.html');
- $expectedHTML = file_get_contents($path . DIRECTORY_SEPARATOR . $testPage . DIRECTORY_SEPARATOR . 'expected.html');
- $expectedImages = file_get_contents($path . DIRECTORY_SEPARATOR . $testPage . DIRECTORY_SEPARATOR . 'expected-images.json');
-
- $expectedMetadata = json_decode(file_get_contents($path . DIRECTORY_SEPARATOR . $testPage . DIRECTORY_SEPARATOR . 'expected-metadata.json'));
+ $testCasePath = $path . DIRECTORY_SEPARATOR . $testPage . DIRECTORY_SEPARATOR;
- $config = false;
- if (file_exists($path . DIRECTORY_SEPARATOR . $testPage . DIRECTORY_SEPARATOR . 'config.json')) {
- $config = file_get_contents($path . DIRECTORY_SEPARATOR . $testPage . DIRECTORY_SEPARATOR . 'config.json');
- if ($config) {
- $config = json_decode($config, true);
- }
- }
+ $source = file_get_contents($testCasePath . 'source.html');
+ $expectedHTML = file_get_contents($testCasePath . 'expected.html');
+ $expectedImages = json_decode(file_get_contents($testCasePath . 'expected-images.json'), true);
+ $expectedMetadata = json_decode(file_get_contents($testCasePath . 'expected-metadata.json'));
+ $configuration = file_exists($testCasePath . 'config.json') ? json_decode(file_get_contents($testCasePath . 'config.json'), true) : [];
- $pages[$testPage] = [$source, $expectedHTML, $expectedMetadata, $config, $expectedImages];
+ yield $testPage => [new TestPage($configuration, $source, $expectedHTML, $expectedImages, $expectedMetadata)];
}
-
- return $pages;
}
+ /**
+ * Test that Readability throws an exception with malformed HTML
+ *
+ * @throws ParseException
+ */
public function testReadabilityThrowsExceptionWithMalformedHTML()
{
$parser = new Readability(new Configuration());
@@ -103,6 +124,11 @@ class ReadabilityTest extends \PHPUnit\Framework\TestCase
$parser->parse('<html>');
}
+ /**
+ * Test that Readability throws an exception with incomplete or short HTML
+ *
+ * @throws ParseException
+ */
public function testReadabilityThrowsExceptionWithUnparseableHTML()
{
$parser = new Readability(new Configuration());
@@ -111,6 +137,9 @@ class ReadabilityTest extends \PHPUnit\Framework\TestCase
$parser->parse('<html><body><p></p></body></html>');
}
+ /**
+ * Test that the Readability object has no content as soon as it is instantiated
+ */
public function testReadabilityCallGetContentWithNoContent()
{
$parser = new Readability(new Configuration());
diff --git a/test/TestPage.php b/test/TestPage.php
new file mode 100644
index 0000000..7859481
--- /dev/null
+++ b/test/TestPage.php
@@ -0,0 +1,61 @@
+<?php
+
+namespace andreskrey\Readability\Test;
+
+class TestPage
+{
+ private $configuration;
+ private $sourceHTML;
+ private $expectedHTML;
+ private $expectedImages;
+ private $expectedMetadata;
+
+ public function __construct( $configuration, $sourceHTML, $expectedHTML, $expectedImages, $expectedMetadata)
+ {
+ $this->configuration = $configuration;
+ $this->sourceHTML = $sourceHTML;
+ $this->expectedHTML = $expectedHTML;
+ $this->expectedImages = $expectedImages;
+ $this->expectedMetadata = $expectedMetadata;
+ }
+
+ /**
+ * @return array
+ */
+ public function getConfiguration()
+ {
+ return $this->configuration;
+ }
+
+ /**
+ * @return null
+ */
+ public function getSourceHTML()
+ {
+ return $this->sourceHTML;
+ }
+
+ /**
+ * @return null
+ */
+ public function getExpectedHTML()
+ {
+ return $this->expectedHTML;
+ }
+
+ /**
+ * @return mixed
+ */
+ public function getExpectedImages()
+ {
+ return $this->expectedImages;
+ }
+
+ /**
+ * @return \stdClass
+ */
+ public function getExpectedMetadata()
+ {
+ return $this->expectedMetadata;
+ }
+}