diff options
author | Andres Rey <[email protected]> | 2019-05-26 11:56:58 +0100 |
---|---|---|
committer | Andres Rey <[email protected]> | 2019-05-26 11:56:58 +0100 |
commit | 02b9c23c4062884695c7a0b9fd911c5c756927d4 (patch) | |
tree | 58baf8183a0b447aa39e6a7858ea85c98f0d59cb /test | |
parent | bb5e75ae05238fbe1b06d7e2164de215e2bf4b23 (diff) |
Implement TestPage and upgrade the test suite
Diffstat (limited to 'test')
-rw-r--r-- | test/ReadabilityTest.php | 113 | ||||
-rw-r--r-- | test/TestPage.php | 61 |
2 files changed, 132 insertions, 42 deletions
diff --git a/test/ReadabilityTest.php b/test/ReadabilityTest.php index 466680e..df2a272 100644 --- a/test/ReadabilityTest.php +++ b/test/ReadabilityTest.php @@ -6,12 +6,21 @@ use andreskrey\Readability\Configuration; use andreskrey\Readability\ParseException; use andreskrey\Readability\Readability; +/** + * Class ReadabilityTest + * @package andreskrey\Readability\Test + */ class ReadabilityTest extends \PHPUnit\Framework\TestCase { /** + * Test that Readability parses the HTML correctly and matches the expected result + * * @dataProvider getSamplePages + * @param TestPage $testPage + * + * @throws ParseException */ - public function testReadabilityParsesHTML($html, $expectedResult, $expectedMetadata, $config, $expectedImages) + public function testReadabilityParsesHTML(TestPage $testPage) { $options = ['OriginalURL' => 'http://fakehost/test/test.html', 'FixRelativeURLs' => true, @@ -19,28 +28,47 @@ class ReadabilityTest extends \PHPUnit\Framework\TestCase 'ArticleByLine' => true ]; - if ($config === null || $expectedMetadata === null) { - $this->markTestSkipped('Wrong test configuration'); - } + $configuration = new Configuration(array_merge($testPage->getConfiguration(), $options)); - if ($config) { - $options = array_merge($config, $options); - } + $readability = new Readability($configuration); + $readability->parse($testPage->getSourceHTML()); - $configuration = new Configuration($options); + $this->assertSame($testPage->getExpectedHTML(), $readability->getContent(), 'Parsed text does not match the expected one.'); + } - $readability = new Readability($configuration); - $readability->parse($html); - $this->assertSame($expectedResult, $readability->getContent()); + /** + * Test that Readability parses the HTML correctly and matches the expected result + * + * @dataProvider getSamplePages + * @param TestPage $testPage + * + * @throws ParseException + */ + public function testReadabilityParsesMetadata(TestPage $testPage) + { + $options = ['OriginalURL' => 'http://fakehost/test/test.html', + 'FixRelativeURLs' => true, + 'SubstituteEntities' => true, + 'ArticleByLine' => true + ]; + + $configuration = new Configuration(array_merge($testPage->getConfiguration(), $options)); - foreach ($expectedMetadata as $key => $metadata) { - $function = 'get' . $key; - $this->assertEquals($metadata, $readability->$function(), sprintf('Failed asserting %s metadata', $key)); - } + $readability = new Readability($configuration); + $readability->parse($testPage->getSourceHTML()); + + $this->assertSame($testPage->getExpectedMetadata()->Author, $readability->getAuthor(), 'Parsed Author does not match expected value.'); + $this->assertSame($testPage->getExpectedMetadata()->Direction, $readability->getDirection(), 'Parsed Direction does not match expected value.'); + $this->assertSame($testPage->getExpectedMetadata()->Excerpt, $readability->getExcerpt(), 'Parsed Excerpt does not match expected value.'); + $this->assertSame($testPage->getExpectedMetadata()->Image, $readability->getImage(), 'Parsed Image does not match expected value.'); + $this->assertSame($testPage->getExpectedMetadata()->Title, $readability->getTitle(), 'Parsed Title does not match expected value.'); } /** + * Test that Readability returns all the expected images from the test page + * + * @param TestPage $testPage * @dataProvider getSamplePages * * @throws ParseException @@ -52,49 +80,42 @@ class ReadabilityTest extends \PHPUnit\Framework\TestCase 'substituteEntities' => true, ]; - if ($config) { - $options = array_merge($options, $config); - } - - $configuration = new Configuration($options); + $configuration = new Configuration(array_merge($testPage->getConfiguration(), $options)); $readability = new Readability($configuration); - $readability->parse($html); + $readability->parse($testPage->getSourceHTML()); - $this->assertSame(json_decode($expectedImages, true), $readability->getImages()); + $this->assertSame($testPage->getExpectedImages(), $readability->getImages()); } + /** + * Main data provider + * + * @return \Generator + */ public function getSamplePages() { $path = pathinfo(__FILE__, PATHINFO_DIRNAME) . DIRECTORY_SEPARATOR . 'test-pages'; $testPages = scandir($path); - if (in_array('.DS_Store', $testPages)) { - unset($testPages[array_search('.DS_Store', $testPages)]); - } - - $pages = []; foreach (array_slice($testPages, 2) as $testPage) { - $source = file_get_contents($path . DIRECTORY_SEPARATOR . $testPage . DIRECTORY_SEPARATOR . 'source.html'); - $expectedHTML = file_get_contents($path . DIRECTORY_SEPARATOR . $testPage . DIRECTORY_SEPARATOR . 'expected.html'); - $expectedImages = file_get_contents($path . DIRECTORY_SEPARATOR . $testPage . DIRECTORY_SEPARATOR . 'expected-images.json'); - - $expectedMetadata = json_decode(file_get_contents($path . DIRECTORY_SEPARATOR . $testPage . DIRECTORY_SEPARATOR . 'expected-metadata.json')); + $testCasePath = $path . DIRECTORY_SEPARATOR . $testPage . DIRECTORY_SEPARATOR; - $config = false; - if (file_exists($path . DIRECTORY_SEPARATOR . $testPage . DIRECTORY_SEPARATOR . 'config.json')) { - $config = file_get_contents($path . DIRECTORY_SEPARATOR . $testPage . DIRECTORY_SEPARATOR . 'config.json'); - if ($config) { - $config = json_decode($config, true); - } - } + $source = file_get_contents($testCasePath . 'source.html'); + $expectedHTML = file_get_contents($testCasePath . 'expected.html'); + $expectedImages = json_decode(file_get_contents($testCasePath . 'expected-images.json'), true); + $expectedMetadata = json_decode(file_get_contents($testCasePath . 'expected-metadata.json')); + $configuration = file_exists($testCasePath . 'config.json') ? json_decode(file_get_contents($testCasePath . 'config.json'), true) : []; - $pages[$testPage] = [$source, $expectedHTML, $expectedMetadata, $config, $expectedImages]; + yield $testPage => [new TestPage($configuration, $source, $expectedHTML, $expectedImages, $expectedMetadata)]; } - - return $pages; } + /** + * Test that Readability throws an exception with malformed HTML + * + * @throws ParseException + */ public function testReadabilityThrowsExceptionWithMalformedHTML() { $parser = new Readability(new Configuration()); @@ -103,6 +124,11 @@ class ReadabilityTest extends \PHPUnit\Framework\TestCase $parser->parse('<html>'); } + /** + * Test that Readability throws an exception with incomplete or short HTML + * + * @throws ParseException + */ public function testReadabilityThrowsExceptionWithUnparseableHTML() { $parser = new Readability(new Configuration()); @@ -111,6 +137,9 @@ class ReadabilityTest extends \PHPUnit\Framework\TestCase $parser->parse('<html><body><p></p></body></html>'); } + /** + * Test that the Readability object has no content as soon as it is instantiated + */ public function testReadabilityCallGetContentWithNoContent() { $parser = new Readability(new Configuration()); diff --git a/test/TestPage.php b/test/TestPage.php new file mode 100644 index 0000000..7859481 --- /dev/null +++ b/test/TestPage.php @@ -0,0 +1,61 @@ +<?php + +namespace andreskrey\Readability\Test; + +class TestPage +{ + private $configuration; + private $sourceHTML; + private $expectedHTML; + private $expectedImages; + private $expectedMetadata; + + public function __construct( $configuration, $sourceHTML, $expectedHTML, $expectedImages, $expectedMetadata) + { + $this->configuration = $configuration; + $this->sourceHTML = $sourceHTML; + $this->expectedHTML = $expectedHTML; + $this->expectedImages = $expectedImages; + $this->expectedMetadata = $expectedMetadata; + } + + /** + * @return array + */ + public function getConfiguration() + { + return $this->configuration; + } + + /** + * @return null + */ + public function getSourceHTML() + { + return $this->sourceHTML; + } + + /** + * @return null + */ + public function getExpectedHTML() + { + return $this->expectedHTML; + } + + /** + * @return mixed + */ + public function getExpectedImages() + { + return $this->expectedImages; + } + + /** + * @return \stdClass + */ + public function getExpectedMetadata() + { + return $this->expectedMetadata; + } +} |