summaryrefslogtreecommitdiff
path: root/plugins/af_readability/vendor/fivefilters/readability.php/test/ReadabilityTest.php
diff options
context:
space:
mode:
Diffstat (limited to 'plugins/af_readability/vendor/fivefilters/readability.php/test/ReadabilityTest.php')
-rw-r--r--plugins/af_readability/vendor/fivefilters/readability.php/test/ReadabilityTest.php148
1 files changed, 148 insertions, 0 deletions
diff --git a/plugins/af_readability/vendor/fivefilters/readability.php/test/ReadabilityTest.php b/plugins/af_readability/vendor/fivefilters/readability.php/test/ReadabilityTest.php
new file mode 100644
index 000000000..ca208e894
--- /dev/null
+++ b/plugins/af_readability/vendor/fivefilters/readability.php/test/ReadabilityTest.php
@@ -0,0 +1,148 @@
+<?php
+
+namespace andreskrey\Readability\Test;
+
+use andreskrey\Readability\Configuration;
+use andreskrey\Readability\ParseException;
+use andreskrey\Readability\Readability;
+
+/**
+ * Class ReadabilityTest.
+ */
+class ReadabilityTest extends \PHPUnit\Framework\TestCase
+{
+ /**
+ * Test that Readability parses the HTML correctly and matches the expected result.
+ *
+ * @dataProvider getSamplePages
+ *
+ * @param TestPage $testPage
+ *
+ * @throws ParseException
+ */
+ public function testReadabilityParsesHTML(TestPage $testPage)
+ {
+ $options = ['OriginalURL' => 'http://fakehost/test/test.html',
+ 'FixRelativeURLs' => true,
+ 'SubstituteEntities' => true,
+ 'ArticleByLine' => true
+ ];
+
+ $configuration = new Configuration(array_merge($testPage->getConfiguration(), $options));
+
+ $readability = new Readability($configuration);
+ $readability->parse($testPage->getSourceHTML());
+
+ $this->assertSame($testPage->getExpectedHTML(), $readability->getContent(), 'Parsed text does not match the expected one.');
+ }
+
+ /**
+ * Test that Readability parses the HTML correctly and matches the expected result.
+ *
+ * @dataProvider getSamplePages
+ *
+ * @param TestPage $testPage
+ *
+ * @throws ParseException
+ */
+ public function testReadabilityParsesMetadata(TestPage $testPage)
+ {
+ $options = ['OriginalURL' => 'http://fakehost/test/test.html',
+ 'FixRelativeURLs' => true,
+ 'SubstituteEntities' => true,
+ 'ArticleByLine' => true
+ ];
+
+ $configuration = new Configuration(array_merge($testPage->getConfiguration(), $options));
+
+ $readability = new Readability($configuration);
+ $readability->parse($testPage->getSourceHTML());
+
+ $this->assertSame($testPage->getExpectedMetadata()->Author, $readability->getAuthor(), 'Parsed Author does not match expected value.');
+ $this->assertSame($testPage->getExpectedMetadata()->Direction, $readability->getDirection(), 'Parsed Direction does not match expected value.');
+ $this->assertSame($testPage->getExpectedMetadata()->Excerpt, $readability->getExcerpt(), 'Parsed Excerpt does not match expected value.');
+ $this->assertSame($testPage->getExpectedMetadata()->Image, $readability->getImage(), 'Parsed Image does not match expected value.');
+ $this->assertSame($testPage->getExpectedMetadata()->Title, $readability->getTitle(), 'Parsed Title does not match expected value.');
+ }
+
+ /**
+ * Test that Readability returns all the expected images from the test page.
+ *
+ * @param TestPage $testPage
+ * @dataProvider getSamplePages
+ *
+ * @throws ParseException
+ */
+ public function testHTMLParserParsesImages(TestPage $testPage)
+ {
+ $options = ['OriginalURL' => 'http://fakehost/test/test.html',
+ 'fixRelativeURLs' => true,
+ 'substituteEntities' => true,
+ ];
+
+ $configuration = new Configuration(array_merge($testPage->getConfiguration(), $options));
+
+ $readability = new Readability($configuration);
+ $readability->parse($testPage->getSourceHTML());
+
+ $this->assertSame($testPage->getExpectedImages(), $readability->getImages());
+ }
+
+ /**
+ * Main data provider.
+ *
+ * @return \Generator
+ */
+ public function getSamplePages()
+ {
+ $path = pathinfo(__FILE__, PATHINFO_DIRNAME) . DIRECTORY_SEPARATOR . 'test-pages';
+ $testPages = scandir($path);
+
+ foreach (array_slice($testPages, 2) as $testPage) {
+ $testCasePath = $path . DIRECTORY_SEPARATOR . $testPage . DIRECTORY_SEPARATOR;
+
+ $source = file_get_contents($testCasePath . 'source.html');
+ $expectedHTML = file_get_contents($testCasePath . 'expected.html');
+ $expectedImages = json_decode(file_get_contents($testCasePath . 'expected-images.json'), true);
+ $expectedMetadata = json_decode(file_get_contents($testCasePath . 'expected-metadata.json'));
+ $configuration = file_exists($testCasePath . 'config.json') ? json_decode(file_get_contents($testCasePath . 'config.json'), true) : [];
+
+ yield $testPage => [new TestPage($configuration, $source, $expectedHTML, $expectedImages, $expectedMetadata)];
+ }
+ }
+
+ /**
+ * Test that Readability throws an exception with malformed HTML.
+ *
+ * @throws ParseException
+ */
+ public function testReadabilityThrowsExceptionWithMalformedHTML()
+ {
+ $parser = new Readability(new Configuration());
+ $this->expectException(ParseException::class);
+ $this->expectExceptionMessage('Invalid or incomplete HTML.');
+ $parser->parse('<html>');
+ }
+
+ /**
+ * Test that Readability throws an exception with incomplete or short HTML.
+ *
+ * @throws ParseException
+ */
+ public function testReadabilityThrowsExceptionWithUnparseableHTML()
+ {
+ $parser = new Readability(new Configuration());
+ $this->expectException(ParseException::class);
+ $this->expectExceptionMessage('Could not parse text.');
+ $parser->parse('<html><body><p></p></body></html>');
+ }
+
+ /**
+ * Test that the Readability object has no content as soon as it is instantiated.
+ */
+ public function testReadabilityCallGetContentWithNoContent()
+ {
+ $parser = new Readability(new Configuration());
+ $this->assertNull($parser->getContent());
+ }
+}