From 8ea537123d1cef38f25f9fbe92e3a9c0f89de55a Mon Sep 17 00:00:00 2001 From: Andrew Dolgov Date: Tue, 13 Dec 2022 20:08:43 +0300 Subject: move af_readability out of master tree --- .../readability.php/test/ReadabilityTest.php | 203 --------------------- 1 file changed, 203 deletions(-) delete mode 100644 plugins/af_readability/vendor/fivefilters/readability.php/test/ReadabilityTest.php (limited to 'plugins/af_readability/vendor/fivefilters/readability.php/test/ReadabilityTest.php') diff --git a/plugins/af_readability/vendor/fivefilters/readability.php/test/ReadabilityTest.php b/plugins/af_readability/vendor/fivefilters/readability.php/test/ReadabilityTest.php deleted file mode 100644 index dafd54021..000000000 --- a/plugins/af_readability/vendor/fivefilters/readability.php/test/ReadabilityTest.php +++ /dev/null @@ -1,203 +0,0 @@ - 'http://fakehost/test/test.html', - 'FixRelativeURLs' => true, - 'SubstituteEntities' => true, - 'ArticleByLine' => true - ]; - - $configuration = new Configuration(array_merge($testPage->getConfiguration(), $options)); - - $readability = new Readability($configuration); - $readability->parse($testPage->getSourceHTML()); - - // Let's (crudely) remove whitespace between tags here to simplify comparison. - // This isn't used for output. - $from = ['/\>[^\S ]+/s', '/[^\S ]+\ ', '<', '\\1', '><']; - $expected_no_whitespace = preg_replace($from, $to, $testPage->getExpectedHTML()); - $readability_no_whitespace = preg_replace($from, $to, $readability->getContent()); - - if (getenv('output-changes') && $expected_no_whitespace !== $readability_no_whitespace) { - @mkdir(__DIR__.'/changed/'.$testPage->getSlug()); - $new_expected = __DIR__.'/changed/'.$testPage->getSlug().'/expected.html'; - $old_expected = __DIR__.'/test-pages/'.$testPage->getSlug().'/expected.html'; - //file_put_contents(__DIR__.'/changed/'.$testPage->getSlug().'/readability.html', $readability_no_whitespace); - //file_put_contents(__DIR__.'/changed/'.$testPage->getSlug().'/expected-current.html', $expected_no_whitespace); - file_put_contents($new_expected, $readability->getContent()); - if (getenv('output-diff')) { - file_put_contents(__DIR__.'/changed/'.$testPage->getSlug().'/diff-expected.txt', shell_exec(sprintf('diff -u -d %s %s', $old_expected, $new_expected))); - } - - } - - $this->assertSame($expected_no_whitespace, $readability_no_whitespace, 'Parsed text does not match the expected one.'); - - //$this->assertSame($testPage->getExpectedHTML(), $readability->getContent(), 'Parsed text does not match the expected one.'); - //$this->assertXmlStringEqualsXmlString($testPage->getExpectedHTML(), $readability->getContent(), 'Parsed text does not match the expected one.'); - } - - /** - * Test that Readability parses the HTML correctly and matches the expected result. - * - * @dataProvider getSamplePages - * - * @param TestPage $testPage - * - * @throws ParseException - */ - public function testReadabilityParsesMetadata(TestPage $testPage) - { - $options = ['OriginalURL' => 'http://fakehost/test/test.html', - 'FixRelativeURLs' => true, - 'SubstituteEntities' => true, - 'ArticleByLine' => true - ]; - - $configuration = new Configuration(array_merge($testPage->getConfiguration(), $options)); - - $readability = new Readability($configuration); - $readability->parse($testPage->getSourceHTML()); - - $metadata = [ - 'Author' => $readability->getAuthor(), - 'Direction' => $readability->getDirection(), - 'Excerpt' => $readability->getExcerpt(), - 'Image' => $readability->getImage(), - 'Title' => $readability->getTitle(), - 'SiteName' => $readability->getSiteName() - ]; - - if (getenv('output-changes') && (array)$testPage->getExpectedMetadata() !== $metadata) { - @mkdir(__DIR__.'/changed/'.$testPage->getSlug()); - $new_expected = __DIR__.'/changed/'.$testPage->getSlug().'/expected-metadata.json'; - $old_expected = __DIR__.'/test-pages/'.$testPage->getSlug().'/expected-metadata.json'; - //file_put_contents(__DIR__.'/changed/'.$testPage->getSlug().'/expected-metadata-current.json', json_encode($testPage->getExpectedMetadata(), JSON_PRETTY_PRINT)); - file_put_contents($new_expected, json_encode((object)$metadata, JSON_PRETTY_PRINT)); - if (getenv('output-diff')) { - file_put_contents(__DIR__.'/changed/'.$testPage->getSlug().'/diff-expected-metadata.txt', shell_exec(sprintf('diff -u -d %s %s', $old_expected, $new_expected))); - } - } - - $this->assertSame($testPage->getExpectedMetadata()->Author, $readability->getAuthor(), 'Parsed Author does not match expected value.'); - $this->assertSame($testPage->getExpectedMetadata()->Direction, $readability->getDirection(), 'Parsed Direction does not match expected value.'); - $this->assertSame($testPage->getExpectedMetadata()->Excerpt, $readability->getExcerpt(), 'Parsed Excerpt does not match expected value.'); - $this->assertSame($testPage->getExpectedMetadata()->Image, $readability->getImage(), 'Parsed Image does not match expected value.'); - $this->assertSame($testPage->getExpectedMetadata()->Title, $readability->getTitle(), 'Parsed Title does not match expected value.'); - } - - /** - * Test that Readability returns all the expected images from the test page. - * - * @param TestPage $testPage - * @dataProvider getSamplePages - * - * @throws ParseException - */ - public function testHTMLParserParsesImages(TestPage $testPage) - { - $options = ['OriginalURL' => 'http://fakehost/test/test.html', - 'fixRelativeURLs' => true, - 'substituteEntities' => true, - ]; - - $configuration = new Configuration(array_merge($testPage->getConfiguration(), $options)); - - $readability = new Readability($configuration); - $readability->parse($testPage->getSourceHTML()); - - if (getenv('output-changes') && $testPage->getExpectedImages() !== array_values($readability->getImages())) { - @mkdir(__DIR__.'/changed/'.$testPage->getSlug()); - $new_expected = __DIR__.'/changed/'.$testPage->getSlug().'/expected-images.json'; - $old_expected = __DIR__.'/test-pages/'.$testPage->getSlug().'/expected-images.json'; - //file_put_contents(__DIR__.'/changed/'.$testPage->getSlug().'/expected-images-current.json', json_encode($testPage->getExpectedImages(), JSON_PRETTY_PRINT)); - file_put_contents($new_expected, json_encode(array_values($readability->getImages()), JSON_PRETTY_PRINT)); - if (getenv('output-diff')) { - file_put_contents(__DIR__.'/changed/'.$testPage->getSlug().'/diff-expected-images.txt', shell_exec(sprintf('diff -u -d %s %s', $old_expected, $new_expected))); - } - } - - $this->assertSame($testPage->getExpectedImages(), array_values($readability->getImages())); - } - - /** - * Main data provider. - * - * @return \Generator - */ - public function getSamplePages() - { - $path = pathinfo(__FILE__, PATHINFO_DIRNAME) . DIRECTORY_SEPARATOR . 'test-pages'; - $testPages = scandir($path); - - foreach (array_slice($testPages, 2) as $testPage) { - $testCasePath = $path . DIRECTORY_SEPARATOR . $testPage . DIRECTORY_SEPARATOR; - - $slug = $testPage; - $source = file_get_contents($testCasePath . 'source.html'); - $expectedHTML = file_exists($testCasePath . 'expected.html') ? file_get_contents($testCasePath . 'expected.html') : ''; - $expectedImages = file_exists($testCasePath . 'expected-images.json') ? json_decode(file_get_contents($testCasePath . 'expected-images.json'), true) : []; - $expectedMetadata = file_exists($testCasePath . 'expected-metadata.json') ? json_decode(file_get_contents($testCasePath . 'expected-metadata.json')) : (object)[]; - $configuration = file_exists($testCasePath . 'config.json') ? json_decode(file_get_contents($testCasePath . 'config.json'), true) : []; - - yield $testPage => [new TestPage($slug, $configuration, $source, $expectedHTML, $expectedImages, $expectedMetadata)]; - } - } - - /** - * Test that Readability throws an exception with malformed HTML. - * - * @throws ParseException - */ - public function testReadabilityThrowsExceptionWithMalformedHTML() - { - $parser = new Readability(new Configuration()); - $this->expectException(ParseException::class); - $this->expectExceptionMessage('Invalid or incomplete HTML.'); - $parser->parse(''); - } - - /** - * Test that Readability throws an exception with incomplete or short HTML. - * - * @throws ParseException - */ - public function testReadabilityThrowsExceptionWithUnparseableHTML() - { - $parser = new Readability(new Configuration()); - $this->expectException(ParseException::class); - $this->expectExceptionMessage('Could not parse text.'); - $parser->parse('

'); - } - - /** - * Test that the Readability object has no content as soon as it is instantiated. - */ - public function testReadabilityCallGetContentWithNoContent() - { - $parser = new Readability(new Configuration()); - $this->assertNull($parser->getContent()); - } -} -- cgit v1.2.3