summaryrefslogtreecommitdiff
path: root/test/ReadabilityTest.php
blob: c20574eee12f04c85f1b675000c5c43c70d87d00 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
<?php

namespace andreskrey\Readability\Test;

use andreskrey\Readability\Configuration;
use andreskrey\Readability\ParseException;
use andreskrey\Readability\Readability;

class ReadabilityTest extends \PHPUnit_Framework_TestCase
{
    /**
     * @dataProvider getSamplePages
     */
    public function testReadabilityParsesHTML($html, $expectedResult, $expectedMetadata, $config, $expectedImages)
    {
        $options = ['OriginalURL' => 'http://fakehost/test/test.html',
            'FixRelativeURLs' => true,
            'SubstituteEntities' => true,
            'ArticleByLine' => true
        ];

        if ($config === null || $expectedMetadata === null) {
            $this->markTestSkipped('Wrong test configuration');
        }

        if ($config) {
            $options = array_merge($config, $options);
        }

        $configuration = new Configuration($options);

        $readability = new Readability($configuration);
        $readability->parse($html);

        $this->assertSame($expectedResult, $readability->getContent());

        foreach ($expectedMetadata as $key => $metadata) {
            $function = 'get' . $key;
            $this->assertEquals($metadata, $readability->$function(), sprintf('Failed asserting %s metadata', $key));
        }
    }

    /**
     * @dataProvider getSamplePages
     */
    public function testHTMLParserParsesImages($html, $expectedResult, $expectedMetadata, $config, $expectedImages)
    {
        $options = ['OriginalURL' => 'http://fakehost/test/test.html',
            'fixRelativeURLs' => true,
            'substituteEntities' => true,
        ];

        if ($config) {
            $options = array_merge($options, $config);
        }

        $configuration = new Configuration($options);

        $readability = new Readability($configuration);
        $readability->parse($html);

        $this->assertSame($expectedImages, json_encode($readability->getImages()));
    }

    public function getSamplePages()
    {
        $path = pathinfo(__FILE__, PATHINFO_DIRNAME) . DIRECTORY_SEPARATOR . 'test-pages';
        $testPages = scandir($path);
        if (in_array('.DS_Store', $testPages)) {
            unset($testPages[array_search('.DS_Store', $testPages)]);
        }

        $pages = [];

        foreach (array_slice($testPages, 2) as $testPage) {
            $source = file_get_contents($path . DIRECTORY_SEPARATOR . $testPage . DIRECTORY_SEPARATOR . 'source.html');
            $expectedHTML = file_get_contents($path . DIRECTORY_SEPARATOR . $testPage . DIRECTORY_SEPARATOR . 'expected.html');
            $expectedImages = file_get_contents($path . DIRECTORY_SEPARATOR . $testPage . DIRECTORY_SEPARATOR . 'expected-images.json');

            $expectedMetadata = json_decode(file_get_contents($path . DIRECTORY_SEPARATOR . $testPage . DIRECTORY_SEPARATOR . 'expected-metadata.json'));

            $config = false;
            if (file_exists($path . DIRECTORY_SEPARATOR . $testPage . DIRECTORY_SEPARATOR . 'config.json')) {
                $config = file_get_contents($path . DIRECTORY_SEPARATOR . $testPage . DIRECTORY_SEPARATOR . 'config.json');
                if ($config) {
                    $config = json_decode($config, true);
                }
            }

            $pages[$testPage] = [$source, $expectedHTML, $expectedMetadata, $config, $expectedImages];
        }

        return $pages;
    }

    public function testReadabilityThrowsExceptionWithMalformedHTML()
    {
        $parser = new Readability(new Configuration());
        $this->expectException(ParseException::class);
        $this->expectExceptionMessage('Invalid or incomplete HTML.');
        $parser->parse('<html>');
    }

    public function testReadabilityThrowsExceptionWithUnparseableHTML()
    {
        $parser = new Readability(new Configuration());
        $this->expectException(ParseException::class);
        $this->expectExceptionMessage('Could not parse text.');
        $parser->parse('<html><body><p></p></body></html>');
    }

    public function testReadabilityCallGetContentWithNoContent()
    {
        $parser = new Readability(new Configuration());
        $this->assertNull($parser->getContent());
    }
}