summaryrefslogtreecommitdiff
path: root/test/HTMLParserTest.php
blob: c97745dfaddbb2a7205328a56b343db83bf704c3 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
<?php

namespace andreskrey\Readability\Test;

use andreskrey\Readability\HTMLParser;

class HTMLParserTest extends \PHPUnit_Framework_TestCase
{
    /**
     * @dataProvider getSamplePages
     */
    public function testHTMLParserParsesHTML($html, $expectedResult, $expectedMetadata)
    {
        $readability = new HTMLParser([
            'originalURL' => 'http://fakehost/test/test.html',
            'fixRelativeURLs' => true
        ]);
        $result = $readability->parse($html);

        $this->assertEquals($expectedResult, $result['html']);
    }

    public function getSamplePages()
    {
        $path = pathinfo(__FILE__, PATHINFO_DIRNAME) . DIRECTORY_SEPARATOR . 'test-pages';
        $testPages = scandir($path);
        if (in_array('.DS_Store', $testPages)) {
            unset($testPages[array_search('.DS_Store', $testPages)]);
        }

        $pages = [];

        foreach (array_slice($testPages, 2) as $testPage) {
            $source = file_get_contents($path . DIRECTORY_SEPARATOR . $testPage . DIRECTORY_SEPARATOR . 'source.html');
            $expectedHTML = file_get_contents($path . DIRECTORY_SEPARATOR . $testPage . DIRECTORY_SEPARATOR . 'expected.html');
            $expectedMetadata = file_get_contents($path . DIRECTORY_SEPARATOR . $testPage . DIRECTORY_SEPARATOR . 'expected-metadata.json');

            $pages[$testPage] = [$source, $expectedHTML, $expectedMetadata];
        }

        return $pages;
    }
}