diff options
author | Andres Rey <[email protected]> | 2017-03-03 16:58:42 +0000 |
---|---|---|
committer | Andres Rey <[email protected]> | 2017-03-03 16:58:42 +0000 |
commit | 3b73cde640956aa08cee59a9be44d941a819b5e6 (patch) | |
tree | 4dd3dc6dd52ca7a67b954231cb18ddf9f660aef9 /test/HTMLParserTest.php | |
parent | 0b2e1c28ca9ad54290fd036afbcda7e808becc9e (diff) |
Functons to normalize space and disable subtitute entities
Diffstat (limited to 'test/HTMLParserTest.php')
-rw-r--r-- | test/HTMLParserTest.php | 23 |
1 files changed, 17 insertions, 6 deletions
diff --git a/test/HTMLParserTest.php b/test/HTMLParserTest.php index c97745d..55b82b8 100644 --- a/test/HTMLParserTest.php +++ b/test/HTMLParserTest.php @@ -9,12 +9,19 @@ class HTMLParserTest extends \PHPUnit_Framework_TestCase /** * @dataProvider getSamplePages */ - public function testHTMLParserParsesHTML($html, $expectedResult, $expectedMetadata) + public function testHTMLParserParsesHTML($html, $expectedResult, $expectedMetadata, $config) { - $readability = new HTMLParser([ - 'originalURL' => 'http://fakehost/test/test.html', - 'fixRelativeURLs' => true - ]); + $options = ['originalURL' => 'http://fakehost/test/test.html', + 'fixRelativeURLs' => true, + 'normalizeSpaces' => false, + 'substituteEntities' => true + ]; + + if ($config) { + $options = $config; + } + + $readability = new HTMLParser($options); $result = $readability->parse($html); $this->assertEquals($expectedResult, $result['html']); @@ -34,8 +41,12 @@ class HTMLParserTest extends \PHPUnit_Framework_TestCase $source = file_get_contents($path . DIRECTORY_SEPARATOR . $testPage . DIRECTORY_SEPARATOR . 'source.html'); $expectedHTML = file_get_contents($path . DIRECTORY_SEPARATOR . $testPage . DIRECTORY_SEPARATOR . 'expected.html'); $expectedMetadata = file_get_contents($path . DIRECTORY_SEPARATOR . $testPage . DIRECTORY_SEPARATOR . 'expected-metadata.json'); + $config = file_get_contents($path . DIRECTORY_SEPARATOR . $testPage . DIRECTORY_SEPARATOR . 'config.json'); + if ($config) { + $config = json_decode($config); + } - $pages[$testPage] = [$source, $expectedHTML, $expectedMetadata]; + $pages[$testPage] = [$source, $expectedHTML, $expectedMetadata, $config]; } return $pages; |