From a9b9bd9aa00322c988847de6de91f37ba5e89034 Mon Sep 17 00:00:00 2001 From: Andres Rey Date: Fri, 10 Mar 2017 10:48:44 +0000 Subject: Fixed all test cases and bugs, now 100% of our test pass. BREAK OUT THE CHAMPAGNE! --- src/HTMLParser.php | 6 +++--- test/HTMLParserTest.php | 2 +- test/test-pages/ars-1/config.json | 3 +++ 3 files changed, 7 insertions(+), 4 deletions(-) create mode 100644 test/test-pages/ars-1/config.json diff --git a/src/HTMLParser.php b/src/HTMLParser.php index 5d02d05..0b407ae 100644 --- a/src/HTMLParser.php +++ b/src/HTMLParser.php @@ -626,7 +626,7 @@ class HTMLParser continue; } // Discard nodes with less than 25 characters, without blank space - if (mb_strlen($node->getValue(true)) < 25) { + if (mb_strlen($node->getTextContent(true)) < 25) { continue; } @@ -641,10 +641,10 @@ class HTMLParser $contentScore = 1; // Add points for any commas within this paragraph. - $contentScore += count(explode(',', $node->getValue(true))); + $contentScore += count(explode(',', $node->getTextContent(true))); // For every 100 characters in this paragraph, add another point. Up to 3 points. - $contentScore += min(floor(mb_strlen($node->getValue(true)) / 100), 3); + $contentScore += min(floor(mb_strlen($node->getTextContent(true)) / 100), 3); // Initialize and score ancestors. /** @var Readability $ancestor */ diff --git a/test/HTMLParserTest.php b/test/HTMLParserTest.php index 2db0443..673a1d5 100644 --- a/test/HTMLParserTest.php +++ b/test/HTMLParserTest.php @@ -42,7 +42,7 @@ class HTMLParserTest extends \PHPUnit_Framework_TestCase $expectedMetadata = file_get_contents($path . DIRECTORY_SEPARATOR . $testPage . DIRECTORY_SEPARATOR . 'expected-metadata.json'); $config = file_get_contents($path . DIRECTORY_SEPARATOR . $testPage . DIRECTORY_SEPARATOR . 'config.json'); if ($config) { - $config = json_decode($config); + $config = json_decode($config, true); } $pages[$testPage] = [$source, $expectedHTML, $expectedMetadata, $config]; diff --git a/test/test-pages/ars-1/config.json b/test/test-pages/ars-1/config.json new file mode 100644 index 0000000..6441edf --- /dev/null +++ b/test/test-pages/ars-1/config.json @@ -0,0 +1,3 @@ +{ + "articleByLine": true +} \ No newline at end of file -- cgit v1.2.3