summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/HTMLParser.php6
-rw-r--r--test/HTMLParserTest.php2
-rw-r--r--test/test-pages/ars-1/config.json3
3 files changed, 7 insertions, 4 deletions
diff --git a/src/HTMLParser.php b/src/HTMLParser.php
index 5d02d05..0b407ae 100644
--- a/src/HTMLParser.php
+++ b/src/HTMLParser.php
@@ -626,7 +626,7 @@ class HTMLParser
continue;
}
// Discard nodes with less than 25 characters, without blank space
- if (mb_strlen($node->getValue(true)) < 25) {
+ if (mb_strlen($node->getTextContent(true)) < 25) {
continue;
}
@@ -641,10 +641,10 @@ class HTMLParser
$contentScore = 1;
// Add points for any commas within this paragraph.
- $contentScore += count(explode(',', $node->getValue(true)));
+ $contentScore += count(explode(',', $node->getTextContent(true)));
// For every 100 characters in this paragraph, add another point. Up to 3 points.
- $contentScore += min(floor(mb_strlen($node->getValue(true)) / 100), 3);
+ $contentScore += min(floor(mb_strlen($node->getTextContent(true)) / 100), 3);
// Initialize and score ancestors.
/** @var Readability $ancestor */
diff --git a/test/HTMLParserTest.php b/test/HTMLParserTest.php
index 2db0443..673a1d5 100644
--- a/test/HTMLParserTest.php
+++ b/test/HTMLParserTest.php
@@ -42,7 +42,7 @@ class HTMLParserTest extends \PHPUnit_Framework_TestCase
$expectedMetadata = file_get_contents($path . DIRECTORY_SEPARATOR . $testPage . DIRECTORY_SEPARATOR . 'expected-metadata.json');
$config = file_get_contents($path . DIRECTORY_SEPARATOR . $testPage . DIRECTORY_SEPARATOR . 'config.json');
if ($config) {
- $config = json_decode($config);
+ $config = json_decode($config, true);
}
$pages[$testPage] = [$source, $expectedHTML, $expectedMetadata, $config];
diff --git a/test/test-pages/ars-1/config.json b/test/test-pages/ars-1/config.json
new file mode 100644
index 0000000..6441edf
--- /dev/null
+++ b/test/test-pages/ars-1/config.json
@@ -0,0 +1,3 @@
+{
+ "articleByLine": true
+} \ No newline at end of file