summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/Readability.php19
1 files changed, 18 insertions, 1 deletions
diff --git a/src/Readability.php b/src/Readability.php
index 323cdc2..aadd631 100644
--- a/src/Readability.php
+++ b/src/Readability.php
@@ -671,7 +671,7 @@ class Readability
* safely converted into plain P elements to avoid confusing the scoring
* algorithm with DIVs with are, in practice, paragraphs.
*/
- if ($node->hasSinglePNode()) {
+ if ($node->hasSingleTagInsideElement('p')) {
$this->logger->debug(sprintf('[Get Nodes] Found DIV with a single P node, removing DIV. Node content is: \'%s\'', substr($node->nodeValue, 0, 128)));
$pNode = $node->getChildren(true)[0];
$node->parentNode->replaceChild($pNode, $node);
@@ -1237,6 +1237,23 @@ class Readability
}
}
+ // Remove single-cell tables
+ foreach (iterator_to_array($article->getElementsByTagName('table')) as $table) {
+ /** @var DOMNode $table */
+ $tbody = $table->hasSingleTagInsideElement('tbody') ? $table->childNodes[0] : $table;
+ if ($tbody->hasSingleTagInsideElement('tr')) {
+ $row = $tbody->childNodes[0];
+ if ($row->hasSingleTagInsideElement('td')) {
+ $cell = $row->childNodes[0];
+ $cell = NodeUtility::setNodeTag($cell, (array_reduce(iterator_to_array($this->childNodes), function ($carry, $node) {
+ return $carry || $node->isPhrasingContent();
+ })) ? 'p' : 'div');
+ $table->parentNode->replaceChild($cell, $table);
+ }
+ }
+
+ }
+
return $article;
}