diff options
-rw-r--r-- | src/Readability.php | 19 |
1 files changed, 18 insertions, 1 deletions
diff --git a/src/Readability.php b/src/Readability.php index 323cdc2..aadd631 100644 --- a/src/Readability.php +++ b/src/Readability.php @@ -671,7 +671,7 @@ class Readability * safely converted into plain P elements to avoid confusing the scoring * algorithm with DIVs with are, in practice, paragraphs. */ - if ($node->hasSinglePNode()) { + if ($node->hasSingleTagInsideElement('p')) { $this->logger->debug(sprintf('[Get Nodes] Found DIV with a single P node, removing DIV. Node content is: \'%s\'', substr($node->nodeValue, 0, 128))); $pNode = $node->getChildren(true)[0]; $node->parentNode->replaceChild($pNode, $node); @@ -1237,6 +1237,23 @@ class Readability } } + // Remove single-cell tables + foreach (iterator_to_array($article->getElementsByTagName('table')) as $table) { + /** @var DOMNode $table */ + $tbody = $table->hasSingleTagInsideElement('tbody') ? $table->childNodes[0] : $table; + if ($tbody->hasSingleTagInsideElement('tr')) { + $row = $tbody->childNodes[0]; + if ($row->hasSingleTagInsideElement('td')) { + $cell = $row->childNodes[0]; + $cell = NodeUtility::setNodeTag($cell, (array_reduce(iterator_to_array($this->childNodes), function ($carry, $node) { + return $carry || $node->isPhrasingContent(); + })) ? 'p' : 'div'); + $table->parentNode->replaceChild($cell, $table); + } + } + + } + return $article; } |