summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorFiveFilters.org <[email protected]>2021-08-19 14:58:04 +0200
committerFiveFilters.org <[email protected]>2021-08-19 14:58:04 +0200
commit93a4ada83a3f94645d16386513ddc3d2b47d6b3b (patch)
treeea946b835e26bbc87db3d9ce6621280fd01c84e1 /src
parent4499150a8aca89ba019edb28d25a168d38d1ce8a (diff)
preserve children when removing javascript: links
https://github.com/mozilla/readability/commit/7c91bdd2753adabf801beb5063db6693c54e4e33
Diffstat (limited to 'src')
-rw-r--r--src/Readability.php18
1 files changed, 14 insertions, 4 deletions
diff --git a/src/Readability.php b/src/Readability.php
index 87af283..0f4fd13 100644
--- a/src/Readability.php
+++ b/src/Readability.php
@@ -1776,13 +1776,23 @@ class Readability
/** @var DOMElement $link */
$href = $link->getAttribute('href');
if ($href) {
- // Replace links with javascript: URIs with text content, since
+ // Remove links with javascript: URIs, since
// they won't work after scripts have been removed from the page.
if (strpos($href, 'javascript:') === 0) {
$this->logger->debug(sprintf('[PostProcess] Removing \'javascript:\' link. Content is: \'%s\'', substr($link->textContent, 0, 128)));
-
- $text = $article->createTextNode($link->textContent);
- $link->parentNode->replaceChild($text, $link);
+
+ // if the link only contains simple text content, it can be converted to a text node
+ if ($link->childNodes->length === 1 && $link->childNodes->item(0)->nodeType === XML_TEXT_NODE) {
+ $text = $article->createTextNode($link->textContent);
+ $link->parentNode->replaceChild($text, $link);
+ } else {
+ // if the link has multiple children, they should all be preserved
+ $container = $article->createElement('span');
+ while ($link->firstChild) {
+ $container->appendChild($link->firstChild);
+ }
+ $link->parentNode->replaceChild($container, $link);
+ }
} else {
$this->logger->debug(sprintf('[PostProcess] Converting link to absolute URI: \'%s\'', substr($href, 0, 128)));