Browse Source

parser: properly support tag subtrees instead of text content for article content

Andrew Dolgov 4 years ago
parent
commit
7d1e15c396
3 changed files with 19 additions and 6 deletions
  1. 2 2
      classes/feeditem/atom.php
  2. 11 0
      classes/feeditem/common.php
  3. 6 4
      classes/feeditem/rss.php

+ 2 - 2
classes/feeditem/atom.php

@@ -75,7 +75,7 @@ class FeedItem_Atom extends FeedItem_Common {
 				}
 			}
 
-			return $content->nodeValue;
+			return $this->subtree_or_text($content);
 		}
 	}
 
@@ -95,7 +95,7 @@ class FeedItem_Atom extends FeedItem_Common {
 				}
 			}
 
-			return $content->nodeValue;
+			return $this->subtree_or_text($content);
 		}
 
 	}

+ 11 - 0
classes/feeditem/common.php

@@ -70,6 +70,17 @@ abstract class FeedItem_Common extends FeedItem {
 		}
 	}
 
+	function count_children($node) {
+		return $node->getElementsByTagName("*")->length;
+	}
+
+	function subtree_or_text($node) {
+		if ($this->count_children($node) == 0) {
+			return $node->nodeValue;
+		} else {
+			return $node->c14n();
+		}
+	}
 
 }
 ?>

+ 6 - 4
classes/feeditem/rss.php

@@ -71,17 +71,19 @@ class FeedItem_RSS extends FeedItem_Common {
 		$contentB = $this->elem->getElementsByTagName("description")->item(0);
 
 		if ($contentA && !$contentB) {
-			return $contentA->nodeValue;
+			return $this->subtree_or_text($contentA);
 		}
 
 
 		if ($contentB && !$contentA) {
-			return $contentB->nodeValue;
+			return $this->subtree_or_text($contentB);
 		}
 
 		if ($contentA && $contentB) {
-			return mb_strlen($contentA->nodeValue) > mb_strlen($contentB->nodeValue) ?
-				$contentA->nodeValue : $contentB->nodeValue;
+			$resultA = $this->subtree_or_text($contentA);
+			$resultB = $this->subtree_or_text($contentB);
+
+			return mb_strlen($resultA) > mb_strlen($resultB) ? $resultA : $resultB;
 		}
 	}