Browse Source

parser: trim some some feed-extracted data link titles and links

Andrew Dolgov 6 years ago
parent
commit
31bd6f7643
3 changed files with 17 additions and 13 deletions
  1. 5 5
      classes/feeditem/atom.php
  2. 6 6
      classes/feeditem/rss.php
  3. 6 2
      classes/feedparser.php

+ 5 - 5
classes/feeditem/atom.php

@@ -43,9 +43,9 @@ class FeedItem_Atom extends FeedItem_Common {
 				$base = $this->xpath->evaluate("string(ancestor-or-self::*[@xml:base][1]/@xml:base)", $link);
 
 				if ($base)
-					return rewrite_relative_url($base, $link->getAttribute("href"));
+					return rewrite_relative_url($base, trim($link->getAttribute("href")));
 				else
-					return $link->getAttribute("href");
+					return trim($link->getAttribute("href"));
 
 			}
 		}
@@ -55,7 +55,7 @@ class FeedItem_Atom extends FeedItem_Common {
 		$title = $this->elem->getElementsByTagName("title")->item(0);
 
 		if ($title) {
-			return $title->nodeValue;
+			return trim($title->nodeValue);
 		}
 	}
 
@@ -106,13 +106,13 @@ class FeedItem_Atom extends FeedItem_Common {
 
 		foreach ($categories as $cat) {
 			if ($cat->hasAttribute("term"))
-				array_push($cats, $cat->getAttribute("term"));
+				array_push($cats, trim($cat->getAttribute("term")));
 		}
 
 		$categories = $this->xpath->query("dc:subject", $this->elem);
 
 		foreach ($categories as $cat) {
-			array_push($cats, $cat->nodeValue);
+			array_push($cats, trim($cat->nodeValue));
 		}
 
 		return $cats;

+ 6 - 6
classes/feeditem/rss.php

@@ -33,20 +33,20 @@ class FeedItem_RSS extends FeedItem_Common {
 					|| $link->getAttribute("rel") == "alternate"
 					|| $link->getAttribute("rel") == "standout")) {
 
-				return $link->getAttribute("href");
+				return trim($link->getAttribute("href"));
 			}
 		}
 
 		$link = $this->elem->getElementsByTagName("guid")->item(0);
 
 		if ($link && $link->hasAttributes() && $link->getAttribute("isPermaLink") == "true") {
-			return $link->nodeValue;
+			return trim($link->nodeValue);
 		}
 
 		$link = $this->elem->getElementsByTagName("link")->item(0);
 
 		if ($link) {
-			return $link->nodeValue;
+			return trim($link->nodeValue);
 		}
 	}
 
@@ -54,7 +54,7 @@ class FeedItem_RSS extends FeedItem_Common {
 		$title = $this->elem->getElementsByTagName("title")->item(0);
 
 		if ($title) {
-			return $title->nodeValue;
+			return trim($title->nodeValue);
 		}
 	}
 
@@ -90,13 +90,13 @@ class FeedItem_RSS extends FeedItem_Common {
 		$cats = array();
 
 		foreach ($categories as $cat) {
-			array_push($cats, $cat->nodeValue);
+			array_push($cats, trim($cat->nodeValue));
 		}
 
 		$categories = $this->xpath->query("dc:subject", $this->elem);
 
 		foreach ($categories as $cat) {
-			array_push($cats, $cat->nodeValue);
+			array_push($cats, trim($cat->nodeValue));
 		}
 
 		return $cats;

+ 6 - 2
classes/feedparser.php

@@ -205,6 +205,10 @@ class FeedParser {
 				break;
 
 			}
+
+			if ($this->title) $this->title = trim($this->title);
+			if ($this->link) $this->link = trim($this->link);
+
 		} else {
 			if( !isset($this->error) ){
 				$this->error = "Unknown/unsupported feed type";
@@ -252,7 +256,7 @@ class FeedParser {
 
 			foreach ($links as $link) {
 				if (!$rel || $link->hasAttribute('rel') && $link->getAttribute('rel') == $rel) {
-					array_push($rv, $link->getAttribute('href'));
+					array_push($rv, trim($link->getAttribute('href')));
 				}
 			}
 			break;
@@ -261,7 +265,7 @@ class FeedParser {
 
 			foreach ($links as $link) {
 				if (!$rel || $link->hasAttribute('rel') && $link->getAttribute('rel') == $rel) {
-					array_push($rv, $link->getAttribute('href'));
+					array_push($rv, trim($link->getAttribute('href')));
 				}
 			}
 			break;