summaryrefslogtreecommitdiff
path: root/classes/feeditem
diff options
context:
space:
mode:
authorAndrew Dolgov <[email protected]>2019-11-20 18:56:34 +0300
committerAndrew Dolgov <[email protected]>2019-11-20 18:56:34 +0300
commit304d3a0b88052d16aca0230f67532acd6822e482 (patch)
treeb2d09a9eba0db8f14da859205d84c7c2a4ee8097 /classes/feeditem
parentffa3f9309f9f87b63dae58235597dbad467ba37d (diff)
tag-related fixes
1. move tag sanitization to feedparser common item class 2. enforce length limit on tags when parsing 3. support multiple tags passed via one dc:subject and other such elements, parse them as a comma-separated list 4. sort resulting tag list to prevent different order between feed updates 5. remove some duplicate code related to tag validation 6. allow + symbol in tags
Diffstat (limited to 'classes/feeditem')
-rwxr-xr-xclasses/feeditem/atom.php8
-rwxr-xr-xclasses/feeditem/common.php31
-rwxr-xr-xclasses/feeditem/rss.php8
3 files changed, 39 insertions, 8 deletions
diff --git a/classes/feeditem/atom.php b/classes/feeditem/atom.php
index a962b59f2..a03080981 100755
--- a/classes/feeditem/atom.php
+++ b/classes/feeditem/atom.php
@@ -103,20 +103,20 @@ class FeedItem_Atom extends FeedItem_Common {
function get_categories() {
$categories = $this->elem->getElementsByTagName("category");
- $cats = array();
+ $cats = [];
foreach ($categories as $cat) {
if ($cat->hasAttribute("term"))
- array_push($cats, trim($cat->getAttribute("term")));
+ array_push($cats, $cat->getAttribute("term"));
}
$categories = $this->xpath->query("dc:subject", $this->elem);
foreach ($categories as $cat) {
- array_push($cats, clean(trim($cat->nodeValue)));
+ array_push($cats, $cat->nodeValue);
}
- return $cats;
+ return $this->normalize_categories($cats);
}
function get_enclosures() {
diff --git a/classes/feeditem/common.php b/classes/feeditem/common.php
index 3193ed273..f208f4a48 100755
--- a/classes/feeditem/common.php
+++ b/classes/feeditem/common.php
@@ -162,4 +162,35 @@ abstract class FeedItem_Common extends FeedItem {
}
}
+ static function normalize_categories($cats) {
+
+ $tmp = [];
+
+ foreach ($cats as $rawcat) {
+ $tmp = array_merge($tmp, explode(",", $rawcat));
+ }
+
+ $tmp = array_map(function($srccat) {
+ $cat = clean(trim(mb_strtolower($srccat)));
+
+ // we don't support numeric tags
+ if (is_numeric($cat))
+ $cat = 't:' . $cat;
+
+ $cat = preg_replace('/[,\'\"]/', "", $cat);
+
+ if (DB_TYPE == "mysql") {
+ $cat = preg_replace('/[\x{10000}-\x{10FFFF}]/u', "\xEF\xBF\xBD", $cat);
+ }
+
+ if (mb_strlen($cat) > 250)
+ $cat = mb_substr($cat, 0, 250);
+
+ return $cat;
+ }, $tmp);
+
+ asort($tmp);
+
+ return array_unique($tmp);
+ }
}
diff --git a/classes/feeditem/rss.php b/classes/feeditem/rss.php
index 916c73ec4..1f7953c51 100755
--- a/classes/feeditem/rss.php
+++ b/classes/feeditem/rss.php
@@ -97,19 +97,19 @@ class FeedItem_RSS extends FeedItem_Common {
function get_categories() {
$categories = $this->elem->getElementsByTagName("category");
- $cats = array();
+ $cats = [];
foreach ($categories as $cat) {
- array_push($cats, trim($cat->nodeValue));
+ array_push($cats, $cat->nodeValue);
}
$categories = $this->xpath->query("dc:subject", $this->elem);
foreach ($categories as $cat) {
- array_push($cats, clean(trim($cat->nodeValue)));
+ array_push($cats, $cat->nodeValue);
}
- return $cats;
+ return $this->normalize_categories($cats);
}
function get_enclosures() {