Browse Source

strip utf8mb4 characters in enclosures on mysql

Andrew Dolgov 1 year ago
parent
commit
102a01354b
1 changed files with 17 additions and 3 deletions
  1. 17 3
      classes/rssutils.php

+ 17 - 3
classes/rssutils.php

@@ -16,6 +16,11 @@ class RSSUtils {
 		return sha1(implode(",", $pluginhost->get_plugin_names()) . $tmp);
 	}
 
+	// Strips utf8mb4 characters (i.e. emoji) for mysql
+	static function strip_utf8mb4($str) {
+        return preg_replace('/[\x{10000}-\x{10FFFF}]/u', "\xEF\xBF\xBD", $str);
+    }
+
 	static function update_feedbrowser_cache() {
 
 		$pdo = Db::pdo();
@@ -760,10 +765,9 @@ class RSSUtils {
 				// Workaround: 4-byte unicode requires utf8mb4 in MySQL. See https://tt-rss.org/forum/viewtopic.php?f=1&t=3377&p=20077#p20077
 				if (DB_TYPE == "mysql") {
 					foreach ($article as $k => $v) {
-
 						// i guess we'll have to take the risk of 4byte unicode labels & tags here
 						if (is_string($article[$k])) {
-							$article[$k] = preg_replace('/[\x{10000}-\x{10FFFF}]/u', "\xEF\xBF\xBD", $v);
+							$article[$k] = RSSUtils::strip_utf8mb4($v);
 						}
 					}
 				}
@@ -1043,7 +1047,17 @@ class RSSUtils {
 						$e_item = array(
 							rewrite_relative_url($site_url, $e->link),
 							$e->type, $e->length, $e->title, $e->width, $e->height);
-						array_push($enclosures, $e_item);
+
+						// Yet another episode of "mysql utf8_general_ci is gimped"
+						if (DB_TYPE == "mysql") {
+                            for ($i = 0; $i < count($e_item); $i++) {
+                                if (is_string($e_item[$i])) {
+                                    $e_item[$i] = RSSUtils::strip_utf8mb4($e_item[$i]);
+                                }
+                            }
+						}
+
+                        array_push($enclosures, $e_item);
 					}
 				}