summaryrefslogtreecommitdiff
path: root/include
diff options
context:
space:
mode:
authorAndrew Dolgov <[email protected]>2013-04-29 16:59:36 +0400
committerAndrew Dolgov <[email protected]>2013-04-29 16:59:36 +0400
commit39ede9862f1df94b24cbe476ec66eca99a1d1a2f (patch)
tree5650e6b8cea69f191476eb339a706dc78227c46a /include
parent258068b3793ff8b75cc1642d8fd77920ecb47a9c (diff)
experimental: decode numerical utf entities on import in entry title
Diffstat (limited to 'include')
-rw-r--r--include/rssfuncs.php12
1 files changed, 12 insertions, 0 deletions
diff --git a/include/rssfuncs.php b/include/rssfuncs.php
index 0ecab6a25..a5d3898ce 100644
--- a/include/rssfuncs.php
+++ b/include/rssfuncs.php
@@ -553,6 +553,7 @@
_debug("date $entry_timestamp [$entry_timestamp_fmt]", $debug_enabled);
$entry_title = html_entity_decode($item->get_title(), ENT_COMPAT, 'UTF-8');
+ $entry_title = decode_numeric_entities($entry_title);
$entry_link = rewrite_relative_url($site_url, $item->get_link());
@@ -1388,4 +1389,15 @@
_debug("Cleaned $rc cached tags.");
}
+
+ function utf8_entity_decode($entity){
+ $convmap = array(0x0, 0x10000, 0, 0xfffff);
+ return mb_decode_numericentity($entity, $convmap, 'UTF-8');
+ }
+
+ function decode_numeric_entities($body) {
+ $body = preg_replace('/&#\d{2,5};/ue', "utf8_entity_decode('$0')", $body );
+ $body = preg_replace('/&#x([a-fA-F0-7]{2,8});/ue', "utf8_entity_decode('&#'.hexdec('$1').';')", $body );
+ return $body;
+ }
?>