From 4f00f55ca2ecd2e5a75c2c4ef37ca0e1143a7ac7 Mon Sep 17 00:00:00 2001 From: Andrew Dolgov Date: Thu, 11 Jul 2013 15:40:09 +0400 Subject: parser: add charset recoding hack for systems where libxml is build without support for iconv (handles libxml error 32) --- classes/feedparser.php | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) (limited to 'classes/feedparser.php') diff --git a/classes/feedparser.php b/classes/feedparser.php index eb8606de9..53f6c52a9 100644 --- a/classes/feedparser.php +++ b/classes/feedparser.php @@ -20,6 +20,24 @@ class FeedParser { $error = libxml_get_last_error(); + // libxml compiled without iconv? + if ($error && $error->code == 32) { + if (preg_match('/^(<\\?xml .*?)encoding="(.+?)"(.*?\\?>)/', $data, $matches) === 1) { + libxml_clear_errors(); + + $enc = $matches[2]; + + $data = iconv($enc, 'UTF-8//IGNORE', $data); + $data = preg_replace('/^<\\?xml .*?\\?>/', $matches[1] . $matches[3] , $data); + + $this->doc = new DOMDocument(); + $this->doc->loadXML($data); + + $error = libxml_get_last_error(); + } + } + + // some terrible invalid unicode entity? if ($error && $error->code == 9) { libxml_clear_errors(); -- cgit v1.2.3