summaryrefslogtreecommitdiff
path: root/include/rssfuncs.php
diff options
context:
space:
mode:
authorAndrew Dolgov <[email protected]>2013-04-19 13:17:28 +0400
committerAndrew Dolgov <[email protected]>2013-04-19 13:17:28 +0400
commitebec81a6fb2dff0b2fe6b569b021e057995ee6c7 (patch)
tree9acdc3fd78b47d19f98827b76dcf096dec814429 /include/rssfuncs.php
parent1367bc3f5e0f99f5b900bcd3ea9e7512b7c84388 (diff)
subscribe: verify XML before adding to the database; fetch: try to work around entity problems if initial parsing fails
Diffstat (limited to 'include/rssfuncs.php')
-rw-r--r--include/rssfuncs.php29
1 files changed, 28 insertions, 1 deletions
diff --git a/include/rssfuncs.php b/include/rssfuncs.php
index 31d35bf8e..47d622169 100644
--- a/include/rssfuncs.php
+++ b/include/rssfuncs.php
@@ -316,6 +316,25 @@
_debug("update_rss_feed: fetch done.");
}
+ $error = verify_feed_xml($feed_data);
+
+ if ($error) {
+ if ($debug_enabled) {
+ _debug("update_rss_feed: error verifying XML, code: " . $error->code);
+ }
+
+ if ($error->code == 26) {
+ if ($debug_enabled) {
+ _debug("update_rss_feed: got error 26, trying to decode entities...");
+ }
+
+ $feed_data = html_entity_decode($feed_data, ENT_COMPAT, 'UTF-8');
+
+ $error = verify_feed_xml($feed_data);
+
+ if ($error) $feed_data = '';
+ }
+ }
}
if (!$feed_data) {
@@ -559,7 +578,7 @@
_debug("update_rss_feed: date $entry_timestamp [$entry_timestamp_fmt]");
}
- $entry_title = html_entity_decode($item->get_title());
+ $entry_title = html_entity_decode($item->get_title(), ENT_COMPAT, 'UTF-8');
$entry_link = rewrite_relative_url($site_url, $item->get_link());
@@ -1421,5 +1440,13 @@
mb_strtolower(strip_tags($title), 'utf-8'));
}
+ function verify_feed_xml($feed_data) {
+ libxml_use_internal_errors(true);
+ $doc = new DOMDocument();
+ $doc->loadXML($feed_data);
+ $error = libxml_get_last_error();
+ libxml_clear_errors();
+ return $error;
+ }
?>