summaryrefslogtreecommitdiff
path: root/include
diff options
context:
space:
mode:
authorAndrew Dolgov <[email protected]>2013-04-19 13:17:28 +0400
committerAndrew Dolgov <[email protected]>2013-04-19 13:17:28 +0400
commitebec81a6fb2dff0b2fe6b569b021e057995ee6c7 (patch)
tree9acdc3fd78b47d19f98827b76dcf096dec814429 /include
parent1367bc3f5e0f99f5b900bcd3ea9e7512b7c84388 (diff)
subscribe: verify XML before adding to the database; fetch: try to work around entity problems if initial parsing fails
Diffstat (limited to 'include')
-rw-r--r--include/functions.php19
-rw-r--r--include/rssfuncs.php29
2 files changed, 47 insertions, 1 deletions
diff --git a/include/functions.php b/include/functions.php
index 4cc8f134d..8ac5753c9 100644
--- a/include/functions.php
+++ b/include/functions.php
@@ -1558,6 +1558,7 @@
* Here you should call extractfeedurls in rpc-backend
* to get all possible feeds.
* 5 - Couldn't download the URL content.
+ * 6 - Content is an invalid XML.
*/
function subscribe_to_feed($url, $cat_id = 0,
$auth_login = '', $auth_pass = '') {
@@ -1588,6 +1589,18 @@
$url = key($feedUrls);
}
+ libxml_use_internal_errors(true);
+ $doc = new DOMDocument();
+ $doc->loadXML(html_entity_decode($contents));
+ $error = libxml_get_last_error();
+ libxml_clear_errors();
+
+ if ($error) {
+ $error_message = format_libxml_error($error);
+
+ return array("code" => 6, "message" => $error_message);
+ }
+
if ($cat_id == "0" || !$cat_id) {
$cat_qpart = "NULL";
} else {
@@ -4203,4 +4216,10 @@
return LABEL_BASE_INDEX - 1 + abs($feed);
}
+ function format_libxml_error($error) {
+ return T_sprintf("LibXML error %s at line %d (column %d): %s",
+ $error->code, $error->line, $error->column,
+ $error->message);
+ }
+
?>
diff --git a/include/rssfuncs.php b/include/rssfuncs.php
index 31d35bf8e..47d622169 100644
--- a/include/rssfuncs.php
+++ b/include/rssfuncs.php
@@ -316,6 +316,25 @@
_debug("update_rss_feed: fetch done.");
}
+ $error = verify_feed_xml($feed_data);
+
+ if ($error) {
+ if ($debug_enabled) {
+ _debug("update_rss_feed: error verifying XML, code: " . $error->code);
+ }
+
+ if ($error->code == 26) {
+ if ($debug_enabled) {
+ _debug("update_rss_feed: got error 26, trying to decode entities...");
+ }
+
+ $feed_data = html_entity_decode($feed_data, ENT_COMPAT, 'UTF-8');
+
+ $error = verify_feed_xml($feed_data);
+
+ if ($error) $feed_data = '';
+ }
+ }
}
if (!$feed_data) {
@@ -559,7 +578,7 @@
_debug("update_rss_feed: date $entry_timestamp [$entry_timestamp_fmt]");
}
- $entry_title = html_entity_decode($item->get_title());
+ $entry_title = html_entity_decode($item->get_title(), ENT_COMPAT, 'UTF-8');
$entry_link = rewrite_relative_url($site_url, $item->get_link());
@@ -1421,5 +1440,13 @@
mb_strtolower(strip_tags($title), 'utf-8'));
}
+ function verify_feed_xml($feed_data) {
+ libxml_use_internal_errors(true);
+ $doc = new DOMDocument();
+ $doc->loadXML($feed_data);
+ $error = libxml_get_last_error();
+ libxml_clear_errors();
+ return $error;
+ }
?>