summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAndrew Dolgov <[email protected]>2012-11-26 14:33:18 +0400
committerAndrew Dolgov <[email protected]>2012-11-26 14:33:18 +0400
commitcc85704f3cc798e9df7813ab9e2de955d7152ada (patch)
treeae3c76d63fd8bdb78e082c4be292b9090b55c272
parentbf2611195f4f2f3e8f8f6c23dd27b024ce3d93d1 (diff)
implement experimental article on-import data filters
-rw-r--r--classes/filter.php14
-rw-r--r--classes/filter/redditimgur.php47
-rw-r--r--include/functions.php8
-rw-r--r--include/rssfuncs.php44
4 files changed, 111 insertions, 2 deletions
diff --git a/classes/filter.php b/classes/filter.php
new file mode 100644
index 000000000..8d6bf6f26
--- /dev/null
+++ b/classes/filter.php
@@ -0,0 +1,14 @@
+<?php
+class Filter {
+ protected $link;
+
+ function __construct($link) {
+ $this->link = $link;
+ }
+
+ function filter_article($article) {
+ return $article;
+ }
+
+}
+?>
diff --git a/classes/filter/redditimgur.php b/classes/filter/redditimgur.php
new file mode 100644
index 000000000..6b41015e7
--- /dev/null
+++ b/classes/filter/redditimgur.php
@@ -0,0 +1,47 @@
+<?php
+class Filter_RedditImgur {
+
+ function filter_article($article) {
+
+ if (strpos($article["link"], "reddit.com/r/") !== FALSE) {
+ if (strpos($article["content"], "i.imgur.com") !== FALSE) {
+
+ $doc = new DOMDocument();
+ @$doc->loadHTML($article["content"]);
+
+ if ($doc) {
+ $xpath = new DOMXPath($doc);
+ $entries = $xpath->query('(//a[@href]|//img[@src])');
+
+ foreach ($entries as $entry) {
+ if ($entry->hasAttribute("href")) {
+ if (preg_match("/i.imgur.com\/.*?.jpg/", $entry->getAttribute("href"))) {
+
+ $img = $doc->createElement('img');
+ $img->setAttribute("src", $entry->getAttribute("href"));
+
+ $entry->parentNode->replaceChild($img, $entry);
+ }
+ }
+
+ // remove tiny thumbnails
+ if ($entry->hasAttribute("src")) {
+ if ($entry->parentNode && $entry->parentNode->parentNode) {
+ $entry->parentNode->parentNode->removeChild($entry->parentNode);
+ }
+ }
+ }
+
+ $node = $doc->getElementsByTagName('body')->item(0);
+
+ if ($node) {
+ $article["content"] = $doc->saveXML($node, LIBXML_NOEMPTYTAG);
+ }
+ }
+ }
+ }
+
+ return $article;
+ }
+}
+?>
diff --git a/include/functions.php b/include/functions.php
index 55333ccd6..1c3f4cbb9 100644
--- a/include/functions.php
+++ b/include/functions.php
@@ -7,11 +7,19 @@
function __autoload($class) {
$class_file = str_replace("_", "/", strtolower(basename($class)));
+ $file = dirname(__FILE__)."/../plugins/$class_file.php";
+
+ if (file_exists($file)) {
+ require $file;
+ return;
+ }
+
$file = dirname(__FILE__)."/../classes/$class_file.php";
if (file_exists($file)) {
require $file;
}
+
}
mb_internal_encoding("UTF-8");
diff --git a/include/rssfuncs.php b/include/rssfuncs.php
index e12044ea5..be7e19edc 100644
--- a/include/rssfuncs.php
+++ b/include/rssfuncs.php
@@ -398,6 +398,23 @@
_debug("update_rss_feed: " . count($filters) . " filters loaded.");
}
+ $filter_plugins = array();
+
+ if (defined('_ARTICLE_FILTER_PLUGINS')) {
+ foreach (explode(",", _ARTICLE_FILTER_PLUGINS) as $p) {
+ $pclass = "filter_" . trim($p);
+
+ if (class_exists($pclass)) {
+ $plugin = new $pclass($link);
+ array_push($filter_plugins, $plugin);
+ }
+ }
+ }
+
+ if ($debug_enabled) {
+ _debug("update_rss_feed: " . count($filter_plugins) . " filter plugins loaded.");
+ }
+
if ($use_simplepie) {
$iterator = $rss->get_items();
} else {
@@ -651,8 +668,6 @@
$entry_content = db_escape_string($entry_content, false);
- $content_hash = "SHA1:" . sha1(strip_tags($entry_content));
-
$entry_title = db_escape_string($entry_title);
$entry_link = db_escape_string($entry_link);
$entry_comments = mb_substr(db_escape_string($entry_comments), 0, 250);
@@ -757,6 +772,31 @@
_debug("update_rss_feed: done collecting data [TITLE:$entry_title]");
}
+ // TODO: less memory-hungry implementation
+ if (count($filter_plugins) > 0) {
+ if ($debug_enabled) {
+ _debug("update_rss_feed: applying plugin filters...");
+ }
+
+ $article = array("owner_uid" => $owner_uid,
+ "title" => $entry_title,
+ "content" => $entry_content,
+ "link" => $entry_link,
+ "tags" => $entry_tags,
+ "author" => $entry_author);
+
+ foreach ($filter_plugins as $plugin) {
+ $article = $plugin->filter_article($article);
+ }
+
+ $entry_title = $article["title"];
+ $entry_content = $article["content"];
+ $entry_tags = $article["tags"];
+ $entry_author = $article["author"];
+ }
+
+ $content_hash = "SHA1:" . sha1(strip_tags($entry_content));
+
db_query($link, "BEGIN");
if (db_num_rows($result) == 0) {