summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAndrew Dolgov <[email protected]>2015-06-19 10:12:47 +0300
committerAndrew Dolgov <[email protected]>2015-06-19 10:12:47 +0300
commit3318d324105ee222a54afc94076878c12b588c24 (patch)
treea12222c9add62f165946cf6b03a6cd7e885350d8
parent724e08f1c01bb60681616e3b1ddc14bc0648de3a (diff)
move language detection to a plugin, remove config.php constant
-rw-r--r--config.php-dist6
-rw-r--r--include/functions.php2
-rw-r--r--include/rssfuncs.php29
-rw-r--r--include/sanity_config.php4
-rw-r--r--plugins/af_lang_detect/init.php46
-rw-r--r--plugins/af_lang_detect/languagedetect/LanguageDetect.php (renamed from lib/languagedetect/LanguageDetect.php)0
-rw-r--r--plugins/af_lang_detect/languagedetect/Text/LanguageDetect/Exception.php (renamed from lib/languagedetect/Text/LanguageDetect/Exception.php)0
-rw-r--r--plugins/af_lang_detect/languagedetect/Text/LanguageDetect/ISO639.php (renamed from lib/languagedetect/Text/LanguageDetect/ISO639.php)0
-rw-r--r--plugins/af_lang_detect/languagedetect/Text/LanguageDetect/Parser.php (renamed from lib/languagedetect/Text/LanguageDetect/Parser.php)0
-rw-r--r--plugins/af_lang_detect/languagedetect/data/lang.dat (renamed from lib/languagedetect/data/lang.dat)0
-rw-r--r--plugins/af_lang_detect/languagedetect/data/unicode_blocks.dat (renamed from lib/languagedetect/data/unicode_blocks.dat)0
11 files changed, 53 insertions, 34 deletions
diff --git a/config.php-dist b/config.php-dist
index c0729b61b..311b94df0 100644
--- a/config.php-dist
+++ b/config.php-dist
@@ -180,12 +180,6 @@
define('CHECK_FOR_UPDATES', true);
// Check for updates automatically if running Git version
- define('DETECT_ARTICLE_LANGUAGE', false);
- // Detect article language when updating feeds, presently this is only
- // used for hyphenation. This may increase amount of CPU time used by
- // update processes, disable if necessary (i.e. you are being billed
- // for CPU time).
-
define('ENABLE_GZIP_OUTPUT', false);
// Selectively gzip output to improve wire performance. This requires
// PHP Zlib extension on the server.
diff --git a/include/functions.php b/include/functions.php
index edc196f31..5c10ac6ac 100644
--- a/include/functions.php
+++ b/include/functions.php
@@ -99,8 +99,6 @@
require_once "lib/accept-to-gettext.php";
require_once "lib/gettext/gettext.inc";
- require_once "lib/languagedetect/LanguageDetect.php";
-
function startup_gettext() {
# Get locale from Accept-Language header
diff --git a/include/rssfuncs.php b/include/rssfuncs.php
index 4efc843c4..7a3ea7402 100644
--- a/include/rssfuncs.php
+++ b/include/rssfuncs.php
@@ -443,13 +443,6 @@
$rss->init();
}
- if (DETECT_ARTICLE_LANGUAGE) {
- require_once "lib/languagedetect/LanguageDetect.php";
-
- $lang = new Text_LanguageDetect();
- $lang->setNameMode(2);
- }
-
// print_r($rss);
$feed = db_escape_string($feed);
@@ -645,21 +638,6 @@
print "\n";
}
- $entry_language = "";
-
- if (DETECT_ARTICLE_LANGUAGE) {
- $entry_language = $lang->detect($entry_title . " " . $entry_content, 1);
-
- if (count($entry_language) > 0) {
- $possible = array_keys($entry_language);
- $entry_language = $possible[0];
-
- _debug("detected language: $entry_language", $debug_enabled);
- } else {
- $entry_language = "";
- }
- }
-
$entry_comments = $item->get_comments_url();
$entry_author = $item->get_author();
@@ -695,17 +673,19 @@
_debug("done collecting data.", $debug_enabled);
- $result = db_query("SELECT id, content_hash FROM ttrss_entries
+ $result = db_query("SELECT id, content_hash, lang FROM ttrss_entries
WHERE guid = '".db_escape_string($entry_guid)."' OR guid = '$entry_guid_hashed'");
if (db_num_rows($result) != 0) {
$base_entry_id = db_fetch_result($result, 0, "id");
$entry_stored_hash = db_fetch_result($result, 0, "content_hash");
$article_labels = get_article_labels($base_entry_id, $owner_uid);
+ $entry_language = db_fetch_result($result, 0, "lang");
} else {
$base_entry_id = false;
$entry_stored_hash = "";
$article_labels = array();
+ $entry_language = "";
}
$article = array("owner_uid" => $owner_uid, // read only
@@ -719,7 +699,7 @@
"author" => $entry_author,
"force_catchup" => false, // ugly hack for the time being
"score_modifier" => 0, // no previous value, plugin should recalculate score modifier based on content if needed
- "language" => $entry_language, // read only
+ "language" => $entry_language,
"feed" => array("id" => $feed,
"fetch_url" => $fetch_url,
"site_url" => $site_url)
@@ -783,6 +763,7 @@
$entry_force_catchup = $article["force_catchup"];
$article_labels = $article["labels"];
$entry_score_modifier = (int) $article["score_modifier"];
+ $entry_language = db_escape_string($article["language"]);
if ($debug_enabled) {
_debug("article labels:", $debug_enabled);
diff --git a/include/sanity_config.php b/include/sanity_config.php
index 6120e5fe0..eaa1668b7 100644
--- a/include/sanity_config.php
+++ b/include/sanity_config.php
@@ -1,3 +1,3 @@
-<?php # This file has been generated at: Tue Feb 3 14:45:46 MSK 2015
+<?php # This file has been generated at: Fri, Jun 19, 2015 10:11:43 AM
define('GENERATED_CONFIG_CHECK', 26);
-$requred_defines = array( 'DB_TYPE', 'DB_HOST', 'DB_USER', 'DB_NAME', 'DB_PASS', 'MYSQL_CHARSET', 'SELF_URL_PATH', 'FEED_CRYPT_KEY', 'SINGLE_USER_MODE', 'SIMPLE_UPDATE_MODE', 'PHP_EXECUTABLE', 'LOCK_DIRECTORY', 'CACHE_DIR', 'ICONS_DIR', 'ICONS_URL', 'AUTH_AUTO_CREATE', 'AUTH_AUTO_LOGIN', 'FORCE_ARTICLE_PURGE', 'PUBSUBHUBBUB_HUB', 'PUBSUBHUBBUB_ENABLED', 'SPHINX_SERVER', 'SPHINX_INDEX', 'ENABLE_REGISTRATION', 'REG_NOTIFY_ADDRESS', 'REG_MAX_USERS', 'SESSION_COOKIE_LIFETIME', 'SESSION_CHECK_ADDRESS', 'SMTP_FROM_NAME', 'SMTP_FROM_ADDRESS', 'DIGEST_SUBJECT', 'SMTP_SERVER', 'SMTP_LOGIN', 'SMTP_PASSWORD', 'SMTP_SECURE', 'CHECK_FOR_UPDATES', 'DETECT_ARTICLE_LANGUAGE', 'ENABLE_GZIP_OUTPUT', 'PLUGINS', 'LOG_DESTINATION', 'CONFIG_VERSION'); ?>
+$requred_defines = array( 'DB_TYPE', 'DB_HOST', 'DB_USER', 'DB_NAME', 'DB_PASS', 'MYSQL_CHARSET', 'SELF_URL_PATH', 'FEED_CRYPT_KEY', 'SINGLE_USER_MODE', 'SIMPLE_UPDATE_MODE', 'PHP_EXECUTABLE', 'LOCK_DIRECTORY', 'CACHE_DIR', 'ICONS_DIR', 'ICONS_URL', 'AUTH_AUTO_CREATE', 'AUTH_AUTO_LOGIN', 'FORCE_ARTICLE_PURGE', 'PUBSUBHUBBUB_HUB', 'PUBSUBHUBBUB_ENABLED', 'SPHINX_SERVER', 'SPHINX_INDEX', 'ENABLE_REGISTRATION', 'REG_NOTIFY_ADDRESS', 'REG_MAX_USERS', 'SESSION_COOKIE_LIFETIME', 'SESSION_CHECK_ADDRESS', 'SMTP_FROM_NAME', 'SMTP_FROM_ADDRESS', 'DIGEST_SUBJECT', 'SMTP_SERVER', 'SMTP_LOGIN', 'SMTP_PASSWORD', 'SMTP_SECURE', 'CHECK_FOR_UPDATES', 'ENABLE_GZIP_OUTPUT', 'PLUGINS', 'LOG_DESTINATION', 'CONFIG_VERSION'); ?>
diff --git a/plugins/af_lang_detect/init.php b/plugins/af_lang_detect/init.php
new file mode 100644
index 000000000..3f2eb29f8
--- /dev/null
+++ b/plugins/af_lang_detect/init.php
@@ -0,0 +1,46 @@
+<?php
+class Af_Lang_Detect extends Plugin {
+ private $host;
+ private $lang;
+
+ function about() {
+ return array(1.0,
+ "Detect article language",
+ "fox");
+ }
+
+ function init($host) {
+ $this->host = $host;
+
+ $host->add_hook($host::HOOK_ARTICLE_FILTER, $this);
+
+ require_once __DIR__ . "/languagedetect/LanguageDetect.php";
+
+ $this->lang = new Text_LanguageDetect();
+ $this->lang->setNameMode(2);
+ }
+
+ function hook_article_filter($article) {
+
+ if ($this->lang) {
+ $entry_language = $this->lang->detect($article['title'] . " " . $article['content'], 1);
+
+ if (count($entry_language) > 0) {
+ $possible = array_keys($entry_language);
+ $entry_language = $possible[0];
+
+ _debug("detected language: $entry_language");
+
+ $article["language"] = $entry_language;
+ }
+ }
+
+ return $article;
+ }
+
+ function api_version() {
+ return 2;
+ }
+
+}
+?>
diff --git a/lib/languagedetect/LanguageDetect.php b/plugins/af_lang_detect/languagedetect/LanguageDetect.php
index 36ebacf93..36ebacf93 100644
--- a/lib/languagedetect/LanguageDetect.php
+++ b/plugins/af_lang_detect/languagedetect/LanguageDetect.php
diff --git a/lib/languagedetect/Text/LanguageDetect/Exception.php b/plugins/af_lang_detect/languagedetect/Text/LanguageDetect/Exception.php
index 196d994f5..196d994f5 100644
--- a/lib/languagedetect/Text/LanguageDetect/Exception.php
+++ b/plugins/af_lang_detect/languagedetect/Text/LanguageDetect/Exception.php
diff --git a/lib/languagedetect/Text/LanguageDetect/ISO639.php b/plugins/af_lang_detect/languagedetect/Text/LanguageDetect/ISO639.php
index c577a2e1a..c577a2e1a 100644
--- a/lib/languagedetect/Text/LanguageDetect/ISO639.php
+++ b/plugins/af_lang_detect/languagedetect/Text/LanguageDetect/ISO639.php
diff --git a/lib/languagedetect/Text/LanguageDetect/Parser.php b/plugins/af_lang_detect/languagedetect/Text/LanguageDetect/Parser.php
index 1c20c2657..1c20c2657 100644
--- a/lib/languagedetect/Text/LanguageDetect/Parser.php
+++ b/plugins/af_lang_detect/languagedetect/Text/LanguageDetect/Parser.php
diff --git a/lib/languagedetect/data/lang.dat b/plugins/af_lang_detect/languagedetect/data/lang.dat
index c2a44f56e..c2a44f56e 100644
--- a/lib/languagedetect/data/lang.dat
+++ b/plugins/af_lang_detect/languagedetect/data/lang.dat
diff --git a/lib/languagedetect/data/unicode_blocks.dat b/plugins/af_lang_detect/languagedetect/data/unicode_blocks.dat
index 3b24cd2c1..3b24cd2c1 100644
--- a/lib/languagedetect/data/unicode_blocks.dat
+++ b/plugins/af_lang_detect/languagedetect/data/unicode_blocks.dat