Browse Source

move language detection to a plugin, remove config.php constant

Andrew Dolgov 4 years ago
parent
commit
3318d32410

+ 0 - 6
config.php-dist

@@ -180,12 +180,6 @@
 	define('CHECK_FOR_UPDATES', true);
 	// Check for updates automatically if running Git version
  
-	define('DETECT_ARTICLE_LANGUAGE', false);
-	// Detect article language when updating feeds, presently this is only
-	// used for hyphenation. This may increase amount of CPU time used by 
-	// update processes, disable if necessary (i.e. you are being billed
-	// for CPU time).
-
 	define('ENABLE_GZIP_OUTPUT', false);
 	// Selectively gzip output to improve wire performance. This requires
 	// PHP Zlib extension on the server.

+ 0 - 2
include/functions.php

@@ -99,8 +99,6 @@
 	require_once "lib/accept-to-gettext.php";
 	require_once "lib/gettext/gettext.inc";
 
-	require_once "lib/languagedetect/LanguageDetect.php";
-
 	function startup_gettext() {
 
 		# Get locale from Accept-Language header

+ 5 - 24
include/rssfuncs.php

@@ -443,13 +443,6 @@
 			$rss->init();
 		}
 
-		if (DETECT_ARTICLE_LANGUAGE) {
-			require_once "lib/languagedetect/LanguageDetect.php";
-
-			$lang = new Text_LanguageDetect();
-			$lang->setNameMode(2);
-		}
-
 //		print_r($rss);
 
 		$feed = db_escape_string($feed);
@@ -645,21 +638,6 @@
 					print "\n";
 				}
 
-				$entry_language = "";
-
-				if (DETECT_ARTICLE_LANGUAGE) {
-					$entry_language = $lang->detect($entry_title . " " . $entry_content, 1);
-
-					if (count($entry_language) > 0) {
-						$possible = array_keys($entry_language);
-						$entry_language = $possible[0];
-
-						_debug("detected language: $entry_language", $debug_enabled);
-					} else {
-						$entry_language = "";
-					}
-				}
-
 				$entry_comments = $item->get_comments_url();
 				$entry_author = $item->get_author();
 
@@ -695,17 +673,19 @@
 
 				_debug("done collecting data.", $debug_enabled);
 
-				$result = db_query("SELECT id, content_hash FROM ttrss_entries
+				$result = db_query("SELECT id, content_hash, lang FROM ttrss_entries
 					WHERE guid = '".db_escape_string($entry_guid)."' OR guid = '$entry_guid_hashed'");
 
 				if (db_num_rows($result) != 0) {
 					$base_entry_id = db_fetch_result($result, 0, "id");
 					$entry_stored_hash = db_fetch_result($result, 0, "content_hash");
 					$article_labels = get_article_labels($base_entry_id, $owner_uid);
+					$entry_language = db_fetch_result($result, 0, "lang");
 				} else {
 					$base_entry_id = false;
 					$entry_stored_hash = "";
 					$article_labels = array();
+					$entry_language = "";
 				}
 
 				$article = array("owner_uid" => $owner_uid, // read only
@@ -719,7 +699,7 @@
 					"author" => $entry_author,
 					"force_catchup" => false, // ugly hack for the time being
 					"score_modifier" => 0, // no previous value, plugin should recalculate score modifier based on content if needed
-					"language" => $entry_language, // read only
+					"language" => $entry_language,
 					"feed" => array("id" => $feed,
 						"fetch_url" => $fetch_url,
 						"site_url" => $site_url)
@@ -783,6 +763,7 @@
 				$entry_force_catchup = $article["force_catchup"];
 				$article_labels = $article["labels"];
 				$entry_score_modifier = (int) $article["score_modifier"];
+				$entry_language = db_escape_string($article["language"]);
 
 				if ($debug_enabled) {
 					_debug("article labels:", $debug_enabled);

File diff suppressed because it is too large
+ 2 - 2
include/sanity_config.php


+ 46 - 0
plugins/af_lang_detect/init.php

@@ -0,0 +1,46 @@
+<?php
+class Af_Lang_Detect extends Plugin {
+	private $host;
+	private $lang;
+
+	function about() {
+		return array(1.0,
+			"Detect article language",
+			"fox");
+	}
+
+	function init($host) {
+		$this->host = $host;
+
+		$host->add_hook($host::HOOK_ARTICLE_FILTER, $this);
+
+		require_once __DIR__ . "/languagedetect/LanguageDetect.php";
+
+		$this->lang = new Text_LanguageDetect();
+		$this->lang->setNameMode(2);
+	}
+
+	function hook_article_filter($article) {
+
+		if ($this->lang) {
+			$entry_language = $this->lang->detect($article['title'] . " " . $article['content'], 1);
+
+			if (count($entry_language) > 0) {
+				$possible = array_keys($entry_language);
+				$entry_language = $possible[0];
+
+				_debug("detected language: $entry_language");
+
+				$article["language"] = $entry_language;
+			}
+		}
+
+		return $article;
+	}
+
+	function api_version() {
+		return 2;
+	}
+
+}
+?>

lib/languagedetect/LanguageDetect.php → plugins/af_lang_detect/languagedetect/LanguageDetect.php


lib/languagedetect/Text/LanguageDetect/Exception.php → plugins/af_lang_detect/languagedetect/Text/LanguageDetect/Exception.php


lib/languagedetect/Text/LanguageDetect/ISO639.php → plugins/af_lang_detect/languagedetect/Text/LanguageDetect/ISO639.php


lib/languagedetect/Text/LanguageDetect/Parser.php → plugins/af_lang_detect/languagedetect/Text/LanguageDetect/Parser.php


lib/languagedetect/data/lang.dat → plugins/af_lang_detect/languagedetect/data/lang.dat


lib/languagedetect/data/unicode_blocks.dat → plugins/af_lang_detect/languagedetect/data/unicode_blocks.dat