summaryrefslogtreecommitdiff
path: root/include
diff options
context:
space:
mode:
authorAndrew Dolgov <[email protected]>2013-07-31 10:30:14 +0400
committerAndrew Dolgov <[email protected]>2013-07-31 10:30:17 +0400
commit6b4617970f2c25ac852daab873f5374d611d5b7e (patch)
treee7026b5c9879b7489454626aef00e3c2f04e06db /include
parentf035e6dc822e3b8766d55689abff26a6bf52d404 (diff)
add text_languagedetect to guess article language for better hyphenation
(bump schema)
Diffstat (limited to 'include')
-rw-r--r--include/functions.php9
-rw-r--r--include/rssfuncs.php16
2 files changed, 22 insertions, 3 deletions
diff --git a/include/functions.php b/include/functions.php
index cef0ea9eb..9d14aae7a 100644
--- a/include/functions.php
+++ b/include/functions.php
@@ -1,6 +1,6 @@
<?php
define('EXPECTED_CONFIG_VERSION', 26);
- define('SCHEMA_VERSION', 121);
+ define('SCHEMA_VERSION', 122);
define('LABEL_BASE_INDEX', -1024);
define('PLUGIN_FEED_BASE_INDEX', -128);
@@ -87,6 +87,7 @@
require_once "lib/accept-to-gettext.php";
require_once "lib/gettext/gettext.inc";
+ require_once "lib/languagedetect/LanguageDetect.php";
function startup_gettext() {
@@ -2650,6 +2651,7 @@
comments,
int_id,
uuid,
+ lang,
hide_images,
unread,feed_id,marked,published,link,last_read,orig_feed_id,
last_marked, last_published,
@@ -2692,6 +2694,7 @@
"tag_cache," .
"label_cache," .
"link," .
+ "lang," .
"uuid," .
"last_read," .
"(SELECT hide_images FROM ttrss_feeds WHERE id = feed_id) AS hide_images," .
@@ -3118,7 +3121,7 @@
ccache_update($feed_id, $owner_uid);
}
- $result = db_query("SELECT id,title,link,content,feed_id,comments,int_id,
+ $result = db_query("SELECT id,title,link,content,feed_id,comments,int_id,lang,
".SUBSTRING_FOR_DATE."(updated,1,16) as updated,
(SELECT site_url FROM ttrss_feeds WHERE id = feed_id) as site_url,
(SELECT hide_images FROM ttrss_feeds WHERE id = feed_id) as hide_images,
@@ -3290,7 +3293,7 @@
}
$rv['content'] .= "</div>";
- $rv['content'] .= "<div class=\"postContent\" lang=\"en\">";
+ $rv['content'] .= "<div class=\"postContent\" lang=\"".$line['lang']."\">";
$rv['content'] .= $line["content"];
$rv['content'] .= format_article_enclosures($id,
diff --git a/include/rssfuncs.php b/include/rssfuncs.php
index cfb0e7a46..756ecbfc1 100644
--- a/include/rssfuncs.php
+++ b/include/rssfuncs.php
@@ -354,6 +354,11 @@
$rss->init();
}
+ require_once "lib/languagedetect/LanguageDetect.php";
+
+ $lang = new Text_LanguageDetect();
+ $lang->setNameMode(2);
+
// print_r($rss);
$feed = db_escape_string($feed);
@@ -565,6 +570,15 @@
print "\n";
}
+ $entry_language = $lang->detect($entry_content, 1);
+
+ if (count($entry_language) > 0) {
+ $entry_language = array_keys($entry_language);
+ $entry_language = db_escape_string($entry_language[0]);
+
+ _debug("detected language: $entry_language", $debug_enabled);
+ }
+
$entry_comments = $item->get_comments_url();
$entry_author = $item->get_author();
@@ -677,6 +691,7 @@
comments,
num_comments,
plugin_data,
+ lang,
author)
VALUES
('$entry_title',
@@ -691,6 +706,7 @@
'$entry_comments',
'$num_comments',
'$entry_plugin_data',
+ '$entry_language',
'$entry_author')");
$article_labels = array();