From 6b4617970f2c25ac852daab873f5374d611d5b7e Mon Sep 17 00:00:00 2001 From: Andrew Dolgov Date: Wed, 31 Jul 2013 10:30:14 +0400 Subject: add text_languagedetect to guess article language for better hyphenation (bump schema) --- include/rssfuncs.php | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) (limited to 'include/rssfuncs.php') diff --git a/include/rssfuncs.php b/include/rssfuncs.php index cfb0e7a46..756ecbfc1 100644 --- a/include/rssfuncs.php +++ b/include/rssfuncs.php @@ -354,6 +354,11 @@ $rss->init(); } + require_once "lib/languagedetect/LanguageDetect.php"; + + $lang = new Text_LanguageDetect(); + $lang->setNameMode(2); + // print_r($rss); $feed = db_escape_string($feed); @@ -565,6 +570,15 @@ print "\n"; } + $entry_language = $lang->detect($entry_content, 1); + + if (count($entry_language) > 0) { + $entry_language = array_keys($entry_language); + $entry_language = db_escape_string($entry_language[0]); + + _debug("detected language: $entry_language", $debug_enabled); + } + $entry_comments = $item->get_comments_url(); $entry_author = $item->get_author(); @@ -677,6 +691,7 @@ comments, num_comments, plugin_data, + lang, author) VALUES ('$entry_title', @@ -691,6 +706,7 @@ '$entry_comments', '$num_comments', '$entry_plugin_data', + '$entry_language', '$entry_author')"); $article_labels = array(); -- cgit v1.2.3