From 74a752879bcad54daa62994f9eae42fe4afdd299 Mon Sep 17 00:00:00 2001 From: Andrew Dolgov Date: Wed, 25 Nov 2015 21:07:24 +0300 Subject: af_sort_bayes: move to -attic repo, not really suitable for production use of any kind --- plugins/af_sort_bayes/init.php | 413 ----------------------------------------- 1 file changed, 413 deletions(-) delete mode 100644 plugins/af_sort_bayes/init.php (limited to 'plugins/af_sort_bayes/init.php') diff --git a/plugins/af_sort_bayes/init.php b/plugins/af_sort_bayes/init.php deleted file mode 100644 index 6599baa0f..000000000 --- a/plugins/af_sort_bayes/init.php +++ /dev/null @@ -1,413 +0,0 @@ -host = $host; - $this->dbh = Db::get(); - - $this->init_database(); - - $host->add_hook($host::HOOK_ARTICLE_FILTER, $this); - $host->add_hook($host::HOOK_PREFS_TAB, $this); - $host->add_hook($host::HOOK_ARTICLE_BUTTON, $this); - - } - - function trainArticle() { - $article_id = (int) $_REQUEST["article_id"]; - $train_up = sql_bool_to_bool($_REQUEST["train_up"]); - - //$category = $train_up ? "GOOD" : "UGLY"; - $dst_category = "UGLY"; - - $nbs = new NaiveBayesianStorage($_SESSION["uid"]); - $nb = new NaiveBayesian($nbs); - - $result = $this->dbh->query("SELECT score, guid, title, content FROM ttrss_entries, ttrss_user_entries WHERE ref_id = id AND id = " . - $article_id . " AND owner_uid = " . $_SESSION["uid"]); - - if ($this->dbh->num_rows($result) != 0) { - $guid = $this->dbh->fetch_result($result, 0, "guid"); - $title = $this->dbh->fetch_result($result, 0, "title"); - $content = mb_substr(mb_strtolower($title . " " . strip_tags($this->dbh->fetch_result($result, 0, "content"))), 0, $this->max_document_length); - $score = $this->dbh->fetch_result($result, 0, "score"); - - $this->dbh->query("BEGIN"); - - $ref = $nbs->getReference($guid, false); - - if (isset($ref['category_id'])) { - $current_category = $nbs->getCategoryById($ref['category_id']); - } else { - $current_category = "UGLY"; - } - - // set score to fixed value for now - - if ($train_up) { - switch ($current_category) { - case "UGLY": - $dst_category = "GOOD"; - $score = $this->score_modifier; - break; - case "BAD": - $dst_category = "UGLY"; - $score = 0; - break; - case "GOOD": - $dst_category = "GOOD"; - break; - } - } else { - switch ($current_category) { - case "UGLY": - $dst_category = "BAD"; - $score = -$this->score_modifier; - break; - case "BAD": - $dst_category = "BAD"; - break; - case "GOOD": - $dst_category = "UGLY"; - $score = 0; - break; - } - } - - $nb->untrain($guid, $content); - $nb->train($guid, $nbs->getCategoryByName($dst_category), $content); - - $this->dbh->query("UPDATE ttrss_user_entries SET score = '$score' WHERE ref_id = $article_id AND owner_uid = " . $_SESSION["uid"]); - - $nb->updateProbabilities(); - - $this->dbh->query("COMMIT"); - - } - - print "$article_id :: $dst_category :: $score"; - } - - function get_js() { - return file_get_contents(__DIR__ . "/init.js"); - } - - function get_prefs_js() { - return file_get_contents(__DIR__ . "/init.js"); - } - - function hook_article_button($line) { - return "" . - "" . - ""; - - } - - function init_database() { - $prefix = $this->sql_prefix; - - // TODO there probably should be a way for plugins to determine their schema version to upgrade tables - - /*$this->dbh->query("DROP TABLE IF EXISTS ${prefix}_wordfreqs", false); - $this->dbh->query("DROP TABLE IF EXISTS ${prefix}_references", false); - $this->dbh->query("DROP TABLE IF EXISTS ${prefix}_categories", false);*/ - - $this->dbh->query("BEGIN"); - - // PG only for the time being - - if (DB_TYPE == "mysql") { - - $this->dbh->query("CREATE TABLE IF NOT EXISTS ${prefix}_categories ( - id INTEGER NOT NULL PRIMARY KEY auto_increment, - category varchar(100) NOT NULL DEFAULT '', - probability DOUBLE NOT NULL DEFAULT '0', - owner_uid INTEGER NOT NULL, - FOREIGN KEY (owner_uid) REFERENCES ttrss_users(id) ON DELETE CASCADE, - word_count BIGINT NOT NULL DEFAULT '0') ENGINE=InnoDB"); - - $this->dbh->query("CREATE TABLE IF NOT EXISTS ${prefix}_references ( - id INTEGER NOT NULL PRIMARY KEY auto_increment, - document_id VARCHAR(255) NOT NULL, - category_id INTEGER NOT NULL, - FOREIGN KEY (category_id) REFERENCES ${prefix}_categories(id) ON DELETE CASCADE, - owner_uid INTEGER NOT NULL, - FOREIGN KEY (owner_uid) REFERENCES ttrss_users(id) ON DELETE CASCADE) ENGINE=InnoDB"); - - $this->dbh->query("CREATE TABLE IF NOT EXISTS ${prefix}_wordfreqs ( - word varchar(100) NOT NULL DEFAULT '', - category_id INTEGER NOT NULL, - FOREIGN KEY (category_id) REFERENCES ${prefix}_categories(id) ON DELETE CASCADE, - owner_uid INTEGER NOT NULL, - FOREIGN KEY (owner_uid) REFERENCES ttrss_users(id) ON DELETE CASCADE, - count BIGINT NOT NULL DEFAULT '0') ENGINE=InnoDB"); - - - } else { - $this->dbh->query("CREATE TABLE IF NOT EXISTS ${prefix}_categories ( - id SERIAL NOT NULL PRIMARY KEY, - category varchar(100) NOT NULL DEFAULT '', - probability DOUBLE PRECISION NOT NULL DEFAULT '0', - owner_uid INTEGER NOT NULL REFERENCES ttrss_users(id) ON DELETE CASCADE, - word_count BIGINT NOT NULL DEFAULT '0')"); - - $this->dbh->query("CREATE TABLE IF NOT EXISTS ${prefix}_references ( - id SERIAL NOT NULL PRIMARY KEY, - document_id VARCHAR(255) NOT NULL, - category_id INTEGER NOT NULL REFERENCES ${prefix}_categories(id) ON DELETE CASCADE, - owner_uid INTEGER NOT NULL REFERENCES ttrss_users(id) ON DELETE CASCADE)"); - - $this->dbh->query("CREATE TABLE IF NOT EXISTS ${prefix}_wordfreqs ( - word varchar(100) NOT NULL DEFAULT '', - category_id INTEGER NOT NULL REFERENCES ${prefix}_categories(id) ON DELETE CASCADE, - owner_uid INTEGER NOT NULL REFERENCES ttrss_users(id) ON DELETE CASCADE, - count BIGINT NOT NULL DEFAULT '0')"); - } - - $owner_uid = @$_SESSION["uid"]; - - if ($owner_uid) { - $result = $this->dbh->query("SELECT id FROM ${prefix}_categories WHERE owner_uid = $owner_uid LIMIT 1"); - - if ($this->dbh->num_rows($result) == 0) { - $this->dbh->query("INSERT INTO ${prefix}_categories (category, owner_uid) VALUES ('GOOD', $owner_uid)"); - $this->dbh->query("INSERT INTO ${prefix}_categories (category, owner_uid) VALUES ('BAD', $owner_uid)"); - $this->dbh->query("INSERT INTO ${prefix}_categories (category, owner_uid) VALUES ('UGLY', $owner_uid)"); - } - } - - $this->dbh->query("COMMIT"); - } - - function renderPrefsUI() { - $result = $this->dbh->query("SELECT category, probability, word_count, - (SELECT COUNT(id) FROM {$this->sql_prefix}_references WHERE - category_id = {$this->sql_prefix}_categories.id) as doc_count - FROM {$this->sql_prefix}_categories WHERE owner_uid = " . $_SESSION["uid"]); - - print "

" . __("Statistics") . "

"; - - print "

".T_sprintf("Required UGLY word count for automatic matching: %d", $this->auto_categorize_threshold)."

"; - - print ""; - print ""; - - while ($line = $this->dbh->fetch_assoc($result)) { - print ""; - foreach ($line as $k => $v) { - if ($k == "probability") $v = sprintf("%.3f", $v); - - print ""; - } - print ""; - } - - print "
CategoryProbabilityWordsArticles
$v
"; - - print "

" . __("Last matched articles") . "

"; - - $result = $this->dbh->query("SELECT te.title, category, tf.title AS feed_title - FROM ttrss_entries AS te, ttrss_user_entries AS tu, ttrss_feeds AS tf, {$this->sql_prefix}_references AS tr, {$this->sql_prefix}_categories AS tc - WHERE tf.id = tu.feed_id AND tu.ref_id = te.id AND tc.id = tr.category_id AND tr.document_id = te.guid ORDER BY te.id DESC LIMIT 20"); - - print ""; - - print " "; - - print " "; - - // - } - - function hook_prefs_tab($args) { - if ($args != "prefPrefs") return; - - print "
"; - - $this->renderPrefsUI(); - - print "
"; - } - - function hook_article_filter($article) { - $owner_uid = $article["owner_uid"]; - - // guid already includes owner_uid so we don't need to include it - $result = $this->dbh->query("SELECT id FROM {$this->sql_prefix}_references WHERE - document_id = '" . $this->dbh->escape_string($article['guid_hashed']) . "'"); - - if (db_num_rows($result) != 0) { - _debug("bayes: article already categorized"); - return $article; - } - - $nbs = new NaiveBayesianStorage($owner_uid); - $nb = new NaiveBayesian($nbs); - - $categories = $nbs->getCategories(); - - if (count($categories) > 0) { - - $count_neutral = 0; - - $id_good = 0; - $id_ugly = 0; - $id_bad = 0; - - foreach ($categories as $id => $cat) { - if ($cat["category"] == "GOOD") { - $id_good = $id; - } else if ($cat["category"] == "UGLY") { - $id_ugly = $id; - $count_neutral += $cat["word_count"]; - } else if ($cat["category"] == "BAD") { - $id_bad = $id; - } - } - - $dst_category = $id_ugly; - - $bayes_content = mb_substr(mb_strtolower($article["title"] . " " . strip_tags($article["content"])), 0, $this->max_document_length); - - if ($count_neutral >= $this->auto_categorize_threshold) { - // enable automatic categorization - - $result = $nb->categorize($bayes_content); - - //print_r($result); - - if (count($result) == 3) { - $prob_good = $result[$id_good]; - $prob_bad = $result[$id_bad]; - - if (!is_nan($prob_good) && $prob_good > 0.90) { - $dst_category = $id_good; - $article["score_modifier"] += $this->score_modifier; - } else if (!is_nan($prob_bad) && $prob_bad > 0.90) { - $dst_category = $id_bad; - $article["score_modifier"] -= $this->score_modifier; - } - } - - _debug("bayes, dst category: $dst_category"); - } - - $nb->train($article["guid_hashed"], $dst_category, $bayes_content); - - $nb->updateProbabilities(); - } - - return $article; - - } - - function clearDatabase() { - $prefix = $this->sql_prefix; - - $this->dbh->query("BEGIN"); - $this->dbh->query("DELETE FROM ${prefix}_references WHERE owner_uid = " . $_SESSION["uid"]); - $this->dbh->query("DELETE FROM ${prefix}_wordfreqs WHERE owner_uid = " . $_SESSION["uid"]); - $this->dbh->query("COMMIT"); - - $nbs = new NaiveBayesianStorage($_SESSION["uid"]); - $nb = new NaiveBayesian($nbs); - $nb->updateProbabilities(); - } - - function showArticleStats() { - $article_id = (int) $_REQUEST["article_id"]; - - $result = $this->dbh->query("SELECT score, guid, title, content FROM ttrss_entries, ttrss_user_entries WHERE ref_id = id AND id = " . - $article_id . " AND owner_uid = " . $_SESSION["uid"]); - - if ($this->dbh->num_rows($result) != 0) { - $guid = $this->dbh->fetch_result($result, 0, "guid"); - $title = $this->dbh->fetch_result($result, 0, "title"); - - $content = mb_substr(mb_strtolower($title . " " . strip_tags($this->dbh->fetch_result($result, 0, "content"))), 0, $this->max_document_length); - - print "

" . $title . "

"; - - $nbs = new NaiveBayesianStorage($_SESSION["uid"]); - $nb = new NaiveBayesian($nbs); - - $categories = $nbs->getCategories(); - - $ref = $nbs->getReference($guid, false); - - $current_cat = isset($ref["category_id"]) ? $categories[$ref["category_id"]]["category"] : "N/A"; - - print "

" . T_sprintf("Currently stored as: %s", $current_cat) . "

"; - - $result = $nb->categorize($content); - - print "

" . __("Classifier result") . "

"; - - print ""; - print ""; - - foreach ($result as $k => $v) { - print ""; - print ""; - print ""; - - print ""; - } - - print "
CategoryProbability
" . $categories[$k]["category"] . "" . $v . "
"; - - } else { - print_error("Article not found"); - } - - print "
"; - - print ""; - - print "
"; - - } - - function api_version() { - return 2; - } - -} -?> -- cgit v1.2.3