summaryrefslogtreecommitdiff
path: root/plugins
diff options
context:
space:
mode:
authorAndrew Dolgov <[email protected]>2015-06-18 08:42:17 +0300
committerAndrew Dolgov <[email protected]>2015-06-18 08:42:17 +0300
commitef7395f170caa46519e11f77148457f7aa1cee43 (patch)
treea84216f8d3a1548d844ab5114bb348b39252c769 /plugins
parent5613bb3584356e51e5e476484d6e8b38210707d4 (diff)
add more english common words to bayes ignore list
Diffstat (limited to 'plugins')
-rw-r--r--plugins/af_sort_bayes/lib/class.naivebayesian.php13
1 files changed, 12 insertions, 1 deletions
diff --git a/plugins/af_sort_bayes/lib/class.naivebayesian.php b/plugins/af_sort_bayes/lib/class.naivebayesian.php
index da81f2aab..4a4ffa7eb 100644
--- a/plugins/af_sort_bayes/lib/class.naivebayesian.php
+++ b/plugins/af_sort_bayes/lib/class.naivebayesian.php
@@ -226,7 +226,18 @@
@return array ignore list
*/
function getIgnoreList() {
- return array('the', 'that', 'you', 'for', 'and');
+ //return array('the', 'that', 'you', 'for', 'and');
+
+ // https://en.wikipedia.org/wiki/Most_common_words_in_English
+ return array('the', 'be', 'to', 'of', 'and', 'a', 'in', 'that', 'have', 'I', 'it', 'for', 'not', 'on', 'with',
+ 'he', 'as', 'you', 'do', 'at', 'this', 'but', 'his', 'by', 'from', 'they', 'we', 'say', 'her',
+ 'she', 'or', 'an', 'will', 'my', 'one', 'all', 'would', 'there', 'their', 'what', 'so', 'up',
+ 'out', 'if', 'about', 'who', 'get', 'which', 'go', 'me', 'when', 'make', 'can', 'like', 'time',
+ 'no', 'just', 'him', 'know', 'take', 'people', 'into', 'year', 'your', 'good', 'some', 'could',
+ 'them', 'see', 'other', 'than', 'then', 'now', 'look', 'only', 'come', 'its', 'over', 'think',
+ 'also', 'back', 'after', 'use', 'two', 'how', 'our', 'work', 'first', 'well', 'way', 'even',
+ 'new', 'want', 'because', 'any', 'these', 'give', 'day', 'most', 'us', 'read', 'more');
+
}
/** get the tokens from a string