diff options
author | Andrew Dolgov <[email protected]> | 2015-06-17 10:36:11 +0300 |
---|---|---|
committer | Andrew Dolgov <[email protected]> | 2015-06-17 10:36:11 +0300 |
commit | 853cc128d6e262b4a7a693c6321a406674837d73 (patch) | |
tree | 7d78800e3930d114407118c9658f24326760444f /plugins/af_sort_bayes/lib/class.naivebayesian_ngram.php | |
parent | 35c12dc40a4efb2b3f1ca8a8f822819ce1d34b8a (diff) |
add placeholder stuff for af_sort_bayes
Diffstat (limited to 'plugins/af_sort_bayes/lib/class.naivebayesian_ngram.php')
-rw-r--r-- | plugins/af_sort_bayes/lib/class.naivebayesian_ngram.php | 52 |
1 files changed, 52 insertions, 0 deletions
diff --git a/plugins/af_sort_bayes/lib/class.naivebayesian_ngram.php b/plugins/af_sort_bayes/lib/class.naivebayesian_ngram.php new file mode 100644 index 000000000..cee2bb1d7 --- /dev/null +++ b/plugins/af_sort_bayes/lib/class.naivebayesian_ngram.php @@ -0,0 +1,52 @@ +<?php + + class NaiveBayesianNgram extends NaiveBayesian { + var $N = 2; + + /** + * add Parameter for ngram + * + * @param NaiveBayesianStorage $nbs + * @param ngram's N $n + * @return boolean + */ + function __construct($nbs, $n = 2) { + parent::__construct($nbs); + + $this->N = $n; + + return true; + } + + /** + * override method for ngram + * + * @param string $string + * @return multiple + */ + function _getTokens($string) { + $tokens = array(); + + if (mb_strlen($string)) { + for ($i = 0; $i < mb_strlen($string) - $this->N; $i++) { + $wd = mb_substr($string, $i, $this->N); + + if (mb_strlen($wd) == $this->N) { + if (!array_key_exists($wd, $tokens)) { + $tokens[$wd] = 0; + } + + $tokens[$wd]++; + } + } + } + + if (count($tokens)) { + // remove empty value + $tokens = array_filter($tokens); + } + + return $tokens; + } + + } |