blob: cee2bb1d7bfeeb4b734be7a883260e63ae3e586a (
plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
|
<?php
class NaiveBayesianNgram extends NaiveBayesian {
var $N = 2;
/**
* add Parameter for ngram
*
* @param NaiveBayesianStorage $nbs
* @param ngram's N $n
* @return boolean
*/
function __construct($nbs, $n = 2) {
parent::__construct($nbs);
$this->N = $n;
return true;
}
/**
* override method for ngram
*
* @param string $string
* @return multiple
*/
function _getTokens($string) {
$tokens = array();
if (mb_strlen($string)) {
for ($i = 0; $i < mb_strlen($string) - $this->N; $i++) {
$wd = mb_substr($string, $i, $this->N);
if (mb_strlen($wd) == $this->N) {
if (!array_key_exists($wd, $tokens)) {
$tokens[$wd] = 0;
}
$tokens[$wd]++;
}
}
}
if (count($tokens)) {
// remove empty value
$tokens = array_filter($tokens);
}
return $tokens;
}
}
|