forked from grafana/grafana
-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathngram.go
47 lines (38 loc) · 1.23 KB
/
ngram.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
package searchV2
import (
"strings"
"github.com/blugelabs/bluge/analysis"
"github.com/blugelabs/bluge/analysis/token"
"github.com/blugelabs/bluge/analysis/tokenizer"
)
var punctuationReplacer *strings.Replacer
func init() {
var punctuation = "!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~"
args := make([]string, 0, len(punctuation)*2)
for _, r := range punctuation {
args = append(args, string(r), " ")
}
punctuationReplacer = strings.NewReplacer(args...)
}
type punctuationCharFilter struct{}
func (t *punctuationCharFilter) Filter(input []byte) []byte {
return []byte(punctuationReplacer.Replace(string(input)))
}
const ngramEdgeFilterMaxLength = 7
var ngramIndexAnalyzer = &analysis.Analyzer{
CharFilters: []analysis.CharFilter{&punctuationCharFilter{}},
Tokenizer: tokenizer.NewWhitespaceTokenizer(),
TokenFilters: []analysis.TokenFilter{
token.NewCamelCaseFilter(),
token.NewLowerCaseFilter(),
token.NewEdgeNgramFilter(token.FRONT, 1, ngramEdgeFilterMaxLength),
},
}
var ngramQueryAnalyzer = &analysis.Analyzer{
CharFilters: []analysis.CharFilter{&punctuationCharFilter{}},
Tokenizer: tokenizer.NewWhitespaceTokenizer(),
TokenFilters: []analysis.TokenFilter{
token.NewCamelCaseFilter(),
token.NewLowerCaseFilter(),
},
}