-
-
Notifications
You must be signed in to change notification settings - Fork 30
/
Copy pathnormalize.go
69 lines (63 loc) · 2.04 KB
/
normalize.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
// Copyright 2023 Dolthub, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// Copyright 2017 The Cockroach Authors.
//
// Use of this software is governed by the Business Source License
// included in the file licenses/BSL.txt.
//
// As of the Change Date specified in that file, in accordance with
// the Business Source License, use of this software will be governed
// by the Apache License, Version 2.0, included in the file
// licenses/APL.txt.
package lex
import (
"strings"
"unicode"
"golang.org/x/text/unicode/norm"
)
// Special case normalization rules for Turkish/Azeri lowercase dotless-i and
// uppercase dotted-i. Fold both dotted and dotless 'i' into the ascii i/I, so
// our case-insensitive comparison functions can be locale-invariant. This
// mapping implements case-insensitivity for Turkish and other latin-derived
// languages simultaneously, with the additional quirk that it is also
// insensitive to the dottedness of the i's
var normalize = unicode.SpecialCase{
unicode.CaseRange{
Lo: 0x0130,
Hi: 0x0130,
Delta: [unicode.MaxCase]rune{
0x49 - 0x130, // Upper
0x69 - 0x130, // Lower
0x49 - 0x130, // Title
},
},
unicode.CaseRange{
Lo: 0x0131,
Hi: 0x0131,
Delta: [unicode.MaxCase]rune{
0x49 - 0x131, // Upper
0x69 - 0x131, // Lower
0x49 - 0x131, // Title
},
},
}
// NormalizeName normalizes to lowercase and Unicode Normalization
// Form C (NFC).
func NormalizeName(n string) string {
lower := strings.Map(normalize.ToLower, n)
if isASCII(lower) {
return lower
}
return norm.NFC.String(lower)
}