Skip to content

Commit ddf590b

Browse files
committed
pycodestyle (PEP 8) cleanup in Python scripts
These are mainly whitespace changes. I didn't fix "E501 line too long", which would require more significant surgery.
1 parent e80a7a1 commit ddf590b

File tree

2 files changed

+40
-24
lines changed

2 files changed

+40
-24
lines changed

contrib/unaccent/generate_unaccent_rules.py

+34-20
Original file line numberDiff line numberDiff line change
@@ -38,10 +38,10 @@
3838
# For now we are being conservative by including only Latin and Greek. This
3939
# could be extended in future based on feedback from people with relevant
4040
# language knowledge.
41-
PLAIN_LETTER_RANGES = ((ord('a'), ord('z')), # Latin lower case
42-
(ord('A'), ord('Z')), # Latin upper case
43-
(0x03b1, 0x03c9), # GREEK SMALL LETTER ALPHA, GREEK SMALL LETTER OMEGA
44-
(0x0391, 0x03a9)) # GREEK CAPITAL LETTER ALPHA, GREEK CAPITAL LETTER OMEGA
41+
PLAIN_LETTER_RANGES = ((ord('a'), ord('z')), # Latin lower case
42+
(ord('A'), ord('Z')), # Latin upper case
43+
(0x03b1, 0x03c9), # GREEK SMALL LETTER ALPHA, GREEK SMALL LETTER OMEGA
44+
(0x0391, 0x03a9)) # GREEK CAPITAL LETTER ALPHA, GREEK CAPITAL LETTER OMEGA
4545

4646
# Combining marks follow a "base" character, and result in a composite
4747
# character. Example: "U&'A\0300'"produces "À".There are three types of
@@ -51,9 +51,10 @@
5151
# https://en.wikipedia.org/wiki/Combining_character
5252
# https://www.unicode.org/charts/PDF/U0300.pdf
5353
# https://www.unicode.org/charts/PDF/U20D0.pdf
54-
COMBINING_MARK_RANGES = ((0x0300, 0x0362), # Mn: Accents, IPA
55-
(0x20dd, 0x20E0), # Me: Symbols
56-
(0x20e2, 0x20e4),) # Me: Screen, keycap, triangle
54+
COMBINING_MARK_RANGES = ((0x0300, 0x0362), # Mn: Accents, IPA
55+
(0x20dd, 0x20E0), # Me: Symbols
56+
(0x20e2, 0x20e4),) # Me: Screen, keycap, triangle
57+
5758

5859
def print_record(codepoint, letter):
5960
if letter:
@@ -63,12 +64,14 @@ def print_record(codepoint, letter):
6364

6465
print(output)
6566

67+
6668
class Codepoint:
6769
def __init__(self, id, general_category, combining_ids):
6870
self.id = id
6971
self.general_category = general_category
7072
self.combining_ids = combining_ids
7173

74+
7275
def is_mark_to_remove(codepoint):
7376
"""Return true if this is a combining mark to remove."""
7477
if not is_mark(codepoint):
@@ -79,17 +82,20 @@ def is_mark_to_remove(codepoint):
7982
return True
8083
return False
8184

85+
8286
def is_plain_letter(codepoint):
8387
"""Return true if codepoint represents a "plain letter"."""
8488
for begin, end in PLAIN_LETTER_RANGES:
85-
if codepoint.id >= begin and codepoint.id <= end:
86-
return True
89+
if codepoint.id >= begin and codepoint.id <= end:
90+
return True
8791
return False
8892

93+
8994
def is_mark(codepoint):
9095
"""Returns true for diacritical marks (combining codepoints)."""
9196
return codepoint.general_category in ("Mn", "Me", "Mc")
9297

98+
9399
def is_letter_with_marks(codepoint, table):
94100
"""Returns true for letters combined with one or more marks."""
95101
# See https://www.unicode.org/reports/tr44/tr44-14.html#General_Category_Values
@@ -105,16 +111,18 @@ def is_letter_with_marks(codepoint, table):
105111

106112
# Check if the base letter of this letter has marks.
107113
codepoint_base = codepoint.combining_ids[0]
108-
if (is_plain_letter(table[codepoint_base]) is False and \
109-
is_letter_with_marks(table[codepoint_base], table) is False):
114+
if is_plain_letter(table[codepoint_base]) is False and \
115+
is_letter_with_marks(table[codepoint_base], table) is False:
110116
return False
111117

112118
return True
113119

120+
114121
def is_letter(codepoint, table):
115122
"""Return true for letter with or without diacritical marks."""
116123
return is_plain_letter(codepoint) or is_letter_with_marks(codepoint, table)
117124

125+
118126
def get_plain_letter(codepoint, table):
119127
"""Return the base codepoint without marks. If this codepoint has more
120128
than one combining character, do a recursive lookup on the table to
@@ -133,15 +141,18 @@ def get_plain_letter(codepoint, table):
133141
# Should not come here
134142
assert(False)
135143

144+
136145
def is_ligature(codepoint, table):
137146
"""Return true for letters combined with letters."""
138147
return all(is_letter(table[i], table) for i in codepoint.combining_ids)
139148

149+
140150
def get_plain_letters(codepoint, table):
141151
"""Return a list of plain letters from a ligature."""
142152
assert(is_ligature(codepoint, table))
143153
return [get_plain_letter(table[id], table) for id in codepoint.combining_ids]
144154

155+
145156
def parse_cldr_latin_ascii_transliterator(latinAsciiFilePath):
146157
"""Parse the XML file and return a set of tuples (src, trg), where "src"
147158
is the original character and "trg" the substitute."""
@@ -189,21 +200,23 @@ def parse_cldr_latin_ascii_transliterator(latinAsciiFilePath):
189200

190201
return charactersSet
191202

203+
192204
def special_cases():
193205
"""Returns the special cases which are not handled by other methods"""
194206
charactersSet = set()
195207

196208
# Cyrillic
197-
charactersSet.add((0x0401, "\u0415")) # CYRILLIC CAPITAL LETTER IO
198-
charactersSet.add((0x0451, "\u0435")) # CYRILLIC SMALL LETTER IO
209+
charactersSet.add((0x0401, "\u0415")) # CYRILLIC CAPITAL LETTER IO
210+
charactersSet.add((0x0451, "\u0435")) # CYRILLIC SMALL LETTER IO
199211

200212
# Symbols of "Letterlike Symbols" Unicode Block (U+2100 to U+214F)
201-
charactersSet.add((0x2103, "\xb0C")) # DEGREE CELSIUS
202-
charactersSet.add((0x2109, "\xb0F")) # DEGREE FAHRENHEIT
203-
charactersSet.add((0x2117, "(P)")) # SOUND RECORDING COPYRIGHT
213+
charactersSet.add((0x2103, "\xb0C")) # DEGREE CELSIUS
214+
charactersSet.add((0x2109, "\xb0F")) # DEGREE FAHRENHEIT
215+
charactersSet.add((0x2117, "(P)")) # SOUND RECORDING COPYRIGHT
204216

205217
return charactersSet
206218

219+
207220
def main(args):
208221
# https://www.unicode.org/reports/tr44/tr44-14.html#Character_Decomposition_Mappings
209222
decomposition_type_pattern = re.compile(" *<[^>]*> *")
@@ -238,12 +251,12 @@ def main(args):
238251
len(codepoint.combining_ids) > 1:
239252
if is_letter_with_marks(codepoint, table):
240253
charactersSet.add((codepoint.id,
241-
chr(get_plain_letter(codepoint, table).id)))
254+
chr(get_plain_letter(codepoint, table).id)))
242255
elif args.noLigaturesExpansion is False and is_ligature(codepoint, table):
243256
charactersSet.add((codepoint.id,
244-
"".join(chr(combining_codepoint.id)
245-
for combining_codepoint \
246-
in get_plain_letters(codepoint, table))))
257+
"".join(chr(combining_codepoint.id)
258+
for combining_codepoint
259+
in get_plain_letters(codepoint, table))))
247260
elif is_mark_to_remove(codepoint):
248261
charactersSet.add((codepoint.id, None))
249262

@@ -258,6 +271,7 @@ def main(args):
258271
for characterPair in charactersList:
259272
print_record(characterPair[0], characterPair[1])
260273

274+
261275
if __name__ == "__main__":
262276
parser = argparse.ArgumentParser(description='This script builds unaccent.rules on standard output when given the contents of UnicodeData.txt and Latin-ASCII.xml given as arguments.')
263277
parser.add_argument("--unicode-data-file", help="Path to formatted text file corresponding to UnicodeData.txt.", type=str, required=True, dest='unicodeDataFilePath')

src/test/locale/sort-test.py

+6-4
Original file line numberDiff line numberDiff line change
@@ -1,18 +1,20 @@
11
#! /usr/bin/env python
22

3-
import sys, string, locale
3+
import locale
4+
import sys
5+
46
locale.setlocale(locale.LC_ALL, "")
57

68
if len(sys.argv) != 2:
7-
sys.stderr.write("Usage: sort.py filename\n")
8-
sys.exit(1)
9+
sys.stderr.write("Usage: sort.py filename\n")
10+
sys.exit(1)
911

1012
infile = open(sys.argv[1], 'r')
1113
list = infile.readlines()
1214
infile.close()
1315

1416
for i in range(0, len(list)):
15-
list[i] = list[i][:-1] # chop!
17+
list[i] = list[i][:-1] # chop!
1618

1719
list.sort(key=locale.strxfrm)
1820
print('\n'.join(list))

0 commit comments

Comments
 (0)