Add type annotations to CldrAccess

Task-number: QTBUG-129613 Pick-to: 6.8 Change-Id: I8a00cca718554909b7ab9dcad15cc9b9ac702e94 Reviewed-by: Edward Welbourne <[email protected]>
author: Mate Barany <[email protected]> 2024-10-10 17:02:56 +0200
committer: Mate Barany <[email protected]> 2024-10-24 11:53:52 +0200
commit: defd1549de9a26607e888fae8d82029633ca6d17 (patch)
tree: a07731eea267ff85f6e2a5980b4150d1982df808
parent: adc4ec9d3911010b2890db351933d49e46504021 (diff)
2 files changed, 94 insertions, 74 deletions
diff --git a/util/locale_database/cldr.py b/util/locale_database/cldr.py
index 45db816c5ef..450d868bfc6 100644
--- a/util/locale_database/cldr.py
+++ b/util/locale_database/cldr.py
@@ -10,11 +10,11 @@ The former should normally be all you need to access.
 See individual classes for further detail.
 """
 
-from typing import Iterable, TextIO
+from typing import Callable, Iterable, Iterator, TextIO
 from xml.dom import minidom
 from weakref import WeakValueDictionary as CacheDict
 from pathlib import Path
-from datetime import datetime
+from datetime import datetime, timedelta
 
 from ldml import Error, Node, XmlScanner, Supplement, LocaleScanner
 from localetools import names_clash
@@ -309,7 +309,7 @@ class CldrReader (object):
 # the cache. If a process were to instantiate this class with distinct
 # roots, each cache would be filled by the first to need it !
 class CldrAccess (object):
-    def __init__(self, root: Path):
+    def __init__(self, root: Path) -> None:
         """Set up a master object for accessing CLDR data.
 
         Single parameter, root, is the file-system path to the root of
@@ -317,20 +317,20 @@ class CldrAccess (object):
         contain dtd/, main/ and supplemental/ sub-directories."""
         self.root = root
 
-    def xml(self, relative_path: str):
+    def xml(self, relative_path: str) -> XmlScanner:
         """Load a single XML file and return its root element as an XmlScanner.
 
         The path is interpreted relative to self.root"""
         return XmlScanner(Node(self.__xml(relative_path)))
 
-    def supplement(self, name):
+    def supplement(self, name: str) -> Supplement:
         """Loads supplemental data as a Supplement object.
 
         The name should be that of a file in common/supplemental/, without path.
         """
         return Supplement(Node(self.__xml(f'common/supplemental/{name}')))
 
-    def locale(self, name):
+    def locale(self, name: str) -> LocaleScanner:
         """Loads all data for a locale as a LocaleScanner object.
 
         The name should be a locale name; adding suffix '.xml' to it
@@ -340,7 +340,7 @@ class CldrAccess (object):
         inheritance, where relevant."""
         return LocaleScanner(name, self.__localeRoots(name), self.__rootLocale)
 
-    def englishNaming(self, tag): # see QLocaleXmlWriter.enumData()
+    def englishNaming(self, tag: str) -> Callable[[str], str]: # see QLocaleXmlWriter.enumData()
         return self.__codeMap(tag).get
 
     @property
@@ -354,18 +354,18 @@ class CldrAccess (object):
                 yield path.stem
 
     @property
-    def defaultContentLocales(self):
+    def defaultContentLocales(self) -> Iterator[str]:
         """Generator for the default content locales."""
         for name, attrs in self.supplement('supplementalMetadata.xml').find('metadata/defaultContent'):
             try:
-                locales = attrs['locales']
+                locales: str = attrs['locales']
             except KeyError:
                 pass
             else:
                 for locale in locales.split():
                     yield locale
 
-    def likelySubTags(self):
+    def likelySubTags(self) -> Iterator[tuple[str, str]]:
         for ignore, attrs in self.supplement('likelySubtags.xml').find('likelySubtags'):
             yield attrs['from'], attrs['to']
 
@@ -380,7 +380,7 @@ class CldrAccess (object):
         except KeyError:
             raise Error(f'Unsupported number system: {system}')
 
-    def weekData(self, territory):
+    def weekData(self, territory: str) -> tuple[str, str, str]:
         """Data on the weekly cycle.
 
         Returns a triple (W, S, E) of en's short names for week-days;
@@ -393,7 +393,7 @@ class CldrAccess (object):
         except KeyError:
             return self.__weekData['001']
 
-    def currencyData(self, territory):
+    def currencyData(self, territory: str) -> tuple[str, int, int]:
         """Returns currency data for the given territory code.
 
         Return value is a tuple (ISO4217 code, digit count, rounding
@@ -405,7 +405,9 @@ class CldrAccess (object):
         except KeyError:
             return '', 2, 1
 
-    def codesToIdName(self, language, script, territory, variant = ''):
+    def codesToIdName(self, language: str, script: str, territory: str, variant: str = ''
+                     ) -> tuple[tuple[int, str], tuple[int, str],
+                                tuple[int, str], tuple[int, str]]:
         """Maps each code to the appropriate ID and name.
 
         Returns a 4-tuple of (ID, name) pairs corresponding to the
@@ -417,7 +419,7 @@ class CldrAccess (object):
         Until we implement variant support (QTBUG-81051), the fourth
         member of the returned tuple is always 0 paired with a string
         that should not be used."""
-        enum = self.__enumMap
+        enum: Callable[[str], dict[str, tuple[int, str]]] = self.__enumMap
         try:
             return (enum('language')[language],
                     enum('script')[script],
@@ -428,8 +430,9 @@ class CldrAccess (object):
 
         parts, values = [], [language, script, territory, variant]
         for index, key in enumerate(('language', 'script', 'territory', 'variant')):
-            naming, enums = self.__codeMap(key), enum(key)
-            value = values[index]
+            naming: dict[str, str] = self.__codeMap(key)
+            enums: dict[str, tuple[int, str]]  = enum(key)
+            value: str = values[index]
             if value not in enums:
                 text = f'{key} code {value}'
                 name = naming.get(value)
@@ -447,21 +450,22 @@ class CldrAccess (object):
                     language, script, territory, variant)
 
     @staticmethod
-    def __checkEnum(given, proper, scraps):
+    def __checkEnum(given: dict[str, str], proper: dict[str, str], scraps: set[str]
+                    ) -> Iterator[tuple[str, str]]:
         # Each is a { code: full name } mapping
         for code, name in given.items():
-            try: right = proper[code]
+            try: right: str = proper[code]
             except KeyError:
                 # No en.xml name for this code, but supplementalData's
                 # parentLocale may still believe in it:
                 if code not in scraps:
                     yield name, f'[Found no CLDR name for code {code}]'
                 continue
-            cleaned = names_clash(right, name)
+            cleaned: None | str = names_clash(right, name)
             if cleaned:
                 yield name, cleaned
 
-    def checkEnumData(self, grumble):
+    def checkEnumData(self, grumble: Callable[[str], int]) -> None:
         scraps = set()
         for k in self.__parentLocale.keys():
             for f in k.split('_'):
@@ -492,7 +496,7 @@ enumdata.py (keeping the old name as an alias):
                         + '\n')
             grumble('\n')
 
-    def bcp47Aliases(self):
+    def bcp47Aliases(self) -> tuple[dict[str, str], dict[str, str]]:
         """Reads the mapping from CLDR IDs to IANA IDs
 
         CLDR identifies timezones in various ways but its standard
@@ -530,7 +534,8 @@ enumdata.py (keeping the old name as an alias):
 
         # If we ever need a mapping back to CLDR ID, we can make
         # (description, space-joined-list) the naming values.
-        alias, naming = {}, {} # { alias: iana }, { iana: description }
+        alias: dict[str, str] = {} # { alias: iana }
+        naming: dict[str, str] = {} # { iana: description }
         for item, attrs in root.find('keyword/key/type', exclude=('deprecated',)):
             assert 'description' in attrs, item
             assert 'alias' in attrs, item
@@ -545,7 +550,8 @@ enumdata.py (keeping the old name as an alias):
 
         return alias, naming
 
-    def readWindowsTimeZones(self, alias):
+    def readWindowsTimeZones(self, alias: dict[str, str]) -> tuple[dict[str, str],
+                                                                   list[tuple[str, str, str]]]:
         """Digest CLDR's MS-Win time-zone name mapping.
 
         Single argument, alias, should be the first part of the pair
@@ -582,7 +588,8 @@ enumdata.py (keeping the old name as an alias):
         mapZone element and the last is s, its cleaned-up list of IANA
         IDs."""
 
-        defaults, windows = {}, []
+        defaults: dict[str, str] = {}
+        windows: list[tuple[str, str, str]] = []
         zones = self.supplement('windowsZones.xml')
         for name, attrs in zones.find('windowsZones/mapTimezones'):
             if name != 'mapZone':
@@ -602,7 +609,10 @@ enumdata.py (keeping the old name as an alias):
 
         return defaults, windows
 
-    def readMetaZoneMap(self, alias):
+    def readMetaZoneMap(self, alias: dict[str, str]
+                        ) -> tuple[dict[str, dict[str, str]],
+                                   dict[str, tuple[tuple[int, int, str], ...]],
+                                   dict[str, str]]:
         """Digests the metaZones supplemental data.
 
         Required argument, alias, should be the first of
@@ -633,9 +643,9 @@ enumdata.py (keeping the old name as an alias):
         locale."""
         metaZones = self.supplement('metaZones.xml') # Doesn't appear to use draft attribute
         # Map CLDR name to IANA name (or use CLDR name if unknown to alias):
-        zoneName = lambda n, g=alias.get: g(n, n)
+        zoneName: Callable[[str], str] = lambda n, g=alias.get: g(n, n)
 
-        metaMap = {} # { meta: { territory code: zoneId } }
+        metaMap: dict[str, dict[str, str]] = {} # { meta: { territory code: zoneId } }
         # Entry with territory 001 is "golden zone" for the metazone.
         for mapMeta in metaZones.findNodes('metaZones/mapTimezones'):
             attrs = mapMeta.attributes()
@@ -646,13 +656,13 @@ enumdata.py (keeping the old name as an alias):
                 raise Error('Version of metazone map type is not 2018e', attrs)
 
             for node in mapMeta.findAllChildren('mapZone'):
-                attrs = node.attributes()
+                attrs: dict[str, str] = node.attributes()
                 try:
                     meta, code, zone = attrs['other'], attrs['territory'], attrs['type']
                 except KeyError:
                     continue
 
-                bok = metaMap.setdefault(meta, {})
+                bok: dict[str, str] = metaMap.setdefault(meta, {})
                 assert code not in bok, (meta, code)
                 bok[code] = zoneName(zone)
         # Territories not named in a metaMap entry fall back on the
@@ -660,16 +670,16 @@ enumdata.py (keeping the old name as an alias):
         # entry:
         assert all('001' in bok for bok in metaMap.values())
 
-        def scanUses(zone, check=metaMap):
+        def scanUses(zone: Node, check=metaMap) -> Iterator[tuple[str|None, str|None, str]]:
             for node in zone.findAllChildren('usesMetazone'):
-                attrs = node.attributes()
-                mzone = attrs['mzone']
+                attrs: dict[str, str] = node.attributes()
+                mzone: str = attrs['mzone']
                 if mzone not in check:
                     raise Error('Unknown metazone', mzone)
                 # These are UTC date-times.
                 yield attrs.get('from'), attrs.get('to'), mzone
 
-        def sortKey(triple):
+        def sortKey(triple: tuple[str|None, str|None, str]) -> str | None:
             start, stop, mzone = triple
             # The start = None entry should sort first; since its key
             # is its stop, which is likely the next entry's start, we
@@ -680,11 +690,11 @@ enumdata.py (keeping the old name as an alias):
             # in the list, so the sorting is fatuous and the key
             # doesn't matter).
 
-        def timeRep(text, notime, epoch=datetime(1970, 1, 1, 0, 0)):
+        def timeRep(text: str, notime: bool, epoch=datetime(1970, 1, 1, 0, 0)) -> int:
             """Map a 'yyyy-MM-dd HH:mm' string to epoch minutes.
 
             If the HH:mm part is omitted, second parameter notime is true to
-            use the end of the day, false for the start. LDM specifies this
+            use the end of the day, false for the start. LDML specifies this
             reading of the pure-date values for start and stop attributes.  If
             the HH:mm part is 24:00, the end of the day is also used; LDML
             specifies this but python's datetime.fromisoformat() doesn't like
@@ -704,16 +714,20 @@ enumdata.py (keeping the old name as an alias):
                     assert len(text) == 16, text
 
                 # If it's given with HH:mm as 24:00, this throws:
-                diff = datetime.fromisoformat(text) - epoch
+                diff: timedelta = datetime.fromisoformat(text) - epoch
             except ValueError:
                 diff = datetime.fromisoformat(text[:10]) - epoch
                 diff += diff.__class__(days=1)
 
             assert diff.days >= 0 and diff.seconds >= 0, (diff, text)
-            assert diff.seconds % 60 == 0, (diff, text)
-            return diff.days * 1440 + int(diff.seconds / 60)
-
-        def mapTimes(triple, alpha=0, omega=(1<<32)-1, torep=timeRep):
+            mins, secs = divmod(diff.seconds, 60)
+            assert secs == 0, (diff, text)
+            return diff.days * 1440 + mins
+
+        def mapTimes(triple: tuple[str|None, str|None, str],
+                     alpha: int = 0, omega: int = (1<<32) - 1,
+                     torep: Callable[[str, bool, datetime], int] = timeRep
+                     ) -> tuple[int, int, str]:
             start, stop, mzone = triple
             start = alpha if start is None else torep(start, False)
             stop = omega if stop is None else torep(stop, True)
@@ -723,10 +737,11 @@ enumdata.py (keeping the old name as an alias):
                 stop = omega
             return start, stop, mzone
 
-        zones = {} # { ianaId: ( (from, to, meta), ... ) }
+        # zones is { ianaId: ( (from, to, meta), ... ) }
+        zones: dict[str, tuple[tuple[int, int, str], ...]] = {}
         for metaInfo in metaZones.findNodes('metaZones/metazoneInfo'):
             for zone in metaInfo.findAllChildren('timezone'):
-                iana = zoneName(zone.dom.attributes['type'].value)
+                iana: str = zoneName(zone.dom.attributes['type'].value)
                 story = tuple(sorted(scanUses(zone), key=sortKey))
                 # Only {first,last} entry can have None for {from,to}:
                 assert not any(s[0] is None for s in story[1:]), (iana, story)
@@ -743,7 +758,7 @@ enumdata.py (keeping the old name as an alias):
                        for zone in bok.values())
                    for metaz, bok in metaMap.items())
 
-        territorial = {} # { territory code: IANA ID }
+        territorial: dict[str, str] = {} # { territory code: IANA ID }
         for prime in metaZones.findNodes('primaryZones/primaryZone'):
             code = prime.attributes()['iso3166']
             assert code not in territorial, code
@@ -752,36 +767,36 @@ enumdata.py (keeping the old name as an alias):
         return metaMap, zones, territorial
 
     @property
-    def cldrVersion(self):
+    def cldrVersion(self) -> str:
         # Evaluate so as to ensure __cldrVersion is set:
         self.__unDistinguishedAttributes
         return self.__cldrVersion
 
     # Implementation details
-    def __xml(self, relative_path: str, cache = CacheDict(), read = minidom.parse):
+    def __xml(self, relPath: str, cache = CacheDict(), read = minidom.parse) -> minidom.Element:
         try:
-            doc = cache[relative_path]
+            doc: minidom.Element = cache[relPath]
         except KeyError:
-            cache[relative_path] = doc = read(str(self.root.joinpath(relative_path))).documentElement
+            cache[relPath] = doc = read(str(self.root.joinpath(relPath))).documentElement
         return doc
 
     def __open(self, relative_path: str) -> TextIO:
         return self.root.joinpath(relative_path).open()
 
     @property
-    def __rootLocale(self, cache = []):
+    def __rootLocale(self, cache: list[XmlScanner] = []) -> XmlScanner:
         if not cache:
             cache.append(self.xml('common/main/root.xml'))
         return cache[0]
 
     @property
-    def __supplementalData(self, cache = []):
+    def __supplementalData(self, cache: list[Supplement] = []) -> Supplement:
         if not cache:
             cache.append(self.supplement('supplementalData.xml'))
         return cache[0]
 
     @property
-    def __numberSystems(self, cache = {}):
+    def __numberSystems(self, cache: dict[str, dict[str, str]] = {}) -> dict[str, dict[str, str]]:
         if not cache:
             for ignore, attrs in self.supplement('numberingSystems.xml').find('numberingSystems'):
                 cache[attrs['id']] = attrs
@@ -789,20 +804,22 @@ enumdata.py (keeping the old name as an alias):
         return cache
 
     @property
-    def __weekData(self, cache = {}):
+    def __weekData(self, cache: dict[str, tuple[str, str, str]] = {}
+                   ) -> dict[str, tuple[str, str, str]]:
         if not cache:
+            # firstDay, weStart and weEnd are all dict[str, str]
             firstDay, weStart, weEnd = self.__getWeekData()
             # Massage those into an easily-consulted form:
             # World defaults given for code '001':
             mon, sat, sun = firstDay['001'], weStart['001'], weEnd['001']
-            lands = set(firstDay) | set(weStart) | set(weEnd)
+            lands: set[str] = set(firstDay) | set(weStart) | set(weEnd)
             cache.update((land,
                           (firstDay.get(land, mon), weStart.get(land, sat), weEnd.get(land, sun)))
                          for land in lands)
             assert cache
         return cache
 
-    def __getWeekData(self):
+    def __getWeekData(self) -> Iterator[dict[str, str]]:
         """Scan for data on the weekly cycle.
 
         Yields three mappings from locales to en's short names for
@@ -811,12 +828,12 @@ enumdata.py (keeping the old name as an alias):
         gives the day on which the week starts, the second gives the
         day on which the week-end starts, the third gives the last day
         of the week-end."""
-        source = self.__supplementalData
+        source: Supplement = self.__supplementalData
         for key in ('firstDay', 'weekendStart', 'weekendEnd'):
-            result = {}
+            result: dict[str, str] = {}
             for ignore, attrs in source.find(f'weekData/{key}'):
                 assert ignore == key
-                day = attrs['day']
+                day: str = attrs['day']
                 assert day in ('mon', 'tue', 'wed', 'thu', 'fri', 'sat', 'sun'), day
                 if 'alt' in attrs:
                     continue
@@ -825,7 +842,8 @@ enumdata.py (keeping the old name as an alias):
             yield result
 
     @property
-    def __currencyData(self, cache = {}):
+    def __currencyData(self, cache: dict[str, tuple[str, int, int]] = {}
+                       ) -> dict[str, tuple[str, int, int]]:
         if not cache:
             source = self.__supplementalData
             for elt in source.findNodes('currencyData/region'):
@@ -850,15 +868,16 @@ enumdata.py (keeping the old name as an alias):
                 if iso:
                     for tag, data in source.find(
                         f'currencyData/fractions/info[iso4217={iso}]'):
-                        digits = data['digits']
-                        rounding = data['rounding']
+                        digits = int(data['digits'])
+                        rounding = int(data['rounding'])
                 cache[territory] = iso, digits, rounding
             assert cache
 
         return cache
 
     @property
-    def __unDistinguishedAttributes(self, cache = {}):
+    def __unDistinguishedAttributes(self, cache: dict[str, tuple[str, ...]] = {}
+                                    ) -> dict[str, tuple[str, ...]]:
         """Mapping from tag names to lists of attributes.
 
         LDML defines some attributes as 'distinguishing': if a node
@@ -878,7 +897,7 @@ enumdata.py (keeping the old name as an alias):
 
         return cache
 
-    def __scanLdmlDtd(self):
+    def __scanLdmlDtd(self) -> Iterator[tuple[str, tuple[str, ...]]]:
         """Scan the LDML DTD, record CLDR version
 
         Yields (tag, attrs) pairs: on elements with a given tag,
@@ -920,7 +939,8 @@ enumdata.py (keeping the old name as an alias):
             if tag and ignored:
                 yield tag, tuple(ignored)
 
-    def __enumMap(self, key, cache = {}):
+    def __enumMap(self, key: str, cache: dict[str, dict[str, tuple[int, str]]] = {}
+                  ) -> dict[str, tuple[int, str]]:
         if not cache:
             cache['variant'] = {'': (0, 'This should never be seen outside ldml.py')}
             # They're mappings from numeric value to pairs of full
@@ -943,19 +963,19 @@ enumdata.py (keeping the old name as an alias):
 
         return cache[key]
 
-    def __codeMap(self, key, cache = {},
+    def __codeMap(self, key: str, cache: dict[str, dict[str, str]] = {},
                   # Maps our name for it to CLDR's name:
                   naming = {'language': 'languages', 'script': 'scripts',
-                            'territory': 'territories', 'variant': 'variants'}):
+                            'territory': 'territories', 'variant': 'variants'}) -> dict[str, str]:
         if not cache:
-            root = self.xml('common/main/en.xml').root.findUniqueChild('localeDisplayNames')
+            root: Node = self.xml('common/main/en.xml').root.findUniqueChild('localeDisplayNames')
             for dst, src in naming.items():
                 cache[dst] = dict(self.__codeMapScan(root.findUniqueChild(src)))
             assert cache
 
         return cache[key]
 
-    def __codeMapScan(self, node):
+    def __codeMapScan(self, node: Node) -> Iterator[tuple[str, str]]:
         """Get mapping from codes to element values.
 
         Passed in node is a <languages>, <scripts>, <territories> or
@@ -986,23 +1006,23 @@ enumdata.py (keeping the old name as an alias):
 
     # CLDR uses inheritance between locales to save repetition:
     @property
-    def __parentLocale(self, cache = {}):
+    def __parentLocale(self, cache: dict[str, str] = {}) -> dict[str, str]:
         # see http://www.unicode.org/reports/tr35/#Parent_Locales
         if not cache:
             for tag, attrs in self.__supplementalData.find('parentLocales',
                                                            ('component',)):
-                parent = attrs.get('parent', '')
+                parent: str = attrs.get('parent', '')
                 for child in attrs['locales'].split():
                     cache[child] = parent
             assert cache
 
         return cache
 
-    def __scanLocaleRoots(self, name: str):
+    def __scanLocaleRoots(self, name: str) -> Iterator[Node]:
         while name and name != 'root':
             path = f'common/main/{name}.xml'
             if self.root.joinpath(path).exists():
-                elt = self.__xml(path) # which has no top-level alias children:
+                elt: minidom.Element = self.__xml(path) # which has no top-level alias children:
                 assert not any(True
                                for child in Node(elt).findAllChildren(
                                        'alias', allDull=True)
@@ -1019,11 +1039,11 @@ enumdata.py (keeping the old name as an alias):
                     break
 
     class __Seq (list): pass # No weakref for tuple and list, but list sub-class is ok.
-    def __localeRoots(self, name, cache = CacheDict()):
+    def __localeRoots(self, name: str, cache = CacheDict()) -> __Seq:
         try:
-            chain = cache[name]
+            chain: CldrAccess.__Seq = cache[name]
         except KeyError:
-            cache[name] = chain = self.__Seq(self.__scanLocaleRoots(name))
+            cache[name] = chain = CldrAccess.__Seq(self.__scanLocaleRoots(name))
         return chain
 
 # Unpolute the namespace: we don't need to export these.
diff --git a/util/locale_database/localetools.py b/util/locale_database/localetools.py
index 88f152c910a..10937df5485 100644
--- a/util/locale_database/localetools.py
+++ b/util/locale_database/localetools.py
@@ -64,7 +64,7 @@ def wrap_list(lst, perline=20):
             yield head
     return ",\n".join(", ".join(x) for x in split(lst, perline))
 
-def names_clash(cldr, enum):
+def names_clash(cldr: str, enum: str) -> None | str:
     """True if the reader might not recognize cldr as the name of enum
 
     First argument, cldr, is the name CLDR gives for some language,
author	Mate Barany <[email protected]>	2024-10-10 17:02:56 +0200
committer	Mate Barany <[email protected]>	2024-10-24 11:53:52 +0200
commit	defd1549de9a26607e888fae8d82029633ca6d17 (patch)
tree	a07731eea267ff85f6e2a5980b4150d1982df808
parent	adc4ec9d3911010b2890db351933d49e46504021 (diff)