diff options
| author | Mate Barany <[email protected]> | 2024-10-10 17:02:56 +0200 | 
|---|---|---|
| committer | Mate Barany <[email protected]> | 2024-10-24 11:53:52 +0200 | 
| commit | defd1549de9a26607e888fae8d82029633ca6d17 (patch) | |
| tree | a07731eea267ff85f6e2a5980b4150d1982df808 | |
| parent | adc4ec9d3911010b2890db351933d49e46504021 (diff) | |
Add type annotations to CldrAccess
Task-number: QTBUG-129613
Pick-to: 6.8
Change-Id: I8a00cca718554909b7ab9dcad15cc9b9ac702e94
Reviewed-by: Edward Welbourne <[email protected]>
| -rw-r--r-- | util/locale_database/cldr.py | 166 | ||||
| -rw-r--r-- | util/locale_database/localetools.py | 2 | 
2 files changed, 94 insertions, 74 deletions
diff --git a/util/locale_database/cldr.py b/util/locale_database/cldr.py index 45db816c5ef..450d868bfc6 100644 --- a/util/locale_database/cldr.py +++ b/util/locale_database/cldr.py @@ -10,11 +10,11 @@ The former should normally be all you need to access.  See individual classes for further detail.  """ -from typing import Iterable, TextIO +from typing import Callable, Iterable, Iterator, TextIO  from xml.dom import minidom  from weakref import WeakValueDictionary as CacheDict  from pathlib import Path -from datetime import datetime +from datetime import datetime, timedelta  from ldml import Error, Node, XmlScanner, Supplement, LocaleScanner  from localetools import names_clash @@ -309,7 +309,7 @@ class CldrReader (object):  # the cache. If a process were to instantiate this class with distinct  # roots, each cache would be filled by the first to need it !  class CldrAccess (object): -    def __init__(self, root: Path): +    def __init__(self, root: Path) -> None:          """Set up a master object for accessing CLDR data.          Single parameter, root, is the file-system path to the root of @@ -317,20 +317,20 @@ class CldrAccess (object):          contain dtd/, main/ and supplemental/ sub-directories."""          self.root = root -    def xml(self, relative_path: str): +    def xml(self, relative_path: str) -> XmlScanner:          """Load a single XML file and return its root element as an XmlScanner.          The path is interpreted relative to self.root"""          return XmlScanner(Node(self.__xml(relative_path))) -    def supplement(self, name): +    def supplement(self, name: str) -> Supplement:          """Loads supplemental data as a Supplement object.          The name should be that of a file in common/supplemental/, without path.          """          return Supplement(Node(self.__xml(f'common/supplemental/{name}'))) -    def locale(self, name): +    def locale(self, name: str) -> LocaleScanner:          """Loads all data for a locale as a LocaleScanner object.          The name should be a locale name; adding suffix '.xml' to it @@ -340,7 +340,7 @@ class CldrAccess (object):          inheritance, where relevant."""          return LocaleScanner(name, self.__localeRoots(name), self.__rootLocale) -    def englishNaming(self, tag): # see QLocaleXmlWriter.enumData() +    def englishNaming(self, tag: str) -> Callable[[str], str]: # see QLocaleXmlWriter.enumData()          return self.__codeMap(tag).get      @property @@ -354,18 +354,18 @@ class CldrAccess (object):                  yield path.stem      @property -    def defaultContentLocales(self): +    def defaultContentLocales(self) -> Iterator[str]:          """Generator for the default content locales."""          for name, attrs in self.supplement('supplementalMetadata.xml').find('metadata/defaultContent'):              try: -                locales = attrs['locales'] +                locales: str = attrs['locales']              except KeyError:                  pass              else:                  for locale in locales.split():                      yield locale -    def likelySubTags(self): +    def likelySubTags(self) -> Iterator[tuple[str, str]]:          for ignore, attrs in self.supplement('likelySubtags.xml').find('likelySubtags'):              yield attrs['from'], attrs['to'] @@ -380,7 +380,7 @@ class CldrAccess (object):          except KeyError:              raise Error(f'Unsupported number system: {system}') -    def weekData(self, territory): +    def weekData(self, territory: str) -> tuple[str, str, str]:          """Data on the weekly cycle.          Returns a triple (W, S, E) of en's short names for week-days; @@ -393,7 +393,7 @@ class CldrAccess (object):          except KeyError:              return self.__weekData['001'] -    def currencyData(self, territory): +    def currencyData(self, territory: str) -> tuple[str, int, int]:          """Returns currency data for the given territory code.          Return value is a tuple (ISO4217 code, digit count, rounding @@ -405,7 +405,9 @@ class CldrAccess (object):          except KeyError:              return '', 2, 1 -    def codesToIdName(self, language, script, territory, variant = ''): +    def codesToIdName(self, language: str, script: str, territory: str, variant: str = '' +                     ) -> tuple[tuple[int, str], tuple[int, str], +                                tuple[int, str], tuple[int, str]]:          """Maps each code to the appropriate ID and name.          Returns a 4-tuple of (ID, name) pairs corresponding to the @@ -417,7 +419,7 @@ class CldrAccess (object):          Until we implement variant support (QTBUG-81051), the fourth          member of the returned tuple is always 0 paired with a string          that should not be used.""" -        enum = self.__enumMap +        enum: Callable[[str], dict[str, tuple[int, str]]] = self.__enumMap          try:              return (enum('language')[language],                      enum('script')[script], @@ -428,8 +430,9 @@ class CldrAccess (object):          parts, values = [], [language, script, territory, variant]          for index, key in enumerate(('language', 'script', 'territory', 'variant')): -            naming, enums = self.__codeMap(key), enum(key) -            value = values[index] +            naming: dict[str, str] = self.__codeMap(key) +            enums: dict[str, tuple[int, str]]  = enum(key) +            value: str = values[index]              if value not in enums:                  text = f'{key} code {value}'                  name = naming.get(value) @@ -447,21 +450,22 @@ class CldrAccess (object):                      language, script, territory, variant)      @staticmethod -    def __checkEnum(given, proper, scraps): +    def __checkEnum(given: dict[str, str], proper: dict[str, str], scraps: set[str] +                    ) -> Iterator[tuple[str, str]]:          # Each is a { code: full name } mapping          for code, name in given.items(): -            try: right = proper[code] +            try: right: str = proper[code]              except KeyError:                  # No en.xml name for this code, but supplementalData's                  # parentLocale may still believe in it:                  if code not in scraps:                      yield name, f'[Found no CLDR name for code {code}]'                  continue -            cleaned = names_clash(right, name) +            cleaned: None | str = names_clash(right, name)              if cleaned:                  yield name, cleaned -    def checkEnumData(self, grumble): +    def checkEnumData(self, grumble: Callable[[str], int]) -> None:          scraps = set()          for k in self.__parentLocale.keys():              for f in k.split('_'): @@ -492,7 +496,7 @@ enumdata.py (keeping the old name as an alias):                          + '\n')              grumble('\n') -    def bcp47Aliases(self): +    def bcp47Aliases(self) -> tuple[dict[str, str], dict[str, str]]:          """Reads the mapping from CLDR IDs to IANA IDs          CLDR identifies timezones in various ways but its standard @@ -530,7 +534,8 @@ enumdata.py (keeping the old name as an alias):          # If we ever need a mapping back to CLDR ID, we can make          # (description, space-joined-list) the naming values. -        alias, naming = {}, {} # { alias: iana }, { iana: description } +        alias: dict[str, str] = {} # { alias: iana } +        naming: dict[str, str] = {} # { iana: description }          for item, attrs in root.find('keyword/key/type', exclude=('deprecated',)):              assert 'description' in attrs, item              assert 'alias' in attrs, item @@ -545,7 +550,8 @@ enumdata.py (keeping the old name as an alias):          return alias, naming -    def readWindowsTimeZones(self, alias): +    def readWindowsTimeZones(self, alias: dict[str, str]) -> tuple[dict[str, str], +                                                                   list[tuple[str, str, str]]]:          """Digest CLDR's MS-Win time-zone name mapping.          Single argument, alias, should be the first part of the pair @@ -582,7 +588,8 @@ enumdata.py (keeping the old name as an alias):          mapZone element and the last is s, its cleaned-up list of IANA          IDs.""" -        defaults, windows = {}, [] +        defaults: dict[str, str] = {} +        windows: list[tuple[str, str, str]] = []          zones = self.supplement('windowsZones.xml')          for name, attrs in zones.find('windowsZones/mapTimezones'):              if name != 'mapZone': @@ -602,7 +609,10 @@ enumdata.py (keeping the old name as an alias):          return defaults, windows -    def readMetaZoneMap(self, alias): +    def readMetaZoneMap(self, alias: dict[str, str] +                        ) -> tuple[dict[str, dict[str, str]], +                                   dict[str, tuple[tuple[int, int, str], ...]], +                                   dict[str, str]]:          """Digests the metaZones supplemental data.          Required argument, alias, should be the first of @@ -633,9 +643,9 @@ enumdata.py (keeping the old name as an alias):          locale."""          metaZones = self.supplement('metaZones.xml') # Doesn't appear to use draft attribute          # Map CLDR name to IANA name (or use CLDR name if unknown to alias): -        zoneName = lambda n, g=alias.get: g(n, n) +        zoneName: Callable[[str], str] = lambda n, g=alias.get: g(n, n) -        metaMap = {} # { meta: { territory code: zoneId } } +        metaMap: dict[str, dict[str, str]] = {} # { meta: { territory code: zoneId } }          # Entry with territory 001 is "golden zone" for the metazone.          for mapMeta in metaZones.findNodes('metaZones/mapTimezones'):              attrs = mapMeta.attributes() @@ -646,13 +656,13 @@ enumdata.py (keeping the old name as an alias):                  raise Error('Version of metazone map type is not 2018e', attrs)              for node in mapMeta.findAllChildren('mapZone'): -                attrs = node.attributes() +                attrs: dict[str, str] = node.attributes()                  try:                      meta, code, zone = attrs['other'], attrs['territory'], attrs['type']                  except KeyError:                      continue -                bok = metaMap.setdefault(meta, {}) +                bok: dict[str, str] = metaMap.setdefault(meta, {})                  assert code not in bok, (meta, code)                  bok[code] = zoneName(zone)          # Territories not named in a metaMap entry fall back on the @@ -660,16 +670,16 @@ enumdata.py (keeping the old name as an alias):          # entry:          assert all('001' in bok for bok in metaMap.values()) -        def scanUses(zone, check=metaMap): +        def scanUses(zone: Node, check=metaMap) -> Iterator[tuple[str|None, str|None, str]]:              for node in zone.findAllChildren('usesMetazone'): -                attrs = node.attributes() -                mzone = attrs['mzone'] +                attrs: dict[str, str] = node.attributes() +                mzone: str = attrs['mzone']                  if mzone not in check:                      raise Error('Unknown metazone', mzone)                  # These are UTC date-times.                  yield attrs.get('from'), attrs.get('to'), mzone -        def sortKey(triple): +        def sortKey(triple: tuple[str|None, str|None, str]) -> str | None:              start, stop, mzone = triple              # The start = None entry should sort first; since its key              # is its stop, which is likely the next entry's start, we @@ -680,11 +690,11 @@ enumdata.py (keeping the old name as an alias):              # in the list, so the sorting is fatuous and the key              # doesn't matter). -        def timeRep(text, notime, epoch=datetime(1970, 1, 1, 0, 0)): +        def timeRep(text: str, notime: bool, epoch=datetime(1970, 1, 1, 0, 0)) -> int:              """Map a 'yyyy-MM-dd HH:mm' string to epoch minutes.              If the HH:mm part is omitted, second parameter notime is true to -            use the end of the day, false for the start. LDM specifies this +            use the end of the day, false for the start. LDML specifies this              reading of the pure-date values for start and stop attributes.  If              the HH:mm part is 24:00, the end of the day is also used; LDML              specifies this but python's datetime.fromisoformat() doesn't like @@ -704,16 +714,20 @@ enumdata.py (keeping the old name as an alias):                      assert len(text) == 16, text                  # If it's given with HH:mm as 24:00, this throws: -                diff = datetime.fromisoformat(text) - epoch +                diff: timedelta = datetime.fromisoformat(text) - epoch              except ValueError:                  diff = datetime.fromisoformat(text[:10]) - epoch                  diff += diff.__class__(days=1)              assert diff.days >= 0 and diff.seconds >= 0, (diff, text) -            assert diff.seconds % 60 == 0, (diff, text) -            return diff.days * 1440 + int(diff.seconds / 60) - -        def mapTimes(triple, alpha=0, omega=(1<<32)-1, torep=timeRep): +            mins, secs = divmod(diff.seconds, 60) +            assert secs == 0, (diff, text) +            return diff.days * 1440 + mins + +        def mapTimes(triple: tuple[str|None, str|None, str], +                     alpha: int = 0, omega: int = (1<<32) - 1, +                     torep: Callable[[str, bool, datetime], int] = timeRep +                     ) -> tuple[int, int, str]:              start, stop, mzone = triple              start = alpha if start is None else torep(start, False)              stop = omega if stop is None else torep(stop, True) @@ -723,10 +737,11 @@ enumdata.py (keeping the old name as an alias):                  stop = omega              return start, stop, mzone -        zones = {} # { ianaId: ( (from, to, meta), ... ) } +        # zones is { ianaId: ( (from, to, meta), ... ) } +        zones: dict[str, tuple[tuple[int, int, str], ...]] = {}          for metaInfo in metaZones.findNodes('metaZones/metazoneInfo'):              for zone in metaInfo.findAllChildren('timezone'): -                iana = zoneName(zone.dom.attributes['type'].value) +                iana: str = zoneName(zone.dom.attributes['type'].value)                  story = tuple(sorted(scanUses(zone), key=sortKey))                  # Only {first,last} entry can have None for {from,to}:                  assert not any(s[0] is None for s in story[1:]), (iana, story) @@ -743,7 +758,7 @@ enumdata.py (keeping the old name as an alias):                         for zone in bok.values())                     for metaz, bok in metaMap.items()) -        territorial = {} # { territory code: IANA ID } +        territorial: dict[str, str] = {} # { territory code: IANA ID }          for prime in metaZones.findNodes('primaryZones/primaryZone'):              code = prime.attributes()['iso3166']              assert code not in territorial, code @@ -752,36 +767,36 @@ enumdata.py (keeping the old name as an alias):          return metaMap, zones, territorial      @property -    def cldrVersion(self): +    def cldrVersion(self) -> str:          # Evaluate so as to ensure __cldrVersion is set:          self.__unDistinguishedAttributes          return self.__cldrVersion      # Implementation details -    def __xml(self, relative_path: str, cache = CacheDict(), read = minidom.parse): +    def __xml(self, relPath: str, cache = CacheDict(), read = minidom.parse) -> minidom.Element:          try: -            doc = cache[relative_path] +            doc: minidom.Element = cache[relPath]          except KeyError: -            cache[relative_path] = doc = read(str(self.root.joinpath(relative_path))).documentElement +            cache[relPath] = doc = read(str(self.root.joinpath(relPath))).documentElement          return doc      def __open(self, relative_path: str) -> TextIO:          return self.root.joinpath(relative_path).open()      @property -    def __rootLocale(self, cache = []): +    def __rootLocale(self, cache: list[XmlScanner] = []) -> XmlScanner:          if not cache:              cache.append(self.xml('common/main/root.xml'))          return cache[0]      @property -    def __supplementalData(self, cache = []): +    def __supplementalData(self, cache: list[Supplement] = []) -> Supplement:          if not cache:              cache.append(self.supplement('supplementalData.xml'))          return cache[0]      @property -    def __numberSystems(self, cache = {}): +    def __numberSystems(self, cache: dict[str, dict[str, str]] = {}) -> dict[str, dict[str, str]]:          if not cache:              for ignore, attrs in self.supplement('numberingSystems.xml').find('numberingSystems'):                  cache[attrs['id']] = attrs @@ -789,20 +804,22 @@ enumdata.py (keeping the old name as an alias):          return cache      @property -    def __weekData(self, cache = {}): +    def __weekData(self, cache: dict[str, tuple[str, str, str]] = {} +                   ) -> dict[str, tuple[str, str, str]]:          if not cache: +            # firstDay, weStart and weEnd are all dict[str, str]              firstDay, weStart, weEnd = self.__getWeekData()              # Massage those into an easily-consulted form:              # World defaults given for code '001':              mon, sat, sun = firstDay['001'], weStart['001'], weEnd['001'] -            lands = set(firstDay) | set(weStart) | set(weEnd) +            lands: set[str] = set(firstDay) | set(weStart) | set(weEnd)              cache.update((land,                            (firstDay.get(land, mon), weStart.get(land, sat), weEnd.get(land, sun)))                           for land in lands)              assert cache          return cache -    def __getWeekData(self): +    def __getWeekData(self) -> Iterator[dict[str, str]]:          """Scan for data on the weekly cycle.          Yields three mappings from locales to en's short names for @@ -811,12 +828,12 @@ enumdata.py (keeping the old name as an alias):          gives the day on which the week starts, the second gives the          day on which the week-end starts, the third gives the last day          of the week-end.""" -        source = self.__supplementalData +        source: Supplement = self.__supplementalData          for key in ('firstDay', 'weekendStart', 'weekendEnd'): -            result = {} +            result: dict[str, str] = {}              for ignore, attrs in source.find(f'weekData/{key}'):                  assert ignore == key -                day = attrs['day'] +                day: str = attrs['day']                  assert day in ('mon', 'tue', 'wed', 'thu', 'fri', 'sat', 'sun'), day                  if 'alt' in attrs:                      continue @@ -825,7 +842,8 @@ enumdata.py (keeping the old name as an alias):              yield result      @property -    def __currencyData(self, cache = {}): +    def __currencyData(self, cache: dict[str, tuple[str, int, int]] = {} +                       ) -> dict[str, tuple[str, int, int]]:          if not cache:              source = self.__supplementalData              for elt in source.findNodes('currencyData/region'): @@ -850,15 +868,16 @@ enumdata.py (keeping the old name as an alias):                  if iso:                      for tag, data in source.find(                          f'currencyData/fractions/info[iso4217={iso}]'): -                        digits = data['digits'] -                        rounding = data['rounding'] +                        digits = int(data['digits']) +                        rounding = int(data['rounding'])                  cache[territory] = iso, digits, rounding              assert cache          return cache      @property -    def __unDistinguishedAttributes(self, cache = {}): +    def __unDistinguishedAttributes(self, cache: dict[str, tuple[str, ...]] = {} +                                    ) -> dict[str, tuple[str, ...]]:          """Mapping from tag names to lists of attributes.          LDML defines some attributes as 'distinguishing': if a node @@ -878,7 +897,7 @@ enumdata.py (keeping the old name as an alias):          return cache -    def __scanLdmlDtd(self): +    def __scanLdmlDtd(self) -> Iterator[tuple[str, tuple[str, ...]]]:          """Scan the LDML DTD, record CLDR version          Yields (tag, attrs) pairs: on elements with a given tag, @@ -920,7 +939,8 @@ enumdata.py (keeping the old name as an alias):              if tag and ignored:                  yield tag, tuple(ignored) -    def __enumMap(self, key, cache = {}): +    def __enumMap(self, key: str, cache: dict[str, dict[str, tuple[int, str]]] = {} +                  ) -> dict[str, tuple[int, str]]:          if not cache:              cache['variant'] = {'': (0, 'This should never be seen outside ldml.py')}              # They're mappings from numeric value to pairs of full @@ -943,19 +963,19 @@ enumdata.py (keeping the old name as an alias):          return cache[key] -    def __codeMap(self, key, cache = {}, +    def __codeMap(self, key: str, cache: dict[str, dict[str, str]] = {},                    # Maps our name for it to CLDR's name:                    naming = {'language': 'languages', 'script': 'scripts', -                            'territory': 'territories', 'variant': 'variants'}): +                            'territory': 'territories', 'variant': 'variants'}) -> dict[str, str]:          if not cache: -            root = self.xml('common/main/en.xml').root.findUniqueChild('localeDisplayNames') +            root: Node = self.xml('common/main/en.xml').root.findUniqueChild('localeDisplayNames')              for dst, src in naming.items():                  cache[dst] = dict(self.__codeMapScan(root.findUniqueChild(src)))              assert cache          return cache[key] -    def __codeMapScan(self, node): +    def __codeMapScan(self, node: Node) -> Iterator[tuple[str, str]]:          """Get mapping from codes to element values.          Passed in node is a <languages>, <scripts>, <territories> or @@ -986,23 +1006,23 @@ enumdata.py (keeping the old name as an alias):      # CLDR uses inheritance between locales to save repetition:      @property -    def __parentLocale(self, cache = {}): +    def __parentLocale(self, cache: dict[str, str] = {}) -> dict[str, str]:          # see http://www.unicode.org/reports/tr35/#Parent_Locales          if not cache:              for tag, attrs in self.__supplementalData.find('parentLocales',                                                             ('component',)): -                parent = attrs.get('parent', '') +                parent: str = attrs.get('parent', '')                  for child in attrs['locales'].split():                      cache[child] = parent              assert cache          return cache -    def __scanLocaleRoots(self, name: str): +    def __scanLocaleRoots(self, name: str) -> Iterator[Node]:          while name and name != 'root':              path = f'common/main/{name}.xml'              if self.root.joinpath(path).exists(): -                elt = self.__xml(path) # which has no top-level alias children: +                elt: minidom.Element = self.__xml(path) # which has no top-level alias children:                  assert not any(True                                 for child in Node(elt).findAllChildren(                                         'alias', allDull=True) @@ -1019,11 +1039,11 @@ enumdata.py (keeping the old name as an alias):                      break      class __Seq (list): pass # No weakref for tuple and list, but list sub-class is ok. -    def __localeRoots(self, name, cache = CacheDict()): +    def __localeRoots(self, name: str, cache = CacheDict()) -> __Seq:          try: -            chain = cache[name] +            chain: CldrAccess.__Seq = cache[name]          except KeyError: -            cache[name] = chain = self.__Seq(self.__scanLocaleRoots(name)) +            cache[name] = chain = CldrAccess.__Seq(self.__scanLocaleRoots(name))          return chain  # Unpolute the namespace: we don't need to export these. diff --git a/util/locale_database/localetools.py b/util/locale_database/localetools.py index 88f152c910a..10937df5485 100644 --- a/util/locale_database/localetools.py +++ b/util/locale_database/localetools.py @@ -64,7 +64,7 @@ def wrap_list(lst, perline=20):              yield head      return ",\n".join(", ".join(x) for x in split(lst, perline)) -def names_clash(cldr, enum): +def names_clash(cldr: str, enum: str) -> None | str:      """True if the reader might not recognize cldr as the name of enum      First argument, cldr, is the name CLDR gives for some language,  | 
