From f337ffdfd8577e88c09aa6cbbc5efe4adf8cca76 Mon Sep 17 00:00:00 2001 From: Rico Hermans Date: Sat, 18 Nov 2023 13:29:27 +0100 Subject: [PATCH] Bump CLDR download script to 43 --- babel/core.py | 2 +- scripts/download_import_cldr.py | 8 ++++---- scripts/import_cldr.py | 20 ++++++++++++++++---- 3 files changed, 21 insertions(+), 9 deletions(-) diff --git a/babel/core.py b/babel/core.py index f63b97b65..ef0d16a1d 100644 --- a/babel/core.py +++ b/babel/core.py @@ -930,7 +930,7 @@ def interval_formats(self) -> localedata.LocaleDataDict: smallest changing component: >>> Locale('fi_FI').interval_formats['MEd']['d'] - [u'E d. \u2013 ', u'E d.M.'] + [u'E d.\u2009\u2013\u2009', u'E d.M.'] .. seealso:: diff --git a/scripts/download_import_cldr.py b/scripts/download_import_cldr.py index c08c7fc08..bcee0ea7c 100755 --- a/scripts/download_import_cldr.py +++ b/scripts/download_import_cldr.py @@ -9,10 +9,10 @@ import zipfile from urllib.request import urlretrieve -URL = 'http://unicode.org/Public/cldr/42/cldr-common-42.0.zip' -FILENAME = 'cldr-common-42.0.zip' -# Via https://unicode.org/Public/cldr/42/hashes/SHASUM512 -FILESUM = '315448fe6a9ac2d5a6a7fd1a27b38c5db30fed053654a803d50e3a8d06aa08ad153e8e57089fa094c561f41a54f37eecda0701b47a1813879902be71945aa38a' +URL = 'http://unicode.org/Public/cldr/43/cldr-common-43.0.zip' +FILENAME = 'cldr-common-43.0.zip' +# Via https://unicode.org/Public/cldr/43/hashes/SHASUM512 +FILESUM = '930c64208d6f680d115bfa74a69445fb614910bb54233227b0b9ae85ddbce4db19e4ec863bf04ae9d4a11b2306aa7394e553384d7537487de8011f0e34877cef' BLKSIZE = 131072 diff --git a/scripts/import_cldr.py b/scripts/import_cldr.py index 493787407..96b6c9438 100755 --- a/scripts/import_cldr.py +++ b/scripts/import_cldr.py @@ -301,10 +301,22 @@ def parse_global(srcdir, sup): currency: tuple(sorted(regions)) for currency, regions in all_currencies.items()} # Explicit parent locales - for paternity in sup.findall('.//parentLocales/parentLocale'): - parent = paternity.attrib['parent'] - for child in paternity.attrib['locales'].split(): - parent_exceptions[child] = parent + # Since CLDR-43, there are multiple statements, some of them with a `component="collations"` or + # `component="segmentations"` attribute; these indicate that only some language aspects should be inherited. + # (https://cldr.unicode.org/index/downloads/cldr-43) + # + # Ignore these for now, as one of them even points to a locale that doesn't have a corresponding XML file (sr_ME) + # and we crash trying to load it. + # There is no XPath support to test for an absent attribute, so use Python to filter + for parentBlock in sup.findall('.//parentLocales'): + if parentBlock.attrib.get('component'): + # Consider only unqualified parent declarations + continue + + for paternity in parentBlock.findall('./parentLocale'): + parent = paternity.attrib['parent'] + for child in paternity.attrib['locales'].split(): + parent_exceptions[child] = parent # Currency decimal and rounding digits for fraction in sup.findall('.//currencyData/fractions/info'):