diff --git a/babel/lists.py b/babel/lists.py index 376bc963e..6c34cb099 100644 --- a/babel/lists.py +++ b/babel/lists.py @@ -26,9 +26,11 @@ DEFAULT_LOCALE = default_locale() -def format_list(lst: Sequence[str], - style: Literal['standard', 'standard-short', 'or', 'or-short', 'unit', 'unit-short', 'unit-narrow'] = 'standard', - locale: Locale | str | None = DEFAULT_LOCALE) -> str: +def format_list( + lst: Sequence[str], + style: Literal['standard', 'standard-short', 'or', 'or-short', 'unit', 'unit-short', 'unit-narrow'] = 'standard', + locale: Locale | str | None = DEFAULT_LOCALE, +) -> str: """ Format the items in `lst` as a list. @@ -39,7 +41,11 @@ def format_list(lst: Sequence[str], >>> format_list(['omena', 'peruna', 'aplari'], style='or', locale='fi') u'omena, peruna tai aplari' - These styles are defined, but not all are necessarily available in all locales. + Not all styles are necessarily available in all locales. + The function will attempt to fall back to replacement styles according to the rules + set forth in the CLDR root XML file, and raise a ValueError if no suitable replacement + can be found. + The following text is verbatim from the Unicode TR35-49 spec [1]. * standard: @@ -76,14 +82,9 @@ def format_list(lst: Sequence[str], if len(lst) == 1: return lst[0] - if style not in locale.list_patterns: - raise ValueError( - f'Locale {locale} does not support list formatting style {style!r} ' - f'(supported are {sorted(locale.list_patterns)})', - ) - patterns = locale.list_patterns[style] + patterns = _resolve_list_style(locale, style) - if len(lst) == 2: + if len(lst) == 2 and '2' in patterns: return patterns['2'].format(*lst) result = patterns['start'].format(lst[0], lst[1]) @@ -92,3 +93,31 @@ def format_list(lst: Sequence[str], result = patterns['end'].format(result, lst[-1]) return result + + +# Based on CLDR 45's root.xml file's ``es. +# The root file defines both `standard` and `or`, +# so they're always available. +# TODO: It would likely be better to use the +# babel.localedata.Alias mechanism for this, +# but I'm not quite sure how it's supposed to +# work with inheritance and data in the root. +_style_fallbacks = { + "or-narrow": ["or-short", "or"], + "or-short": ["or"], + "standard-narrow": ["standard-short", "standard"], + "standard-short": ["standard"], + "unit": ["unit-short", "standard"], + "unit-narrow": ["unit-short", "unit", "standard"], + "unit-short": ["standard"], +} + + +def _resolve_list_style(locale: Locale, style: str): + for style in (style, *(_style_fallbacks.get(style, []))): # noqa: B020 + if style in locale.list_patterns: + return locale.list_patterns[style] + raise ValueError( + f"Locale {locale} does not support list formatting style {style!r} " + f"(supported are {sorted(locale.list_patterns)})", + ) diff --git a/scripts/import_cldr.py b/scripts/import_cldr.py index 761372ac0..ee481440f 100755 --- a/scripts/import_cldr.py +++ b/scripts/import_cldr.py @@ -530,10 +530,11 @@ def parse_locale_display_names(data, tree): def parse_list_patterns(data, tree): list_patterns = data.setdefault('list_patterns', {}) - for listType in tree.findall('.//listPatterns/listPattern'): - by_type = list_patterns.setdefault(listType.attrib.get('type', 'standard'), {}) - for listPattern in listType.findall('listPatternPart'): - by_type[listPattern.attrib['type']] = _text(listPattern) + for list_pattern_el in tree.findall('.//listPatterns/listPattern'): + pattern_type = list_pattern_el.attrib.get('type', 'standard') + for pattern_part_el in list_pattern_el.findall('listPatternPart'): + pattern_part_type = pattern_part_el.attrib['type'] + list_patterns.setdefault(pattern_type, {})[pattern_part_type] = _text(pattern_part_el) def parse_dates(data, tree, sup, regions, territory): diff --git a/tests/test_lists.py b/tests/test_lists.py index ca9c6ab41..46ca10d02 100644 --- a/tests/test_lists.py +++ b/tests/test_lists.py @@ -1,20 +1,32 @@ import pytest -from babel import lists +from babel import lists, units -def test_format_list(): - for list, locale, expected in [ - ([], 'en', ''), - (['string'], 'en', 'string'), - (['string1', 'string2'], 'en', 'string1 and string2'), - (['string1', 'string2', 'string3'], 'en', 'string1, string2, and string3'), - (['string1', 'string2', 'string3'], 'zh', 'string1、string2和string3'), - (['string1', 'string2', 'string3', 'string4'], 'ne', 'string1,string2, string3 र string4'), - ]: - assert lists.format_list(list, locale=locale) == expected +@pytest.mark.parametrize(('list', 'locale', 'expected'), [ + ([], 'en', ''), + (['string'], 'en', 'string'), + (['string1', 'string2'], 'en', 'string1 and string2'), + (['string1', 'string2', 'string3'], 'en', 'string1, string2, and string3'), + (['string1', 'string2', 'string3'], 'zh', 'string1、string2和string3'), + (['string1', 'string2', 'string3', 'string4'], 'ne', 'string1,string2, string3 र string4'), +]) +def test_format_list(list, locale, expected): + assert lists.format_list(list, locale=locale) == expected def test_format_list_error(): with pytest.raises(ValueError): lists.format_list(['a', 'b', 'c'], style='orange', locale='en') + + +def test_issue_1098(): + one_foot = units.format_unit(1, "length-foot", length="short", locale="zh_CN") + five_inches = units.format_unit(5, "length-inch", length="short", locale="zh_CN") + # zh-CN does not specify the "unit" style, so we fall back to "unit-short" style. + assert ( + lists.format_list([one_foot, five_inches], style="unit", locale="zh_CN") == + lists.format_list([one_foot, five_inches], style="unit-short", locale="zh_CN") == + # Translation verified using Google Translate. It would add more spacing, but the glyphs are correct. + "1英尺5英寸" + )