Skip to content

Commit

Permalink
Merge pull request #1099 from python-babel/issue-1098
Browse files Browse the repository at this point in the history
Support list format fallbacks
  • Loading branch information
akx authored Jul 15, 2024
2 parents 1834204 + 1a45aba commit 7394c37
Show file tree
Hide file tree
Showing 3 changed files with 68 additions and 26 deletions.
51 changes: 40 additions & 11 deletions babel/lists.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,9 +26,11 @@
DEFAULT_LOCALE = default_locale()


def format_list(lst: Sequence[str],
style: Literal['standard', 'standard-short', 'or', 'or-short', 'unit', 'unit-short', 'unit-narrow'] = 'standard',
locale: Locale | str | None = DEFAULT_LOCALE) -> str:
def format_list(
lst: Sequence[str],
style: Literal['standard', 'standard-short', 'or', 'or-short', 'unit', 'unit-short', 'unit-narrow'] = 'standard',
locale: Locale | str | None = DEFAULT_LOCALE,
) -> str:
"""
Format the items in `lst` as a list.
Expand All @@ -39,7 +41,11 @@ def format_list(lst: Sequence[str],
>>> format_list(['omena', 'peruna', 'aplari'], style='or', locale='fi')
u'omena, peruna tai aplari'
These styles are defined, but not all are necessarily available in all locales.
Not all styles are necessarily available in all locales.
The function will attempt to fall back to replacement styles according to the rules
set forth in the CLDR root XML file, and raise a ValueError if no suitable replacement
can be found.
The following text is verbatim from the Unicode TR35-49 spec [1].
* standard:
Expand Down Expand Up @@ -76,14 +82,9 @@ def format_list(lst: Sequence[str],
if len(lst) == 1:
return lst[0]

if style not in locale.list_patterns:
raise ValueError(
f'Locale {locale} does not support list formatting style {style!r} '
f'(supported are {sorted(locale.list_patterns)})',
)
patterns = locale.list_patterns[style]
patterns = _resolve_list_style(locale, style)

if len(lst) == 2:
if len(lst) == 2 and '2' in patterns:
return patterns['2'].format(*lst)

result = patterns['start'].format(lst[0], lst[1])
Expand All @@ -92,3 +93,31 @@ def format_list(lst: Sequence[str],
result = patterns['end'].format(result, lst[-1])

return result


# Based on CLDR 45's root.xml file's `<alias>`es.
# The root file defines both `standard` and `or`,
# so they're always available.
# TODO: It would likely be better to use the
# babel.localedata.Alias mechanism for this,
# but I'm not quite sure how it's supposed to
# work with inheritance and data in the root.
_style_fallbacks = {
"or-narrow": ["or-short", "or"],
"or-short": ["or"],
"standard-narrow": ["standard-short", "standard"],
"standard-short": ["standard"],
"unit": ["unit-short", "standard"],
"unit-narrow": ["unit-short", "unit", "standard"],
"unit-short": ["standard"],
}


def _resolve_list_style(locale: Locale, style: str):
for style in (style, *(_style_fallbacks.get(style, []))): # noqa: B020
if style in locale.list_patterns:
return locale.list_patterns[style]
raise ValueError(
f"Locale {locale} does not support list formatting style {style!r} "
f"(supported are {sorted(locale.list_patterns)})",
)
9 changes: 5 additions & 4 deletions scripts/import_cldr.py
Original file line number Diff line number Diff line change
Expand Up @@ -530,10 +530,11 @@ def parse_locale_display_names(data, tree):

def parse_list_patterns(data, tree):
list_patterns = data.setdefault('list_patterns', {})
for listType in tree.findall('.//listPatterns/listPattern'):
by_type = list_patterns.setdefault(listType.attrib.get('type', 'standard'), {})
for listPattern in listType.findall('listPatternPart'):
by_type[listPattern.attrib['type']] = _text(listPattern)
for list_pattern_el in tree.findall('.//listPatterns/listPattern'):
pattern_type = list_pattern_el.attrib.get('type', 'standard')
for pattern_part_el in list_pattern_el.findall('listPatternPart'):
pattern_part_type = pattern_part_el.attrib['type']
list_patterns.setdefault(pattern_type, {})[pattern_part_type] = _text(pattern_part_el)


def parse_dates(data, tree, sup, regions, territory):
Expand Down
34 changes: 23 additions & 11 deletions tests/test_lists.py
Original file line number Diff line number Diff line change
@@ -1,20 +1,32 @@
import pytest

from babel import lists
from babel import lists, units


def test_format_list():
for list, locale, expected in [
([], 'en', ''),
(['string'], 'en', 'string'),
(['string1', 'string2'], 'en', 'string1 and string2'),
(['string1', 'string2', 'string3'], 'en', 'string1, string2, and string3'),
(['string1', 'string2', 'string3'], 'zh', 'string1、string2和string3'),
(['string1', 'string2', 'string3', 'string4'], 'ne', 'string1,string2, string3 र string4'),
]:
assert lists.format_list(list, locale=locale) == expected
@pytest.mark.parametrize(('list', 'locale', 'expected'), [
([], 'en', ''),
(['string'], 'en', 'string'),
(['string1', 'string2'], 'en', 'string1 and string2'),
(['string1', 'string2', 'string3'], 'en', 'string1, string2, and string3'),
(['string1', 'string2', 'string3'], 'zh', 'string1、string2和string3'),
(['string1', 'string2', 'string3', 'string4'], 'ne', 'string1,string2, string3 र string4'),
])
def test_format_list(list, locale, expected):
assert lists.format_list(list, locale=locale) == expected


def test_format_list_error():
with pytest.raises(ValueError):
lists.format_list(['a', 'b', 'c'], style='orange', locale='en')


def test_issue_1098():
one_foot = units.format_unit(1, "length-foot", length="short", locale="zh_CN")
five_inches = units.format_unit(5, "length-inch", length="short", locale="zh_CN")
# zh-CN does not specify the "unit" style, so we fall back to "unit-short" style.
assert (
lists.format_list([one_foot, five_inches], style="unit", locale="zh_CN") ==
lists.format_list([one_foot, five_inches], style="unit-short", locale="zh_CN") ==
# Translation verified using Google Translate. It would add more spacing, but the glyphs are correct.
"1英尺5英寸"
)

0 comments on commit 7394c37

Please sign in to comment.