Skip to content

Commit

Permalink
Merge pull request #49 from HansBug/dev/collection
Browse files Browse the repository at this point in the history
dev(hansbug): add group_by function
  • Loading branch information
HansBug authored May 24, 2022
2 parents c4a2404 + aeee0d4 commit 9d0be26
Show file tree
Hide file tree
Showing 3 changed files with 87 additions and 2 deletions.
5 changes: 5 additions & 0 deletions docs/source/api_doc/collection/sequence.rst
Original file line number Diff line number Diff line change
Expand Up @@ -12,4 +12,9 @@ unique
.. autofunction:: unique


group_by
---------------------------

.. autofunction:: group_by


52 changes: 51 additions & 1 deletion hbutils/collection/sequence.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
from typing import Union, TypeVar, Sequence
from typing import Union, TypeVar, Sequence, Callable, Optional, Dict, List, Iterable

__all__ = [
'unique',
'group_by',
]

_ElementType = TypeVar('_ElementType')
Expand Down Expand Up @@ -32,3 +33,52 @@ def unique(s: Union[Sequence[_ElementType]]) -> Sequence[_ElementType]:
_set.add(element)

return type(s)(_result)


_GroupType = TypeVar('_GroupType')
_ResultType = TypeVar('_ResultType')


def group_by(s: Iterable[_ElementType],
key: Callable[[_ElementType], _GroupType],
gfunc: Optional[Callable[[List[_ElementType]], _ResultType]] = None) -> Dict[_GroupType, _ResultType]:
"""
Overview:
Divide the elements into groups.
:param s: Elements.
:param key: Group key, should be a callable object.
:param gfunc: Post-process function for groups, should be a callable object. Default is ``None`` which means \
no post-processing will be performed.
:return: Grouping result.
Examples::
>>> from hbutils.collection import group_by
>>>
>>> foods = [
... 'apple', 'orange', 'pear',
... 'banana', 'fish', 'pork', 'milk',
... ]
>>> group_by(foods, len) # group by length
{5: ['apple'], 6: ['orange', 'banana'], 4: ['pear', 'fish', 'pork', 'milk']}
>>> group_by(foods, len, len) # group and get length
{5: 1, 6: 2, 4: 4}
>>> group_by(foods, lambda x: x[0]) # group by first letter
{'a': ['apple'], 'o': ['orange'], 'p': ['pear', 'pork'], 'b': ['banana'], 'f': ['fish'], 'm': ['milk']}
>>> group_by(foods, lambda x: x[0], len) # group and get length
{'a': 1, 'o': 1, 'p': 2, 'b': 1, 'f': 1, 'm': 1}
"""

gfunc = gfunc or (lambda x: x)

_result_dict: Dict[_GroupType, List[_ElementType]] = {}
for item in s:
_item_key = key(item)
if _item_key not in _result_dict:
_result_dict[_item_key] = []
_result_dict[_item_key].append(item)

return {
key: gfunc(grps)
for key, grps in _result_dict.items()
}
32 changes: 31 additions & 1 deletion test/collection/test_sequence.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import pytest

from hbutils.collection import unique
from hbutils.collection import unique, group_by


@pytest.mark.unittest
Expand All @@ -24,3 +24,33 @@ class MyList(list):
r4 = unique(MyList([3, 1, 2, 1, 4, 3]))
assert type(r4) == MyList
assert r4 == MyList([3, 1, 2, 4])

def test_group_by(self):
foods = [
'apple',
'orange',
'pear',
'banana',
'fish',
'pork',
'milk'
]
assert group_by(foods, len) == {
4: ['pear', 'fish', 'pork', 'milk'],
5: ['apple'],
6: ['orange', 'banana']
}
assert group_by(foods, len, len) == {4: 4, 5: 1, 6: 2}

assert group_by(foods, lambda x: x[0]) == {
'a': ['apple'],
'b': ['banana'],
'f': ['fish'],
'm': ['milk'],
'o': ['orange'],
'p': ['pear', 'pork']
}
assert group_by(foods, lambda x: x[0], len) == {
'a': 1, 'b': 1, 'f': 1,
'm': 1, 'o': 1, 'p': 2,
}

0 comments on commit 9d0be26

Please sign in to comment.