Skip to content

Commit

Permalink
apacheGH-39515: [Python] Pass in type to MapType.from_arrays (apach…
Browse files Browse the repository at this point in the history
…e#39516)

### Rationale for this change

For Iceberg we want to add metadata type the type (the field-id), therefore we need to pass in the type analog to what we do for `ListArray.from_arrays(self, offsets, values, DataType type=None, MemoryPool pool=None, mask=None)`.

### What changes are included in this PR?

Updated a keyword argument for the `type`, and make sure that the the static method to create the MapType is exposed from the cpp side.

### Are these changes tested?

I've added a simple test.

### Are there any user-facing changes?

* Closes: apache#39515

Authored-by: Fokko Driesprong <[email protected]>
Signed-off-by: AlenkaF <[email protected]>
  • Loading branch information
Fokko authored Jan 10, 2024
1 parent e7ab540 commit 07a4655
Show file tree
Hide file tree
Showing 3 changed files with 41 additions and 7 deletions.
21 changes: 15 additions & 6 deletions python/pyarrow/array.pxi
Original file line number Diff line number Diff line change
Expand Up @@ -2467,7 +2467,7 @@ cdef class MapArray(ListArray):
"""

@staticmethod
def from_arrays(offsets, keys, items, MemoryPool pool=None):
def from_arrays(offsets, keys, items, DataType type=None, MemoryPool pool=None):
"""
Construct MapArray from arrays of int32 offsets and key, item arrays.
Expand All @@ -2476,6 +2476,8 @@ cdef class MapArray(ListArray):
offsets : array-like or sequence (int32 type)
keys : array-like or sequence (any type)
items : array-like or sequence (any type)
type : DataType, optional
If not specified, a default MapArray with the keys' and items' type is used.
pool : MemoryPool
Returns
Expand Down Expand Up @@ -2564,11 +2566,18 @@ cdef class MapArray(ListArray):
_keys = asarray(keys)
_items = asarray(items)

with nogil:
out = GetResultValue(
CMapArray.FromArrays(_offsets.sp_array,
_keys.sp_array,
_items.sp_array, cpool))
if type is not None:
with nogil:
out = GetResultValue(
CMapArray.FromArraysAndType(
type.sp_type, _offsets.sp_array,
_keys.sp_array, _items.sp_array, cpool))
else:
with nogil:
out = GetResultValue(
CMapArray.FromArrays(_offsets.sp_array,
_keys.sp_array,
_items.sp_array, cpool))
cdef Array result = pyarrow_wrap_array(out)
result.validate()
return result
Expand Down
8 changes: 8 additions & 0 deletions python/pyarrow/includes/libarrow.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -696,6 +696,14 @@ cdef extern from "arrow/api.h" namespace "arrow" nogil:
const shared_ptr[CArray]& items,
CMemoryPool* pool)

@staticmethod
CResult[shared_ptr[CArray]] FromArraysAndType" FromArrays"(
shared_ptr[CDataType],
const shared_ptr[CArray]& offsets,
const shared_ptr[CArray]& keys,
const shared_ptr[CArray]& items,
CMemoryPool* pool)

shared_ptr[CArray] keys()
shared_ptr[CArray] items()
CMapType* map_type()
Expand Down
19 changes: 18 additions & 1 deletion python/pyarrow/tests/test_array.py
Original file line number Diff line number Diff line change
Expand Up @@ -1057,8 +1057,25 @@ def test_map_from_arrays():

assert result.equals(expected)

# check invalid usage
# pass in the type explicitly
result = pa.MapArray.from_arrays(offsets, keys, items, pa.map_(
keys.type,
items.type
))
assert result.equals(expected)

# pass in invalid types
with pytest.raises(pa.ArrowTypeError, match='Expected map type, got string'):
pa.MapArray.from_arrays(offsets, keys, items, pa.string())

with pytest.raises(pa.ArrowTypeError, match='Mismatching map items type'):
pa.MapArray.from_arrays(offsets, keys, items, pa.map_(
keys.type,
# Larger than the original i4
pa.int64()
))

# check invalid usage
offsets = [0, 1, 3, 5]
keys = np.arange(5)
items = np.arange(5)
Expand Down

0 comments on commit 07a4655

Please sign in to comment.