Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

v2 Object Encoding in 3.x.x python package #2500

Closed
ilan-gold opened this issue Nov 18, 2024 · 1 comment
Closed

v2 Object Encoding in 3.x.x python package #2500

ilan-gold opened this issue Nov 18, 2024 · 1 comment
Labels
bug Potential issues with the zarr-python library

Comments

@ilan-gold
Copy link

Zarr version

3.0.0b2

Numcodecs version

0.14.0

Python Version

3.12.7

Operating System

Mac

Installation

uv

Description

It seems like this feature is missing judging by the source code but I'm not sure.

Steps to reproduce

import zarr
import numpy as np

k = "x"
arr = np.array(["a", "b"], dtype=object)
f = zarr.open_group("foo.zarr", mode="w", zarr_format=2)
f.create_array(k, shape=arr.shape, dtype=object)
f[k][:] = arr

---------------------------------------------------------------------------
RuntimeError                              Traceback (most recent call last)
Cell In[20], line 1
----> 1 f["x"][:] = arr

File ~/Projects/Theis/anndata/venv/lib/python3.12/site-packages/zarr/core/array.py:2052, in Array.__setitem__(self, selection, value)
   2050     self.vindex[cast(CoordinateSelection | MaskSelection, selection)] = value
   2051 elif is_pure_orthogonal_indexing(pure_selection, self.ndim):
-> 2052     self.set_orthogonal_selection(pure_selection, value, fields=fields)
   2053 else:
   2054     self.set_basic_selection(cast(BasicSelection, pure_selection), value, fields=fields)

File ~/Projects/Theis/anndata/venv/lib/python3.12/site-packages/zarr/_compat.py:43, in _deprecate_positional_args.<locals>._inner_deprecate_positional_args.<locals>.inner_f(*args, **kwargs)
     41 extra_args = len(args) - len(all_args)
     42 if extra_args <= 0:
---> 43     return f(*args, **kwargs)
     45 # extra_args > 0
     46 args_msg = [
     47     f"{name}={arg}"
     48     for name, arg in zip(kwonly_args[:extra_args], args[-extra_args:], strict=False)
     49 ]

File ~/Projects/Theis/anndata/venv/lib/python3.12/site-packages/zarr/core/array.py:2508, in Array.set_orthogonal_selection(self, selection, value, fields, prototype)
   2506     prototype = default_buffer_prototype()
   2507 indexer = OrthogonalIndexer(selection, self.shape, self.metadata.chunk_grid)
-> 2508 return sync(
   2509     self._async_array._set_selection(indexer, value, fields=fields, prototype=prototype)
   2510 )

File ~/Projects/Theis/anndata/venv/lib/python3.12/site-packages/zarr/core/sync.py:141, in sync(coro, loop, timeout)
    138 return_result = next(iter(finished)).result()
    140 if isinstance(return_result, BaseException):
--> 141     raise return_result
    142 else:
    143     return return_result

File ~/Projects/Theis/anndata/venv/lib/python3.12/site-packages/zarr/core/sync.py:100, in _runner(coro)
     95 """
     96 Await a coroutine and return the result of running it. If awaiting the coroutine raises an
     97 exception, the exception will be returned.
     98 """
     99 try:
--> 100     return await coro
    101 except Exception as ex:
    102     return ex

File ~/Projects/Theis/anndata/venv/lib/python3.12/site-packages/zarr/core/array.py:1131, in AsyncArray._set_selection(self, indexer, value, prototype, fields)
   1128 value_buffer = prototype.nd_buffer.from_ndarray_like(value)
   1130 # merging with existing data and encoding chunks
-> 1131 await self.codec_pipeline.write(
   1132     [
   1133         (
   1134             self.store_path / self.metadata.encode_chunk_key(chunk_coords),
   1135             self.metadata.get_chunk_spec(chunk_coords, self.order, prototype),
   1136             chunk_selection,
   1137             out_selection,
   1138         )
   1139         for chunk_coords, chunk_selection, out_selection in indexer
   1140     ],
   1141     value_buffer,
   1142     drop_axes=indexer.drop_axes,
   1143 )

File ~/Projects/Theis/anndata/venv/lib/python3.12/site-packages/zarr/core/codec_pipeline.py:455, in BatchedCodecPipeline.write(self, batch_info, value, drop_axes)
    449 async def write(
    450     self,
    451     batch_info: Iterable[tuple[ByteSetter, ArraySpec, SelectorTuple, SelectorTuple]],
    452     value: NDBuffer,
    453     drop_axes: tuple[int, ...] = (),
    454 ) -> None:
--> 455     await concurrent_map(
    456         [
    457             (single_batch_info, value, drop_axes)
    458             for single_batch_info in batched(batch_info, self.batch_size)
    459         ],
    460         self.write_batch,
    461         config.get("async.concurrency"),
    462     )

File ~/Projects/Theis/anndata/venv/lib/python3.12/site-packages/zarr/core/common.py:67, in concurrent_map(items, func, limit)
     64     async with sem:
     65         return await func(*item)
---> 67 return await asyncio.gather(*[asyncio.ensure_future(run(item)) for item in items])

File ~/Projects/Theis/anndata/venv/lib/python3.12/site-packages/zarr/core/common.py:65, in concurrent_map.<locals>.run(item)
     63 async def run(item: tuple[Any]) -> V:
     64     async with sem:
---> 65         return await func(*item)

File ~/Projects/Theis/anndata/venv/lib/python3.12/site-packages/zarr/core/codec_pipeline.py:390, in BatchedCodecPipeline.write_batch(self, batch_info, value, drop_axes)
    372 chunk_array_batch = [
    373     self._merge_chunk_array(
    374         chunk_array, value, out_selection, chunk_spec, chunk_selection, drop_axes
   (...)
    378     )
    379 ]
    381 chunk_array_batch = [
    382     None
    383     if chunk_array is None or chunk_array.all_equal(chunk_spec.fill_value)
   (...)
    387     )
    388 ]
--> 390 chunk_bytes_batch = await self.encode_batch(
    391     [
    392         (chunk_array, chunk_spec)
    393         for chunk_array, (_, chunk_spec, _, _) in zip(
    394             chunk_array_batch, batch_info, strict=False
    395         )
    396     ],
    397 )
    399 async def _write_key(byte_setter: ByteSetter, chunk_bytes: Buffer | None) -> None:
    400     if chunk_bytes is None:

File ~/Projects/Theis/anndata/venv/lib/python3.12/site-packages/zarr/core/codec_pipeline.py:210, in BatchedCodecPipeline.encode_batch(self, chunk_arrays_and_specs)
    205     chunk_array_batch = await aa_codec.encode(
    206         zip(chunk_array_batch, chunk_specs, strict=False)
    207     )
    208     chunk_specs = resolve_batched(aa_codec, chunk_specs)
--> 210 chunk_bytes_batch = await self.array_bytes_codec.encode(
    211     zip(chunk_array_batch, chunk_specs, strict=False)
    212 )
    213 chunk_specs = resolve_batched(self.array_bytes_codec, chunk_specs)
    215 for bb_codec in self.bytes_bytes_codecs:

File ~/Projects/Theis/anndata/venv/lib/python3.12/site-packages/zarr/abc/codec.py:152, in BaseCodec.encode(self, chunks_and_specs)
    136 async def encode(
    137     self,
    138     chunks_and_specs: Iterable[tuple[CodecInput | None, ArraySpec]],
    139 ) -> Iterable[CodecOutput | None]:
    140     """Encodes a batch of chunks.
    141     Chunks can be None in which case they are ignored by the codec.
    142 
   (...)
    150     Iterable[CodecOutput | None]
    151     """
--> 152     return await _batching_helper(self._encode_single, chunks_and_specs)

File ~/Projects/Theis/anndata/venv/lib/python3.12/site-packages/zarr/abc/codec.py:407, in _batching_helper(func, batch_info)
    403 async def _batching_helper(
    404     func: Callable[[CodecInput, ArraySpec], Awaitable[CodecOutput | None]],
    405     batch_info: Iterable[tuple[CodecInput | None, ArraySpec]],
    406 ) -> list[CodecOutput | None]:
--> 407     return await concurrent_map(
    408         list(batch_info),
    409         _noop_for_none(func),
    410         config.get("async.concurrency"),
    411     )

File ~/Projects/Theis/anndata/venv/lib/python3.12/site-packages/zarr/core/common.py:67, in concurrent_map(items, func, limit)
     64     async with sem:
     65         return await func(*item)
---> 67 return await asyncio.gather(*[asyncio.ensure_future(run(item)) for item in items])

File ~/Projects/Theis/anndata/venv/lib/python3.12/site-packages/zarr/core/common.py:65, in concurrent_map.<locals>.run(item)
     63 async def run(item: tuple[Any]) -> V:
     64     async with sem:
---> 65         return await func(*item)

File ~/Projects/Theis/anndata/venv/lib/python3.12/site-packages/zarr/abc/codec.py:420, in _noop_for_none.<locals>.wrap(chunk, chunk_spec)
    418 if chunk is None:
    419     return None
--> 420 return await func(chunk, chunk_spec)

File ~/Projects/Theis/anndata/venv/lib/python3.12/site-packages/zarr/codecs/_v2.py:78, in V2Codec._encode_single(self, chunk_array, chunk_spec)
     76 # check object encoding
     77 if ensure_ndarray_like(chunk).dtype == object:
---> 78     raise RuntimeError("cannot write object array without object codec")
     80 # compress
     81 if self.compressor:

RuntimeError: cannot write object array without object codec

### Additional output

_No response_
@ilan-gold
Copy link
Author

ilan-gold commented Dec 5, 2024

from numcodecs import VLenUTF8

f.create_array(
    k,
    shape=arr.shape,
    dtype=object,
    filters=[VLenUTF8()],
)
f[k][:] = arr

This seems to work for v2 assuming f is a v2 group.

Would be happy to contribute this example to some migration guide if I understood it more...

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
bug Potential issues with the zarr-python library
Projects
None yet
Development

No branches or pull requests

1 participant