diff --git a/src/zarr/api/asynchronous.py b/src/zarr/api/asynchronous.py index 1908a1a11e..54fefaa041 100644 --- a/src/zarr/api/asynchronous.py +++ b/src/zarr/api/asynchronous.py @@ -108,6 +108,19 @@ def _infer_overwrite(mode: AccessModeLiteral) -> bool: return mode in _OVERWRITE_MODES +def _warn_unimplemented_kwargs(kwargs: dict[str, Any]) -> None: + """ + Emit a "not yet implemented" warning for each provided keyword argument that is not None. + + ``kwargs`` maps a keyword argument name to its supplied value. The ``stacklevel`` is chosen + so the warning points at the caller of the public API function (the same location as an + inline ``warnings.warn(..., stacklevel=2)`` would). + """ + for name, value in kwargs.items(): + if value is not None: + warnings.warn(f"{name} is not yet implemented", ZarrRuntimeWarning, stacklevel=3) + + def _get_shape_chunks(a: ArrayLike | Any) -> tuple[tuple[int, ...] | None, tuple[int, ...] | None]: """Helper function to get the shape and chunks from an array-like object""" shape = None @@ -134,6 +147,7 @@ class _LikeArgs(TypedDict): filters: NotRequired[tuple[Numcodec, ...] | None] compressor: NotRequired[CompressorLikev2] codecs: NotRequired[tuple[Codec, ...]] + fill_value: NotRequired[Any] def _like_args(a: ArrayLike) -> _LikeArgs: @@ -151,6 +165,7 @@ def _like_args(a: ArrayLike) -> _LikeArgs: new["dtype"] = a.dtype if isinstance(a, AsyncArray | Array): + new["fill_value"] = a.metadata.fill_value if isinstance(a.metadata, ArrayV2Metadata): new["order"] = a.order new["compressor"] = a.metadata.compressor @@ -813,14 +828,14 @@ async def open_group( The new group. """ - if cache_attrs is not None: - warnings.warn("cache_attrs is not yet implemented", ZarrRuntimeWarning, stacklevel=2) - if synchronizer is not None: - warnings.warn("synchronizer is not yet implemented", ZarrRuntimeWarning, stacklevel=2) - if meta_array is not None: - warnings.warn("meta_array is not yet implemented", ZarrRuntimeWarning, stacklevel=2) - if chunk_store is not None: - warnings.warn("chunk_store is not yet implemented", ZarrRuntimeWarning, stacklevel=2) + _warn_unimplemented_kwargs( + { + "cache_attrs": cache_attrs, + "synchronizer": synchronizer, + "meta_array": meta_array, + "chunk_store": chunk_store, + } + ) store_path = await make_store_path(store, mode=mode, storage_options=storage_options, path=path) if attributes is None: @@ -1010,20 +1025,17 @@ async def create( if zarr_format is None: zarr_format = _default_zarr_format() - if synchronizer is not None: - warnings.warn("synchronizer is not yet implemented", ZarrRuntimeWarning, stacklevel=2) - if chunk_store is not None: - warnings.warn("chunk_store is not yet implemented", ZarrRuntimeWarning, stacklevel=2) - if cache_metadata is not None: - warnings.warn("cache_metadata is not yet implemented", ZarrRuntimeWarning, stacklevel=2) - if cache_attrs is not None: - warnings.warn("cache_attrs is not yet implemented", ZarrRuntimeWarning, stacklevel=2) - if object_codec is not None: - warnings.warn("object_codec is not yet implemented", ZarrRuntimeWarning, stacklevel=2) - if read_only is not None: - warnings.warn("read_only is not yet implemented", ZarrRuntimeWarning, stacklevel=2) - if meta_array is not None: - warnings.warn("meta_array is not yet implemented", ZarrRuntimeWarning, stacklevel=2) + _warn_unimplemented_kwargs( + { + "synchronizer": synchronizer, + "chunk_store": chunk_store, + "cache_metadata": cache_metadata, + "cache_attrs": cache_attrs, + "object_codec": object_codec, + "read_only": read_only, + "meta_array": meta_array, + } + ) if write_empty_chunks is not None: _warn_write_empty_chunks_kwarg() @@ -1111,8 +1123,6 @@ async def empty_like(a: ArrayLike, **kwargs: Any) -> AnyAsyncArray: and these are not guaranteed to be stable from one access to the next. """ like_kwargs = _like_args(a) | kwargs - if isinstance(a, (AsyncArray | Array)): - like_kwargs.setdefault("fill_value", a.metadata.fill_value) return await empty(**like_kwargs) # type: ignore[arg-type] @@ -1155,8 +1165,6 @@ async def full_like(a: ArrayLike, **kwargs: Any) -> AnyAsyncArray: The new array. """ like_kwargs = _like_args(a) | kwargs - if isinstance(a, (AsyncArray | Array)): - like_kwargs.setdefault("fill_value", a.metadata.fill_value) return await full(**like_kwargs) # type: ignore[arg-type] @@ -1194,7 +1202,10 @@ async def ones_like(a: ArrayLike, **kwargs: Any) -> AnyAsyncArray: Array The new array. """ - like_kwargs = _like_args(a) | kwargs + like_args = _like_args(a) + # `ones` supplies its own fill_value, so drop any inherited from `a`. + like_args.pop("fill_value", None) + like_kwargs = like_args | kwargs return await ones(**like_kwargs) # type: ignore[arg-type] @@ -1270,8 +1281,6 @@ async def open_like(a: ArrayLike, path: str, **kwargs: Any) -> AnyAsyncArray: The opened array. """ like_kwargs = _like_args(a) | kwargs - if isinstance(a, (AsyncArray | Array)): - like_kwargs.setdefault("fill_value", a.metadata.fill_value) return await open_array(path=path, **like_kwargs) # type: ignore[arg-type] @@ -1309,5 +1318,8 @@ async def zeros_like(a: ArrayLike, **kwargs: Any) -> AnyAsyncArray: Array The new array. """ - like_kwargs = _like_args(a) | kwargs + like_args = _like_args(a) + # `zeros` supplies its own fill_value, so drop any inherited from `a`. + like_args.pop("fill_value", None) + like_kwargs = like_args | kwargs return await zeros(**like_kwargs) # type: ignore[arg-type] diff --git a/src/zarr/core/array.py b/src/zarr/core/array.py index 977520b12e..ea7d125b10 100644 --- a/src/zarr/core/array.py +++ b/src/zarr/core/array.py @@ -299,6 +299,22 @@ async def get_array_metadata( return metadata_dict +async def _prepare_overwrite( + store_path: StorePath, *, zarr_format: ZarrFormat, overwrite: bool +) -> None: + """ + Prepare a store path for writing a new node. + + If ``overwrite`` is true and the store supports deletes, any existing node at + ``store_path`` is deleted. Otherwise, the absence of an existing node is enforced + (raising if one is present). + """ + if overwrite and store_path.store.supports_deletes: + await store_path.delete_dir() + else: + await ensure_no_existing_node(store_path, zarr_format=zarr_format) + + @dataclass(frozen=True) class AsyncArray[T_ArrayMetadata: (ArrayV2Metadata, ArrayV3Metadata)]: """ @@ -524,7 +540,8 @@ def _create_metadata_v3( shape = parse_shapelike(shape) if codecs is None: - filters = default_filters_v3(dtype) + # no data types have default filters + filters = () serializer = default_serializer_v3(dtype) compressors = default_compressors_v3(dtype) @@ -577,13 +594,7 @@ async def _create_v3( attributes: dict[str, JSON] | None = None, overwrite: bool = False, ) -> AsyncArrayV3: - if overwrite: - if store_path.store.supports_deletes: - await store_path.delete_dir() - else: - await ensure_no_existing_node(store_path, zarr_format=3) - else: - await ensure_no_existing_node(store_path, zarr_format=3) + await _prepare_overwrite(store_path, zarr_format=3, overwrite=overwrite) if isinstance(chunk_key_encoding, tuple): chunk_key_encoding = ( @@ -658,13 +669,7 @@ async def _create_v2( attributes: dict[str, JSON] | None = None, overwrite: bool = False, ) -> AsyncArrayV2: - if overwrite: - if store_path.store.supports_deletes: - await store_path.delete_dir() - else: - await ensure_no_existing_node(store_path, zarr_format=2) - else: - await ensure_no_existing_node(store_path, zarr_format=2) + await _prepare_overwrite(store_path, zarr_format=2, overwrite=overwrite) compressor_parsed: CompressorLikev2 if compressor == "auto": @@ -970,10 +975,9 @@ def _zdtype(self) -> ZDType[TBaseDType, TBaseScalar]: """ The zarr-specific representation of the array data type """ - if self.metadata.zarr_format == 2: - return self.metadata.dtype - else: - return self.metadata.data_type + # `dtype` returns the zarr dtype object for both v2 and v3 metadata + # (on v3 it is an alias for `data_type`). + return self.metadata.dtype @property def dtype(self) -> TBaseDType: @@ -1490,13 +1494,16 @@ async def get_orthogonal_selection( fields: Fields | None = None, prototype: BufferPrototype | None = None, ) -> NDArrayLikeOrScalar: - return await _get_orthogonal_selection( + if prototype is None: + prototype = default_buffer_prototype() + indexer = OrthogonalIndexer(selection, self.metadata.shape, self._chunk_grid) + return await _get_selection( self.store_path, self.metadata, self.codec_pipeline, self.config, self._chunk_grid, - selection, + indexer=indexer, out=out, fields=fields, prototype=prototype, @@ -1510,13 +1517,16 @@ async def get_mask_selection( fields: Fields | None = None, prototype: BufferPrototype | None = None, ) -> NDArrayLikeOrScalar: - return await _get_mask_selection( + if prototype is None: + prototype = default_buffer_prototype() + indexer = MaskIndexer(mask, self.metadata.shape, self._chunk_grid) + return await _get_selection( self.store_path, self.metadata, self.codec_pipeline, self.config, self._chunk_grid, - mask, + indexer=indexer, out=out, fields=fields, prototype=prototype, @@ -1530,17 +1540,24 @@ async def get_coordinate_selection( fields: Fields | None = None, prototype: BufferPrototype | None = None, ) -> NDArrayLikeOrScalar: - return await _get_coordinate_selection( + if prototype is None: + prototype = default_buffer_prototype() + indexer = CoordinateIndexer(selection, self.metadata.shape, self._chunk_grid) + out_array = await _get_selection( self.store_path, self.metadata, self.codec_pipeline, self.config, self._chunk_grid, - selection, + indexer=indexer, out=out, fields=fields, prototype=prototype, ) + if hasattr(out_array, "shape"): + # restore shape + out_array = cast("NDArrayLikeOrScalar", np.array(out_array).reshape(indexer.sel_shape)) + return out_array async def _save_metadata(self, metadata: ArrayMetadata, ensure_parents: bool = False) -> None: """ @@ -2166,7 +2183,7 @@ def cdata_shape(self) -> tuple[int, ...]: When sharding is used, this counts inner chunks (not shards) per dimension. """ - return self.async_array._chunk_grid_shape + return self._chunk_grid_shape @property def _chunk_grid_shape(self) -> tuple[int, ...]: @@ -4402,10 +4419,7 @@ async def init_array( chunk_key_encoding, zarr_format=zarr_format ) - if overwrite and store_path.store.supports_deletes: - await store_path.delete_dir() - else: - await ensure_no_existing_node(store_path, zarr_format=zarr_format) + await _prepare_overwrite(store_path, zarr_format=zarr_format, overwrite=overwrite) # Validate rectilinear chunks constraints if _is_rectilinear_chunks(chunks): @@ -4832,15 +4846,6 @@ def _parse_chunk_key_encoding( return result -def default_filters_v3(dtype: ZDType[Any, Any]) -> tuple[ArrayArrayCodec, ...]: - """ - Given a data type, return the default filters for that data type. - - This is an empty tuple. No data types have default filters. - """ - return () - - def default_compressors_v3(dtype: ZDType[Any, Any]) -> tuple[BytesBytesCodec, ...]: """ Given a data type, return the default compressors for that data type. @@ -4993,7 +4998,8 @@ def _parse_chunk_encoding_v3( if filters is None: out_array_array: tuple[ArrayArrayCodec, ...] = () elif filters == "auto": - out_array_array = default_filters_v3(dtype) + # no data types have default filters + out_array_array = () else: maybe_array_array: Iterable[Codec | dict[str, JSON]] if isinstance(filters, dict | Codec): @@ -5388,11 +5394,8 @@ async def _get_selection( NDArrayLikeOrScalar The selected data. """ - # Get dtype from metadata - if metadata.zarr_format == 2: - zdtype = metadata.dtype - else: - zdtype = metadata.data_type + # `dtype` returns the zarr dtype object for both v2 and v3 metadata. + zdtype = metadata.dtype dtype = zdtype.to_native_dtype() # Determine memory order @@ -5522,182 +5525,6 @@ async def _getitem( ) -async def _get_orthogonal_selection( - store_path: StorePath, - metadata: ArrayMetadata, - codec_pipeline: CodecPipeline, - config: ArrayConfig, - chunk_grid: ChunkGrid, - selection: OrthogonalSelection, - *, - out: NDBuffer | None = None, - fields: Fields | None = None, - prototype: BufferPrototype | None = None, -) -> NDArrayLikeOrScalar: - """ - Get an orthogonal selection from the array. - - Parameters - ---------- - store_path : StorePath - The store path of the array. - metadata : ArrayMetadata - The array metadata. - codec_pipeline : CodecPipeline - The codec pipeline for encoding/decoding. - config : ArrayConfig - The array configuration. - chunk_grid : ChunkGrid - The chunk grid. - selection : OrthogonalSelection - The orthogonal selection specification. - out : NDBuffer | None, optional - An output buffer to write the data to. - fields : Fields | None, optional - Fields to select from structured arrays. - prototype : BufferPrototype | None, optional - A buffer prototype to use for the retrieved data. - - Returns - ------- - NDArrayLikeOrScalar - The selected data. - """ - if prototype is None: - prototype = default_buffer_prototype() - indexer = OrthogonalIndexer(selection, metadata.shape, chunk_grid) - return await _get_selection( - store_path, - metadata, - codec_pipeline, - config, - chunk_grid, - indexer=indexer, - out=out, - fields=fields, - prototype=prototype, - ) - - -async def _get_mask_selection( - store_path: StorePath, - metadata: ArrayMetadata, - codec_pipeline: CodecPipeline, - config: ArrayConfig, - chunk_grid: ChunkGrid, - mask: MaskSelection, - *, - out: NDBuffer | None = None, - fields: Fields | None = None, - prototype: BufferPrototype | None = None, -) -> NDArrayLikeOrScalar: - """ - Get a mask selection from the array. - - Parameters - ---------- - store_path : StorePath - The store path of the array. - metadata : ArrayMetadata - The array metadata. - codec_pipeline : CodecPipeline - The codec pipeline for encoding/decoding. - config : ArrayConfig - The array configuration. - chunk_grid : ChunkGrid - The chunk grid. - mask : MaskSelection - The boolean mask specifying the selection. - out : NDBuffer | None, optional - An output buffer to write the data to. - fields : Fields | None, optional - Fields to select from structured arrays. - prototype : BufferPrototype | None, optional - A buffer prototype to use for the retrieved data. - - Returns - ------- - NDArrayLikeOrScalar - The selected data. - """ - if prototype is None: - prototype = default_buffer_prototype() - indexer = MaskIndexer(mask, metadata.shape, chunk_grid) - return await _get_selection( - store_path, - metadata, - codec_pipeline, - config, - chunk_grid, - indexer=indexer, - out=out, - fields=fields, - prototype=prototype, - ) - - -async def _get_coordinate_selection( - store_path: StorePath, - metadata: ArrayMetadata, - codec_pipeline: CodecPipeline, - config: ArrayConfig, - chunk_grid: ChunkGrid, - selection: CoordinateSelection, - *, - out: NDBuffer | None = None, - fields: Fields | None = None, - prototype: BufferPrototype | None = None, -) -> NDArrayLikeOrScalar: - """ - Get a coordinate selection from the array. - - Parameters - ---------- - store_path : StorePath - The store path of the array. - metadata : ArrayMetadata - The array metadata. - codec_pipeline : CodecPipeline - The codec pipeline for encoding/decoding. - config : ArrayConfig - The array configuration. - chunk_grid : ChunkGrid - The chunk grid. - selection : CoordinateSelection - The coordinate selection specification. - out : NDBuffer | None, optional - An output buffer to write the data to. - fields : Fields | None, optional - Fields to select from structured arrays. - prototype : BufferPrototype | None, optional - A buffer prototype to use for the retrieved data. - - Returns - ------- - NDArrayLikeOrScalar - The selected data. - """ - if prototype is None: - prototype = default_buffer_prototype() - indexer = CoordinateIndexer(selection, metadata.shape, chunk_grid) - out_array = await _get_selection( - store_path, - metadata, - codec_pipeline, - config, - chunk_grid, - indexer=indexer, - out=out, - fields=fields, - prototype=prototype, - ) - - if hasattr(out_array, "shape"): - # restore shape - out_array = cast("NDArrayLikeOrScalar", np.array(out_array).reshape(indexer.sel_shape)) - return out_array - - async def _set_selection( store_path: StorePath, metadata: ArrayMetadata, @@ -5734,11 +5561,8 @@ async def _set_selection( fields : Fields | None, optional Fields to select from structured arrays. """ - # Get dtype from metadata - if metadata.zarr_format == 2: - zdtype = metadata.dtype - else: - zdtype = metadata.data_type + # `dtype` returns the zarr dtype object for both v2 and v3 metadata. + zdtype = metadata.dtype dtype = zdtype.to_native_dtype() # check fields are sensible diff --git a/src/zarr/storage/_common.py b/src/zarr/storage/_common.py index 1e13a9ac3f..86b6c41573 100644 --- a/src/zarr/storage/_common.py +++ b/src/zarr/storage/_common.py @@ -643,18 +643,7 @@ async def contains_group(store_path: StorePath, zarr_format: ZarrFormat) -> bool """ if zarr_format == 3: - extant_meta_bytes = await (store_path / ZARR_JSON).get() - if extant_meta_bytes is None: - return False - else: - try: - extant_meta_json = buffer_to_json_object(extant_meta_bytes) - # we avoid constructing a full metadata document here in the name of speed. - result: bool = extant_meta_json["node_type"] == "group" - except (ValueError, KeyError, TypeError): - return False - else: - return result + return (await _contains_node_v3(store_path)) == "group" elif zarr_format == 2: return await (store_path / ZGROUP_JSON).exists() msg = f"Invalid zarr_format provided. Got {zarr_format}, expected 2 or 3" # type: ignore[unreachable] diff --git a/tests/test_api/test_asynchronous.py b/tests/test_api/test_asynchronous.py index 362195e858..6ebec36bbd 100644 --- a/tests/test_api/test_asynchronous.py +++ b/tests/test_api/test_asynchronous.py @@ -75,6 +75,7 @@ def test_get_shape_chunks( "chunks": (10,), "shape": (100,), "dtype": np.dtype("f8"), + "fill_value": np.float64(0.0), "compressor": None, "filters": None, "order": "C",