Skip to content

Commit

Permalink
Zarr v3: support root path (#1085)
Browse files Browse the repository at this point in the history
* support path='/' for zarr v3 to create a root array or group

v3 spec states path = '/' for arrays gives /meta/root.array.json
               path = '/' for groups gives /meta/root.array.json

In this implementation path = None or path = '' will also result in
a root array. Creation routines default to path=None, so this makes
it so that the path argument does not have to be manually specified.

* revert change to normalize_storage_path

update additional tests

* fix

* update TestArrayV3 to inherit all tests from TestArray

* remove test bypass

* fix nbytes_stored for v3 array without path

update test_nbytes_stored to handle both v3 and v2 cases properly

* pep8

* fix incorrect default value for at_root in _init_creation_kwargs

previous behavior corresponded to at_root=True by default

* flake8
  • Loading branch information
grlee77 authored Sep 8, 2022
1 parent 43266ee commit 2cfee9c
Show file tree
Hide file tree
Showing 8 changed files with 143 additions and 136 deletions.
10 changes: 5 additions & 5 deletions zarr/_storage/store.py
Original file line number Diff line number Diff line change
Expand Up @@ -420,23 +420,23 @@ def _listdir_from_keys(store: BaseStore, path: Optional[str] = None) -> List[str

def _prefix_to_array_key(store: StoreLike, prefix: str) -> str:
if getattr(store, "_store_version", 2) == 3:
sfx = _get_metadata_suffix(store) # type: ignore
if prefix:
sfx = _get_metadata_suffix(store) # type: ignore
key = meta_root + prefix.rstrip("/") + ".array" + sfx
else:
raise ValueError("prefix must be supplied to get a v3 array key")
key = meta_root[:-1] + '.array' + sfx
else:
key = prefix + array_meta_key
return key


def _prefix_to_group_key(store: StoreLike, prefix: str) -> str:
if getattr(store, "_store_version", 2) == 3:
sfx = _get_metadata_suffix(store) # type: ignore
if prefix:
sfx = _get_metadata_suffix(store) # type: ignore
key = meta_root + prefix.rstrip('/') + ".group" + sfx
else:
raise ValueError("prefix must be supplied to get a v3 group key")
key = meta_root[:-1] + '.group' + sfx
else:
key = prefix + group_meta_key
return key
Expand All @@ -449,7 +449,7 @@ def _prefix_to_attrs_key(store: StoreLike, prefix: str) -> str:
if prefix:
key = meta_root + prefix.rstrip('/') + ".array" + sfx
else:
raise ValueError("prefix must be supplied to get a v3 array key")
key = meta_root[:-1] + '.array' + sfx
else:
key = prefix + attrs_key
return key
2 changes: 1 addition & 1 deletion zarr/creation.py
Original file line number Diff line number Diff line change
Expand Up @@ -163,7 +163,7 @@ def create(shape, chunks=True, dtype=None, compressor='default',
dimension_separator = normalize_dimension_separator(dimension_separator)

if zarr_version > 2 and path is None:
raise ValueError("path must be supplied to initialize a zarr v3 array")
path = '/'

# initialize array metadata
init_array(store, shape=shape, chunks=chunks, dtype=dtype, compressor=compressor,
Expand Down
6 changes: 0 additions & 6 deletions zarr/hierarchy.py
Original file line number Diff line number Diff line change
Expand Up @@ -1276,8 +1276,6 @@ def group(store=None, overwrite=False, chunk_store=None,
if zarr_version != 2:
assert_zarr_v3_api_available()

if zarr_version == 3 and path is None:
raise ValueError(f"path must be provided for a v{zarr_version} group")
path = normalize_storage_path(path)

if zarr_version == 2:
Expand Down Expand Up @@ -1366,10 +1364,6 @@ def open_group(store=None, mode='a', cache_attrs=True, synchronizer=None, path=N
"zarr_version of store and chunk_store must match"
)

store_version = getattr(store, '_store_version', 2)
if store_version == 3 and path is None:
raise ValueError("path must be supplied to initialize a zarr v3 group")

path = normalize_storage_path(path)

# ensure store is initialized
Expand Down
10 changes: 8 additions & 2 deletions zarr/storage.py
Original file line number Diff line number Diff line change
Expand Up @@ -230,8 +230,14 @@ def _getsize(store: BaseStore, path: Path = None) -> int:
size = 0
store_version = getattr(store, '_store_version', 2)
if store_version == 3:
members = store.list_prefix(data_root + path) # type: ignore
members += store.list_prefix(meta_root + path) # type: ignore
if path == '':
# have to list the root folders without trailing / in this case
members = store.list_prefix(data_root.rstrip('/')) # type: ignore
members += store.list_prefix(meta_root.rstrip('/')) # type: ignore
else:
members = store.list_prefix(data_root + path) # type: ignore
members += store.list_prefix(meta_root + path) # type: ignore
# also include zarr.json?
# members += ['zarr.json']
else:
members = listdir(store, path)
Expand Down
10 changes: 0 additions & 10 deletions zarr/tests/test_convenience.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,11 +73,6 @@ def test_open_array(path_type, zarr_version):
assert isinstance(z, Array)
assert z.shape == (200,)

if zarr_version == 3:
# cannot open a v3 array without path
with pytest.raises(ValueError):
open(store, mode='w', shape=200, zarr_version=3)

# open array, read-only
z = open(store, mode='r', **kwargs)
assert isinstance(z, Array)
Expand Down Expand Up @@ -108,11 +103,6 @@ def test_open_group(path_type, zarr_version):
assert isinstance(g, Group)
assert 'foo' not in g

if zarr_version == 3:
# cannot open a v3 group without path
with pytest.raises(ValueError):
open(store, mode='w', zarr_version=3)

# open group, read-only
g = open(store, mode='r', **kwargs)
assert isinstance(g, Group)
Expand Down
117 changes: 64 additions & 53 deletions zarr/tests/test_core.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,9 +19,6 @@

from zarr._storage.store import (
v3_api_available,
_prefix_to_array_key,
_prefix_to_attrs_key,
_prefix_to_group_key
)
from zarr.core import Array
from zarr.errors import ArrayNotFoundError, ContainsGroupError
Expand Down Expand Up @@ -64,56 +61,64 @@
class TestArray(unittest.TestCase):

version = 2
root = ''
KVStoreClass = KVStore

def test_array_init(self):

# normal initialization
store = KVStore(dict())
store = self.KVStoreClass(dict())
init_array(store, shape=100, chunks=10, dtype="<f8")
a = Array(store)
a = Array(store, zarr_version=self.version)
assert isinstance(a, Array)
assert (100,) == a.shape
assert (10,) == a.chunks
assert '' == a.path
assert a.name is None
assert a.basename is None
assert store is a.store
assert "8fecb7a17ea1493d9c1430d04437b4f5b0b34985" == a.hexdigest()
if self.version == 2:
assert "8fecb7a17ea1493d9c1430d04437b4f5b0b34985" == a.hexdigest()
else:
assert "968dccbbfc0139f703ead2fd1d503ad6e44db307" == a.hexdigest()
store.close()

# initialize at path
store = KVStore(dict())
store = self.KVStoreClass(dict())
init_array(store, shape=100, chunks=10, path='foo/bar', dtype='<f8')
a = Array(store, path='foo/bar')
a = Array(store, path='foo/bar', zarr_version=self.version)
assert isinstance(a, Array)
assert (100,) == a.shape
assert (10,) == a.chunks
assert 'foo/bar' == a.path
assert '/foo/bar' == a.name
assert 'bar' == a.basename
assert store is a.store
assert "8fecb7a17ea1493d9c1430d04437b4f5b0b34985" == a.hexdigest()

if self.version == 2:
assert "8fecb7a17ea1493d9c1430d04437b4f5b0b34985" == a.hexdigest()
else:
assert "968dccbbfc0139f703ead2fd1d503ad6e44db307" == a.hexdigest()
# store not initialized
store = KVStore(dict())
store = self.KVStoreClass(dict())
with pytest.raises(ValueError):
Array(store)
Array(store, zarr_version=self.version)

# group is in the way
store = KVStore(dict())
store = self.KVStoreClass(dict())
init_group(store, path='baz')
with pytest.raises(ValueError):
Array(store, path='baz')
Array(store, path='baz', zarr_version=self.version)

def create_array(self, read_only=False, **kwargs):
store = KVStore(dict())
store = self.KVStoreClass(dict())
kwargs.setdefault('compressor', Zlib(level=1))
cache_metadata = kwargs.pop('cache_metadata', True)
cache_attrs = kwargs.pop('cache_attrs', True)
write_empty_chunks = kwargs.pop('write_empty_chunks', True)
init_array(store, **kwargs)
return Array(store, read_only=read_only, cache_metadata=cache_metadata,
cache_attrs=cache_attrs, write_empty_chunks=write_empty_chunks)
cache_attrs=cache_attrs, write_empty_chunks=write_empty_chunks,
zarr_version=self.version)

def test_store_has_text_keys(self):
# Initialize array
Expand Down Expand Up @@ -162,15 +167,28 @@ def test_nbytes_stored(self):

# dict as store
z = self.create_array(shape=1000, chunks=100)
expect_nbytes_stored = sum(buffer_size(v) for v in z.store.values())
if self.version == 3:
expect_nbytes_stored = sum(
buffer_size(v) for k, v in z.store.items() if k != 'zarr.json'
)
else:
expect_nbytes_stored = sum(buffer_size(v) for v in z.store.values())
assert expect_nbytes_stored == z.nbytes_stored
z[:] = 42
expect_nbytes_stored = sum(buffer_size(v) for v in z.store.values())
if self.version == 3:
expect_nbytes_stored = sum(
buffer_size(v) for k, v in z.store.items() if k != 'zarr.json'
)
else:
expect_nbytes_stored = sum(buffer_size(v) for v in z.store.values())
assert expect_nbytes_stored == z.nbytes_stored

# mess with store
try:
z.store[z._key_prefix + 'foo'] = list(range(10))
if self.version == 2:
z.store[z._key_prefix + 'foo'] = list(range(10))
else:
z.store['meta/root/foo'] = list(range(10))
assert -1 == z.nbytes_stored
except TypeError:
pass
Expand Down Expand Up @@ -1003,7 +1021,7 @@ def test_nchunks_initialized(self):

assert 0 == z.nchunks_initialized
# manually put something into the store to confuse matters
z.store['foo'] = b'bar'
z.store[self.root + 'foo'] = b'bar'
assert 0 == z.nchunks_initialized
z[:] = 42
assert 10 == z.nchunks_initialized
Expand Down Expand Up @@ -2703,36 +2721,25 @@ def test_read_from_all_blocks(self):
# StoreV3 test classes inheriting from the above below this point
####

# Start with TestArrayWithPathV3 not TestArrayV3 since path must be supplied

@pytest.mark.skipif(not v3_api_available, reason="V3 is disabled")
class TestArrayV3(unittest.TestCase):
class TestArrayV3(TestArray):

version = 3
root = meta_root
KVStoreClass = KVStoreV3

def test_array_init(self):

# normal initialization
store = KVStoreV3(dict())
with pytest.raises(ValueError):
# cannot init_array for v3 without a path
init_array(store, shape=100, chunks=10, dtype="<f8")

init_array(store, path='x', shape=100, chunks=10, dtype="<f8")
with pytest.raises(ValueError):
# cannot initialize a v3 array without a path
Array(store)

def test_prefix_exceptions(self):
store = KVStoreV3(dict())
with pytest.raises(ValueError):
_prefix_to_array_key(store, '')

with pytest.raises(ValueError):
_prefix_to_group_key(store, '')
def expected(self):
# tests for array without path will not be run for v3 stores
assert self.version == 3
return [
"73ab8ace56719a5c9308c3754f5e2d57bc73dc20",
"5fb3d02b8f01244721582929b3cad578aec5cea5",
"26b098bedb640846e18dc2fbc1c27684bb02b532",
"799a458c287d431d747bec0728987ca4fe764549",
"c780221df84eb91cb62f633f12d3f1eaa9cee6bd"
]

with pytest.raises(ValueError):
_prefix_to_attrs_key(store, '')
# TODO: fix test_nbytes_stored


@pytest.mark.skipif(not v3_api_available, reason="V3 is disabled")
Expand All @@ -2754,10 +2761,19 @@ def create_array(array_path='arr1', read_only=False, **kwargs):

def test_array_init(self):

# should not be able to initialize without a path in V3
store = KVStoreV3(dict())
with pytest.raises(ValueError):
init_array(store, shape=100, chunks=10, dtype="<f8")
# can initialize an array without a path
init_array(store, shape=100, chunks=10, dtype="<f8")
b = Array(store)
assert not b.is_view
assert isinstance(b, Array)
assert (100,) == b.shape
assert (10,) == b.chunks
assert '' == b.path
assert b.name is None
assert b.basename is None
assert store is b.store
assert "968dccbbfc0139f703ead2fd1d503ad6e44db307" == b.hexdigest()

# initialize at path
store = KVStoreV3(dict())
Expand Down Expand Up @@ -2797,11 +2813,6 @@ def test_array_init(self):
assert group_key not in store
assert (meta_root + path + '.array.json') in store

def test_array_no_path(self):
# passing path=None to init_array will raise an exception
with pytest.raises(ValueError):
self.create_array(shape=1000, chunks=100, array_path=None)

def expected(self):
return [
"73ab8ace56719a5c9308c3754f5e2d57bc73dc20",
Expand Down
Loading

0 comments on commit 2cfee9c

Please sign in to comment.