diff --git a/CHANGELOG.md b/CHANGELOG.md index 3731f3082..7783f6748 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,8 @@ * Changed the metadata schema for `Fluorescence` and `DfOverF` where the traces metadata can be provided as a dict instead of a list of dicts. The name of the plane segmentation is used to determine which traces to add to the `Fluorescence` and `DfOverF` containers. [PR #632](https://github.com/catalystneuro/neuroconv/pull/632) * Modify the filtering of traces to also filter out traces with empty values. [PR #649](https://github.com/catalystneuro/neuroconv/pull/649) +* Added tool function `get_default_dataset_configurations` for identifying and collecting all fields of an in-memory `NWBFile` that could become datasets on disk; and return instances of the Pydantic dataset models filled with default values for chunking/buffering/compression. [PR #569](https://github.com/catalystneuro/neuroconv/pull/569) + ### Fixes * Fixed GenericDataChunkIterator (in hdmf.py) in the case where the number of dimensions is 1 and the size in bytes is greater than the threshold of 1 GB. [PR #638](https://github.com/catalystneuro/neuroconv/pull/638) diff --git a/src/neuroconv/tools/hdmf.py b/src/neuroconv/tools/hdmf.py index 4be1a5dc6..46f0fd865 100644 --- a/src/neuroconv/tools/hdmf.py +++ b/src/neuroconv/tools/hdmf.py @@ -8,77 +8,113 @@ class GenericDataChunkIterator(HDMFGenericDataChunkIterator): def _get_default_buffer_shape(self, buffer_gb: float = 1.0) -> Tuple[int]: - num_axes = len(self.maxshape) - chunk_bytes = math.prod(self.chunk_shape) * self.dtype.itemsize + return self.estimate_default_buffer_shape( + buffer_gb=buffer_gb, chunk_shape=self.chunk_shape, maxshape=self.maxshape, dtype=self.dtype + ) + + # TODO: move this to the core iterator in HDMF so it can be easily swapped out as well as run on its own + @staticmethod + def estimate_default_chunk_shape(chunk_mb: float, maxshape: Tuple[int, ...], dtype: np.dtype) -> Tuple[int, ...]: + """ + Select chunk shape with size in MB less than the threshold of chunk_mb. + + Keeps the dimensional ratios of the original data. + """ + assert chunk_mb > 0.0, f"chunk_mb ({chunk_mb}) must be greater than zero!" + # Eventually, Pydantic validation can handle this validation for us + + n_dims = len(maxshape) + itemsize = dtype.itemsize + chunk_bytes = chunk_mb * 1e6 + + min_maxshape = min(maxshape) + v = tuple(math.floor(maxshape_axis / min_maxshape) for maxshape_axis in maxshape) + prod_v = math.prod(v) + while prod_v * itemsize > chunk_bytes and prod_v != 1: + non_unit_min_v = min(x for x in v if x != 1) + v = tuple(math.floor(x / non_unit_min_v) if x != 1 else x for x in v) + prod_v = math.prod(v) + k = math.floor((chunk_bytes / (prod_v * itemsize)) ** (1 / n_dims)) + return tuple([min(k * x, maxshape[dim]) for dim, x in enumerate(v)]) + + # TODO: move this to the core iterator in HDMF so it can be easily swapped out as well as run on its own + @staticmethod + def estimate_default_buffer_shape( + buffer_gb: float, chunk_shape: Tuple[int, ...], maxshape: Tuple[int, ...], dtype: np.dtype + ) -> Tuple[int]: + num_axes = len(maxshape) + chunk_bytes = math.prod(chunk_shape) * dtype.itemsize + assert buffer_gb > 0, f"buffer_gb ({buffer_gb}) must be greater than zero!" assert ( buffer_gb >= chunk_bytes / 1e9 ), f"buffer_gb ({buffer_gb}) must be greater than the chunk size ({chunk_bytes / 1e9})!" - assert all( - np.array(self.chunk_shape) > 0 - ), f"Some dimensions of chunk_shape ({self.chunk_shape}) are less than zero!" + assert all(np.array(chunk_shape) > 0), f"Some dimensions of chunk_shape ({chunk_shape}) are less than zero!" - maxshape = np.array(self.maxshape) + maxshape = np.array(maxshape) # Early termination condition - if math.prod(maxshape) * self.dtype.itemsize / 1e9 < buffer_gb: - return tuple(self.maxshape) + if math.prod(maxshape) * dtype.itemsize / 1e9 < buffer_gb: + return tuple(maxshape) buffer_bytes = chunk_bytes - axis_sizes_bytes = maxshape * self.dtype.itemsize + axis_sizes_bytes = maxshape * dtype.itemsize target_buffer_bytes = buffer_gb * 1e9 - if num_axes > 1: - smallest_chunk_axis, second_smallest_chunk_axis, *_ = np.argsort(self.chunk_shape) - # If the smallest full axis does not fit within the buffer size, form a square along the two smallest axes - sub_square_buffer_shape = np.array(self.chunk_shape) - if min(axis_sizes_bytes) > target_buffer_bytes: - k1 = math.floor((target_buffer_bytes / chunk_bytes) ** 0.5) - for axis in [smallest_chunk_axis, second_smallest_chunk_axis]: - sub_square_buffer_shape[axis] = k1 * sub_square_buffer_shape[axis] - return tuple(sub_square_buffer_shape) - elif num_axes == 1: - smallest_chunk_axis = 0 - # Handle the case where the single axis is too large to fit in the buffer - if axis_sizes_bytes[0] > target_buffer_bytes: - k1 = math.floor(target_buffer_bytes / chunk_bytes) - return tuple( - [ - k1 * self.chunk_shape[0], - ] - ) - else: - raise ValueError(f"num_axes ({num_axes}) is less than one!") + + if min(axis_sizes_bytes) > target_buffer_bytes: + if num_axes > 1: + smallest_chunk_axis, second_smallest_chunk_axis, *_ = np.argsort(chunk_shape) + # If the smallest full axis does not fit within the buffer size, form a square along the smallest axes + sub_square_buffer_shape = np.array(chunk_shape) + if min(axis_sizes_bytes) > target_buffer_bytes: + k1 = math.floor((target_buffer_bytes / chunk_bytes) ** 0.5) + for axis in [smallest_chunk_axis, second_smallest_chunk_axis]: + sub_square_buffer_shape[axis] = k1 * sub_square_buffer_shape[axis] + return tuple(sub_square_buffer_shape) + elif num_axes == 1: + smallest_chunk_axis = 0 + # Handle the case where the single axis is too large to fit in the buffer + if axis_sizes_bytes[0] > target_buffer_bytes: + k1 = math.floor(target_buffer_bytes / chunk_bytes) + return tuple( + [ + k1 * chunk_shape[0], + ] + ) + else: + raise ValueError(f"num_axes ({num_axes}) is less than one!") # Original one-shot estimation has good performance for certain shapes chunk_to_buffer_ratio = buffer_gb * 1e9 / chunk_bytes chunk_scaling_factor = math.floor(chunk_to_buffer_ratio ** (1 / num_axes)) unpadded_buffer_shape = [ - np.clip(a=int(x), a_min=self.chunk_shape[j], a_max=self.maxshape[j]) - for j, x in enumerate(chunk_scaling_factor * np.array(self.chunk_shape)) + np.clip(a=int(x), a_min=chunk_shape[j], a_max=maxshape[j]) + for j, x in enumerate(chunk_scaling_factor * np.array(chunk_shape)) ] - unpadded_buffer_bytes = math.prod(unpadded_buffer_shape) * self.dtype.itemsize + unpadded_buffer_bytes = math.prod(unpadded_buffer_shape) * dtype.itemsize # Method that starts by filling the smallest axis completely or calculates best partial fill - padded_buffer_shape = np.array(self.chunk_shape) - chunks_per_axis = np.ceil(maxshape / self.chunk_shape) + padded_buffer_shape = np.array(chunk_shape) + chunks_per_axis = np.ceil(maxshape / chunk_shape) small_axis_fill_size = chunk_bytes * min(chunks_per_axis) full_axes_used = np.zeros(shape=num_axes, dtype=bool) if small_axis_fill_size <= target_buffer_bytes: buffer_bytes = small_axis_fill_size - padded_buffer_shape[smallest_chunk_axis] = self.maxshape[smallest_chunk_axis] + padded_buffer_shape[smallest_chunk_axis] = maxshape[smallest_chunk_axis] full_axes_used[smallest_chunk_axis] = True for axis, chunks_on_axis in enumerate(chunks_per_axis): if full_axes_used[axis]: # If the smallest axis, skip since already used continue if chunks_on_axis * buffer_bytes <= target_buffer_bytes: # If multiple axes can be used together buffer_bytes *= chunks_on_axis - padded_buffer_shape[axis] = self.maxshape[axis] + padded_buffer_shape[axis] = maxshape[axis] else: # Found an axis that is too large to use with the rest of the buffer; calculate how much can be used k3 = math.floor(target_buffer_bytes / buffer_bytes) padded_buffer_shape[axis] *= k3 break - padded_buffer_bytes = math.prod(padded_buffer_shape) * self.dtype.itemsize + + padded_buffer_bytes = math.prod(padded_buffer_shape) * dtype.itemsize if padded_buffer_bytes >= unpadded_buffer_bytes: return tuple(padded_buffer_shape) @@ -88,7 +124,7 @@ def _get_default_buffer_shape(self, buffer_gb: float = 1.0) -> Tuple[int]: class SliceableDataChunkIterator(GenericDataChunkIterator): """ - Generic data chunk iterator that works for any memory mapped array, such as a np.memmap or an h5py.Dataset + Generic data chunk iterator that works for any memory mapped array, such as a np.memmap or h5py.Dataset object. """ def __init__(self, data, **kwargs): diff --git a/src/neuroconv/tools/nwb_helpers/__init__.py b/src/neuroconv/tools/nwb_helpers/__init__.py index 0982439bb..cb78a67a5 100644 --- a/src/neuroconv/tools/nwb_helpers/__init__.py +++ b/src/neuroconv/tools/nwb_helpers/__init__.py @@ -1,3 +1,4 @@ +from ._dataset_configuration import get_default_dataset_io_configurations from ._metadata_and_file_helpers import ( add_device_from_metadata, get_default_nwbfile_metadata, @@ -5,17 +6,17 @@ make_nwbfile_from_metadata, make_or_load_nwbfile, ) -from ._models._base_models import DatasetConfiguration, DatasetInfo +from ._models._base_models import DatasetInfo from ._models._hdf5_models import ( AVAILABLE_HDF5_COMPRESSION_METHODS, HDF5BackendConfiguration, - HDF5DatasetConfiguration, + HDF5DatasetIOConfiguration, ) from ._models._zarr_models import ( AVAILABLE_ZARR_COMPRESSION_METHODS, ZarrBackendConfiguration, - ZarrDatasetConfiguration, + ZarrDatasetIOConfiguration, ) -BACKEND_TO_DATASET_CONFIGURATION = dict(hdf5=HDF5DatasetConfiguration, zarr=ZarrDatasetConfiguration) -BACKEND_TO_CONFIGURATION = dict(hdf5=HDF5BackendConfiguration, zarr=ZarrBackendConfiguration) +BACKEND_CONFIGURATIONS = dict(hdf5=HDF5BackendConfiguration, zarr=ZarrBackendConfiguration) +DATASET_IO_CONFIGURATIONS = dict(hdf5=HDF5DatasetIOConfiguration, zarr=ZarrDatasetIOConfiguration) diff --git a/src/neuroconv/tools/nwb_helpers/_dataset_configuration.py b/src/neuroconv/tools/nwb_helpers/_dataset_configuration.py new file mode 100644 index 000000000..4e7783aff --- /dev/null +++ b/src/neuroconv/tools/nwb_helpers/_dataset_configuration.py @@ -0,0 +1,208 @@ +"""Collection of helper functions related to configuration of datasets dependent on backend.""" +from typing import Generator, Literal, Union + +import h5py +import numpy as np +import zarr +from hdmf import Container +from hdmf.data_utils import DataChunkIterator, DataIO, GenericDataChunkIterator +from hdmf.utils import get_data_shape +from hdmf_zarr import NWBZarrIO +from pynwb import NWBHDF5IO, NWBFile, TimeSeries +from pynwb.base import DynamicTable + +from ._models._base_models import DatasetInfo, DatasetIOConfiguration +from ._models._hdf5_models import HDF5BackendConfiguration, HDF5DatasetIOConfiguration +from ._models._zarr_models import ZarrBackendConfiguration, ZarrDatasetIOConfiguration +from ..hdmf import SliceableDataChunkIterator + +BACKEND_TO_DATASET_CONFIGURATION = dict(hdf5=HDF5DatasetIOConfiguration, zarr=ZarrDatasetIOConfiguration) +BACKEND_TO_CONFIGURATION = dict(hdf5=HDF5BackendConfiguration, zarr=ZarrBackendConfiguration) + + +def _get_io_mode(io: Union[NWBHDF5IO, NWBZarrIO]) -> str: + """NWBHDF5IO and NWBZarrIO have different ways of storing the io mode (e.g. "r", "a", "w") they used on a path.""" + if isinstance(io, NWBHDF5IO): + return io.mode + elif isinstance(io, NWBZarrIO): + return io._ZarrIO__mode + + +def _is_dataset_written_to_file( + candidate_dataset: Union[h5py.Dataset, zarr.Array], + backend: Literal["hdf5", "zarr"], + existing_file: Union[h5py.File, zarr.Group, None], +) -> bool: + """ + Determine if the neurodata object is already written to the file on disk. + + This object should then be skipped by the `get_io_datasets` function when working in append mode. + """ + return ( + isinstance(candidate_dataset, h5py.Dataset) # If the source data is an HDF5 Dataset + and backend == "hdf5" + and candidate_dataset.file == existing_file # If the source HDF5 Dataset is the appending NWBFile + ) or ( + isinstance(candidate_dataset, zarr.Array) # If the source data is an Zarr Array + and backend == "zarr" + and candidate_dataset.store == existing_file # If the source Zarr 'file' is the appending NWBFile + ) + + +def _find_location_in_memory_nwbfile(current_location: str, neurodata_object: Container) -> str: + """ + Method for determining the location of a neurodata object within an in-memory NWBFile object. + + Distinct from methods from other packages, such as the NWB Inspector, which rely on such files being read from disk. + """ + parent = neurodata_object.parent + if isinstance(parent, NWBFile): + # Items in defined top-level places like acquisition, intervals, etc. do not act as 'containers' + # in that they do not set the `.parent` attribute; ask if object is in their in-memory dictionaries instead + for parent_field_name, parent_field_value in parent.fields.items(): + if isinstance(parent_field_value, dict) and neurodata_object.name in parent_field_value: + return parent_field_name + "/" + neurodata_object.name + "/" + current_location + return neurodata_object.name + "/" + current_location + return _find_location_in_memory_nwbfile( + current_location=neurodata_object.name + "/" + current_location, neurodata_object=parent + ) + + +def _infer_dtype_using_data_chunk_iterator(candidate_dataset: Union[h5py.Dataset, zarr.Array]): + """ + The DataChunkIterator has one of the best generic dtype inference, though logic is hard to peel out of it. + + It can fail in rare cases but not essential to our default configuration + """ + try: + return DataChunkIterator(candidate_dataset).dtype + except Exception as exception: + if str(exception) != "Data type could not be determined. Please specify dtype in DataChunkIterator init.": + raise exception + else: + return np.dtype("object") + + +def _get_dataset_metadata( + neurodata_object: Union[TimeSeries, DynamicTable], field_name: str, backend: Literal["hdf5", "zarr"] +) -> Union[HDF5DatasetIOConfiguration, ZarrDatasetIOConfiguration, None]: + """Fill in the Dataset model with as many values as can be automatically detected or inferred.""" + DatasetIOConfigurationClass = BACKEND_TO_DATASET_CONFIGURATION[backend] + + candidate_dataset = getattr(neurodata_object, field_name) + + # For now, skip over datasets already wrapped in DataIO + # Could maybe eventually support modifying chunks in place + # But setting buffer shape only possible if iterator was wrapped first + if isinstance(candidate_dataset, DataIO): + return None + + dtype = _infer_dtype_using_data_chunk_iterator(candidate_dataset=candidate_dataset) + full_shape = get_data_shape(data=candidate_dataset) + + if isinstance(candidate_dataset, GenericDataChunkIterator): + chunk_shape = candidate_dataset.chunk_shape + buffer_shape = candidate_dataset.buffer_shape + elif dtype != "unknown": + # TODO: eventually replace this with staticmethods on hdmf.data_utils.GenericDataChunkIterator + chunk_shape = SliceableDataChunkIterator.estimate_default_chunk_shape( + chunk_mb=10.0, maxshape=full_shape, dtype=np.dtype(dtype) + ) + buffer_shape = SliceableDataChunkIterator.estimate_default_buffer_shape( + buffer_gb=0.5, chunk_shape=chunk_shape, maxshape=full_shape, dtype=np.dtype(dtype) + ) + else: + pass # TODO: think on this; perhaps zarr's standalone estimator? + + location = _find_location_in_memory_nwbfile(current_location=field_name, neurodata_object=neurodata_object) + dataset_info = DatasetInfo( + object_id=neurodata_object.object_id, + object_name=neurodata_object.name, + location=location, + full_shape=full_shape, + dtype=dtype, + ) + dataset_configuration = DatasetIOConfigurationClass( + dataset_info=dataset_info, chunk_shape=chunk_shape, buffer_shape=buffer_shape + ) + return dataset_configuration + + +def get_default_dataset_io_configurations( + nwbfile: NWBFile, + backend: Union[None, Literal["hdf5", "zarr"]] = None, # None for auto-detect from append mode, otherwise required +) -> Generator[DatasetIOConfiguration, None, None]: + """ + Method for automatically detecting all objects in the file that could be wrapped in a DataIO. + + Parameters + ---------- + nwbfile : pynwb.NWBFile + An in-memory NWBFile object, either generated from the base class or read from an existing file of any backend. + backend : "hdf5" or "zarr" + Which backend format type you would like to use in configuring each datasets compression methods and options. + + Yields + ------ + DatasetIOConfiguration + A summary of each detected object that can be wrapped in a DataIO. + """ + if backend is None and nwbfile.read_io is None: + raise ValueError( + "Keyword argument `backend` (either 'hdf5' or 'zarr') must be specified if the `nwbfile` was not " + "read from an existing file!" + ) + if backend is None and nwbfile.read_io is not None and nwbfile.read_io.mode not in ("r+", "a"): + raise ValueError( + "Keyword argument `backend` (either 'hdf5' or 'zarr') must be specified if the `nwbfile` is being appended." + ) + + detected_backend = None + existing_file = None + if isinstance(nwbfile.read_io, NWBHDF5IO) and _get_io_mode(io=nwbfile.read_io) in ("r+", "a"): + detected_backend = "hdf5" + existing_file = nwbfile.read_io._file + elif isinstance(nwbfile.read_io, NWBZarrIO) and _get_io_mode(io=nwbfile.read_io) in ("r+", "a"): + detected_backend = "zarr" + existing_file = nwbfile.read_io.file.store + backend = backend or detected_backend + + if detected_backend is not None and detected_backend != backend: + raise ValueError( + f"Detected backend '{detected_backend}' for appending file, but specified `backend` " + f"({backend}) does not match! Set `backend=None` or remove the keyword argument to allow it to auto-detect." + ) + + for neurodata_object in nwbfile.objects.values(): + if isinstance(neurodata_object, DynamicTable): + dynamic_table = neurodata_object # for readability + + for column in dynamic_table.columns: + column_name = column.name + candidate_dataset = column.data # VectorData object + if _is_dataset_written_to_file( + candidate_dataset=candidate_dataset, backend=backend, existing_file=existing_file + ): + continue # skip + + yield _get_dataset_metadata(neurodata_object=column, field_name="data", backend=backend) + else: + # Primarily for TimeSeries, but also any extended class that has 'data' or 'timestamps' + # The most common example of this is ndx-events Events/LabeledEvents types + time_series = neurodata_object # for readability + + for field_name in ("data", "timestamps"): + if field_name not in time_series.fields: # timestamps is optional + continue + + candidate_dataset = getattr(time_series, field_name) + if _is_dataset_written_to_file( + candidate_dataset=candidate_dataset, backend=backend, existing_file=existing_file + ): + continue # skip + + # Edge case of in-memory ImageSeries with external mode; data is in fields and is empty array + if isinstance(candidate_dataset, np.ndarray) and candidate_dataset.size == 0: + continue # skip + + yield _get_dataset_metadata(neurodata_object=time_series, field_name=field_name, backend=backend) diff --git a/src/neuroconv/tools/nwb_helpers/_models/_base_models.py b/src/neuroconv/tools/nwb_helpers/_models/_base_models.py index 72b364dea..8a6486e74 100644 --- a/src/neuroconv/tools/nwb_helpers/_models/_base_models.py +++ b/src/neuroconv/tools/nwb_helpers/_models/_base_models.py @@ -62,7 +62,7 @@ def __init__(self, **values): super().__init__(**values) -class DatasetConfiguration(BaseModel, ABC): +class DatasetIOConfiguration(BaseModel, ABC): """A data model for configuring options about an object that will become a HDF5 or Zarr Dataset in the file.""" # TODO: When using Pydantic v2, remove @@ -188,7 +188,7 @@ class BackendConfiguration(BaseModel): backend: Literal["hdf5", "zarr"] = Field(description="The name of the backend used to configure the NWBFile.") data_io_class: Type[DataIO] = Field(description="The DataIO class that is specific to this backend.") - dataset_configurations: Dict[str, DatasetConfiguration] = Field( + dataset_configurations: Dict[str, DatasetIOConfiguration] = Field( description=( "A mapping from object locations (e.g. `acquisition/TestElectricalSeriesAP/data`) " "to their DatasetConfiguration specification that contains all information " diff --git a/src/neuroconv/tools/nwb_helpers/_models/_hdf5_models.py b/src/neuroconv/tools/nwb_helpers/_models/_hdf5_models.py index daf772688..b34671154 100644 --- a/src/neuroconv/tools/nwb_helpers/_models/_hdf5_models.py +++ b/src/neuroconv/tools/nwb_helpers/_models/_hdf5_models.py @@ -6,7 +6,7 @@ from pydantic import Field from pynwb import H5DataIO -from ._base_models import BackendConfiguration, DatasetConfiguration +from ._base_models import BackendConfiguration, DatasetIOConfiguration _base_hdf5_filters = set(h5py.filters.decode) _excluded_hdf5_filters = set( @@ -29,7 +29,7 @@ ) -class HDF5DatasetConfiguration(DatasetConfiguration): +class HDF5DatasetIOConfiguration(DatasetIOConfiguration): """A data model for configuring options about an object that will become a HDF5 Dataset in the file.""" # TODO: When using Pydantic v2, replace with `model_config = ConfigDict(...)` @@ -90,7 +90,7 @@ class HDF5BackendConfiguration(BackendConfiguration): data_io_class: Type[H5DataIO] = Field( # TODO: in pydantic v2 use property instead of class attribute default=H5DataIO, description="The DataIO class that is specific to HDF5." ) - dataset_configurations: Dict[str, HDF5DatasetConfiguration] = Field( + dataset_configurations: Dict[str, HDF5DatasetIOConfiguration] = Field( description=( "A mapping from object locations to their HDF5DatasetConfiguration specification that contains all " "information for writing the datasets to disk using the HDF5 backend." diff --git a/src/neuroconv/tools/nwb_helpers/_models/_zarr_models.py b/src/neuroconv/tools/nwb_helpers/_models/_zarr_models.py index 760c7c2a9..14214b513 100644 --- a/src/neuroconv/tools/nwb_helpers/_models/_zarr_models.py +++ b/src/neuroconv/tools/nwb_helpers/_models/_zarr_models.py @@ -7,7 +7,7 @@ from hdmf_zarr import ZarrDataIO from pydantic import Field, root_validator -from ._base_models import BackendConfiguration, DatasetConfiguration +from ._base_models import BackendConfiguration, DatasetIOConfiguration _base_zarr_codecs = set(zarr.codec_registry.keys()) _lossy_zarr_codecs = set(("astype", "bitround", "quantize")) @@ -43,7 +43,7 @@ } -class ZarrDatasetConfiguration(DatasetConfiguration): +class ZarrDatasetIOConfiguration(DatasetIOConfiguration): """A data model for configuring options about an object that will become a Zarr Dataset in the file.""" # TODO: When using Pydantic v2, replace with `model_config = ConfigDict(...)` @@ -147,7 +147,7 @@ class ZarrBackendConfiguration(BackendConfiguration): data_io_class: Type[ZarrDataIO] = Field( default=ZarrDataIO, description="The DataIO class that is specific to Zarr." ) - dataset_configurations: Dict[str, ZarrDatasetConfiguration] = Field( + dataset_configurations: Dict[str, ZarrDatasetIOConfiguration] = Field( description=( "A mapping from object locations to their ZarrDatasetConfiguration specification that contains all " "information for writing the datasets to disk using the Zarr backend." diff --git a/src/neuroconv/tools/testing/__init__.py b/src/neuroconv/tools/testing/__init__.py index 502634466..2d5b06497 100644 --- a/src/neuroconv/tools/testing/__init__.py +++ b/src/neuroconv/tools/testing/__init__.py @@ -1,9 +1,9 @@ from ._mock._mock_dataset_models import ( mock_DatasetInfo, mock_HDF5BackendConfiguration, - mock_HDF5DatasetConfiguration, + mock_HDF5DatasetIOConfiguration, mock_ZarrBackendConfiguration, - mock_ZarrDatasetConfiguration, + mock_ZarrDatasetIOConfiguration, ) from .mock_files import generate_path_expander_demo_ibl from .mock_interfaces import MockBehaviorEventInterface, MockSpikeGLXNIDQInterface diff --git a/src/neuroconv/tools/testing/_mock/_mock_dataset_models.py b/src/neuroconv/tools/testing/_mock/_mock_dataset_models.py index 6860f7078..e8ea80826 100644 --- a/src/neuroconv/tools/testing/_mock/_mock_dataset_models.py +++ b/src/neuroconv/tools/testing/_mock/_mock_dataset_models.py @@ -9,9 +9,9 @@ AVAILABLE_ZARR_COMPRESSION_METHODS, DatasetInfo, HDF5BackendConfiguration, - HDF5DatasetConfiguration, + HDF5DatasetIOConfiguration, ZarrBackendConfiguration, - ZarrDatasetConfiguration, + ZarrDatasetIOConfiguration, ) @@ -30,14 +30,14 @@ def mock_DatasetInfo( ) -def mock_HDF5DatasetConfiguration( +def mock_HDF5DatasetIOConfiguration( compression_method: Union[ Literal[tuple(AVAILABLE_HDF5_COMPRESSION_METHODS.keys())], h5py._hl.filters.FilterRefBase, None ] = "gzip", compression_options: Union[Dict[str, Any], None] = None, -) -> HDF5DatasetConfiguration: - """Mock instance of a HDF5DatasetConfiguration with NeuroPixel-like values to show chunk/buffer recommendations.""" - return HDF5DatasetConfiguration( +) -> HDF5DatasetIOConfiguration: + """Mock object of a HDF5DatasetIOConfiguration with NeuroPixel-like values to show chunk/buffer recommendations.""" + return HDF5DatasetIOConfiguration( dataset_info=mock_DatasetInfo(), chunk_shape=(78_125, 64), # ~10 MB buffer_shape=(1_250_000, 384), # ~1 GB @@ -46,7 +46,7 @@ def mock_HDF5DatasetConfiguration( ) -def mock_ZarrDatasetConfiguration( +def mock_ZarrDatasetIOConfiguration( compression_method: Union[ Literal[tuple(AVAILABLE_ZARR_COMPRESSION_METHODS.keys())], numcodecs.abc.Codec, None ] = "gzip", @@ -55,9 +55,9 @@ def mock_ZarrDatasetConfiguration( Union[Literal[tuple(AVAILABLE_ZARR_COMPRESSION_METHODS.keys())], numcodecs.abc.Codec, None] ] = None, filter_options: Union[Iterable[Dict[str, Any]], None] = None, -) -> ZarrDatasetConfiguration: - """Mock instance of a ZarrDatasetConfiguration with NeuroPixel-like values to show chunk/buffer recommendations.""" - return ZarrDatasetConfiguration( +) -> ZarrDatasetIOConfiguration: + """Mock object of a ZarrDatasetIOConfiguration with NeuroPixel-like values to show chunk/buffer recommendations.""" + return ZarrDatasetIOConfiguration( dataset_info=mock_DatasetInfo(), chunk_shape=(78_125, 64), # ~10 MB buffer_shape=(1_250_000, 384), # ~1 GB @@ -71,12 +71,12 @@ def mock_ZarrDatasetConfiguration( def mock_HDF5BackendConfiguration() -> HDF5BackendConfiguration: """Mock instance of a HDF5BackendConfiguration with two NeuroPixel-like datasets.""" dataset_configurations = { - "acquisition/TestElectricalSeriesAP/data": HDF5DatasetConfiguration( + "acquisition/TestElectricalSeriesAP/data": HDF5DatasetIOConfiguration( dataset_info=mock_DatasetInfo(location="acquisition/TestElectricalSeriesAP/data"), chunk_shape=(78_125, 64), # ~10 MB buffer_shape=(1_250_000, 384), # ~1 GB ), - "acquisition/TestElectricalSeriesLF/data": HDF5DatasetConfiguration( + "acquisition/TestElectricalSeriesLF/data": HDF5DatasetIOConfiguration( dataset_info=mock_DatasetInfo( object_id="bc37e164-519f-4b65-a976-206440f1d325", location="acquisition/TestElectricalSeriesLF/data", @@ -93,13 +93,13 @@ def mock_HDF5BackendConfiguration() -> HDF5BackendConfiguration: def mock_ZarrBackendConfiguration() -> ZarrBackendConfiguration: """Mock instance of a HDF5BackendConfiguration with several NeuroPixel-like datasets.""" dataset_configurations = { - "acquisition/TestElectricalSeriesAP/data": ZarrDatasetConfiguration( + "acquisition/TestElectricalSeriesAP/data": ZarrDatasetIOConfiguration( dataset_info=mock_DatasetInfo(location="acquisition/TestElectricalSeriesAP/data"), chunk_shape=(78_125, 64), buffer_shape=(1_250_000, 384), # ~1 GB filter_methods=["delta"], ), - "acquisition/TestElectricalSeriesLF/data": ZarrDatasetConfiguration( + "acquisition/TestElectricalSeriesLF/data": ZarrDatasetIOConfiguration( dataset_info=mock_DatasetInfo( object_id="bc37e164-519f-4b65-a976-206440f1d325", location="acquisition/TestElectricalSeriesLF/data", diff --git a/tests/imports.py b/tests/imports.py index 7048d76f4..656ddfea9 100644 --- a/tests/imports.py +++ b/tests/imports.py @@ -63,6 +63,7 @@ def test_tools(self): "deploy_process", "LocalPathExpander", "get_module", + "hdmf", ] assert sorted(current_structure) == sorted(expected_structure) diff --git a/tests/test_minimal/test_tools/test_backend_and_dataset_configuration/test_common_dataset_configuration_model.py b/tests/test_minimal/test_tools/test_backend_and_dataset_configuration/test_common_dataset_io_configuration_model.py similarity index 79% rename from tests/test_minimal/test_tools/test_backend_and_dataset_configuration/test_common_dataset_configuration_model.py rename to tests/test_minimal/test_tools/test_backend_and_dataset_configuration/test_common_dataset_io_configuration_model.py index 892638a2c..c8a6738b7 100644 --- a/tests/test_minimal/test_tools/test_backend_and_dataset_configuration/test_common_dataset_configuration_model.py +++ b/tests/test_minimal/test_tools/test_backend_and_dataset_configuration/test_common_dataset_io_configuration_model.py @@ -4,21 +4,21 @@ import pytest from neuroconv.tools.nwb_helpers import ( - HDF5DatasetConfiguration, - ZarrDatasetConfiguration, + HDF5DatasetIOConfiguration, + ZarrDatasetIOConfiguration, ) from neuroconv.tools.testing import ( mock_DatasetInfo, - mock_HDF5DatasetConfiguration, - mock_ZarrDatasetConfiguration, + mock_HDF5DatasetIOConfiguration, + mock_ZarrDatasetIOConfiguration, ) @pytest.mark.parametrize( - argnames="dataset_configuration_class", argvalues=[HDF5DatasetConfiguration, ZarrDatasetConfiguration] + argnames="dataset_configuration_class", argvalues=[HDF5DatasetIOConfiguration, ZarrDatasetIOConfiguration] ) def test_validator_chunk_length_consistency( - dataset_configuration_class: Union[HDF5DatasetConfiguration, ZarrDatasetConfiguration] + dataset_configuration_class: Union[HDF5DatasetIOConfiguration, ZarrDatasetIOConfiguration] ): with pytest.raises(ValueError) as error_info: dataset_configuration_class( @@ -35,10 +35,10 @@ def test_validator_chunk_length_consistency( @pytest.mark.parametrize( - argnames="dataset_configuration_class", argvalues=[HDF5DatasetConfiguration, ZarrDatasetConfiguration] + argnames="dataset_configuration_class", argvalues=[HDF5DatasetIOConfiguration, ZarrDatasetIOConfiguration] ) def test_validator_chunk_and_buffer_length_consistency( - dataset_configuration_class: Union[HDF5DatasetConfiguration, ZarrDatasetConfiguration] + dataset_configuration_class: Union[HDF5DatasetIOConfiguration, ZarrDatasetIOConfiguration] ): with pytest.raises(ValueError) as error_info: dataset_configuration_class( @@ -55,10 +55,10 @@ def test_validator_chunk_and_buffer_length_consistency( @pytest.mark.parametrize( - argnames="dataset_configuration_class", argvalues=[HDF5DatasetConfiguration, ZarrDatasetConfiguration] + argnames="dataset_configuration_class", argvalues=[HDF5DatasetIOConfiguration, ZarrDatasetIOConfiguration] ) def test_validator_chunk_shape_nonpositive_elements( - dataset_configuration_class: Union[HDF5DatasetConfiguration, ZarrDatasetConfiguration] + dataset_configuration_class: Union[HDF5DatasetIOConfiguration, ZarrDatasetIOConfiguration] ): with pytest.raises(ValueError) as error_info: dataset_configuration_class( @@ -75,10 +75,10 @@ def test_validator_chunk_shape_nonpositive_elements( @pytest.mark.parametrize( - argnames="dataset_configuration_class", argvalues=[HDF5DatasetConfiguration, ZarrDatasetConfiguration] + argnames="dataset_configuration_class", argvalues=[HDF5DatasetIOConfiguration, ZarrDatasetIOConfiguration] ) def test_validator_buffer_shape_nonpositive_elements( - dataset_configuration_class: Union[HDF5DatasetConfiguration, ZarrDatasetConfiguration] + dataset_configuration_class: Union[HDF5DatasetIOConfiguration, ZarrDatasetIOConfiguration] ): with pytest.raises(ValueError) as error_info: dataset_configuration_class( @@ -95,10 +95,10 @@ def test_validator_buffer_shape_nonpositive_elements( @pytest.mark.parametrize( - argnames="dataset_configuration_class", argvalues=[HDF5DatasetConfiguration, ZarrDatasetConfiguration] + argnames="dataset_configuration_class", argvalues=[HDF5DatasetIOConfiguration, ZarrDatasetIOConfiguration] ) def test_validator_chunk_shape_exceeds_buffer_shape( - dataset_configuration_class: Union[HDF5DatasetConfiguration, ZarrDatasetConfiguration] + dataset_configuration_class: Union[HDF5DatasetIOConfiguration, ZarrDatasetIOConfiguration] ): with pytest.raises(ValueError) as error_info: dataset_configuration_class( @@ -115,10 +115,10 @@ def test_validator_chunk_shape_exceeds_buffer_shape( @pytest.mark.parametrize( - argnames="dataset_configuration_class", argvalues=[HDF5DatasetConfiguration, ZarrDatasetConfiguration] + argnames="dataset_configuration_class", argvalues=[HDF5DatasetIOConfiguration, ZarrDatasetIOConfiguration] ) def test_validator_buffer_shape_exceeds_full_shape( - dataset_configuration_class: Union[HDF5DatasetConfiguration, ZarrDatasetConfiguration] + dataset_configuration_class: Union[HDF5DatasetIOConfiguration, ZarrDatasetIOConfiguration] ): with pytest.raises(ValueError) as error_info: dataset_configuration_class( @@ -135,10 +135,10 @@ def test_validator_buffer_shape_exceeds_full_shape( @pytest.mark.parametrize( - argnames="dataset_configuration_class", argvalues=[HDF5DatasetConfiguration, ZarrDatasetConfiguration] + argnames="dataset_configuration_class", argvalues=[HDF5DatasetIOConfiguration, ZarrDatasetIOConfiguration] ) def test_validator_chunk_dimensions_do_not_evenly_divide_buffer( - dataset_configuration_class: Union[HDF5DatasetConfiguration, ZarrDatasetConfiguration] + dataset_configuration_class: Union[HDF5DatasetIOConfiguration, ZarrDatasetIOConfiguration] ): with pytest.raises(ValueError) as error_info: dataset_configuration_class( @@ -155,10 +155,11 @@ def test_validator_chunk_dimensions_do_not_evenly_divide_buffer( @pytest.mark.parametrize( - argnames="mock_dataset_configuration", argvalues=[mock_HDF5DatasetConfiguration(), mock_ZarrDatasetConfiguration()] + argnames="mock_dataset_configuration", + argvalues=[mock_HDF5DatasetIOConfiguration(), mock_ZarrDatasetIOConfiguration()], ) def test_mutation_validation( - mock_dataset_configuration: Union[mock_HDF5DatasetConfiguration, mock_ZarrDatasetConfiguration] + mock_dataset_configuration: Union[mock_HDF5DatasetIOConfiguration, mock_ZarrDatasetIOConfiguration] ): """ Only testing on one dummy case to verify the root validator is triggered. diff --git a/tests/test_minimal/test_tools/test_backend_and_dataset_configuration/test_helpers/test_get_default_dataset_io_configurations.py b/tests/test_minimal/test_tools/test_backend_and_dataset_configuration/test_helpers/test_get_default_dataset_io_configurations.py new file mode 100644 index 000000000..69545adbf --- /dev/null +++ b/tests/test_minimal/test_tools/test_backend_and_dataset_configuration/test_helpers/test_get_default_dataset_io_configurations.py @@ -0,0 +1,300 @@ +"""Unit tests for `get_default_dataset_io_configurations`.""" +from typing import Literal + +import numpy as np +import pytest +from hdmf.common import VectorData +from hdmf.data_utils import DataChunkIterator +from nwbinspector.utils import is_module_installed +from pynwb.base import DynamicTable +from pynwb.behavior import CompassDirection +from pynwb.image import ImageSeries +from pynwb.misc import Units +from pynwb.testing.mock.base import mock_TimeSeries +from pynwb.testing.mock.behavior import mock_SpatialSeries +from pynwb.testing.mock.file import mock_NWBFile + +from neuroconv.tools.hdmf import SliceableDataChunkIterator +from neuroconv.tools.nwb_helpers import ( + DATASET_IO_CONFIGURATIONS, + get_default_dataset_io_configurations, + get_module, +) + + +@pytest.mark.parametrize("iterator", [lambda x: x, SliceableDataChunkIterator, DataChunkIterator]) +@pytest.mark.parametrize("backend", ["hdf5", "zarr"]) +def test_configuration_on_time_series(iterator: callable, backend: Literal["hdf5", "zarr"]): + array = np.array([[1, 2, 3], [4, 5, 6]]) + data = iterator(array) + + nwbfile = mock_NWBFile() + time_series = mock_TimeSeries(name="TestTimeSeries", data=data) + nwbfile.add_acquisition(time_series) + + dataset_configurations = list(get_default_dataset_io_configurations(nwbfile=nwbfile, backend=backend)) + + assert len(dataset_configurations) == 1 + + dataset_configuration = dataset_configurations[0] + assert isinstance(dataset_configuration, DATASET_IO_CONFIGURATIONS[backend]) + assert dataset_configuration.dataset_info.object_id == time_series.object_id + assert dataset_configuration.dataset_info.location == "acquisition/TestTimeSeries/data" + assert dataset_configuration.dataset_info.full_shape == array.shape + assert dataset_configuration.dataset_info.dtype == array.dtype + assert dataset_configuration.chunk_shape == array.shape + assert dataset_configuration.buffer_shape == array.shape + assert dataset_configuration.compression_method == "gzip" + assert dataset_configuration.compression_options is None + + if backend == "zarr": + assert dataset_configuration.filter_methods is None + assert dataset_configuration.filter_options is None + + +@pytest.mark.parametrize("backend", ["hdf5", "zarr"]) +def test_configuration_on_external_image_series(backend: Literal["hdf5", "zarr"]): + nwbfile = mock_NWBFile() + image_series = ImageSeries(name="TestImageSeries", external_file=[""], rate=1.0) + nwbfile.add_acquisition(image_series) + + dataset_configurations = list(get_default_dataset_io_configurations(nwbfile=nwbfile, backend=backend)) + + assert len(dataset_configurations) == 0 + + +@pytest.mark.parametrize("iterator", [lambda x: x, SliceableDataChunkIterator, DataChunkIterator]) +@pytest.mark.parametrize("backend", ["hdf5", "zarr"]) +def test_configuration_on_dynamic_table(iterator: callable, backend: Literal["hdf5", "zarr"]): + array = np.array([0.1, 0.2, 0.3]) + data = iterator(array) + + nwbfile = mock_NWBFile() + column = VectorData(name="TestColumn", description="", data=data) + dynamic_table = DynamicTable(name="TestDynamicTable", description="", columns=[column], id=list(range(len(array)))) + nwbfile.add_acquisition(dynamic_table) + + dataset_configurations = list(get_default_dataset_io_configurations(nwbfile=nwbfile, backend=backend)) + + assert len(dataset_configurations) == 1 + + dataset_configuration = dataset_configurations[0] + assert isinstance(dataset_configuration, DATASET_IO_CONFIGURATIONS[backend]) + assert dataset_configuration.dataset_info.object_id == column.object_id + assert dataset_configuration.dataset_info.location == "acquisition/TestDynamicTable/TestColumn/data" + assert dataset_configuration.dataset_info.full_shape == array.shape + assert dataset_configuration.dataset_info.dtype == array.dtype + assert dataset_configuration.chunk_shape == array.shape + assert dataset_configuration.buffer_shape == array.shape + assert dataset_configuration.compression_method == "gzip" + assert dataset_configuration.compression_options is None + + if backend == "zarr": + assert dataset_configuration.filter_methods is None + assert dataset_configuration.filter_options is None + + +@pytest.mark.parametrize("backend", ["hdf5", "zarr"]) +def test_configuration_on_ragged_units_table(backend: Literal["hdf5", "zarr"]): + nwbfile = mock_NWBFile() + units = Units(name="units", description="") + + spike_times = np.array([0.0, 1.0, 2.0]) + waveforms = np.array([[[1, 2, 3], [1, 2, 3], [1, 2, 3]], [[1, 2, 3], [1, 2, 3], [1, 2, 3]]], dtype="int32") + units.add_unit(spike_times=spike_times, waveforms=waveforms) + + spike_times = np.array([3.0, 4.0]) + waveforms = np.array([[[4, 5], [4, 5], [4, 5]], [[4, 5], [4, 5], [4, 5]]], dtype="int32") + units.add_unit(spike_times=spike_times, waveforms=waveforms) + + nwbfile.units = units + + dataset_configurations = list(get_default_dataset_io_configurations(nwbfile=nwbfile, backend=backend)) + + assert len(dataset_configurations) == 5 + + dataset_configuration = next( + dataset_configuration + for dataset_configuration in dataset_configurations + if dataset_configuration.dataset_info.location == "units/spike_times/data" + ) + assert isinstance(dataset_configuration, DATASET_IO_CONFIGURATIONS[backend]) + assert dataset_configuration.dataset_info.full_shape == (5,) + assert dataset_configuration.dataset_info.dtype == np.dtype("float64") + assert dataset_configuration.chunk_shape == (5,) + assert dataset_configuration.buffer_shape == (5,) + assert dataset_configuration.compression_method == "gzip" + assert dataset_configuration.compression_options is None + + if backend == "zarr": + assert dataset_configuration.filter_methods is None + assert dataset_configuration.filter_options is None + + dataset_configuration = next( + dataset_configuration + for dataset_configuration in dataset_configurations + if dataset_configuration.dataset_info.location == "units/spike_times_index/data" + ) + assert isinstance(dataset_configuration, DATASET_IO_CONFIGURATIONS[backend]) + assert dataset_configuration.dataset_info.full_shape == (2,) + assert dataset_configuration.dataset_info.dtype == np.dtype("uint8") + assert dataset_configuration.chunk_shape == (2,) + assert dataset_configuration.buffer_shape == (2,) + assert dataset_configuration.compression_method == "gzip" + assert dataset_configuration.compression_options is None + + if backend == "zarr": + assert dataset_configuration.filter_methods is None + assert dataset_configuration.filter_options is None + + dataset_configuration = next( + dataset_configuration + for dataset_configuration in dataset_configurations + if dataset_configuration.dataset_info.location == "units/waveforms/data" + ) + assert isinstance(dataset_configuration, DATASET_IO_CONFIGURATIONS[backend]) + assert dataset_configuration.dataset_info.full_shape == (12, 3) + assert dataset_configuration.dataset_info.dtype == np.dtype("int32") + assert dataset_configuration.chunk_shape == (12, 3) + assert dataset_configuration.buffer_shape == (12, 3) + assert dataset_configuration.compression_method == "gzip" + assert dataset_configuration.compression_options is None + + if backend == "zarr": + assert dataset_configuration.filter_methods is None + assert dataset_configuration.filter_options is None + + dataset_configuration = next( + dataset_configuration + for dataset_configuration in dataset_configurations + if dataset_configuration.dataset_info.location == "units/waveforms_index/data" + ) + assert isinstance(dataset_configuration, DATASET_IO_CONFIGURATIONS[backend]) + assert dataset_configuration.dataset_info.full_shape == (4,) + assert dataset_configuration.dataset_info.dtype == np.dtype("uint8") + assert dataset_configuration.chunk_shape == (4,) + assert dataset_configuration.buffer_shape == (4,) + assert dataset_configuration.compression_method == "gzip" + assert dataset_configuration.compression_options is None + + if backend == "zarr": + assert dataset_configuration.filter_methods is None + assert dataset_configuration.filter_options is None + + dataset_configuration = next( + dataset_configuration + for dataset_configuration in dataset_configurations + if dataset_configuration.dataset_info.location == "units/waveforms_index_index/data" + ) + assert isinstance(dataset_configuration, DATASET_IO_CONFIGURATIONS[backend]) + assert dataset_configuration.dataset_info.full_shape == (2,) + assert dataset_configuration.dataset_info.dtype == np.dtype("uint8") + assert dataset_configuration.chunk_shape == (2,) + assert dataset_configuration.buffer_shape == (2,) + assert dataset_configuration.compression_method == "gzip" + assert dataset_configuration.compression_options is None + + if backend == "zarr": + assert dataset_configuration.filter_methods is None + assert dataset_configuration.filter_options is None + + +@pytest.mark.parametrize("iterator", [lambda x: x, SliceableDataChunkIterator, DataChunkIterator]) +@pytest.mark.parametrize("backend", ["hdf5", "zarr"]) +def test_configuration_on_compass_direction(iterator: callable, backend: Literal["hdf5", "zarr"]): + array = np.array([[1, 2, 3], [4, 5, 6]]) + data = iterator(array) + + nwbfile = mock_NWBFile() + spatial_series = mock_SpatialSeries(name="TestSpatialSeries", data=data) + compass_direction = CompassDirection(name="TestCompassDirection", spatial_series=spatial_series) + behavior_module = get_module(nwbfile=nwbfile, name="behavior") + behavior_module.add(compass_direction) + + dataset_configurations = list(get_default_dataset_io_configurations(nwbfile=nwbfile, backend=backend)) + + assert len(dataset_configurations) == 1 + + dataset_configuration = dataset_configurations[0] + assert isinstance(dataset_configuration, DATASET_IO_CONFIGURATIONS[backend]) + assert dataset_configuration.dataset_info.object_id == spatial_series.object_id + assert ( + dataset_configuration.dataset_info.location == "processing/behavior/TestCompassDirection/TestSpatialSeries/data" + ) + assert dataset_configuration.dataset_info.full_shape == array.shape + assert dataset_configuration.dataset_info.dtype == array.dtype + assert dataset_configuration.chunk_shape == array.shape + assert dataset_configuration.buffer_shape == array.shape + assert dataset_configuration.compression_method == "gzip" + assert dataset_configuration.compression_options is None + + if backend == "zarr": + assert dataset_configuration.filter_methods is None + assert dataset_configuration.filter_options is None + + +@pytest.mark.skipif( + not is_module_installed(module_name="ndx_events"), reason="The extra testing package 'ndx-events' is not installed!" +) +@pytest.mark.parametrize("backend", ["hdf5", "zarr"]) +def test_configuration_on_ndx_events(backend: Literal["hdf5", "zarr"]): + from ndx_events import LabeledEvents + + # ndx_events data fields do not support wrapping in DataChunkIterators - data is nearly always small enough + # to fit entirely in memory + data = np.array([1, 2, 3], dtype="uint32") + timestamps = np.array([4.5, 6.7, 8.9]) + + nwbfile = mock_NWBFile() + labeled_events = LabeledEvents( + name="TestLabeledEvents", + description="", + timestamps=timestamps, + data=data, + labels=["response_left", "cue_onset", "cue_offset"], + ) + behavior_module = get_module(nwbfile=nwbfile, name="behavior") + behavior_module.add(labeled_events) + + dataset_configurations = list(get_default_dataset_io_configurations(nwbfile=nwbfile, backend=backend)) + + # Note that the labels dataset is not caught since we search only for 'data' and 'timestamps' fields + assert len(dataset_configurations) == 2 + + data_dataset_configuration = next( + dataset_configuration + for dataset_configuration in dataset_configurations + if dataset_configuration.dataset_info.dataset_name == "data" + ) + assert isinstance(data_dataset_configuration, DATASET_IO_CONFIGURATIONS[backend]) + assert data_dataset_configuration.dataset_info.object_id == labeled_events.object_id + assert data_dataset_configuration.dataset_info.location == "processing/behavior/TestLabeledEvents/data" + assert data_dataset_configuration.dataset_info.full_shape == data.shape + assert data_dataset_configuration.dataset_info.dtype == data.dtype + assert data_dataset_configuration.chunk_shape == data.shape + assert data_dataset_configuration.buffer_shape == data.shape + assert data_dataset_configuration.compression_method == "gzip" + assert data_dataset_configuration.compression_options is None + + if backend == "zarr": + assert data_dataset_configuration.filter_methods is None + assert data_dataset_configuration.filter_options is None + + timestamps_dataset_configuration = next( + dataset_configuration + for dataset_configuration in dataset_configurations + if dataset_configuration.dataset_info.dataset_name == "timestamps" + ) + assert isinstance(timestamps_dataset_configuration, DATASET_IO_CONFIGURATIONS[backend]) + assert timestamps_dataset_configuration.dataset_info.object_id == labeled_events.object_id + assert timestamps_dataset_configuration.dataset_info.location == "processing/behavior/TestLabeledEvents/timestamps" + assert timestamps_dataset_configuration.dataset_info.full_shape == timestamps.shape + assert timestamps_dataset_configuration.dataset_info.dtype == timestamps.dtype + assert timestamps_dataset_configuration.chunk_shape == timestamps.shape + assert timestamps_dataset_configuration.buffer_shape == timestamps.shape + assert timestamps_dataset_configuration.compression_method == "gzip" + assert timestamps_dataset_configuration.compression_options is None + + if backend == "zarr": + assert timestamps_dataset_configuration.filter_methods is None + assert timestamps_dataset_configuration.filter_options is None diff --git a/tests/test_minimal/test_tools/test_backend_and_dataset_configuration/test_helpers/test_get_default_dataset_io_configurations_appended_files.py b/tests/test_minimal/test_tools/test_backend_and_dataset_configuration/test_helpers/test_get_default_dataset_io_configurations_appended_files.py new file mode 100644 index 000000000..3125bfc73 --- /dev/null +++ b/tests/test_minimal/test_tools/test_backend_and_dataset_configuration/test_helpers/test_get_default_dataset_io_configurations_appended_files.py @@ -0,0 +1,146 @@ +""" +Unit tests for `get_default_dataset_io_configurations` operating on already written files open in append mode. +Mostly testing that the right objects are skipped from identification as candidates for configuration. +""" +from pathlib import Path + +import numpy as np +import pytest +from hdmf.common import VectorData +from hdmf_zarr import NWBZarrIO +from pynwb import NWBHDF5IO, NWBFile +from pynwb.base import DynamicTable +from pynwb.testing.mock.base import mock_TimeSeries +from pynwb.testing.mock.file import mock_NWBFile + +from neuroconv.tools.nwb_helpers import ( + HDF5DatasetIOConfiguration, + ZarrDatasetIOConfiguration, + get_default_dataset_io_configurations, +) + + +def generate_nwbfile_with_existing_time_series() -> NWBFile: + nwbfile = mock_NWBFile() + array = np.array([[1, 2, 3], [4, 5, 6]]) + time_series = mock_TimeSeries(name="ExistingTimeSeries", data=array) + nwbfile.add_acquisition(time_series) + return nwbfile + + +@pytest.fixture(scope="session") +def hdf5_nwbfile_path(tmpdir_factory): + nwbfile_path = tmpdir_factory.mktemp("data").join("test_default_dataset_configurations_hdf5_nwbfile_.nwb.h5") + if not Path(nwbfile_path).exists(): + nwbfile = generate_nwbfile_with_existing_time_series() + with NWBHDF5IO(path=str(nwbfile_path), mode="w") as io: + io.write(nwbfile) + return str(nwbfile_path) + + +@pytest.fixture(scope="session") +def zarr_nwbfile_path(tmpdir_factory): + nwbfile_path = tmpdir_factory.mktemp("data").join("test_default_dataset_configurations_zarr_nwbfile.nwb.zarr") + if not Path(nwbfile_path).exists(): + nwbfile = generate_nwbfile_with_existing_time_series() + with NWBZarrIO(path=str(nwbfile_path), mode="w") as io: + io.write(nwbfile) + return str(nwbfile_path) + + +def test_unwrapped_time_series_hdf5(hdf5_nwbfile_path): + array = np.array([[1, 2, 3], [4, 5, 6]]) + + with NWBHDF5IO(path=hdf5_nwbfile_path, mode="a") as io: + nwbfile = io.read() + new_time_series = mock_TimeSeries(name="NewTimeSeries", data=array) + nwbfile.add_acquisition(new_time_series) + dataset_configurations = list(get_default_dataset_io_configurations(nwbfile=nwbfile, backend="hdf5")) + + assert len(dataset_configurations) == 1 + + dataset_configuration = dataset_configurations[0] + assert isinstance(dataset_configuration, HDF5DatasetIOConfiguration) + assert dataset_configuration.dataset_info.object_id == new_time_series.object_id + assert dataset_configuration.dataset_info.location == "acquisition/NewTimeSeries/data" + assert dataset_configuration.dataset_info.full_shape == array.shape + assert dataset_configuration.dataset_info.dtype == array.dtype + assert dataset_configuration.chunk_shape == array.shape + assert dataset_configuration.buffer_shape == array.shape + assert dataset_configuration.compression_method == "gzip" + assert dataset_configuration.compression_options is None + + +def test_unwrapped_time_series_zarr(zarr_nwbfile_path): + array = np.array([[1, 2, 3], [4, 5, 6]]) + + with NWBZarrIO(path=zarr_nwbfile_path, mode="a") as io: + nwbfile = io.read() + new_time_series = mock_TimeSeries(name="NewTimeSeries", data=array) + nwbfile.add_acquisition(new_time_series) + dataset_configurations = list(get_default_dataset_io_configurations(nwbfile=nwbfile, backend="zarr")) + + assert len(dataset_configurations) == 1 + + dataset_configuration = dataset_configurations[0] + assert isinstance(dataset_configuration, ZarrDatasetIOConfiguration) + assert dataset_configuration.dataset_info.object_id == new_time_series.object_id + assert dataset_configuration.dataset_info.location == "acquisition/NewTimeSeries/data" + assert dataset_configuration.dataset_info.full_shape == array.shape + assert dataset_configuration.dataset_info.dtype == array.dtype + assert dataset_configuration.chunk_shape == array.shape + assert dataset_configuration.buffer_shape == array.shape + assert dataset_configuration.compression_method == "gzip" + assert dataset_configuration.compression_options is None + assert dataset_configuration.filter_methods is None + assert dataset_configuration.filter_options is None + + +def test_unwrapped_dynamic_table_hdf5(hdf5_nwbfile_path): + array = np.array([0.1, 0.2, 0.3]) + + with NWBHDF5IO(path=hdf5_nwbfile_path, mode="a") as io: + nwbfile = io.read() + column = VectorData(name="TestColumn", description="", data=array.squeeze()) + dynamic_table = DynamicTable(name="TestDynamicTable", description="", columns=[column]) + nwbfile.add_acquisition(dynamic_table) + dataset_configurations = list(get_default_dataset_io_configurations(nwbfile=nwbfile, backend="hdf5")) + + assert len(dataset_configurations) == 1 + + dataset_configuration = dataset_configurations[0] + assert isinstance(dataset_configuration, HDF5DatasetIOConfiguration) + assert dataset_configuration.dataset_info.object_id == column.object_id + assert dataset_configuration.dataset_info.location == "acquisition/TestDynamicTable/TestColumn/data" + assert dataset_configuration.dataset_info.full_shape == array.shape + assert dataset_configuration.dataset_info.dtype == array.dtype + assert dataset_configuration.chunk_shape == array.shape + assert dataset_configuration.buffer_shape == array.shape + assert dataset_configuration.compression_method == "gzip" + assert dataset_configuration.compression_options is None + + +def test_unwrapped_dynamic_table_zarr(zarr_nwbfile_path): + array = np.array([0.1, 0.2, 0.3]) + + with NWBZarrIO(path=zarr_nwbfile_path, mode="a") as io: + nwbfile = io.read() + column = VectorData(name="TestColumn", description="", data=array.squeeze()) + dynamic_table = DynamicTable(name="TestDynamicTable", description="", columns=[column]) + nwbfile.add_acquisition(dynamic_table) + dataset_configurations = list(get_default_dataset_io_configurations(nwbfile=nwbfile, backend="zarr")) + + assert len(dataset_configurations) == 1 + + dataset_configuration = dataset_configurations[0] + assert isinstance(dataset_configuration, ZarrDatasetIOConfiguration) + assert dataset_configuration.dataset_info.object_id == column.object_id + assert dataset_configuration.dataset_info.location == "acquisition/TestDynamicTable/TestColumn/data" + assert dataset_configuration.dataset_info.full_shape == array.shape + assert dataset_configuration.dataset_info.dtype == array.dtype + assert dataset_configuration.chunk_shape == array.shape + assert dataset_configuration.buffer_shape == array.shape + assert dataset_configuration.compression_method == "gzip" + assert dataset_configuration.compression_options is None + assert dataset_configuration.filter_methods is None + assert dataset_configuration.filter_options is None diff --git a/tests/test_minimal/test_tools/test_backend_and_dataset_configuration/test_dataset_info_model.py b/tests/test_minimal/test_tools/test_backend_and_dataset_configuration/test_models/test_dataset_info_model.py similarity index 100% rename from tests/test_minimal/test_tools/test_backend_and_dataset_configuration/test_dataset_info_model.py rename to tests/test_minimal/test_tools/test_backend_and_dataset_configuration/test_models/test_dataset_info_model.py diff --git a/tests/test_minimal/test_tools/test_backend_and_dataset_configuration/test_dataset_configuration_model.py b/tests/test_minimal/test_tools/test_backend_and_dataset_configuration/test_models/test_dataset_io_configuration_model.py similarity index 58% rename from tests/test_minimal/test_tools/test_backend_and_dataset_configuration/test_dataset_configuration_model.py rename to tests/test_minimal/test_tools/test_backend_and_dataset_configuration/test_models/test_dataset_io_configuration_model.py index fd9e624a3..33b32d10a 100644 --- a/tests/test_minimal/test_tools/test_backend_and_dataset_configuration/test_dataset_configuration_model.py +++ b/tests/test_minimal/test_tools/test_backend_and_dataset_configuration/test_models/test_dataset_io_configuration_model.py @@ -1,26 +1,26 @@ """Unit tests for the DatasetConfiguration Pydantic model.""" import pytest -from neuroconv.tools.nwb_helpers._models._base_models import DatasetConfiguration +from neuroconv.tools.nwb_helpers._models._base_models import DatasetIOConfiguration from neuroconv.tools.testing import mock_DatasetInfo def test_get_data_io_kwargs_abstract_error(): with pytest.raises(TypeError) as error_info: - DatasetConfiguration(dataset_info=mock_DatasetInfo(), chunk_shape=(78_125, 64), buffer_shape=(1_250_000, 384)) - assert "Can't instantiate abstract class DatasetConfiguration with abstract" in str(error_info.value) + DatasetIOConfiguration(dataset_info=mock_DatasetInfo(), chunk_shape=(78_125, 64), buffer_shape=(1_250_000, 384)) + assert "Can't instantiate abstract class DatasetIOConfiguration with abstract" in str(error_info.value) def test_get_data_io_kwargs_not_implemented(): - class TestDatasetConfiguration(DatasetConfiguration): + class TestDatasetIOConfiguration(DatasetIOConfiguration): def get_data_io_kwargs(self): super().get_data_io_kwargs() - dataset_configuration = TestDatasetConfiguration( + dataset_io_configuration = TestDatasetIOConfiguration( dataset_info=mock_DatasetInfo(), chunk_shape=(78_125, 64), buffer_shape=(1_250_000, 384), ) with pytest.raises(NotImplementedError): - dataset_configuration.get_data_io_kwargs() + dataset_io_configuration.get_data_io_kwargs() diff --git a/tests/test_minimal/test_tools/test_backend_and_dataset_configuration/test_hdf5_backend_configuration_model.py b/tests/test_minimal/test_tools/test_backend_and_dataset_configuration/test_models/test_hdf5_backend_configuration_model.py similarity index 95% rename from tests/test_minimal/test_tools/test_backend_and_dataset_configuration/test_hdf5_backend_configuration_model.py rename to tests/test_minimal/test_tools/test_backend_and_dataset_configuration/test_models/test_hdf5_backend_configuration_model.py index 2d6242ad1..7377ff1b8 100644 --- a/tests/test_minimal/test_tools/test_backend_and_dataset_configuration/test_hdf5_backend_configuration_model.py +++ b/tests/test_minimal/test_tools/test_backend_and_dataset_configuration/test_models/test_hdf5_backend_configuration_model.py @@ -6,7 +6,7 @@ def test_hdf5_backend_configuration_print(): - """Test the printout display of a HDF5DatasetConfiguration model looks nice.""" + """Test the printout display of a HDF5BackendConfiguration model looks nice.""" hdf5_backend_configuration = mock_HDF5BackendConfiguration() with patch("sys.stdout", new=StringIO()) as out: diff --git a/tests/test_minimal/test_tools/test_backend_and_dataset_configuration/test_hdf5_dataset_configuration_model.py b/tests/test_minimal/test_tools/test_backend_and_dataset_configuration/test_models/test_hdf5_dataset_io_configuration_model.py similarity index 73% rename from tests/test_minimal/test_tools/test_backend_and_dataset_configuration/test_hdf5_dataset_configuration_model.py rename to tests/test_minimal/test_tools/test_backend_and_dataset_configuration/test_models/test_hdf5_dataset_io_configuration_model.py index d6de7ab4c..b31387fbf 100644 --- a/tests/test_minimal/test_tools/test_backend_and_dataset_configuration/test_hdf5_dataset_configuration_model.py +++ b/tests/test_minimal/test_tools/test_backend_and_dataset_configuration/test_models/test_hdf5_dataset_io_configuration_model.py @@ -1,16 +1,14 @@ -"""Unit tests for the HDF5DatasetConfiguration Pydantic model.""" +"""Unit tests for the HDF5DatasetIOConfiguration Pydantic model.""" from io import StringIO from unittest.mock import patch -import pytest - from neuroconv.tools.nwb_helpers import AVAILABLE_HDF5_COMPRESSION_METHODS -from neuroconv.tools.testing import mock_HDF5DatasetConfiguration +from neuroconv.tools.testing import mock_HDF5DatasetIOConfiguration def test_hdf5_dataset_configuration_print(): - """Test the printout display of a HDF5DatasetConfiguration model looks nice.""" - hdf5_dataset_configuration = mock_HDF5DatasetConfiguration() + """Test the printout display of a HDF5DatasetIOConfiguration model looks nice.""" + hdf5_dataset_configuration = mock_HDF5DatasetIOConfiguration() with patch("sys.stdout", new=StringIO()) as out: print(hdf5_dataset_configuration) @@ -35,8 +33,8 @@ def test_hdf5_dataset_configuration_print(): def test_hdf5_dataset_configuration_print_with_compression_options(): - """Test the printout display of a HDF5DatasetConfiguration model looks nice.""" - hdf5_dataset_configuration = mock_HDF5DatasetConfiguration(compression_options=dict(level=5)) + """Test the printout display of a HDF5DatasetIOConfiguration model looks nice.""" + hdf5_dataset_configuration = mock_HDF5DatasetIOConfiguration(compression_options=dict(level=5)) with patch("sys.stdout", new=StringIO()) as out: print(hdf5_dataset_configuration) @@ -62,8 +60,8 @@ def test_hdf5_dataset_configuration_print_with_compression_options(): def test_hdf5_dataset_configuration_print_with_compression_disabled(): - """Test the printout display of a HDF5DatasetConfiguration model looks nice.""" - hdf5_dataset_configuration = mock_HDF5DatasetConfiguration(compression_method=None) + """Test the printout display of a HDF5DatasetIOConfiguration model looks nice.""" + hdf5_dataset_configuration = mock_HDF5DatasetIOConfiguration(compression_method=None) with patch("sys.stdout", new=StringIO()) as out: print(hdf5_dataset_configuration) @@ -86,12 +84,12 @@ def test_hdf5_dataset_configuration_print_with_compression_disabled(): def test_hdf5_dataset_configuration_repr(): - """Test the programmatic repr of a HDF5DatasetConfiguration model is more dataclass-like.""" - hdf5_dataset_configuration = mock_HDF5DatasetConfiguration() + """Test the programmatic repr of a HDF5DatasetIOConfiguration model is more dataclass-like.""" + hdf5_dataset_configuration = mock_HDF5DatasetIOConfiguration() # Important to keep the `repr` unmodified for appearance inside iterables of DatasetInfo objects expected_repr = ( - "HDF5DatasetConfiguration(dataset_info=DatasetInfo(object_id='481a0860-3a0c-40ec-b931-df4a3e9b101f', " + "HDF5DatasetIOConfiguration(dataset_info=DatasetInfo(object_id='481a0860-3a0c-40ec-b931-df4a3e9b101f', " "location='acquisition/TestElectricalSeries/data', dataset_name='data', dtype=dtype('int16'), " "full_shape=(1800000, 384)), chunk_shape=(78125, 64), buffer_shape=(1250000, 384), compression_method='gzip', " "compression_options=None)" @@ -108,7 +106,7 @@ def test_default_compression_is_always_available(): def test_get_data_io_kwargs(): - hdf5_dataset_configuration = mock_HDF5DatasetConfiguration() + hdf5_dataset_configuration = mock_HDF5DatasetIOConfiguration() assert hdf5_dataset_configuration.get_data_io_kwargs() == dict( chunks=(78125, 64), compression="gzip", compression_opts=None diff --git a/tests/test_minimal/test_tools/test_backend_and_dataset_configuration/test_zarr_backend_configuration_model.py b/tests/test_minimal/test_tools/test_backend_and_dataset_configuration/test_models/test_zarr_backend_configuration_model.py similarity index 95% rename from tests/test_minimal/test_tools/test_backend_and_dataset_configuration/test_zarr_backend_configuration_model.py rename to tests/test_minimal/test_tools/test_backend_and_dataset_configuration/test_models/test_zarr_backend_configuration_model.py index e8017c719..da417710c 100644 --- a/tests/test_minimal/test_tools/test_backend_and_dataset_configuration/test_zarr_backend_configuration_model.py +++ b/tests/test_minimal/test_tools/test_backend_and_dataset_configuration/test_models/test_zarr_backend_configuration_model.py @@ -6,7 +6,7 @@ def test_zarr_backend_configuration_print(): - """Test the printout display of a HDF5DatasetConfiguration model looks nice.""" + """Test the printout display of a ZarrBackendConfiguration model looks nice.""" zarr_backend_configuration = mock_ZarrBackendConfiguration() with patch("sys.stdout", new=StringIO()) as out: diff --git a/tests/test_minimal/test_tools/test_backend_and_dataset_configuration/test_zarr_dataset_configuration_model.py b/tests/test_minimal/test_tools/test_backend_and_dataset_configuration/test_models/test_zarr_dataset_io_configuration_model.py similarity index 79% rename from tests/test_minimal/test_tools/test_backend_and_dataset_configuration/test_zarr_dataset_configuration_model.py rename to tests/test_minimal/test_tools/test_backend_and_dataset_configuration/test_models/test_zarr_dataset_io_configuration_model.py index 8ddc5bf7e..e99c1dbca 100644 --- a/tests/test_minimal/test_tools/test_backend_and_dataset_configuration/test_zarr_dataset_configuration_model.py +++ b/tests/test_minimal/test_tools/test_backend_and_dataset_configuration/test_models/test_zarr_dataset_io_configuration_model.py @@ -1,4 +1,4 @@ -"""Unit tests for the ZarrDatasetConfiguration Pydantic model.""" +"""Unit tests for the ZarrDatasetIOConfiguration Pydantic model.""" from io import StringIO from unittest.mock import patch @@ -7,14 +7,14 @@ from neuroconv.tools.nwb_helpers import ( AVAILABLE_ZARR_COMPRESSION_METHODS, - ZarrDatasetConfiguration, + ZarrDatasetIOConfiguration, ) -from neuroconv.tools.testing import mock_DatasetInfo, mock_ZarrDatasetConfiguration +from neuroconv.tools.testing import mock_DatasetInfo, mock_ZarrDatasetIOConfiguration -def test_zarr_dataset_configuration_print(): - """Test the printout display of a ZarrDatasetConfiguration model looks nice.""" - zarr_dataset_configuration = mock_ZarrDatasetConfiguration() +def test_zarr_dataset_io_configuration_print(): + """Test the printout display of a ZarrDatasetIOConfiguration model looks nice.""" + zarr_dataset_configuration = mock_ZarrDatasetIOConfiguration() with patch("sys.stdout", new=StringIO()) as out: print(zarr_dataset_configuration) @@ -39,8 +39,8 @@ def test_zarr_dataset_configuration_print(): def test_zarr_dataset_configuration_print_with_compression_options(): - """Test the printout display of a ZarrDatasetConfiguration model looks nice.""" - zarr_dataset_configuration = mock_ZarrDatasetConfiguration(compression_options=dict(level=5)) + """Test the printout display of a ZarrDatasetIOConfiguration model looks nice.""" + zarr_dataset_configuration = mock_ZarrDatasetIOConfiguration(compression_options=dict(level=5)) with patch("sys.stdout", new=StringIO()) as out: print(zarr_dataset_configuration) @@ -66,8 +66,8 @@ def test_zarr_dataset_configuration_print_with_compression_options(): def test_zarr_dataset_configuration_print_with_compression_disabled(): - """Test the printout display of a ZarrDatasetConfiguration model looks nice.""" - zarr_dataset_configuration = mock_ZarrDatasetConfiguration(compression_method=None) + """Test the printout display of a ZarrDatasetIOConfiguration model looks nice.""" + zarr_dataset_configuration = mock_ZarrDatasetIOConfiguration(compression_method=None) with patch("sys.stdout", new=StringIO()) as out: print(zarr_dataset_configuration) @@ -90,8 +90,8 @@ def test_zarr_dataset_configuration_print_with_compression_disabled(): def test_zarr_dataset_configuration_print_with_filter_methods(): - """Test the printout display of a ZarrDatasetConfiguration model looks nice.""" - zarr_dataset_configuration = mock_ZarrDatasetConfiguration(filter_methods=["delta"]) + """Test the printout display of a ZarrDatasetIOConfiguration model looks nice.""" + zarr_dataset_configuration = mock_ZarrDatasetIOConfiguration(filter_methods=["delta"]) with patch("sys.stdout", new=StringIO()) as out: print(zarr_dataset_configuration) @@ -118,8 +118,8 @@ def test_zarr_dataset_configuration_print_with_filter_methods(): def test_zarr_dataset_configuration_print_with_filter_options(): - """Test the printout display of a ZarrDatasetConfiguration model looks nice.""" - zarr_dataset_configuration = mock_ZarrDatasetConfiguration( + """Test the printout display of a ZarrDatasetIOConfiguration model looks nice.""" + zarr_dataset_configuration = mock_ZarrDatasetIOConfiguration( filter_methods=["blosc"], filter_options=[dict(clevel=5)] ) @@ -149,12 +149,12 @@ def test_zarr_dataset_configuration_print_with_filter_options(): def test_zarr_dataset_configuration_repr(): - """Test the programmatic repr of a ZarrDatasetConfiguration model is more dataclass-like.""" - zarr_dataset_configuration = mock_ZarrDatasetConfiguration() + """Test the programmatic repr of a ZarrDatasetIOConfiguration model is more dataclass-like.""" + zarr_dataset_configuration = mock_ZarrDatasetIOConfiguration() # Important to keep the `repr` unmodified for appearance inside iterables of DatasetInfo objects expected_repr = ( - "ZarrDatasetConfiguration(dataset_info=DatasetInfo(object_id='481a0860-3a0c-40ec-b931-df4a3e9b101f', " + "ZarrDatasetIOConfiguration(dataset_info=DatasetInfo(object_id='481a0860-3a0c-40ec-b931-df4a3e9b101f', " "location='acquisition/TestElectricalSeries/data', dataset_name='data', dtype=dtype('int16'), " "full_shape=(1800000, 384)), chunk_shape=(78125, 64), buffer_shape=(1250000, 384), compression_method='gzip', " "compression_options=None, filter_methods=None, filter_options=None)" @@ -164,7 +164,7 @@ def test_zarr_dataset_configuration_repr(): def test_validator_filter_options_has_methods(): with pytest.raises(ValueError) as error_info: - ZarrDatasetConfiguration( + ZarrDatasetIOConfiguration( dataset_info=mock_DatasetInfo(), chunk_shape=(78_125, 64), buffer_shape=(1_250_000, 384), @@ -181,7 +181,7 @@ def test_validator_filter_options_has_methods(): def test_validator_filter_methods_length_match_options(): with pytest.raises(ValueError) as error_info: - ZarrDatasetConfiguration( + ZarrDatasetIOConfiguration( dataset_info=mock_DatasetInfo(), chunk_shape=(78_125, 64), buffer_shape=(1_250_000, 384), @@ -205,7 +205,7 @@ def test_default_compression_is_always_available(): def test_get_data_io_kwargs(): - zarr_dataset_configuration = mock_ZarrDatasetConfiguration() + zarr_dataset_configuration = mock_ZarrDatasetIOConfiguration() assert zarr_dataset_configuration.get_data_io_kwargs() == dict( chunks=(78125, 64), compressor=GZip(level=1), filters=None