From 716e3cee19724c7cf1e930465fa9733d77811b4c Mon Sep 17 00:00:00 2001 From: Steph Prince <40640337+stephprince@users.noreply.github.com> Date: Wed, 29 Jan 2025 09:18:32 -0800 Subject: [PATCH 1/6] update default mock_NWBFile start time (#2018) * update default mock_NWBFile start time * Update src/pynwb/testing/mock/file.py Co-authored-by: Ryan Ly --------- Co-authored-by: Ben Dichter Co-authored-by: Ryan Ly --- src/pynwb/testing/mock/file.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/pynwb/testing/mock/file.py b/src/pynwb/testing/mock/file.py index 943f86dcb..351f81454 100644 --- a/src/pynwb/testing/mock/file.py +++ b/src/pynwb/testing/mock/file.py @@ -1,7 +1,7 @@ from typing import Optional from uuid import uuid4 from datetime import datetime -from dateutil.tz import tzlocal +from dateutil.tz import tzutc from ...file import NWBFile, Subject from .utils import name_generator @@ -10,7 +10,7 @@ def mock_NWBFile( session_description: str = 'session_description', identifier: Optional[str] = None, - session_start_time: datetime = datetime(1970, 1, 1, tzinfo=tzlocal()), + session_start_time: datetime = datetime(1970, 1, 1, tzinfo=tzutc()), **kwargs ): return NWBFile( From bc129318edc8eedfd593413bfb7d7b5019b26ba6 Mon Sep 17 00:00:00 2001 From: Ben Dichter Date: Fri, 7 Feb 2025 13:55:26 -0600 Subject: [PATCH 2/6] Update documentation for streaming NWB files and add venv to .gitignore (#2035) * Update documentation for streaming NWB files and add venv to .gitignore * Remove context manager for remfile --- .gitignore | 2 + docs/gallery/advanced_io/streaming.py | 185 +++++++++++++++----------- 2 files changed, 109 insertions(+), 78 deletions(-) diff --git a/.gitignore b/.gitignore index 95f08686e..0c81cb605 100644 --- a/.gitignore +++ b/.gitignore @@ -80,3 +80,5 @@ _version.py .core_typemap_version core_typemap.pkl + +venv \ No newline at end of file diff --git a/docs/gallery/advanced_io/streaming.py b/docs/gallery/advanced_io/streaming.py index 4bdc992b8..428ebe300 100644 --- a/docs/gallery/advanced_io/streaming.py +++ b/docs/gallery/advanced_io/streaming.py @@ -6,9 +6,8 @@ You can read specific sections within individual data files directly from remote stores such as the `DANDI Archive `_. This is especially useful for reading small pieces of data -from a large NWB file stored -remotely. First, you will need to get the location of the file. The code below illustrates how to do this on DANDI -using the dandi API library. +from a large NWB file stored remotely. First, you will need to get the location of the file. The code +below illustrates how to do this on DANDI using the dandi API library. Getting the location of the file on DANDI ----------------------------------------- @@ -41,13 +40,68 @@ s3_url = asset.get_content_url(follow_redirects=1, strip_query=True) ############################################## -# Streaming Method 1: fsspec -# -------------------------- -# fsspec is another data streaming approach that is quite flexible and has several performance advantages. This library -# creates a virtual filesystem for remote stores. With this approach, a virtual file is created for the file and -# the virtual filesystem layer takes care of requesting data from the S3 bucket whenever data is -# read from the virtual file. Note that this implementation is completely unaware of internals of the HDF5 format -# and thus can work for **any** file, not only for the purpose of use with H5PY and PyNWB. +# Once you have an S3 URL, you can use it to read the NWB file directly from the remote store. There are several +# ways to do this, including using the ``remfile`` library, the ``fsspec`` library, or the ROS3 driver in h5py. +# +# Streaming data with ``remfile`` +# ------------------------------- +# ``remfile`` is a library that enables indexing and streaming of files in s3, optimized for reading HDF5 files. +# remfile is simple and fast, especially for the initial load of the nwb file and for accessing small pieces of data. +# It is a lightweight dependency with a very small codebase. Although ``remfile`` is a very new project that has not +# been tested in a variety of use-cases, but it has worked well in our hands. +# +# You can install ``remfile`` with pip: +# +# .. code-block:: bash +# +# pip install remfile +# +# Then in use Python: + +import h5py +from pynwb import NWBHDF5IO +import remfile + +# Create a disk cache to store downloaded data (optional) +cache_dirname = '/tmp/remfile_cache' +disk_cache = remfile.DiskCache(cache_dirname) + +# open the file +rem_file = remfile.File(s3_url, disk_cache=disk_cache) +h5py_file = h5py.File(rem_file, "r") +io = NWBHDF5IO(file=h5py_file) +nwbfile = io.read() + +# now you can access the data +streamed_data = nwbfile.acquisition["lick_times"].time_series["lick_left_times"].data[:] + +# close the file +io.close() +h5py_file.close() +rem_file.close() + +################################## +# You can also use contexts to open the file. This will automatically close the file when the context is exited. +# This approach can be a bit cumbersome when exploring files interactively, but is the preferred approach once +# the program is finalized because it will ensure that the file is closed properly even if an exception is raised. + +rem_file = remfile.File(s3_url, disk_cache=disk_cache) +with h5py.File(rem_file, "r") as h5py_file: + with NWBHDF5IO(file=h5py_file, load_namespaces=True) as io: + nwbfile = io.read() + streamed_data = nwbfile.acquisition["lick_times"].time_series["lick_left_times"].data[:] + +# After the contexts end, the file is closed, so you cannot download new data from the file. + +################################# +# Streaming data with ``fsspec`` +# ------------------------------ +# ``fsspec`` is a data streaming approach that is quite flexible. This library creates a virtual filesystem for remote +# stores. With this approach, a virtual file is created for the file and the virtual filesystem layer takes care of +# requesting data from the S3 bucket whenever data is read from the virtual file. Note that this implementation is +# completely unaware of internals of the HDF5 format and thus can work for **any** file, not only for the purpose of +# use with ``h5py`` and PyNWB. ``fsspec`` can also be used to access data from other storage backends, such as Google +# Drive or Dropbox. # # First install ``fsspec`` and the dependencies of the :py:class:`~fsspec.implementations.http.HTTPFileSystem`: # @@ -71,7 +125,23 @@ cache_storage="nwb-cache", # Local folder for the cache ) -# next, open the file +# open the file +f = fs.open(s3_url, "rb") +file = h5py.File(f) +io = pynwb.NWBHDF5IO(file=file) +nwbfile = io.read() + +# now you can access the data +streamed_data = nwbfile.acquisition['lick_times'].time_series['lick_left_times'].data[:] + +# close the file +io.close() +file.close() +f.close() + +################################## +# You can also use context managers to open the file. This will automatically close the file when the context is exited. + with fs.open(s3_url, "rb") as f: with h5py.File(f) as file: with pynwb.NWBHDF5IO(file=file) as io: @@ -79,101 +149,60 @@ print(nwbfile.acquisition['lick_times'].time_series['lick_left_times'].data[:]) ################################## -# fsspec is a library that can be used to access a variety of different store formats, including (at the time of -# writing): +# fsspec can be used to access a variety of different stores, including (at the time of writing): # # .. code-block:: python # # from fsspec.registry import known_implementations # known_implementations.keys() # -# file, memory, dropbox, http, https, zip, tar, gcs, gs, gdrive, sftp, ssh, ftp, hdfs, arrow_hdfs, webhdfs, s3, s3a, -# wandb, oci, adl, abfs, az, cached, blockcache, filecache, simplecache, dask, dbfs, github, git, smb, jupyter, jlab, -# libarchive, reference +# abfs, adl, arrow_hdfs, asynclocal, az, blockcache, box, cached, dask, data, dbfs, dir, dropbox, dvc, +# file, filecache, ftp, gcs, gdrive, generic, git, github, gs, hdfs, hf, http, https, jlab, jupyter, +# lakefs, libarchive, local, memory, oci, ocilake, oss, reference, root, s3, s3a, sftp, simplecache, +# smb, ssh, tar, wandb, webdav, webhdfs, zip # # The S3 backend, in particular, may provide additional functionality for accessing data on DANDI. See the -# `fsspec documentation on known implementations `_ +# `fsspec documentation on known implementations +# `_ # for a full updated list of supported store formats. # -# One downside of this fsspec method is that fsspec is not optimized for reading HDF5 files, and so streaming data -# using this method can be slow. A faster alternative is ``remfile`` described below. +# One downside of the fsspec method is that fsspec is not optimized for reading HDF5 files, and so streaming data +# using this method can be slow. ``remfile`` may be a faster alternative. # -# Streaming Method 2: ROS3 +# Streaming data with ROS3 # ------------------------ # ROS3 stands for "read only S3" and is a driver created by the HDF5 Group that allows HDF5 to read HDF5 files stored # remotely in s3 buckets. Using this method requires that your HDF5 library is installed with the ROS3 driver enabled. # With ROS3 support enabled in h5py, we can instantiate a :py:class:`~pynwb.NWBHDF5IO` object with the S3 URL and -# specify the driver as "ros3". +# specify the driver as "ros3". Like the other methods, you can use a context manager to open the file and close it, +# or open the file and close it manually. from pynwb import NWBHDF5IO +# open with context manager with NWBHDF5IO(s3_url, mode='r', driver='ros3') as io: nwbfile = io.read() - print(nwbfile) - print(nwbfile.acquisition['lick_times'].time_series['lick_left_times'].data[:]) + streamed_data = nwbfile.acquisition['lick_times'].time_series['lick_left_times'].data[:] + +# open and close manually +io = NWBHDF5IO(s3_url, mode='r', driver='ros3') +nwbfile = io.read() +streamed_data = nwbfile.acquisition['lick_times'].time_series['lick_left_times'].data[:] +io.close() ################################## # This will download metadata about the file from the S3 bucket to memory. The values of datasets are accessed lazily, -# just like when reading an NWB file stored locally. So, slicing into a dataset will require additional time to -# download the sliced data (and only the sliced data) to memory. +# just like when reading an NWB file stored locally. So, slicing into a dataset will download the sliced data (and +# only the sliced data) and load it directly to memory. # # .. note:: # -# Pre-built h5py packages on PyPI do not include this S3 support. If you want this feature, you could use packages -# from conda-forge, or build h5py from source against an HDF5 build with S3 support. You can install HDF5 with -# the ROS3 driver from `conda-forge `_ using ``conda``. You may -# first need to uninstall a currently installed version of ``h5py``. +# Pre-built h5py packages on PyPI do not include this S3 support. If you want this feature, we recommend installing +# ``h5py`` using conda: # # .. code-block:: bash # # pip uninstall h5py -# conda install -c conda-forge "h5py>=3.2" -# -# Besides the extra burden of installing h5py from a non-PyPI source, one downside of this ROS3 method is that -# this method does not support automatic retries in case the connection fails. - - -################################################## -# Method 3: remfile -# ----------------- -# ``remfile`` is another library that enables indexing and streaming of files in s3. remfile is simple and fast, -# especially for the initial load of the nwb file and for accessing small pieces of data. The caveats of ``remfile`` -# are that it is a very new project that has not been tested in a variety of use-cases and caching options are -# limited compared to ``fsspec``. `remfile` is a simple, lightweight dependency with a very small codebase. -# -# You can install ``remfile`` with pip: -# -# .. code-block:: bash +# conda install h5py # -# pip install remfile -# - -import h5py -from pynwb import NWBHDF5IO -import remfile - -rem_file = remfile.File(s3_url) - -with h5py.File(rem_file, "r") as h5py_file: - with NWBHDF5IO(file=h5py_file, load_namespaces=True) as io: - nwbfile = io.read() - print(nwbfile.acquisition["lick_times"].time_series["lick_left_times"].data[:]) - -################################################## -# Which streaming method to choose? -# --------------------------------- -# -# From a user perspective, once opened, the :py:class:`~pynwb.file.NWBFile` works the same with -# fsspec, ros3, or remfile. However, in general, we currently recommend using fsspec for streaming -# NWB files because it is more performant and reliable than ros3 and more widely tested than remfile. -# However, if you are experiencing long wait times for the initial file load on your network, you -# may want to try remfile. -# -# Advantages of fsspec include: -# -# 1. supports caching, which will dramatically speed up repeated requests for the -# same region of data, -# 2. automatically retries when s3 fails to return, which helps avoid errors when accessing data due to -# intermittent errors in connections with S3 (remfile does this as well), -# 3. works also with other storage backends (e.g., GoogleDrive or Dropbox, not just S3) and file formats, and -# 4. in our experience appears to provide faster out-of-the-box performance than the ros3 driver. +# Alternatively, you can build h5py from source against an HDF5 build with S3 support, but this is more complicated. From e43a602f459d05d9f3ff6fad83d2b52d0bc351b5 Mon Sep 17 00:00:00 2001 From: Ryan Ly Date: Mon, 17 Feb 2025 17:24:07 -0800 Subject: [PATCH 3/6] Remove Brain Observatory example (#2026) * Remove Brain Observatory example * Update CHANGELOG.md --------- Co-authored-by: Ben Dichter --- CHANGELOG.md | 3 +- docs/gallery/domain/brain_observatory.py | 202 ----------------------- docs/source/conf.py | 1 - requirements-doc.txt | 2 - test.py | 7 +- 5 files changed, 3 insertions(+), 212 deletions(-) delete mode 100644 docs/gallery/domain/brain_observatory.py diff --git a/CHANGELOG.md b/CHANGELOG.md index 21e9e8ae7..5ed9ddac7 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -16,7 +16,8 @@ ### Documentation and tutorial enhancements - Updated `SpikeEventSeries`, `DecompositionSeries`, and `FilteredEphys` examples. @stephprince [#2012](https://github.com/NeurodataWithoutBorders/pynwb/pull/2012) - Replaced deprecated `scipy.misc.face` dataset in the images tutorial with another example. @stephprince [#2016](https://github.com/NeurodataWithoutBorders/pynwb/pull/2016) - +- Removed Allen Brain Observatory example which was unnecessary and difficult to maintain. @rly [#2026](https://github.com/NeurodataWithoutBorders/pynwb/pull/2026) + ## PyNWB 2.8.3 (November 19, 2024) ### Enhancements and minor changes diff --git a/docs/gallery/domain/brain_observatory.py b/docs/gallery/domain/brain_observatory.py deleted file mode 100644 index 2e1a9d6a3..000000000 --- a/docs/gallery/domain/brain_observatory.py +++ /dev/null @@ -1,202 +0,0 @@ -""" -Allen Brain Observatory -================================= - -Create an nwb file from Allen Brain Observatory data. -""" - -######################################## -# This example demonstrates the basic functionality of several parts of the pynwb write API, centered around the optical -# physiology submodule (pynwb.ophys). We will use the allensdk as a read API, while leveraging the pynwb data model and -# write api to transform and write the data back to disk. -# -# .. note: Using the latest allensdk package requires Python 3.6 or higher. - -######################################## -# .. raw:: html -# :url: https://gist.githubusercontent.com/nicain/82e6b3d8f9ff5b85ef01a582e41e2389/raw/ - -# sphinx_gallery_thumbnail_path = 'figures/gallery_thumbnails_allenbrainobservatory.png' - -import allensdk.brain_observatory.stimulus_info as si -from allensdk.core.brain_observatory_cache import BrainObservatoryCache - -from pynwb import NWBHDF5IO, NWBFile, TimeSeries -from pynwb.device import Device -from pynwb.image import ImageSeries, IndexSeries -from pynwb.ophys import DfOverF, ImageSegmentation, OpticalChannel - -# Settings: -ophys_experiment_id = 562095852 -save_file_name = "brain_observatory.nwb" - -######################################## -# Let's begin by downloading an Allen Institute Brain Observatory file. After we cache this file locally (approx. 450 -# MB), we can open data assets we wish to write into our NWB:N file. These include stimulus, acquisition, and -# processing data, as well as time "epochs" (intervals of interest)". -boc = BrainObservatoryCache(manifest_file="manifest.json") -dataset = boc.get_ophys_experiment_data(ophys_experiment_id) -metadata = dataset.get_metadata() -cell_specimen_ids = dataset.get_cell_specimen_ids() -timestamps, dFF = dataset.get_dff_traces() -stimulus_list = [ - s for s in si.SESSION_STIMULUS_MAP[metadata["session_type"]] if s != "spontaneous" -] -running_data, _ = dataset.get_running_speed() -trial_table = dataset.get_stimulus_table("master") -trial_table["start"] = timestamps[trial_table["start"].values] -trial_table["end"] = timestamps[trial_table["end"].values] -epoch_table = dataset.get_stimulus_epoch_table() -epoch_table["start"] = timestamps[epoch_table["start"].values] -epoch_table["end"] = timestamps[epoch_table["end"].values] - -######################################## -# 1) First, lets create a top-level "file" container object. All the other NWB:N data components will be stored -# hierarchically, relative to this container. The data won't actually be written to the file system until the end of -# the script. - -nwbfile = NWBFile( - session_description="Allen Brain Observatory dataset", - identifier=str(metadata["ophys_experiment_id"]), - session_start_time=metadata["session_start_time"], -) - - -######################################## -# 2) Next, we add stimuli templates (one for each type of stimulus), and a data series that indexes these templates to -# describe what stimulus was being shown during the experiment. -for stimulus in stimulus_list: - visual_stimulus_images = ImageSeries( - name=stimulus, - data=dataset.get_stimulus_template(stimulus), - unit="NA", - format="raw", - timestamps=[0.0], - ) - image_index = IndexSeries( - name=stimulus, - data=dataset.get_stimulus_table(stimulus).frame.values, - unit="NA", - indexed_timeseries=visual_stimulus_images, - timestamps=timestamps[dataset.get_stimulus_table(stimulus).start.values], - ) - nwbfile.add_stimulus_template(visual_stimulus_images) - nwbfile.add_stimulus(image_index) - -######################################## -# 3) Besides the two-photon calcium image stack, the running speed of the animal was also recorded in this experiment. -# We can store this data as a TimeSeries, in the acquisition portion of the file. - -running_speed = TimeSeries( - name="running_speed", data=running_data, timestamps=timestamps, unit="cm/s" -) - -nwbfile.add_acquisition(running_speed) - -######################################## -# 4) In NWB:N, an "epoch" is an interval of experiment time that can slice into a timeseries (for example running_speed, -# the one we just added). PyNWB uses an object-oriented approach to create links into these timeseries, so that data is -# not copied multiple times. Here, we extract the stimulus epochs (both fine and coarse-grained) from the Brain -# Observatory experiment using the allensdk. - -for _, row in trial_table.iterrows(): - nwbfile.add_epoch( - start_time=row.start, - stop_time=row.end, - timeseries=[running_speed], - tags="trials", - ) - -for _, row in epoch_table.iterrows(): - nwbfile.add_epoch( - start_time=row.start, - stop_time=row.end, - timeseries=[running_speed], - tags="stimulus", - ) - -######################################## -# 5) In the brain observatory, a two-photon microscope is used to acquire images of the calcium activity of neurons -# expressing a fluorescent protein indicator. Essentially the microscope captures picture (30 times a second) at a -# single depth in the visual cortex (the imaging plane). Let's use pynwb to store the metadata associated with this -# hardware and experimental setup: -optical_channel = OpticalChannel( - name="optical_channel", - description="2P Optical Channel", - emission_lambda=520.0, -) - -device = Device(metadata["device"]) -nwbfile.add_device(device) - -imaging_plane = nwbfile.create_imaging_plane( - name="imaging_plane", - optical_channel=optical_channel, - description="Imaging plane ", - device=device, - excitation_lambda=float(metadata["excitation_lambda"].split(" ")[0]), - imaging_rate=30.0, - indicator="GCaMP6f", - location=metadata["targeted_structure"], - conversion=1.0, - unit="unknown", - reference_frame="unknown", -) - -######################################## -# The Allen Institute does not include the raw imaging signal, as this data would make the file too large. Instead, -# these data are preprocessed, and a dF/F fluorescence signal extracted for each region-of-interest (ROI). To store the -# chain of computations necessary to describe this data processing pipeline, pynwb provides a "processing module" with -# interfaces that simplify and standardize the process of adding the steps in this provenance chain to the file: -ophys_module = nwbfile.create_processing_module( - name="ophys_module", - description="Processing module for 2P calcium responses", -) - -######################################## -# 6) First, we add an image segmentation interface to the module. This interface implements a pre-defined schema and -# API that facilitates writing segmentation masks for ROI's: - -image_segmentation_interface = ImageSegmentation(name="image_segmentation") - -ophys_module.add(image_segmentation_interface) - -plane_segmentation = image_segmentation_interface.create_plane_segmentation( - name="plane_segmentation", - description="Segmentation for imaging plane", - imaging_plane=imaging_plane, -) - -for cell_specimen_id in cell_specimen_ids: - curr_name = cell_specimen_id - curr_image_mask = dataset.get_roi_mask_array([cell_specimen_id])[0] - plane_segmentation.add_roi(id=curr_name, image_mask=curr_image_mask) - -######################################## -# 7) Next, we add a dF/F interface to the module. This allows us to write the dF/F timeseries data associated with -# each ROI. - -dff_interface = DfOverF(name="dff_interface") -ophys_module.add(dff_interface) - -rt_region = plane_segmentation.create_roi_table_region( - description="segmented cells with cell_specimen_ids", -) - -dFF_series = dff_interface.create_roi_response_series( - name="df_over_f", - data=dFF, - unit="NA", - rois=rt_region, - timestamps=timestamps, -) - -######################################## -# Now that we have created the data set, we can write the file to disk: -with NWBHDF5IO(save_file_name, mode="w") as io: - io.write(nwbfile) - -######################################## -# For good measure, lets read the data back in and see if everything went as planned: -with NWBHDF5IO(save_file_name, mode="r") as io: - nwbfile_in = io.read() diff --git a/docs/source/conf.py b/docs/source/conf.py index 741eb5975..5745a314e 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -84,7 +84,6 @@ class CustomSphinxGallerySectionSortKey(ExampleTitleSortKey): "icephys.py", "plot_behavior.py", "images.py", - "brain_observatory.py" ], 'advanced_io': [] } diff --git a/requirements-doc.txt b/requirements-doc.txt index 8ff798ff2..86892aa09 100644 --- a/requirements-doc.txt +++ b/requirements-doc.txt @@ -4,8 +4,6 @@ sphinx>=4 # improved support for docutils>=0.17 sphinx_rtd_theme>=1 # <1 does not work with docutils>=0.17 matplotlib sphinx-gallery -# allensdk>=2.13.2 # allensdk reinstalls pynwb and hdmf. TODO set up a separate workflow to test allensdk -# MarkupSafe==2.0.1 # resolve incompatibility between jinja2 and markupsafe: https://github.com/AllenInstitute/AllenSDK/issues/2308 Pillow sphinx-copybutton dataframe_image # used to render large dataframe as image in the sphinx gallery to improve html display diff --git a/test.py b/test.py index 570bd4748..6543fe70c 100644 --- a/test.py +++ b/test.py @@ -86,10 +86,6 @@ def _import_from_file(script): os.path.join('advanced_io', 'streaming.py'), ] -allensdk_examples = [ - os.path.join('domain', 'brain_observatory.py'), # TODO create separate workflow for this -] - def run_example_tests(): """Run the Sphinx gallery example files, excluding ROS3-dependent ones, to check for errors.""" @@ -99,7 +95,7 @@ def run_example_tests(): for f in files: if f.endswith(".py"): name_with_parent_dir = os.path.join(os.path.basename(root), f) - if name_with_parent_dir in ros3_examples or name_with_parent_dir in allensdk_examples: + if name_with_parent_dir in ros3_examples: logging.info("Skipping %s" % name_with_parent_dir) continue examples_scripts.append(os.path.join(root, f)) @@ -277,7 +273,6 @@ def clean_up_tests(): "basic_sparse_iterwrite_*.npy", "basics_tutorial.nwb", "behavioral_tutorial.nwb", - "brain_observatory.nwb", "cache_spec_example.nwb", "ecephys_tutorial.nwb", "ecog.extensions.yaml", From 25c2dba986caea11fd8e71210c7be27b1b3f0227 Mon Sep 17 00:00:00 2001 From: Szonja Weigl Date: Tue, 18 Feb 2025 04:19:28 +0100 Subject: [PATCH 4/6] [BUG] Fix `TimeSeries` data does not match length of timestamps should raise an error when created (#1538) * change timeseries timestamp data dimension check to raise error when instance is created but only warn when read from file * adapt timeseries change to image series * add tests for timeseries * add tests for image series * try fix ImageSeries timestamps in brain_observatory.py * add Trues * more merging * more True merge * merge * fix * fix * adapt to new construct * override --------- Co-authored-by: Heberto Mayorquin Co-authored-by: Ben Dichter Co-authored-by: rly --- src/pynwb/base.py | 20 +++++++++++++------- src/pynwb/image.py | 14 +++++--------- tests/unit/test_base.py | 38 ++++++++++++++++++++++++++++++++++++-- tests/unit/test_image.py | 20 ++++++++++++++------ 4 files changed, 68 insertions(+), 24 deletions(-) diff --git a/src/pynwb/base.py b/src/pynwb/base.py index 8b4daf48f..6b64b3ee0 100644 --- a/src/pynwb/base.py +++ b/src/pynwb/base.py @@ -199,28 +199,34 @@ def __init__(self, **kwargs): else: raise TypeError("either 'timestamps' or 'rate' must be specified") - if not self._check_time_series_dimension(): - warn("%s '%s': Length of data does not match length of timestamps. Your data may be transposed. " - "Time should be on the 0th dimension" % (self.__class__.__name__, self.name)) + self._error_on_new_warn_on_construct( + error_msg=self._check_time_series_dimension() + ) def _check_time_series_dimension(self): """Check that the 0th dimension of data equals the length of timestamps, when applicable. """ if self.timestamps is None: - return True + return data_shape = get_data_shape(data=self.fields["data"], strict_no_data_load=True) timestamps_shape = get_data_shape(data=self.fields["timestamps"], strict_no_data_load=True) # skip check if shape of data or timestamps cannot be computed if data_shape is None or timestamps_shape is None: - return True + return # skip check if length of the first dimension is not known if data_shape[0] is None or timestamps_shape[0] is None: - return True + return - return data_shape[0] == timestamps_shape[0] + if data_shape[0] == timestamps_shape[0]: + return + + return ( + "%s '%s': Length of data does not match length of timestamps. Your data may be transposed. " + "Time should be on the 0th dimension" % (self.__class__.__name__, self.name) + ) @property def num_samples(self): diff --git a/src/pynwb/image.py b/src/pynwb/image.py index c775297d7..84f445eac 100644 --- a/src/pynwb/image.py +++ b/src/pynwb/image.py @@ -102,13 +102,9 @@ def __init__(self, **kwargs): DeprecationWarning, ) - if not self._check_image_series_dimension(): - warnings.warn( - "%s '%s': Length of data does not match length of timestamps. Your data may be transposed. " - "Time should be on the 0th dimension" - % (self.__class__.__name__, self.name) - ) - + self._error_on_new_warn_on_construct( + error_msg=self._check_image_series_dimension() + ) self._error_on_new_warn_on_construct( error_msg=self._check_external_file_starting_frame_length() ) @@ -135,7 +131,7 @@ def _check_time_series_dimension(self): """Override _check_time_series_dimension to do nothing. The _check_image_series_dimension method will be called instead. """ - return True + return def _check_image_series_dimension(self): """Check that the 0th dimension of data equals the length of timestamps, when applicable. @@ -145,7 +141,7 @@ def _check_image_series_dimension(self): is provided. Otherwise, this function calls the parent class' _check_time_series_dimension method. """ if self.external_file is not None: - return True + return return super()._check_time_series_dimension() def _check_external_file_starting_frame_length(self): diff --git a/tests/unit/test_base.py b/tests/unit/test_base.py index 5af4986ac..3fcaa1d04 100644 --- a/tests/unit/test_base.py +++ b/tests/unit/test_base.py @@ -375,12 +375,13 @@ def test_conflicting_time_args(self): timestamps=[0.3, 0.4, 0.5], ) - def test_dimension_warning(self): + def test_timestamps_data_length_error_raised(self): + """Test that TimeSeries cannot be created with timestamps and data of different lengths.""" msg = ( "TimeSeries 'test_ts2': Length of data does not match length of timestamps. Your data may be " "transposed. Time should be on the 0th dimension" ) - with self.assertWarnsWith(UserWarning, msg): + with self.assertRaisesWith(ValueError, msg): TimeSeries( name="test_ts2", data=[10, 11, 12], @@ -478,6 +479,39 @@ def test_repr_html(self): pm.add(ts2) self.assertIn('(link to processing/test_ts1/timestamps)', pm._repr_html_()) + def test_timestamps_data_length_warning_construct_mode(self): + """ + Test that warning is raised when the length of data does not match the length of + timestamps in case that the TimeSeries in construct mode (i.e., during read). + """ + msg = ( + "TimeSeries 'test_ts2': Length of data does not match length of timestamps. Your data may be " + "transposed. Time should be on the 0th dimension" + ) + for timestamps in [[0], [1, 2, 3, 4]]: + with self.subTest(): + # Create the time series in construct mode, modelling the behavior + # of the ObjectMapper on read while avoiding having to create, write, + # and read and entire NWB file + obj = TimeSeries.__new__( + TimeSeries, + container_source=None, + parent=None, + object_id="test", + in_construct_mode=True, + ) + with self.assertWarnsWith(UserWarning, msg): + obj.__init__( + name="test_ts2", + data=[10, 11, 12], + unit="grams", + timestamps=timestamps, + ) + # Disable construct mode. Since we are not using this object anymore + # this is not strictly necessary but is good style in case we expand + # the test later on. + obj._in_construct_mode = False + class TestImage(TestCase): def test_init(self): diff --git a/tests/unit/test_image.py b/tests/unit/test_image.py index 92637d82e..3f8b56b59 100644 --- a/tests/unit/test_image.py +++ b/tests/unit/test_image.py @@ -84,14 +84,13 @@ def test_external_file_no_unit(self): ) self.assertEqual(iS.unit, ImageSeries.DEFAULT_UNIT) - def test_dimension_warning(self): - """Test that a warning is raised when the dimensions of the data are not the - same as the dimensions of the timestamps.""" + def test_dimension_error(self): + """Test that ImageSeries cannot be created with timestamps and data of different lengths.""" msg = ( "ImageSeries 'test_iS': Length of data does not match length of timestamps. Your data may be " "transposed. Time should be on the 0th dimension" ) - with self.assertWarnsWith(UserWarning, msg): + with self.assertRaisesWith(ValueError, msg): ImageSeries( name='test_iS', data=np.ones((3, 3, 3)), @@ -100,9 +99,14 @@ def test_dimension_warning(self): ) def test_dimension_warning_external_file_with_timestamps(self): - """Test that a warning is not raised when external file is used with timestamps.""" + """Test that warning is not raised when external file is used with timestamps.""" + obj = ImageSeries.__new__(ImageSeries, + container_source=None, + parent=None, + object_id="test", + in_construct_mode=True) with warnings.catch_warnings(record=True) as w: - ImageSeries( + obj.__init__( name='test_iS', external_file=['external_file'], format='external', @@ -111,6 +115,10 @@ def test_dimension_warning_external_file_with_timestamps(self): timestamps=[1, 2, 3, 4] ) self.assertEqual(w, []) + # Disable construct mode. Since we are not using this object any more + # this is not strictly necessary but is good style in case we expand + # the test later on + obj._in_construct_mode = False def test_dimension_warning_external_file_with_rate(self): """Test that a warning is not raised when external file is used with rate.""" From 5a72510287121ab58648a55082cd46abb7ee82f5 Mon Sep 17 00:00:00 2001 From: Ben Dichter Date: Tue, 18 Feb 2025 14:28:58 -0600 Subject: [PATCH 5/6] Add all to init (#2021) * chore: update .gitignore to include venv/ * Add `__all__` declarations to multiple modules for improved API clarity * Update CHANGELOG.md * Fix docstring formatting for create_icephys_testfile to include proper class references * Fix docstring formatting in create_icephys_testfile for improved readability * Add autodoc_default_options to ignore module all in documentation --- .gitignore | 2 +- CHANGELOG.md | 1 + docs/source/conf.py | 3 +++ src/pynwb/__init__.py | 26 ++++++++++++++++++++++++++ src/pynwb/base.py | 11 +++++++++++ src/pynwb/behavior.py | 11 +++++++++++ src/pynwb/core.py | 11 +++++++++++ src/pynwb/device.py | 1 + src/pynwb/ecephys.py | 10 ++++++++++ src/pynwb/epoch.py | 1 + src/pynwb/file.py | 10 ++++++++++ src/pynwb/icephys.py | 18 ++++++++++++++++++ src/pynwb/image.py | 10 ++++++++++ src/pynwb/misc.py | 7 +++++++ src/pynwb/ogen.py | 4 ++++ src/pynwb/ophys.py | 14 ++++++++++++++ src/pynwb/resources.py | 1 + src/pynwb/spec.py | 13 +++++++++++++ src/pynwb/testing/icephys_testutils.py | 8 ++++---- src/pynwb/validate.py | 6 ++++++ 20 files changed, 163 insertions(+), 5 deletions(-) diff --git a/.gitignore b/.gitignore index 0c81cb605..1767f2597 100644 --- a/.gitignore +++ b/.gitignore @@ -81,4 +81,4 @@ _version.py .core_typemap_version core_typemap.pkl -venv \ No newline at end of file +venv/ diff --git a/CHANGELOG.md b/CHANGELOG.md index 5ed9ddac7..cc1d77ee5 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,6 +3,7 @@ ## PyNWB 3.0.0 (Upcoming) ### Enhancements and minor changes +- Added `__all__` to modules. @bendichter [#2021](https://github.com/NeurodataWithoutBorders/pynwb/pull/2021) - Added `pynwb.read_nwb` convenience method to simplify reading an NWBFile written with any backend @h-mayorquin [#1994](https://github.com/NeurodataWithoutBorders/pynwb/pull/1994) - Added support for NWB schema 2.8.0. @rly [#2001](https://github.com/NeurodataWithoutBorders/pynwb/pull/2001) - Removed `SpatialSeries.bounds` field that was not functional. This will be fixed in a future release. @rly [#1907](https://github.com/NeurodataWithoutBorders/pynwb/pull/1907), [#1996](https://github.com/NeurodataWithoutBorders/pynwb/pull/1996) diff --git a/docs/source/conf.py b/docs/source/conf.py index 5745a314e..6e72e86f4 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -40,6 +40,9 @@ autoclass_content = 'both' autodoc_docstring_signature = True autodoc_member_order = 'bysource' +autodoc_default_options = { + 'ignore-module-all': True, # Continue documenting classes not in __all__ +} # -- General configuration ----------------------------------------------------- diff --git a/src/pynwb/__init__.py b/src/pynwb/__init__.py index 7931322a8..5d16c7d5f 100644 --- a/src/pynwb/__init__.py +++ b/src/pynwb/__init__.py @@ -613,6 +613,32 @@ def read_nwb(**kwargs): from hdmf.data_utils import DataChunkIterator # noqa: F401,E402 from hdmf.backends.hdf5 import H5DataIO # noqa: F401,E402 +__all__ = [ + # Functions + 'get_type_map', + 'get_manager', + 'load_namespaces', + 'available_namespaces', + 'register_class', + 'register_map', + 'get_class', + 'load_type_config', + 'get_loaded_type_config', + 'unload_type_config', + 'read_nwb', + 'get_nwbfile_version', + + # Classes + 'NWBHDF5IO', + 'NWBContainer', + 'NWBData', + 'TimeSeries', + 'ProcessingModule', + 'NWBFile', + 'DataChunkIterator', + 'H5DataIO' +] + from ._due import due, BibTeX # noqa: E402 diff --git a/src/pynwb/base.py b/src/pynwb/base.py index 6b64b3ee0..218e3c5de 100644 --- a/src/pynwb/base.py +++ b/src/pynwb/base.py @@ -12,6 +12,17 @@ from .core import NWBDataInterface, MultiContainerInterface, NWBData +__all__ = [ + 'ProcessingModule', + 'TimeSeries', + 'Image', + 'ImageReferences', + 'Images', + 'TimeSeriesReferenceVectorData', + 'TimeSeriesReference' +] + + @register_class('ProcessingModule', CORE_NAMESPACE) class ProcessingModule(MultiContainerInterface): """ Processing module. This is a container for one or more containers diff --git a/src/pynwb/behavior.py b/src/pynwb/behavior.py index d4d43d515..2aa9472ea 100644 --- a/src/pynwb/behavior.py +++ b/src/pynwb/behavior.py @@ -7,6 +7,17 @@ from .misc import IntervalSeries from .base import TimeSeries +__all__ = [ + 'SpatialSeries', + 'BehavioralEpochs', + 'BehavioralEvents', + 'BehavioralTimeSeries', + 'PupilTracking', + 'EyeTracking', + 'CompassDirection', + 'Position' +] + @register_class('SpatialSeries', CORE_NAMESPACE) class SpatialSeries(TimeSeries): diff --git a/src/pynwb/core.py b/src/pynwb/core.py index f9ae2bd2f..7a656adcc 100644 --- a/src/pynwb/core.py +++ b/src/pynwb/core.py @@ -13,6 +13,17 @@ from pynwb import get_type_map +__all__ = [ + 'NWBMixin', + 'NWBContainer', + 'NWBDataInterface', + 'NWBData', + 'ScratchData', + 'NWBTable', + 'MultiContainerInterface' +] + + def _not_parent(arg): return arg['name'] != 'parent' diff --git a/src/pynwb/device.py b/src/pynwb/device.py index f842776ae..31e490f0a 100644 --- a/src/pynwb/device.py +++ b/src/pynwb/device.py @@ -3,6 +3,7 @@ from . import register_class, CORE_NAMESPACE from .core import NWBContainer +__all__ = ['Device'] @register_class('Device', CORE_NAMESPACE) class Device(NWBContainer): diff --git a/src/pynwb/ecephys.py b/src/pynwb/ecephys.py index 07d584a4f..739264e21 100644 --- a/src/pynwb/ecephys.py +++ b/src/pynwb/ecephys.py @@ -11,6 +11,16 @@ from .core import NWBContainer, NWBDataInterface, MultiContainerInterface from .device import Device +__all__ = [ + 'ElectrodeGroup', + 'ElectricalSeries', + 'SpikeEventSeries', + 'EventDetection', + 'LFP', + 'FilteredEphys', + 'FeatureExtraction' +] + @register_class('ElectrodeGroup', CORE_NAMESPACE) class ElectrodeGroup(NWBContainer): diff --git a/src/pynwb/epoch.py b/src/pynwb/epoch.py index 9cccc5db5..931080788 100644 --- a/src/pynwb/epoch.py +++ b/src/pynwb/epoch.py @@ -7,6 +7,7 @@ from . import register_class, CORE_NAMESPACE from .base import TimeSeries, TimeSeriesReferenceVectorData, TimeSeriesReference +__all__ = ['TimeIntervals'] @register_class('TimeIntervals', CORE_NAMESPACE) class TimeIntervals(DynamicTable): diff --git a/src/pynwb/file.py b/src/pynwb/file.py index a447c126d..84dc1b5b8 100644 --- a/src/pynwb/file.py +++ b/src/pynwb/file.py @@ -26,6 +26,16 @@ from .core import NWBContainer, NWBDataInterface, MultiContainerInterface, ScratchData, LabelledDict +__all__ = [ + 'LabMetaData', + 'Subject', + 'NWBFile', + 'ElectrodeTable', + 'TrialTable', + 'InvalidTimesTable' +] + + def _not_parent(arg): return arg['name'] != 'parent' diff --git a/src/pynwb/icephys.py b/src/pynwb/icephys.py index ec44a2bd8..4e32094ee 100644 --- a/src/pynwb/icephys.py +++ b/src/pynwb/icephys.py @@ -11,6 +11,24 @@ from .core import NWBContainer from .device import Device +__all__ = [ + 'IntracellularElectrode', + 'PatchClampSeries', + 'CurrentClampSeries', + 'IZeroClampSeries', + 'CurrentClampStimulusSeries', + 'VoltageClampSeries', + 'VoltageClampStimulusSeries', + 'IntracellularElectrodesTable', + 'IntracellularStimuliTable', + 'IntracellularResponsesTable', + 'IntracellularRecordingsTable', + 'SimultaneousRecordingsTable', + 'SequentialRecordingsTable', + 'RepetitionsTable', + 'ExperimentalConditionsTable' +] + def ensure_unit(self, name, current_unit, unit, nwb_version): """A helper to ensure correct unit used. diff --git a/src/pynwb/image.py b/src/pynwb/image.py index 84f445eac..34b00fd55 100644 --- a/src/pynwb/image.py +++ b/src/pynwb/image.py @@ -17,6 +17,16 @@ from .device import Device +__all__ = [ + 'ImageSeries', + 'IndexSeries', + 'OpticalSeries', + 'GrayscaleImage', + 'RGBImage', + 'RGBAImage' +] + + @register_class('ImageSeries', CORE_NAMESPACE) class ImageSeries(TimeSeries): ''' diff --git a/src/pynwb/misc.py b/src/pynwb/misc.py index 14c2e08d1..8f7383d59 100644 --- a/src/pynwb/misc.py +++ b/src/pynwb/misc.py @@ -11,6 +11,13 @@ from .ecephys import ElectrodeGroup from hdmf.common import DynamicTable, DynamicTableRegion +__all__ = [ + 'AnnotationSeries', + 'AbstractFeatureSeries', + 'IntervalSeries', + 'Units', + 'DecompositionSeries' +] @register_class('AnnotationSeries', CORE_NAMESPACE) class AnnotationSeries(TimeSeries): diff --git a/src/pynwb/ogen.py b/src/pynwb/ogen.py index af11842e4..f433d1bc2 100644 --- a/src/pynwb/ogen.py +++ b/src/pynwb/ogen.py @@ -5,6 +5,10 @@ from .core import NWBContainer from .device import Device +__all__ = [ + 'OptogeneticStimulusSite', + 'OptogeneticSeries' +] @register_class('OptogeneticStimulusSite', CORE_NAMESPACE) class OptogeneticStimulusSite(NWBContainer): diff --git a/src/pynwb/ophys.py b/src/pynwb/ophys.py index 6f1483079..660af4cb9 100644 --- a/src/pynwb/ophys.py +++ b/src/pynwb/ophys.py @@ -11,6 +11,20 @@ from .core import NWBContainer, MultiContainerInterface, NWBDataInterface from .device import Device +__all__ = [ + 'OpticalChannel', + 'ImagingPlane', + 'OnePhotonSeries', + 'TwoPhotonSeries', + 'CorrectedImageStack', + 'MotionCorrection', + 'PlaneSegmentation', + 'ImageSegmentation', + 'RoiResponseSeries', + 'DfOverF', + 'Fluorescence' +] + @register_class('OpticalChannel', CORE_NAMESPACE) class OpticalChannel(NWBContainer): diff --git a/src/pynwb/resources.py b/src/pynwb/resources.py index acdc22b12..e2d0acda4 100644 --- a/src/pynwb/resources.py +++ b/src/pynwb/resources.py @@ -2,6 +2,7 @@ from . import get_type_map as tm from hdmf.utils import docval, get_docval +__all__ = ['HERD'] class HERD(hdmf_HERD): """ diff --git a/src/pynwb/spec.py b/src/pynwb/spec.py index fe97b6eae..7c3a65173 100644 --- a/src/pynwb/spec.py +++ b/src/pynwb/spec.py @@ -8,6 +8,19 @@ from . import CORE_NAMESPACE +__all__ = [ + 'NWBRefSpec', + 'NWBAttributeSpec', + 'NWBLinkSpec', + 'NWBDtypeSpec', + 'NWBDatasetSpec', + 'NWBGroupSpec', + 'NWBNamespace', + 'NWBNamespaceBuilder', + 'export_spec' # Re-exported from hdmf.spec.write +] + + def __swap_inc_def(cls): args = get_docval(cls.__init__) clsname = 'NWB%s' % cls.__name__ diff --git a/src/pynwb/testing/icephys_testutils.py b/src/pynwb/testing/icephys_testutils.py index 3de4619d4..aed8c2653 100644 --- a/src/pynwb/testing/icephys_testutils.py +++ b/src/pynwb/testing/icephys_testutils.py @@ -60,10 +60,10 @@ def create_icephys_testfile(filename=None, add_custom_columns=True, randomize_da :param randomize_data: Randomize data values in the stimulus and response :type randomize_data: bool - :returns: NWBFile object with icephys data created for writing. NOTE: If filename is provided then - the file is written to disk, but the function does not read the file back. If - you want to use the file from disk then you will need to read it with NWBHDF5IO. - :rtype: NWBFile + :returns: :py:class:`~pynwb.file.NWBFile` object with icephys data created for writing. NOTE: If filename is + provided then the file is written to disk, but the function does not read the file back. If + you want to use the file from disk then you will need to read it with :py:class:`~pynwb.NWBHDF5IO`. + :rtype: :py:class:`~pynwb.file.NWBFile` """ nwbfile = NWBFile( session_description='my first synthetic recording', diff --git a/src/pynwb/validate.py b/src/pynwb/validate.py index 880f860a6..ddd93f2e5 100644 --- a/src/pynwb/validate.py +++ b/src/pynwb/validate.py @@ -14,6 +14,12 @@ from pynwb.spec import NWBDatasetSpec, NWBGroupSpec, NWBNamespace +__all__ = [ + 'validate', + 'get_cached_namespaces_to_validate' +] + + def _print_errors(validation_errors: list): if validation_errors: print(" - found the following errors:", file=sys.stderr) From f3b3306adf2efd416224d6881c7e04666c8281ae Mon Sep 17 00:00:00 2001 From: Ben Dichter Date: Tue, 18 Feb 2025 14:36:56 -0600 Subject: [PATCH 6/6] Update .gitignore to exclude virtual environment directories and enhance documentation on adding datasets with h5py (#2032) * Update .gitignore to exclude virtual environment directories and enhance documentation on adding datasets with h5py * Fix session start time to include local timezone in NWBFile example * Update documentation to demonstrate adding datasets using PyNWB instead of h5py * Update plot_editing.py --------- Co-authored-by: Ryan Ly --- .gitignore | 5 ++++ docs/gallery/advanced_io/plot_editing.py | 38 ++++++++++++++++++++++++ 2 files changed, 43 insertions(+) diff --git a/.gitignore b/.gitignore index 1767f2597..6566ba43c 100644 --- a/.gitignore +++ b/.gitignore @@ -38,6 +38,11 @@ docs/build/ docs/source/pynwb.*.rst +# Virtual Environment +venv/ +env/ +ENV/ + # setuptools build/ dist/ diff --git a/docs/gallery/advanced_io/plot_editing.py b/docs/gallery/advanced_io/plot_editing.py index b08b2adfd..4bcf8c55b 100644 --- a/docs/gallery/advanced_io/plot_editing.py +++ b/docs/gallery/advanced_io/plot_editing.py @@ -162,3 +162,41 @@ "synthetic_timeseries_renamed", "/analysis/synthetic_timeseries_renamed", ) + +############################################## +# Adding datasets to existing groups +# ---------------------------------- +# You can add new datasets to existing groups using PyNWB by calling ``set_modified()``. +# Here's an example of adding a genotype to a Subject: + +from pynwb import NWBFile, NWBHDF5IO +from pynwb.file import Subject + +# First, let's create a file with a Subject that is missing the genotype, which is optional +nwbfile = NWBFile( + session_description="example file with subject", + identifier="EXAMPLE_ID", + session_start_time=datetime.now(tzlocal()), + session_id="LONELYMTN", + subject=Subject( + subject_id="mouse001", + species="Mus musculus", + age="P30D", + ) +) + +with NWBHDF5IO("test_edit3.nwb", "w") as io: + io.write(nwbfile) + +# Now add the genotype using PyNWB and set_modified() +with NWBHDF5IO("test_edit3.nwb", "a") as io: + nwbfile = io.read() + nwbfile.subject.genotype = "Sst-IRES-Cre" + nwbfile.subject.set_modified() # Required to mark the container as modified + io.write(nwbfile) + +# Verify the dataset was added +with NWBHDF5IO("test_edit3.nwb", "r") as io: + nwbfile = io.read() + print(f"Subject genotype: {nwbfile.subject.genotype}") + # Output: Subject genotype: Sst-IRES-Cre