Skip to content

Commit

Permalink
fix: integrity check failed for empty datasets
Browse files Browse the repository at this point in the history
  • Loading branch information
paulmueller committed Oct 3, 2024
1 parent 7dfcfbc commit ae1131c
Show file tree
Hide file tree
Showing 3 changed files with 32 additions and 10 deletions.
2 changes: 2 additions & 0 deletions CHANGELOG
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
0.61.5
- fix: integrity check failed for empty datasets
0.61.4
- docs: update basins section
- docs: cleanup build process
Expand Down
14 changes: 12 additions & 2 deletions dclab/rtdc_dataset/check.py
Original file line number Diff line number Diff line change
Expand Up @@ -199,7 +199,6 @@ def __init__(self, path_or_ds):
level="alert",
category="warning"))
self.finally_close = True
np.max(self.ds["index"])

def __enter__(self):
return self
Expand Down Expand Up @@ -319,8 +318,19 @@ def iter_count_compression(h5):
data=data))
return cues

def check_empty(self, **kwargs):
"""The dataset should contain events"""
cues = []
lends = len(self.ds)
if lends == 0:
cues.append(ICue(
msg="The dataset does not contain any events",
level="alert",
category="feature data"))
return cues

def check_feat_index(self, **kwargs):
"""Up until"""
"""The index of the dataset should be monotonous"""
cues = []
lends = len(self.ds)
if "index" in self.ds:
Expand Down
26 changes: 18 additions & 8 deletions tests/test_rtdc_check_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -175,6 +175,24 @@ def test_icue():
assert cues[0].msg in cues[0].__repr__()


def test_ic_empty():
"""Identity checker should not fail for empty datasets"""
path = retrieve_data("fmt-hdf5_polygon_gate_2021.zip")
path_empty = path.with_name("empty.rtdc")
with RTDCWriter(path_empty) as hw, dclab.new_dataset(path) as ds:
hw.h5file.attrs.update(ds.h5file.attrs)
hw.h5file.attrs["experiment:event count"] = 0
with check.IntegrityChecker(path_empty) as ic:
assert len(ic.ds) == 0
cues = ic.check_empty()

assert len(cues) == 1
cue = cues[0]
assert cue.level == "alert"
assert cue.category == "feature data"
assert cue.msg == "The dataset does not contain any events"


def test_ic_expand_section():
ddict = example_data_dict(size=8472, keys=["area_um", "deform"])
ds1 = new_dataset(ddict)
Expand Down Expand Up @@ -714,11 +732,3 @@ def test_wrong_samples_per_event():
+ "(expected 10, got 566)"
viol, _, _ = check_dataset(h5path)
assert msg in viol


if __name__ == "__main__":
# Run all tests
_loc = locals()
for _key in list(_loc.keys()):
if _key.startswith("test_") and hasattr(_loc[_key], "__call__"):
_loc[_key]()

0 comments on commit ae1131c

Please sign in to comment.