Skip to content

Commit

Permalink
enh: check for external links before upload
Browse files Browse the repository at this point in the history
  • Loading branch information
paulmueller committed Oct 8, 2024
1 parent c354b7b commit c2be93e
Show file tree
Hide file tree
Showing 4 changed files with 42 additions and 12 deletions.
3 changes: 2 additions & 1 deletion CHANGELOG
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,8 @@
- fix: fall-back to ETag verification when downloading resources for
which no SHA256 sum exists
- fix: set error message before publishing download job error state
- setup: bump dclab from 0.60.0 to 0.61.5
- enh: check for external links before upload
- setup: bump dclab from 0.60.0 to 0.62.0
- ref: reduce logging level for SSL verification
0.15.0
- setup: bump dclab from 0.58.7 to 0.60.0 (internal basins support)
Expand Down
2 changes: 2 additions & 0 deletions dcoraid/upload/job.py
Original file line number Diff line number Diff line change
Expand Up @@ -333,6 +333,8 @@ def task_compress_resources(self):
insane = [c.msg for c in ic.sanity_check()]
# check for features not defined in dclab
insane += [c.msg for c in ic.check_features_unknown_hdf5()]
# check for external link, they don't make sense on DCOR
insane += [c.msg for c in ic.check_external_links()]
if insane:
# The user is responsible for cleaning up the mess.
# We just make sure no dirty data gets uploaded to
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ classifiers = [
]
license = {text = "GPL v3"}
dependencies = [
"dclab[dcor]>=0.61.5",
"dclab[dcor]>=0.60.0",
"numpy>=1.21",
"requests>=2.31", # CVE-2023-32681
"urllib3>=2.0", # requests_toolbelt and general compatibility
Expand Down
47 changes: 37 additions & 10 deletions tests/test_upload_job.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,13 +3,15 @@
import re
import shutil
import tempfile
import warnings

import time
from unittest import mock
import warnings

import pytest

import dclab.cli
import h5py

from dcoraid.api import dataset_create
from dcoraid.upload import job

Expand All @@ -34,6 +36,39 @@ def test_resource_name_characters():
job.VALID_RESOURCE_CHARS + " ")


def test_external_links_not_allowed():
api = common.get_api()
# Create a temporary upload directory
td = pathlib.Path(tempfile.mkdtemp(prefix="dcoraid_external_link_upload_"))
h5path = td / "peter.rtdc"
shutil.copy2(rtdc_paths[0], h5path)
h5path_image = h5path.with_name("image.hdf5")
# Dataset creation
with h5py.File(h5path) as src, \
h5py.File(h5path_image, "w") as h5:
# write image data to separate file
h5["image"] = src["/events/image"][:]

# turn image into an external link
with h5py.File(h5path, "a") as src:
del src["/events/image"]
src["/events/image"] = h5py.ExternalLink(
str(h5path_image), "image"
)

# create some metadata
bare_dict = common.make_dataset_dict(hint="create-with-resource")
# create dataset (to get the "id")
dataset_dict = dataset_create(dataset_dict=bare_dict, api=api)
uj = job.UploadJob(api=api,
dataset_id=dataset_dict["id"],
resource_paths=list(td.glob("*.rtdc")))
assert uj.state == "init"
with pytest.raises(IOError, match="The HDF5 file contains at least "
"one external link"):
uj.task_compress_resources()


def test_initialize():
api = common.get_api()
# create some metadata
Expand Down Expand Up @@ -222,11 +257,3 @@ def test_state_compress_reuse():
uj.task_compress_resources()
with pytest.raises(AssertionError):
mockobj.assert_called()


if __name__ == "__main__":
# Run all tests
loc = locals()
for key in list(loc.keys()):
if key.startswith("test_") and hasattr(loc[key], "__call__"):
loc[key]()

0 comments on commit c2be93e

Please sign in to comment.