From 5d34ef38582b9c5fef9c9e443b714d9813f92153 Mon Sep 17 00:00:00 2001 From: "John T. Wodder II" Date: Tue, 9 Jan 2024 09:15:09 -0500 Subject: [PATCH 01/34] Move fuse-mounting code to a dedicated function --- code/src/healthstatus/__main__.py | 61 ++++------------------------ code/src/healthstatus/mounts.py | 67 +++++++++++++++++++++++++++++++ 2 files changed, 75 insertions(+), 53 deletions(-) create mode 100644 code/src/healthstatus/mounts.py diff --git a/code/src/healthstatus/__main__.py b/code/src/healthstatus/__main__.py index 7ed815c18..50f68b256 100644 --- a/code/src/healthstatus/__main__.py +++ b/code/src/healthstatus/__main__.py @@ -4,18 +4,15 @@ from operator import attrgetter from pathlib import Path import re -from signal import SIGINT -import subprocess import sys -from time import sleep from typing import Optional import anyio import click -from ghreq import Client from packaging.version import Version from .checker import AssetReport, Dandiset, HealthStatus from .config import MATNWB_INSTALL_DIR from .core import Asset, DandisetStatus, Outcome, TestSummary, log +from .mounts import fused from .tests import TESTS from .util import MatNWBInstaller, get_package_versions @@ -64,7 +61,6 @@ def check( dandisets: tuple[str, ...], mode: str, ) -> None: - update_dandisets(Path(dataset_path)) installer = MatNWBInstaller(MATNWB_INSTALL_DIR) installer.install(update=True) hs = HealthStatus( @@ -74,30 +70,13 @@ def check( dandiset_jobs=dandiset_jobs, ) hs.versions["matnwb"] = installer.get_version() - with open("fuse.log", "wb") as fp: - with subprocess.Popen( - [ - "datalad", - "fusefs", - "-d", - str(dataset_path), - "--foreground", - "--mode-transparent", - str(mount_point), - ], - stdout=fp, - stderr=fp, - ) as p: - sleep(3) - try: - if mode == "all": - anyio.run(hs.run_all) - elif mode in ("random-asset", "random-outdated-asset-first"): - anyio.run(hs.run_random_assets, mode) - else: - raise AssertionError(f"Unexpected mode: {mode!r}") - finally: - p.send_signal(SIGINT) + with fused(dataset_path=dataset_path, mount_path=mount_point, update=True): + if mode == "all": + anyio.run(hs.run_all) + elif mode in ("random-asset", "random-outdated-asset-first"): + anyio.run(hs.run_random_assets, mode) + else: + raise AssertionError(f"Unexpected mode: {mode!r}") @main.command() @@ -232,29 +211,5 @@ def find_dandiset(asset: Path) -> Optional[Path]: return None -def update_dandisets(dataset_path: Path) -> None: - # Importing this at the top of the file leads to some weird import error - # when running tests: - from datalad.api import Dataset - - log.info("Updating Dandisets dataset ...") - # Fetch just the public repositories from the dandisets org, and then get - # or update just those subdatasets rather than trying to get all - # subdatasets and failing on private/embargoed ones - datasets = set() - with Client() as client: - for repo in client.paginate("/users/dandisets/repos"): - name = repo["name"] - if re.fullmatch("[0-9]{6}", name): - datasets.add(name) - ds = Dataset(dataset_path) - ds.update(follow="parentds", how="ff-only", recursive=True, recursion_limit=1) - for sub in ds.subdatasets(state="present"): - name = Path(sub["path"]).relative_to(dataset_path).as_posix() - datasets.discard(name) - if datasets: - ds.get(path=list(datasets), jobs=5, get_data=False) - - if __name__ == "__main__": main() diff --git a/code/src/healthstatus/mounts.py b/code/src/healthstatus/mounts.py new file mode 100644 index 000000000..8499f0414 --- /dev/null +++ b/code/src/healthstatus/mounts.py @@ -0,0 +1,67 @@ +from __future__ import annotations +from collections.abc import Iterator +from contextlib import contextmanager +import os +from pathlib import Path +import re +from signal import SIGINT +import subprocess +from time import sleep +from ghreq import Client +from .core import log + + +@contextmanager +def fused( + dataset_path: str | os.PathLike[str], + mount_path: str | os.PathLike[str], + update: bool = False, + logdir: Path | None = None, +) -> Iterator[None]: + if update: + update_dandisets(Path(dataset_path)) + if logdir is None: + logdir = Path() + with (logdir / "fuse.log").open("wb") as fp: + with subprocess.Popen( + [ + "datalad", + "fusefs", + "-d", + os.fspath(dataset_path), + "--foreground", + "--mode-transparent", + os.fspath(mount_path), + ], + stdout=fp, + stderr=fp, + ) as p: + sleep(3) + try: + yield + finally: + p.send_signal(SIGINT) + + +def update_dandisets(dataset_path: Path) -> None: + # Importing this at the top of the file leads to some weird import error + # when running tests: + from datalad.api import Dataset + + log.info("Updating Dandisets dataset ...") + # Fetch just the public repositories from the dandisets org, and then get + # or update just those subdatasets rather than trying to get all + # subdatasets and failing on private/embargoed ones + datasets = set() + with Client() as client: + for repo in client.paginate("/users/dandisets/repos"): + name = repo["name"] + if re.fullmatch("[0-9]{6}", name): + datasets.add(name) + ds = Dataset(dataset_path) + ds.update(follow="parentds", how="ff-only", recursive=True, recursion_limit=1) + for sub in ds.subdatasets(state="present"): + name = Path(sub["path"]).relative_to(dataset_path).as_posix() + datasets.discard(name) + if datasets: + ds.get(path=list(datasets), jobs=5, get_data=False) From e4f331df044c3b772a83ca62450cfadfbc3226a5 Mon Sep 17 00:00:00 2001 From: "John T. Wodder II" Date: Wed, 17 Jan 2024 09:43:17 -0500 Subject: [PATCH 02/34] Mounting dandidav --- code/setup.cfg | 4 ++++ code/src/healthstatus/mounts.py | 23 +++++++++++++++++++++++ 2 files changed, 27 insertions(+) diff --git a/code/setup.cfg b/code/setup.cfg index 096cf798d..eb0719f0c 100644 --- a/code/setup.cfg +++ b/code/setup.cfg @@ -32,6 +32,10 @@ install_requires = pyyaml requests ~= 2.20 +[options.extras_require] +dandidav = + dandidav @ git+https://github.com/dandi/dandi-webdav + [options.packages.find] where = src diff --git a/code/src/healthstatus/mounts.py b/code/src/healthstatus/mounts.py index 8499f0414..7888dde30 100644 --- a/code/src/healthstatus/mounts.py +++ b/code/src/healthstatus/mounts.py @@ -8,6 +8,7 @@ import subprocess from time import sleep from ghreq import Client +import requests from .core import log @@ -65,3 +66,25 @@ def update_dandisets(dataset_path: Path) -> None: datasets.discard(name) if datasets: ds.get(path=list(datasets), jobs=5, get_data=False) + + +@contextmanager +def dandidav(logdir: Path | None = None) -> Iterator[str]: + if logdir is None: + logdir = Path() + with (logdir / "dandidav.log").open("wb") as fp: + with subprocess.Popen(["dandidav"], stdout=fp, stderr=fp) as p: + try: + url = "http://127.0.0.1:8080" + for _ in range(10): + try: + requests.get(url, timeout=1) + except requests.RequestException: + sleep(1) + else: + break + else: + raise RuntimeError("WebDAV server did not start up time") + yield url + finally: + p.send_signal(SIGINT) From 3a9f0de0b1cfda47198d888fabc8de278667189a Mon Sep 17 00:00:00 2001 From: "John T. Wodder II" Date: Fri, 19 Jan 2024 13:38:13 -0500 Subject: [PATCH 03/34] Mounting webdavfs --- code/src/healthstatus/mounts.py | 17 +++++++++++++++++ tools/run.sh | 2 +- 2 files changed, 18 insertions(+), 1 deletion(-) diff --git a/code/src/healthstatus/mounts.py b/code/src/healthstatus/mounts.py index 7888dde30..0e92334b4 100644 --- a/code/src/healthstatus/mounts.py +++ b/code/src/healthstatus/mounts.py @@ -88,3 +88,20 @@ def dandidav(logdir: Path | None = None) -> Iterator[str]: yield url finally: p.send_signal(SIGINT) + + +@contextmanager +def webdavfs(url: str, mount_path: str | os.PathLike[str]) -> Iterator[None]: + subprocess.run( + ["sudo", "mount", "-t", "webdavfs", url, os.fspath(mount_path)], + check=True, + ) + try: + yield + finally: + subprocess.run(["sudo", "umount", os.fspath(mount_path)], check=True) + + +@contextmanager +def davfs2(url: str, mount_path: str | os.PathLike[str]) -> Iterator[None]: + raise NotImplementedError diff --git a/tools/run.sh b/tools/run.sh index 603e69e4f..493911a71 100755 --- a/tools/run.sh +++ b/tools/run.sh @@ -3,7 +3,7 @@ set -ex PYTHON="$HOME"/miniconda3/bin/python DANDISETS_PATH=/mnt/backup/dandi/dandisets-healthstatus/dandisets -MOUNT_PATH=/mnt/backup/dandi/dandisets-healthstatus/dandisets-fuse +MOUNT_PATH=/tmp/dandisets-fuse cd "$(dirname "$0")"/.. #git reset --hard HEAD From dfcd068742e76ebba288519c5b378acdc149f171 Mon Sep 17 00:00:00 2001 From: "John T. Wodder II" Date: Fri, 19 Jan 2024 13:47:15 -0500 Subject: [PATCH 04/34] Timing tests --- code/src/healthstatus/core.py | 1 + code/src/healthstatus/tests.py | 23 ++++++++++++++++++++++- 2 files changed, 23 insertions(+), 1 deletion(-) diff --git a/code/src/healthstatus/core.py b/code/src/healthstatus/core.py index f5c6f0067..1884ec9c8 100644 --- a/code/src/healthstatus/core.py +++ b/code/src/healthstatus/core.py @@ -27,6 +27,7 @@ class TestResult: asset_path: str outcome: Outcome output: Optional[str] = None + elapsed: Optional[float] = None @dataclass diff --git a/code/src/healthstatus/tests.py b/code/src/healthstatus/tests.py index f4839a260..ee59dfa24 100644 --- a/code/src/healthstatus/tests.py +++ b/code/src/healthstatus/tests.py @@ -2,6 +2,7 @@ import os import subprocess import sys +import time from typing import Optional import anyio from .config import MATNWB_INSTALL_DIR, PYNWB_OPEN_LOAD_NS_SCRIPT, TIMEOUT @@ -16,8 +17,10 @@ async def run_test_command( ) -> TestResult: if env is not None: env = {**os.environ, **env} + elapsed: float | None = None try: with anyio.fail_after(TIMEOUT): + start = time.perf_counter() r = await anyio.run_process( command, stdout=subprocess.PIPE, @@ -25,6 +28,8 @@ async def run_test_command( check=False, env=env, ) + end = time.perf_counter() + elapsed = end - start except TimeoutError: return TestResult( testname=testname, asset_path=asset.asset_path, outcome=Outcome.TIMEOUT @@ -32,7 +37,10 @@ async def run_test_command( else: if r.returncode == 0: return TestResult( - testname=testname, asset_path=asset.asset_path, outcome=Outcome.PASS + testname=testname, + asset_path=asset.asset_path, + outcome=Outcome.PASS, + elapsed=elapsed, ) else: return TestResult( @@ -65,6 +73,19 @@ async def matnwb_nwbRead(asset: Asset) -> TestResult: ) +async def dandi_ls(asset: Asset) -> TestResult: + return await run_test_command( + "dandi_ls", + asset, + ["dandi", "ls", str(asset.filepath)], + env={"DANDI_CACHE": "ignore"}, + ) + + TESTFUNCS = [pynwb_open_load_ns, matnwb_nwbRead] TESTS = {t.__name__: t for t in TESTFUNCS} + +TIMED_TEST_FUNCS = [*TESTFUNCS, dandi_ls] + +TIMED_TESTS = {t.__name__: t for t in TIMED_TEST_FUNCS} From 47b45cc74dbecc71ed334da19f804de30839d0e6 Mon Sep 17 00:00:00 2001 From: "John T. Wodder II" Date: Fri, 19 Jan 2024 13:49:16 -0500 Subject: [PATCH 05/34] More logging --- code/src/healthstatus/mounts.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/code/src/healthstatus/mounts.py b/code/src/healthstatus/mounts.py index 0e92334b4..940b58cbc 100644 --- a/code/src/healthstatus/mounts.py +++ b/code/src/healthstatus/mounts.py @@ -24,6 +24,7 @@ def fused( if logdir is None: logdir = Path() with (logdir / "fuse.log").open("wb") as fp: + log.debug("Starting `datalad fusefs` process ...") with subprocess.Popen( [ "datalad", @@ -41,6 +42,7 @@ def fused( try: yield finally: + log.debug("Terminating `datalad fusefs` process ...") p.send_signal(SIGINT) @@ -73,6 +75,7 @@ def dandidav(logdir: Path | None = None) -> Iterator[str]: if logdir is None: logdir = Path() with (logdir / "dandidav.log").open("wb") as fp: + log.debug("Starting `dandidav` process ...") with subprocess.Popen(["dandidav"], stdout=fp, stderr=fp) as p: try: url = "http://127.0.0.1:8080" @@ -87,11 +90,13 @@ def dandidav(logdir: Path | None = None) -> Iterator[str]: raise RuntimeError("WebDAV server did not start up time") yield url finally: + log.debug("Terminating `dandidav` process ...") p.send_signal(SIGINT) @contextmanager def webdavfs(url: str, mount_path: str | os.PathLike[str]) -> Iterator[None]: + log.debug("Mounting webdavfs mount ...") subprocess.run( ["sudo", "mount", "-t", "webdavfs", url, os.fspath(mount_path)], check=True, @@ -99,6 +104,7 @@ def webdavfs(url: str, mount_path: str | os.PathLike[str]) -> Iterator[None]: try: yield finally: + log.debug("Unmounting webdavfs mount ...") subprocess.run(["sudo", "umount", os.fspath(mount_path)], check=True) From fb0f546dcc83d1d118b3ffd2ed31b0d510020b14 Mon Sep 17 00:00:00 2001 From: "John T. Wodder II" Date: Fri, 19 Jan 2024 14:46:54 -0500 Subject: [PATCH 06/34] Add "dandi" extra --- code/setup.cfg | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/code/setup.cfg b/code/setup.cfg index eb0719f0c..42442d387 100644 --- a/code/setup.cfg +++ b/code/setup.cfg @@ -33,6 +33,12 @@ install_requires = requests ~= 2.20 [options.extras_require] +all = + %(dandi)s + %(dandidav)s +dandi = + # Needed for timing `dandi ls` runs + dandi dandidav = dandidav @ git+https://github.com/dandi/dandi-webdav From b127ab77b8d95ce6717965cc3d9504774e80bfd0 Mon Sep 17 00:00:00 2001 From: "John T. Wodder II" Date: Mon, 22 Jan 2024 08:34:44 -0500 Subject: [PATCH 07/34] `time-mounts` command --- code/src/healthstatus/__main__.py | 61 +++++++- code/src/healthstatus/mounts.py | 233 ++++++++++++++++++++---------- 2 files changed, 211 insertions(+), 83 deletions(-) diff --git a/code/src/healthstatus/__main__.py b/code/src/healthstatus/__main__.py index 50f68b256..43dd0736b 100644 --- a/code/src/healthstatus/__main__.py +++ b/code/src/healthstatus/__main__.py @@ -12,8 +12,8 @@ from .checker import AssetReport, Dandiset, HealthStatus from .config import MATNWB_INSTALL_DIR from .core import Asset, DandisetStatus, Outcome, TestSummary, log -from .mounts import fused -from .tests import TESTS +from .mounts import AssetInDandiset, FuseMounter, MounterFactory +from .tests import TESTS, TIMED_TESTS from .util import MatNWBInstaller, get_package_versions @@ -30,7 +30,7 @@ def main() -> None: @click.option( "-d", "--dataset-path", - type=click.Path(file_okay=False, exists=True, path_type=anyio.Path), + type=click.Path(file_okay=False, exists=True, path_type=Path), required=True, ) @click.option( @@ -44,7 +44,7 @@ def main() -> None: @click.option( "-m", "--mount-point", - type=click.Path(file_okay=False, exists=True, path_type=anyio.Path), + type=click.Path(file_okay=False, exists=True, path_type=Path), required=True, ) @click.option( @@ -55,8 +55,8 @@ def main() -> None: ) @click.argument("dandisets", nargs=-1) def check( - dataset_path: anyio.Path, - mount_point: anyio.Path, + dataset_path: Path, + mount_point: Path, dandiset_jobs: int, dandisets: tuple[str, ...], mode: str, @@ -64,13 +64,14 @@ def check( installer = MatNWBInstaller(MATNWB_INSTALL_DIR) installer.install(update=True) hs = HealthStatus( - backup_root=mount_point, + backup_root=anyio.Path(str(mount_point)), reports_root=Path.cwd(), dandisets=dandisets, dandiset_jobs=dandiset_jobs, ) hs.versions["matnwb"] = installer.get_version() - with fused(dataset_path=dataset_path, mount_path=mount_point, update=True): + mnt = FuseMounter(dataset_path=dataset_path, mount_path=mount_point, update=True) + with mnt.mount(): if mode == "all": anyio.run(hs.run_all) elif mode in ("random-asset", "random-outdated-asset-first"): @@ -202,6 +203,50 @@ def test_files( sys.exit(0 if ok else 1) +@main.command() +@click.option( + "-d", + "--dataset-path", + type=click.Path(file_okay=False, exists=True, path_type=Path), + required=True, +) +@click.option( + "-m", + "--mount-point", + type=click.Path(file_okay=False, exists=True, path_type=Path), + required=True, +) +@click.argument( + "assets", + nargs=-1, + type=AssetInDandiset.parse, + metavar="DANDISET_ID/ASSET_PATH ...", +) +def time_mounts( + assets: tuple[AssetInDandiset, ...], dataset_path: Path, mount_point: Path +) -> None: + factory = MounterFactory(dataset_path=dataset_path, mount_path=mount_point) + for mounter in factory.iter_mounters(): + log.info("Testing with mount type: %s", mounter.name) + with mounter.mount(): + for a in assets: + log.info( + "Testing Dandiset %s, asset %s ...", a.dandiset_id, a.asset_path + ) + fpath = mounter.resolve(a) + asset_obj = Asset( + filepath=anyio.Path(str(fpath)), asset_path=a.asset_path + ) + for tname, tfunc in TIMED_TESTS.items(): + log.info("Running test %r", tname) + r = anyio.run(tfunc, asset_obj) + if r.outcome is Outcome.PASS: + log.info("Test passed in %f seconds", r.elapsed) + else: + log.info("Test result: %s", r.outcome.name) + ### Summary? + + def find_dandiset(asset: Path) -> Optional[Path]: if not asset.is_absolute(): asset = asset.absolute() diff --git a/code/src/healthstatus/mounts.py b/code/src/healthstatus/mounts.py index 940b58cbc..4bf04caeb 100644 --- a/code/src/healthstatus/mounts.py +++ b/code/src/healthstatus/mounts.py @@ -1,6 +1,8 @@ from __future__ import annotations +from abc import ABC, abstractmethod from collections.abc import Iterator -from contextlib import contextmanager +from contextlib import AbstractContextManager, contextmanager +from dataclasses import dataclass, field import os from pathlib import Path import re @@ -12,38 +14,162 @@ from .core import log -@contextmanager -def fused( - dataset_path: str | os.PathLike[str], - mount_path: str | os.PathLike[str], - update: bool = False, - logdir: Path | None = None, -) -> Iterator[None]: - if update: - update_dandisets(Path(dataset_path)) - if logdir is None: - logdir = Path() - with (logdir / "fuse.log").open("wb") as fp: - log.debug("Starting `datalad fusefs` process ...") - with subprocess.Popen( - [ - "datalad", - "fusefs", - "-d", - os.fspath(dataset_path), - "--foreground", - "--mode-transparent", - os.fspath(mount_path), - ], - stdout=fp, - stderr=fp, - ) as p: - sleep(3) - try: +@dataclass +class AssetInDandiset: + dandiset_id: str + asset_path: str + + @classmethod + def parse(cls, s: str) -> AssetInDandiset: + (dandiset_id, _, asset_path) = s.partition("/") + if not asset_path or not re.fullmatch(r"[0-9]{6}", dandiset_id): + raise ValueError( + f"invalid asset-in-dandiset: {s!r}: must be in form" + " /" + ) + return AssetInDandiset(dandiset_id, asset_path) + + +class Mounter(ABC): + @property + @abstractmethod + def name(self) -> str: + ... + + @abstractmethod + def mount(self) -> AbstractContextManager[None]: + ... + + @abstractmethod + def resolve(self, asset: AssetInDandiset) -> Path: + ... + + +@dataclass +class FuseMounter(Mounter): + dataset_path: Path + mount_path: Path + update: bool = True + logdir: Path = field(default_factory=Path) + + @property + def name(self) -> str: + return "fusefs" + + @contextmanager + def mount(self) -> Iterator[None]: + if self.update: + update_dandisets(self.dataset_path) + with (self.logdir / "fuse.log").open("wb") as fp: + log.debug("Starting `datalad fusefs` process ...") + with subprocess.Popen( + [ + "datalad", + "fusefs", + "-d", + os.fspath(self.dataset_path), + "--foreground", + "--mode-transparent", + os.fspath(self.mount_path), + ], + stdout=fp, + stderr=fp, + ) as p: + sleep(3) + try: + yield + finally: + log.debug("Terminating `datalad fusefs` process ...") + p.send_signal(SIGINT) + + def resolve(self, asset: AssetInDandiset) -> Path: + return self.mount_path / asset.dandiset_id / asset.asset_path + + +@dataclass +class DandiDavMounter(Mounter): + mount_path: Path + logdir: Path = field(default_factory=Path) + + @contextmanager + def dandidav(self) -> Iterator[str]: + with (self.logdir / "dandidav.log").open("wb") as fp: + log.debug("Starting `dandidav` process ...") + with subprocess.Popen(["dandidav"], stdout=fp, stderr=fp) as p: + try: + url = "http://127.0.0.1:8080" + for _ in range(10): + try: + requests.get(url, timeout=1) + except requests.RequestException: + sleep(1) + else: + break + else: + raise RuntimeError("WebDAV server did not start up time") + yield url + finally: + log.debug("Terminating `dandidav` process ...") + p.send_signal(SIGINT) + + @abstractmethod + def mount_webdav(self, url: str) -> AbstractContextManager[None]: + ... + + @contextmanager + def mount(self) -> Iterator[None]: + with self.dandidav() as url: + with self.mount_webdav(url): yield - finally: - log.debug("Terminating `datalad fusefs` process ...") - p.send_signal(SIGINT) + + def resolve(self, asset: AssetInDandiset) -> Path: + return self.mount_path / asset.dandiset_id / "draft" / asset.asset_path + + +class WebDavFSMounter(DandiDavMounter): + @property + def name(self) -> str: + return "webdavfs" + + @contextmanager + def mount_webdav(self, url: str) -> Iterator[None]: + log.debug("Mounting webdavfs mount ...") + subprocess.run( + ["sudo", "mount", "-t", "webdavfs", url, os.fspath(self.mount_path)], + check=True, + ) + try: + yield + finally: + log.debug("Unmounting webdavfs mount ...") + subprocess.run(["sudo", "umount", os.fspath(self.mount_path)], check=True) + + +class DavFS2Mounter(DandiDavMounter): + @property + def name(self) -> str: + return "davfs2" + + @contextmanager + def mount_webdav(self, url: str) -> Iterator[None]: + raise NotImplementedError + + +@dataclass +class MounterFactory: + dataset_path: Path + mount_path: Path + logdir: Path = field(default_factory=Path) + + def iter_mounters(self) -> Iterator[Mounter]: + yield FuseMounter( + dataset_path=self.dataset_path, + mount_path=self.mount_path, + update=True, + logdir=self.logdir, + ) + yield WebDavFSMounter(mount_path=self.mount_path, logdir=self.logdir) + yield DavFS2Mounter(mount_path=self.mount_path, logdir=self.logdir) def update_dandisets(dataset_path: Path) -> None: @@ -68,46 +194,3 @@ def update_dandisets(dataset_path: Path) -> None: datasets.discard(name) if datasets: ds.get(path=list(datasets), jobs=5, get_data=False) - - -@contextmanager -def dandidav(logdir: Path | None = None) -> Iterator[str]: - if logdir is None: - logdir = Path() - with (logdir / "dandidav.log").open("wb") as fp: - log.debug("Starting `dandidav` process ...") - with subprocess.Popen(["dandidav"], stdout=fp, stderr=fp) as p: - try: - url = "http://127.0.0.1:8080" - for _ in range(10): - try: - requests.get(url, timeout=1) - except requests.RequestException: - sleep(1) - else: - break - else: - raise RuntimeError("WebDAV server did not start up time") - yield url - finally: - log.debug("Terminating `dandidav` process ...") - p.send_signal(SIGINT) - - -@contextmanager -def webdavfs(url: str, mount_path: str | os.PathLike[str]) -> Iterator[None]: - log.debug("Mounting webdavfs mount ...") - subprocess.run( - ["sudo", "mount", "-t", "webdavfs", url, os.fspath(mount_path)], - check=True, - ) - try: - yield - finally: - log.debug("Unmounting webdavfs mount ...") - subprocess.run(["sudo", "umount", os.fspath(mount_path)], check=True) - - -@contextmanager -def davfs2(url: str, mount_path: str | os.PathLike[str]) -> Iterator[None]: - raise NotImplementedError From 9d9198267b2dbc96ccb897f5010d1239f18a0157 Mon Sep 17 00:00:00 2001 From: "John T. Wodder II" Date: Mon, 22 Jan 2024 08:44:23 -0500 Subject: [PATCH 08/34] Only use `anyio.Path` where it's actually needed --- code/src/healthstatus/__main__.py | 14 +++++--------- code/src/healthstatus/checker.py | 18 +++++++++--------- code/src/healthstatus/config.py | 3 +-- code/src/healthstatus/core.py | 3 +-- 4 files changed, 16 insertions(+), 22 deletions(-) diff --git a/code/src/healthstatus/__main__.py b/code/src/healthstatus/__main__.py index 43dd0736b..ce12dc28a 100644 --- a/code/src/healthstatus/__main__.py +++ b/code/src/healthstatus/__main__.py @@ -64,7 +64,7 @@ def check( installer = MatNWBInstaller(MATNWB_INSTALL_DIR) installer.install(update=True) hs = HealthStatus( - backup_root=anyio.Path(str(mount_point)), + backup_root=mount_point, reports_root=Path.cwd(), dandisets=dandisets, dandiset_jobs=dandiset_jobs, @@ -156,11 +156,9 @@ def report() -> None: @click.argument( "files", nargs=-1, - type=click.Path(exists=True, dir_okay=False, path_type=anyio.Path), + type=click.Path(exists=True, dir_okay=False, path_type=Path), ) -def test_files( - testname: str, files: tuple[anyio.Path, ...], save_results: bool -) -> None: +def test_files(testname: str, files: tuple[Path, ...], save_results: bool) -> None: versions = get_package_versions() installer = MatNWBInstaller(MATNWB_INSTALL_DIR) if "matnwb" in testname.lower(): @@ -177,7 +175,7 @@ def test_files( dandiset, asset_paths = dandiset_cache[path] except KeyError: dandiset = Dandiset( - path=anyio.Path(path), reports_root=Path.cwd(), versions=versions + path=path, reports_root=Path.cwd(), versions=versions ) asset_paths = anyio.run(dandiset.get_asset_paths) dandiset_cache[path] = (dandiset, asset_paths) @@ -234,9 +232,7 @@ def time_mounts( "Testing Dandiset %s, asset %s ...", a.dandiset_id, a.asset_path ) fpath = mounter.resolve(a) - asset_obj = Asset( - filepath=anyio.Path(str(fpath)), asset_path=a.asset_path - ) + asset_obj = Asset(filepath=fpath, asset_path=a.asset_path) for tname, tfunc in TIMED_TESTS.items(): log.info("Running test %r", tname) r = anyio.run(tfunc, asset_obj) diff --git a/code/src/healthstatus/checker.py b/code/src/healthstatus/checker.py index f31ddc136..b7c53eafc 100644 --- a/code/src/healthstatus/checker.py +++ b/code/src/healthstatus/checker.py @@ -74,7 +74,7 @@ async def run(self) -> UntestedAsset: @dataclass class HealthStatus: - backup_root: anyio.Path + backup_root: Path reports_root: Path dandisets: tuple[str, ...] dandiset_jobs: int @@ -106,12 +106,12 @@ async def aiterdandisets(self) -> AsyncGenerator[Dandiset, None]: for did in self.dandisets: yield await self.get_dandiset(self.backup_root / did) else: - async for p in self.backup_root.iterdir(): + async for p in anyio.Path(self.backup_root).iterdir(): if re.fullmatch(r"\d{6,}", p.name) and await p.is_dir(): log.info("Found Dandiset %s", p.name) - yield await self.get_dandiset(p) + yield await self.get_dandiset(Path(p)) - async def get_dandiset(self, path: anyio.Path) -> Dandiset: + async def get_dandiset(self, path: Path) -> Dandiset: return Dandiset( path=path, reports_root=self.reports_root, versions=self.versions ) @@ -119,7 +119,7 @@ async def get_dandiset(self, path: anyio.Path) -> Dandiset: @dataclass class Dandiset: - path: anyio.Path + path: Path commit: str = field(init=False) reports_root: Path versions: dict[str, str] @@ -227,13 +227,13 @@ async def aiterjobs() -> AsyncGenerator[TestCase, None]: return report async def aiterassets(self) -> AsyncIterator[Asset]: - def mkasset(filepath: anyio.Path) -> Asset: + def mkasset(filepath: Path) -> Asset: return Asset( filepath=filepath, asset_path=filepath.relative_to(self.path).as_posix(), ) - dirs = deque([self.path]) + dirs = deque([anyio.Path(self.path)]) while dirs: async for p in dirs.popleft().iterdir(): if p.name in ( @@ -246,11 +246,11 @@ def mkasset(filepath: anyio.Path) -> Asset: continue if await p.is_dir(): if p.suffix in (".zarr", ".ngff"): - yield mkasset(p) + yield mkasset(Path(p)) else: dirs.append(p) elif p.name != "dandiset.yaml": - yield mkasset(p) + yield mkasset(Path(p)) async def get_asset_paths(self) -> set[str]: log.info("Scanning Dandiset %s", self.identifier) diff --git a/code/src/healthstatus/config.py b/code/src/healthstatus/config.py index be3768ba1..06cd0da7e 100644 --- a/code/src/healthstatus/config.py +++ b/code/src/healthstatus/config.py @@ -1,11 +1,10 @@ from pathlib import Path -import anyio MATNWB_INSTALL_DIR = Path("matnwb") # in current working directory PACKAGES_TO_VERSION = ["pynwb", "hdmf"] -PYNWB_OPEN_LOAD_NS_SCRIPT = anyio.Path(__file__).with_name("pynwb_open_load_ns.py") +PYNWB_OPEN_LOAD_NS_SCRIPT = Path(__file__).with_name("pynwb_open_load_ns.py") TIMEOUT = 3600 diff --git a/code/src/healthstatus/core.py b/code/src/healthstatus/core.py index 1884ec9c8..e3137e21a 100644 --- a/code/src/healthstatus/core.py +++ b/code/src/healthstatus/core.py @@ -7,7 +7,6 @@ import logging from pathlib import Path from typing import Any, Dict, List, Optional, Sequence, Union -import anyio from pydantic import BaseModel, Field, field_validator import yaml from .yamllineno import load_yaml_lineno @@ -32,7 +31,7 @@ class TestResult: @dataclass class Asset: - filepath: anyio.Path + filepath: Path asset_path: str def is_nwb(self) -> bool: From c3a7895a0aab799b76e1bf784c2fed6837927405 Mon Sep 17 00:00:00 2001 From: "John T. Wodder II" Date: Mon, 22 Jan 2024 09:04:19 -0500 Subject: [PATCH 09/34] --mounts option --- code/src/healthstatus/__main__.py | 53 +++++++++++++++++++++++++++---- code/src/healthstatus/mounts.py | 34 +++++++++++++------- 2 files changed, 70 insertions(+), 17 deletions(-) diff --git a/code/src/healthstatus/__main__.py b/code/src/healthstatus/__main__.py index ce12dc28a..b08033110 100644 --- a/code/src/healthstatus/__main__.py +++ b/code/src/healthstatus/__main__.py @@ -1,21 +1,50 @@ from __future__ import annotations from collections import Counter, defaultdict +from enum import Enum import logging from operator import attrgetter from pathlib import Path import re import sys -from typing import Optional +from typing import Generic, Optional, TypeVar import anyio import click from packaging.version import Version from .checker import AssetReport, Dandiset, HealthStatus from .config import MATNWB_INSTALL_DIR from .core import Asset, DandisetStatus, Outcome, TestSummary, log -from .mounts import AssetInDandiset, FuseMounter, MounterFactory +from .mounts import AssetInDandiset, FuseMounter, MountType, iter_mounters from .tests import TESTS, TIMED_TESTS from .util import MatNWBInstaller, get_package_versions +E = TypeVar("E", bound="Enum") + + +class EnumSet(click.ParamType, Generic[E]): + name = "enumset" + + def __init__(self, klass: type[E]) -> None: + self.klass = klass + + def convert( + self, + value: str | set[E], + param: click.Parameter | None, + ctx: click.Context | None, + ) -> set[E]: + if not isinstance(value, str): + return value + selected = set() + for v in re.split(r"\s*,\s*", value): + try: + selected.add(self.klass(v)) + except ValueError: + self.fail(f"{value!r}: invalid option {v!r}", param, ctx) + return selected + + def get_metavar(self, _param: click.Parameter) -> str: + return "[" + ",".join(v.value for v in self.klass) + "]" + @click.group() def main() -> None: @@ -206,7 +235,6 @@ def test_files(testname: str, files: tuple[Path, ...], save_results: bool) -> No "-d", "--dataset-path", type=click.Path(file_okay=False, exists=True, path_type=Path), - required=True, ) @click.option( "-m", @@ -214,6 +242,13 @@ def test_files(testname: str, files: tuple[Path, ...], save_results: bool) -> No type=click.Path(file_okay=False, exists=True, path_type=Path), required=True, ) +@click.option( + "-M", + "--mounts", + type=EnumSet(MountType), + default=set(MountType), + show_default="all", +) @click.argument( "assets", nargs=-1, @@ -221,10 +256,16 @@ def test_files(testname: str, files: tuple[Path, ...], save_results: bool) -> No metavar="DANDISET_ID/ASSET_PATH ...", ) def time_mounts( - assets: tuple[AssetInDandiset, ...], dataset_path: Path, mount_point: Path + assets: tuple[AssetInDandiset, ...], + dataset_path: Path | None, + mount_point: Path, + mounts: set[MountType], ) -> None: - factory = MounterFactory(dataset_path=dataset_path, mount_path=mount_point) - for mounter in factory.iter_mounters(): + for mounter in iter_mounters( + types=mounts, + dataset_path=dataset_path, + mount_path=mount_point, + ): log.info("Testing with mount type: %s", mounter.name) with mounter.mount(): for a in assets: diff --git a/code/src/healthstatus/mounts.py b/code/src/healthstatus/mounts.py index 4bf04caeb..9f1bd368a 100644 --- a/code/src/healthstatus/mounts.py +++ b/code/src/healthstatus/mounts.py @@ -3,12 +3,14 @@ from collections.abc import Iterator from contextlib import AbstractContextManager, contextmanager from dataclasses import dataclass, field +from enum import Enum import os from pathlib import Path import re from signal import SIGINT import subprocess from time import sleep +import click from ghreq import Client import requests from .core import log @@ -155,21 +157,31 @@ def mount_webdav(self, url: str) -> Iterator[None]: raise NotImplementedError -@dataclass -class MounterFactory: - dataset_path: Path - mount_path: Path - logdir: Path = field(default_factory=Path) +class MountType(Enum): + FUSEFS = "fusefs" + WEBDAVFS = "webdavfs" + DAVFS2 = "davfs2" + - def iter_mounters(self) -> Iterator[Mounter]: +def iter_mounters( + types: set[MountType], + dataset_path: Path | None, + mount_path: Path, +) -> Iterator[Mounter]: + logdir = Path() + if MountType.FUSEFS in types: + if dataset_path is None: + raise click.UsageError("--dataset-path must be set when using fusefs mount") yield FuseMounter( - dataset_path=self.dataset_path, - mount_path=self.mount_path, + dataset_path=dataset_path, + mount_path=mount_path, update=True, - logdir=self.logdir, + logdir=logdir, ) - yield WebDavFSMounter(mount_path=self.mount_path, logdir=self.logdir) - yield DavFS2Mounter(mount_path=self.mount_path, logdir=self.logdir) + if MountType.WEBDAVFS in types: + yield WebDavFSMounter(mount_path=mount_path, logdir=logdir) + if MountType.DAVFS2 in types: + yield DavFS2Mounter(mount_path=mount_path, logdir=logdir) def update_dandisets(dataset_path: Path) -> None: From 58941fb89212e8b1790c2a5079fda546681d1989 Mon Sep 17 00:00:00 2001 From: "John T. Wodder II" Date: Mon, 22 Jan 2024 09:40:07 -0500 Subject: [PATCH 10/34] --[no-]update-dataset --- code/src/healthstatus/__main__.py | 12 ++++++++++++ code/src/healthstatus/mounts.py | 3 ++- 2 files changed, 14 insertions(+), 1 deletion(-) diff --git a/code/src/healthstatus/__main__.py b/code/src/healthstatus/__main__.py index b08033110..93bed1f05 100644 --- a/code/src/healthstatus/__main__.py +++ b/code/src/healthstatus/__main__.py @@ -21,6 +21,7 @@ class EnumSet(click.ParamType, Generic[E]): + # The enum's values must be strs. name = "enumset" def __init__(self, klass: type[E]) -> None: @@ -249,6 +250,15 @@ def test_files(testname: str, files: tuple[Path, ...], save_results: bool) -> No default=set(MountType), show_default="all", ) +@click.option( + "--update-dataset/--no-update-dataset", + help=( + "Whether to pull the latest changes to the Dandisets dataset repository" + " before fuse-mounting" + ), + default=True, + show_default=True, +) @click.argument( "assets", nargs=-1, @@ -260,11 +270,13 @@ def time_mounts( dataset_path: Path | None, mount_point: Path, mounts: set[MountType], + update_dataset: bool, ) -> None: for mounter in iter_mounters( types=mounts, dataset_path=dataset_path, mount_path=mount_point, + update_dataset=update_dataset, ): log.info("Testing with mount type: %s", mounter.name) with mounter.mount(): diff --git a/code/src/healthstatus/mounts.py b/code/src/healthstatus/mounts.py index 9f1bd368a..afd509c3b 100644 --- a/code/src/healthstatus/mounts.py +++ b/code/src/healthstatus/mounts.py @@ -167,6 +167,7 @@ def iter_mounters( types: set[MountType], dataset_path: Path | None, mount_path: Path, + update_dataset: bool = True, ) -> Iterator[Mounter]: logdir = Path() if MountType.FUSEFS in types: @@ -175,7 +176,7 @@ def iter_mounters( yield FuseMounter( dataset_path=dataset_path, mount_path=mount_path, - update=True, + update=update_dataset, logdir=logdir, ) if MountType.WEBDAVFS in types: From c4b3ab1c34877d613bfb983a8469663b60dc14e3 Mon Sep 17 00:00:00 2001 From: "John T. Wodder II" Date: Mon, 22 Jan 2024 09:40:55 -0500 Subject: [PATCH 11/34] time-mounts: Install MatNWB --- code/src/healthstatus/__main__.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/code/src/healthstatus/__main__.py b/code/src/healthstatus/__main__.py index 93bed1f05..95f12fbb4 100644 --- a/code/src/healthstatus/__main__.py +++ b/code/src/healthstatus/__main__.py @@ -272,6 +272,8 @@ def time_mounts( mounts: set[MountType], update_dataset: bool, ) -> None: + installer = MatNWBInstaller(MATNWB_INSTALL_DIR) + installer.install(update=True) for mounter in iter_mounters( types=mounts, dataset_path=dataset_path, From 2b27fe95088282853ffda4127190e2d8349f07fe Mon Sep 17 00:00:00 2001 From: "John T. Wodder II" Date: Mon, 22 Jan 2024 09:55:05 -0500 Subject: [PATCH 12/34] fuse: Clone dandi/dandisets if not present at path --- code/src/healthstatus/mounts.py | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/code/src/healthstatus/mounts.py b/code/src/healthstatus/mounts.py index afd509c3b..c34c122f8 100644 --- a/code/src/healthstatus/mounts.py +++ b/code/src/healthstatus/mounts.py @@ -60,7 +60,18 @@ def name(self) -> str: @contextmanager def mount(self) -> Iterator[None]: - if self.update: + if not self.dataset_path.exists(): + log.info("Cloning dandi/dandisets to %s ...", self.dataset_path) + subprocess.run( + [ + "datalad", + "clone", + "git@github.com:dandi/dandisets.git", + os.fspath(self.dataset_path), + ], + check=True, + ) + elif self.update: update_dandisets(self.dataset_path) with (self.logdir / "fuse.log").open("wb") as fp: log.debug("Starting `datalad fusefs` process ...") From e7e1fe43ff829de3bdd36962a0cde52f6467bc50 Mon Sep 17 00:00:00 2001 From: "John T. Wodder II" Date: Mon, 22 Jan 2024 15:53:55 -0500 Subject: [PATCH 13/34] Display summary of benchmark results --- code/src/healthstatus/__main__.py | 44 ++++++++++++++++++++++++++++--- code/src/healthstatus/mounts.py | 8 ++++++ 2 files changed, 49 insertions(+), 3 deletions(-) diff --git a/code/src/healthstatus/__main__.py b/code/src/healthstatus/__main__.py index 95f12fbb4..3e9997c43 100644 --- a/code/src/healthstatus/__main__.py +++ b/code/src/healthstatus/__main__.py @@ -1,11 +1,13 @@ from __future__ import annotations from collections import Counter, defaultdict +from csv import DictWriter from enum import Enum import logging from operator import attrgetter from pathlib import Path import re import sys +import textwrap from typing import Generic, Optional, TypeVar import anyio import click @@ -13,7 +15,13 @@ from .checker import AssetReport, Dandiset, HealthStatus from .config import MATNWB_INSTALL_DIR from .core import Asset, DandisetStatus, Outcome, TestSummary, log -from .mounts import AssetInDandiset, FuseMounter, MountType, iter_mounters +from .mounts import ( + AssetInDandiset, + FuseMounter, + MountBenchmark, + MountType, + iter_mounters, +) from .tests import TESTS, TIMED_TESTS from .util import MatNWBInstaller, get_package_versions @@ -274,6 +282,7 @@ def time_mounts( ) -> None: installer = MatNWBInstaller(MATNWB_INSTALL_DIR) installer.install(update=True) + results = [] for mounter in iter_mounters( types=mounts, dataset_path=dataset_path, @@ -292,10 +301,39 @@ def time_mounts( log.info("Running test %r", tname) r = anyio.run(tfunc, asset_obj) if r.outcome is Outcome.PASS: + assert r.elapsed is not None log.info("Test passed in %f seconds", r.elapsed) + results.append( + MountBenchmark( + mount_name=mounter.name, + asset=a, + testname=tname, + elapsed=r.elapsed, + ) + ) + elif r.outcome is Outcome.FAIL: + assert r.output is not None + log.error( + "Test failed; output:\n\n%s\n", + textwrap.indent(r.output, " " * 4), + ) + sys.exit(1) else: - log.info("Test result: %s", r.outcome.name) - ### Summary? + assert r.outcome is Outcome.TIMEOUT + log.error("Test timed out") + sys.exit(1) + csvout = DictWriter(sys.stdout, ["mount", "dandiset", "asset", "test", "time_sec"]) + csvout.writeheader() + for res in results: + csvout.writerow( + { + "mount": res.mount_name, + "dandiset": res.asset.dandiset_id, + "asset": res.asset.asset_path, + "test": res.testname, + "time_sec": res.elapsed, + } + ) def find_dandiset(asset: Path) -> Optional[Path]: diff --git a/code/src/healthstatus/mounts.py b/code/src/healthstatus/mounts.py index c34c122f8..347ac68ee 100644 --- a/code/src/healthstatus/mounts.py +++ b/code/src/healthstatus/mounts.py @@ -32,6 +32,14 @@ def parse(cls, s: str) -> AssetInDandiset: return AssetInDandiset(dandiset_id, asset_path) +@dataclass +class MountBenchmark: + mount_name: str + asset: AssetInDandiset + testname: str + elapsed: float + + class Mounter(ABC): @property @abstractmethod From 47a58a1bb500113d0fbe17e9da864889eaccc41d Mon Sep 17 00:00:00 2001 From: "John T. Wodder II" Date: Mon, 22 Jan 2024 16:07:46 -0500 Subject: [PATCH 14/34] time-files --- code/src/healthstatus/__main__.py | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) diff --git a/code/src/healthstatus/__main__.py b/code/src/healthstatus/__main__.py index 3e9997c43..17a3c74f4 100644 --- a/code/src/healthstatus/__main__.py +++ b/code/src/healthstatus/__main__.py @@ -336,6 +336,37 @@ def time_mounts( ) +@main.command() +@click.argument("testname", type=click.Choice(list(TIMED_TESTS.keys()))) +@click.argument( + "files", + nargs=-1, + type=click.Path(exists=True, dir_okay=False, path_type=Path), +) +def time_files(testname: str, files: tuple[Path, ...]) -> None: + if "matnwb" in testname.lower(): + installer = MatNWBInstaller(MATNWB_INSTALL_DIR) + installer.install(update=True) + testfunc = TIMED_TESTS[testname] + for f in files: + asset = Asset(filepath=f, asset_path=str(f)) + log.info("Testing %s ...", f) + r = anyio.run(testfunc, asset) + if r.outcome is Outcome.PASS: + assert r.elapsed is not None + log.info("Test passed in %f seconds", r.elapsed) + elif r.outcome is Outcome.FAIL: + assert r.output is not None + log.error( + "Test failed; output:\n\n%s\n", textwrap.indent(r.output, " " * 4) + ) + sys.exit(1) + else: + assert r.outcome is Outcome.TIMEOUT + log.error("Test timed out") + sys.exit(1) + + def find_dandiset(asset: Path) -> Optional[Path]: if not asset.is_absolute(): asset = asset.absolute() From fbf423e9c521abc462fcefa2a9bb05fbd24a34df Mon Sep 17 00:00:00 2001 From: "John T. Wodder II" Date: Mon, 22 Jan 2024 17:10:14 -0500 Subject: [PATCH 15/34] Assorted code improvements --- code/src/healthstatus/__main__.py | 39 ++++---- code/src/healthstatus/checker.py | 57 ++++++------ code/src/healthstatus/core.py | 48 +++++++--- code/src/healthstatus/mounts.py | 6 +- code/src/healthstatus/tests.py | 144 ++++++++++++++++++------------ 5 files changed, 177 insertions(+), 117 deletions(-) diff --git a/code/src/healthstatus/__main__.py b/code/src/healthstatus/__main__.py index 17a3c74f4..2f62acbc5 100644 --- a/code/src/healthstatus/__main__.py +++ b/code/src/healthstatus/__main__.py @@ -14,7 +14,7 @@ from packaging.version import Version from .checker import AssetReport, Dandiset, HealthStatus from .config import MATNWB_INSTALL_DIR -from .core import Asset, DandisetStatus, Outcome, TestSummary, log +from .core import AssetPath, AssetTestResult, DandisetStatus, Outcome, TestSummary, log from .mounts import ( AssetInDandiset, FuseMounter, @@ -183,7 +183,9 @@ def report() -> None: print(file=fp) print("# By Dandiset", file=fp) print("| Dandiset | " + " | ".join(TESTS.keys()) + " | Untested |", file=fp) - print("| --- | " + " | ".join("---" for _ in TESTS) + " | --- |", file=fp) + print( + "| --- | " + " | ".join("---" for _ in TESTS.keys()) + " | --- |", file=fp + ) for s in sorted(all_statuses, key=attrgetter("dandiset")): print(s.as_row(), file=fp) @@ -204,9 +206,9 @@ def test_files(testname: str, files: tuple[Path, ...], save_results: bool) -> No else: installer.download(update=False) versions["matnwb"] = installer.get_version() - testfunc = TESTS[testname] + testfunc = TESTS.get(testname) ok = True - dandiset_cache: dict[Path, tuple[Dandiset, set[str]]] = {} + dandiset_cache: dict[Path, tuple[Dandiset, set[AssetPath]]] = {} for f in files: if save_results and (path := find_dandiset(Path(f))) is not None: try: @@ -218,14 +220,13 @@ def test_files(testname: str, files: tuple[Path, ...], save_results: bool) -> No asset_paths = anyio.run(dandiset.get_asset_paths) dandiset_cache[path] = (dandiset, asset_paths) report = AssetReport(dandiset=dandiset) - ap = Path(f).relative_to(path).as_posix() + ap = AssetPath(Path(f).relative_to(path).as_posix()) else: report = None asset_paths = None - ap = str(f) - asset = Asset(filepath=f, asset_path=ap) + ap = None log.info("Testing %s ...", f) - r = anyio.run(testfunc, asset) + r = anyio.run(testfunc.run, f) if r.output is not None: print(r.output, end="") log.info("%s: %s", f, r.outcome.name) @@ -234,7 +235,13 @@ def test_files(testname: str, files: tuple[Path, ...], save_results: bool) -> No if save_results: assert report is not None assert asset_paths is not None - report.register_test_result(r) + assert ap is not None + atr = AssetTestResult( + testname=testname, + asset_path=AssetPath(ap), + result=r, + ) + report.register_test_result(atr) report.dump(asset_paths) sys.exit(0 if ok else 1) @@ -296,10 +303,9 @@ def time_mounts( "Testing Dandiset %s, asset %s ...", a.dandiset_id, a.asset_path ) fpath = mounter.resolve(a) - asset_obj = Asset(filepath=fpath, asset_path=a.asset_path) - for tname, tfunc in TIMED_TESTS.items(): - log.info("Running test %r", tname) - r = anyio.run(tfunc, asset_obj) + for tfunc in TIMED_TESTS: + log.info("Running test %r", tfunc.NAME) + r = anyio.run(tfunc.run, fpath) if r.outcome is Outcome.PASS: assert r.elapsed is not None log.info("Test passed in %f seconds", r.elapsed) @@ -307,7 +313,7 @@ def time_mounts( MountBenchmark( mount_name=mounter.name, asset=a, - testname=tname, + testname=tfunc.NAME, elapsed=r.elapsed, ) ) @@ -347,11 +353,10 @@ def time_files(testname: str, files: tuple[Path, ...]) -> None: if "matnwb" in testname.lower(): installer = MatNWBInstaller(MATNWB_INSTALL_DIR) installer.install(update=True) - testfunc = TIMED_TESTS[testname] + testfunc = TIMED_TESTS.get(testname) for f in files: - asset = Asset(filepath=f, asset_path=str(f)) log.info("Testing %s ...", f) - r = anyio.run(testfunc, asset) + r = anyio.run(testfunc.run, f) if r.outcome is Outcome.PASS: assert r.elapsed is not None log.info("Test passed in %f seconds", r.elapsed) diff --git a/code/src/healthstatus/checker.py b/code/src/healthstatus/checker.py index b7c53eafc..685c2d8ae 100644 --- a/code/src/healthstatus/checker.py +++ b/code/src/healthstatus/checker.py @@ -1,6 +1,6 @@ from __future__ import annotations from collections import defaultdict, deque -from collections.abc import AsyncGenerator, AsyncIterator, Awaitable, Callable +from collections.abc import AsyncGenerator, AsyncIterator from dataclasses import dataclass, field from datetime import datetime from os.path import getsize @@ -15,14 +15,15 @@ from .config import WORKERS_PER_DANDISET from .core import ( Asset, + AssetPath, + AssetTestResult, DandisetStatus, Outcome, - TestResult, TestStatus, UntestedAsset, log, ) -from .tests import TESTFUNCS, TESTS +from .tests import TESTS, Test from .util import get_package_versions @@ -30,18 +31,22 @@ class TestCase: dandiset_id: str asset: Asset - testfunc: Callable[[Asset], Awaitable[TestResult]] + testfunc: Test - async def run(self) -> TestResult: - r = await self.testfunc(self.asset) + async def run(self) -> AssetTestResult: + r = await self.testfunc.run(self.asset.filepath) log.info( "Dandiset %s, asset %s, test %s: %s", self.dandiset_id, self.asset.asset_path, - r.testname, + self.testfunc.NAME, r.outcome.name, ) - return r + return AssetTestResult( + testname=self.testfunc.NAME, + asset_path=self.asset.asset_path, + result=r, + ) @dataclass @@ -159,7 +164,7 @@ async def test_all_assets(self) -> DandisetReport: async def dowork(job: TestCase | Untested) -> None: res = await job.run() - if isinstance(res, TestResult): + if isinstance(res, AssetTestResult): report.register_test_result(res) else: assert isinstance(res, UntestedAsset) @@ -172,7 +177,7 @@ async def aiterassets() -> AsyncGenerator[TestCase | Untested, None]: ) report.nassets += 1 if asset.is_nwb(): - for t in TESTFUNCS: + for t in TESTS: yield TestCase( asset=asset, testfunc=t, dandiset_id=self.identifier ) @@ -220,17 +225,17 @@ async def dowork(job: TestCase) -> None: report.register_test_result(await job.run()) async def aiterjobs() -> AsyncGenerator[TestCase, None]: - for t in TESTFUNCS: + for t in TESTS: yield TestCase(asset=asset, testfunc=t, dandiset_id=self.identifier) await pool_tasks(dowork, aiterjobs(), WORKERS_PER_DANDISET) return report async def aiterassets(self) -> AsyncIterator[Asset]: - def mkasset(filepath: Path) -> Asset: + def mkasset(filepath: anyio.Path) -> Asset: return Asset( - filepath=filepath, - asset_path=filepath.relative_to(self.path).as_posix(), + filepath=Path(filepath), + asset_path=AssetPath(filepath.relative_to(self.path).as_posix()), ) dirs = deque([anyio.Path(self.path)]) @@ -246,13 +251,13 @@ def mkasset(filepath: Path) -> Asset: continue if await p.is_dir(): if p.suffix in (".zarr", ".ngff"): - yield mkasset(Path(p)) + yield mkasset(p) else: dirs.append(p) elif p.name != "dandiset.yaml": - yield mkasset(Path(p)) + yield mkasset(p) - async def get_asset_paths(self) -> set[str]: + async def get_asset_paths(self) -> set[AssetPath]: log.info("Scanning Dandiset %s", self.identifier) return {asset.asset_path async for asset in self.aiterassets()} @@ -268,7 +273,7 @@ class DandisetReport: started: datetime = field(default_factory=lambda: datetime.now().astimezone()) ended: Optional[datetime] = None - def register_test_result(self, r: TestResult) -> None: + def register_test_result(self, r: AssetTestResult) -> None: self.tests[r.testname].by_outcome[r.outcome].append(r) def register_untested(self, d: UntestedAsset) -> None: @@ -321,40 +326,40 @@ def dump(self) -> None: @dataclass class TestReport: - by_outcome: dict[Outcome, list[TestResult]] = field( + by_outcome: dict[Outcome, list[AssetTestResult]] = field( init=False, default_factory=lambda: defaultdict(list) ) @property - def passed(self) -> list[TestResult]: + def passed(self) -> list[AssetTestResult]: return self.by_outcome[Outcome.PASS] @property - def failed(self) -> list[TestResult]: + def failed(self) -> list[AssetTestResult]: return self.by_outcome[Outcome.FAIL] @property - def timedout(self) -> list[TestResult]: + def timedout(self) -> list[AssetTestResult]: return self.by_outcome[Outcome.TIMEOUT] @dataclass class AssetReport: dandiset: Dandiset - results: list[TestResult] = field(default_factory=list) + results: list[AssetTestResult] = field(default_factory=list) started: datetime = field(default_factory=lambda: datetime.now().astimezone()) - def register_test_result(self, r: TestResult) -> None: + def register_test_result(self, r: AssetTestResult) -> None: self.results.append(r) - def dump(self, asset_paths: set[str]) -> None: + def dump(self, asset_paths: set[AssetPath]) -> None: try: status = self.dandiset.load_status() except FileNotFoundError: status = DandisetStatus( dandiset=self.dandiset.identifier, dandiset_version=self.dandiset.commit, - tests=[TestStatus(name=testname) for testname in TESTS], + tests=[TestStatus(name=testname) for testname in TESTS.keys()], versions=self.dandiset.versions, ) for r in self.results: diff --git a/code/src/healthstatus/core.py b/code/src/healthstatus/core.py index e3137e21a..3b7e473db 100644 --- a/code/src/healthstatus/core.py +++ b/code/src/healthstatus/core.py @@ -6,13 +6,15 @@ from enum import Enum import logging from pathlib import Path -from typing import Any, Dict, List, Optional, Sequence, Union +from typing import Any, Dict, List, NewType, Optional, Sequence, Union from pydantic import BaseModel, Field, field_validator import yaml from .yamllineno import load_yaml_lineno log = logging.getLogger(__package__) +AssetPath = NewType("AssetPath", str) + class Outcome(Enum): PASS = "pass" @@ -22,22 +24,35 @@ class Outcome(Enum): @dataclass class TestResult: - testname: str - asset_path: str outcome: Outcome - output: Optional[str] = None - elapsed: Optional[float] = None + output: str | None = None # Only set if outcome is FAIL + elapsed: float | None = None # Only set if outcome is PASS @dataclass class Asset: filepath: Path - asset_path: str + asset_path: AssetPath def is_nwb(self) -> bool: return self.filepath.suffix.lower() == ".nwb" +@dataclass +class AssetTestResult: + testname: str + asset_path: AssetPath + result: TestResult + + @property + def outcome(self) -> Outcome: + return self.result.outcome + + @property + def output(self) -> str | None: + return self.result.output + + class VersionedPath(BaseModel): path: str versions: Dict[str, str] @@ -75,13 +90,16 @@ def asset_outcomes(self) -> Iterator[tuple[str, Outcome]]: for asset in self.assets_timeout: yield (getpath(asset), Outcome.TIMEOUT) - def outdated_assets(self, current_versions: dict[str, str]) -> Iterator[str]: + def outdated_assets(self, current_versions: dict[str, str]) -> Iterator[AssetPath]: for asset in [*self.assets_ok, *self.assets_nok, *self.assets_timeout]: if getversions(asset) != current_versions: yield getpath(asset) def update_asset( - self, asset_path: str, outcome: Outcome, versions: Optional[dict[str, str]] + self, + asset_path: AssetPath, + outcome: Outcome, + versions: Optional[dict[str, str]], ) -> None: self.assets_ok = [a for a in self.assets_ok if getpath(a) != asset_path] self.assets_nok = [a for a in self.assets_nok if getpath(a) != asset_path] @@ -165,7 +183,7 @@ def to_file(self, path: Path) -> None: jsonable = self.model_dump(mode="json") path.write_text(yaml.dump(jsonable)) - def update_asset(self, res: TestResult, versions: dict[str, str]) -> None: + def update_asset(self, res: AssetTestResult, versions: dict[str, str]) -> None: try: t = next(t for t in self.tests if t.name == res.testname) except StopIteration: @@ -179,7 +197,9 @@ def update_asset(self, res: TestResult, versions: dict[str, str]) -> None: if vdict is not None: self.prune_versions(versions) - def retain(self, asset_paths: set[str], current_versions: dict[str, str]) -> None: + def retain( + self, asset_paths: set[AssetPath], current_versions: dict[str, str] + ) -> None: for t in self.tests: t.assets_ok = [a for a in t.assets_ok if getpath(a) in asset_paths] t.assets_nok = [a for a in t.assets_nok if getpath(a) in asset_paths] @@ -314,17 +334,17 @@ def as_row(self) -> str: @dataclass class AssetTestInfo: - asset_path: str + asset_path: AssetPath testname: str versions: dict[str, str] outcome: Outcome -def getpath(asset: str | VersionedPath) -> str: +def getpath(asset: str | VersionedPath) -> AssetPath: if isinstance(asset, str): - return asset + return AssetPath(asset) else: - return asset.path + return AssetPath(asset.path) def getversions(asset: str | VersionedPath) -> Optional[dict[str, str]]: diff --git a/code/src/healthstatus/mounts.py b/code/src/healthstatus/mounts.py index 347ac68ee..d92a84755 100644 --- a/code/src/healthstatus/mounts.py +++ b/code/src/healthstatus/mounts.py @@ -13,13 +13,13 @@ import click from ghreq import Client import requests -from .core import log +from .core import AssetPath, log @dataclass class AssetInDandiset: dandiset_id: str - asset_path: str + asset_path: AssetPath @classmethod def parse(cls, s: str) -> AssetInDandiset: @@ -29,7 +29,7 @@ def parse(cls, s: str) -> AssetInDandiset: f"invalid asset-in-dandiset: {s!r}: must be in form" " /" ) - return AssetInDandiset(dandiset_id, asset_path) + return AssetInDandiset(dandiset_id, AssetPath(asset_path)) @dataclass diff --git a/code/src/healthstatus/tests.py b/code/src/healthstatus/tests.py index ee59dfa24..06aba7041 100644 --- a/code/src/healthstatus/tests.py +++ b/code/src/healthstatus/tests.py @@ -1,19 +1,97 @@ from __future__ import annotations +from collections.abc import Iterator +from dataclasses import dataclass, field import os +from pathlib import Path +import shlex import subprocess import sys import time -from typing import Optional +from typing import ClassVar, Protocol import anyio from .config import MATNWB_INSTALL_DIR, PYNWB_OPEN_LOAD_NS_SCRIPT, TIMEOUT -from .core import Asset, Outcome, TestResult +from .core import Outcome, TestResult, log +from .util import MatNWBInstaller + + +class Test(Protocol): + NAME: ClassVar[str] + + def prepare(self) -> None: + ... + + async def run(self, path: Path) -> TestResult: + ... + + +@dataclass +class TestRegistry: + tests: dict[str, Test] = field(init=False, default_factory=dict) + + def register(self, klass: type[Test]) -> type[Test]: + self.tests[klass.NAME] = klass() + return klass + + def __iter__(self) -> Iterator[Test]: + return iter(self.tests.values()) + + def get(self, name: str) -> Test: + return self.tests[name] + + def keys(self) -> Iterator[str]: + return iter(self.tests.keys()) + + +TESTS = TestRegistry() + +TIMED_TESTS = TestRegistry() + + +@TESTS.register +@TIMED_TESTS.register +class PyNwbTest: + NAME: ClassVar[str] = "pynwb_open_load_ns" + + def prepare(self) -> None: + pass + + async def run(self, path: Path) -> TestResult: + return await run_test_command( + [sys.executable, os.fspath(PYNWB_OPEN_LOAD_NS_SCRIPT), os.fspath(path)], + ) + + +@TESTS.register +@TIMED_TESTS.register +class MatNwbTest: + NAME: ClassVar[str] = "matnwb_nwbRead" + + def prepare(self) -> None: + MatNWBInstaller(MATNWB_INSTALL_DIR).install(update=True) + + async def run(self, path: Path) -> TestResult: + return await run_test_command( + ["matlab", "-nodesktop", "-batch", f"nwb = nwbRead({str(path)!r})"], + env={"MATLABPATH": os.fspath(MATNWB_INSTALL_DIR)}, + ) + + +@TIMED_TESTS.register +class DandiLsTest: + NAME: ClassVar[str] = "dandi_ls" + + def prepare(self) -> None: + pass + + async def run(self, path: Path) -> TestResult: + return await run_test_command( + ["dandi", "ls", "--", os.fspath(path)], + env={"DANDI_CACHE": "ignore"}, + ) async def run_test_command( - testname: str, - asset: Asset, - command: list[str], - env: Optional[dict[str, str]] = None, + command: list[str], env: dict[str, str] | None = None ) -> TestResult: if env is not None: env = {**os.environ, **env} @@ -21,6 +99,7 @@ async def run_test_command( try: with anyio.fail_after(TIMEOUT): start = time.perf_counter() + log.debug("Running: %s", shlex.join(command)) r = await anyio.run_process( command, stdout=subprocess.PIPE, @@ -31,61 +110,12 @@ async def run_test_command( end = time.perf_counter() elapsed = end - start except TimeoutError: - return TestResult( - testname=testname, asset_path=asset.asset_path, outcome=Outcome.TIMEOUT - ) + return TestResult(outcome=Outcome.TIMEOUT) else: if r.returncode == 0: - return TestResult( - testname=testname, - asset_path=asset.asset_path, - outcome=Outcome.PASS, - elapsed=elapsed, - ) + return TestResult(outcome=Outcome.PASS, elapsed=elapsed) else: return TestResult( - testname=testname, - asset_path=asset.asset_path, outcome=Outcome.FAIL, output=r.stdout.decode("utf-8", "surrogateescape"), ) - - -async def pynwb_open_load_ns(asset: Asset) -> TestResult: - return await run_test_command( - "pynwb_open_load_ns", - asset, - [sys.executable, str(PYNWB_OPEN_LOAD_NS_SCRIPT), str(asset.filepath)], - ) - - -async def matnwb_nwbRead(asset: Asset) -> TestResult: - return await run_test_command( - "matnwb_nwbRead", - asset, - [ - "matlab", - "-nodesktop", - "-batch", - f"nwb = nwbRead({str(asset.filepath)!r})", - ], - env={"MATLABPATH": str(MATNWB_INSTALL_DIR)}, - ) - - -async def dandi_ls(asset: Asset) -> TestResult: - return await run_test_command( - "dandi_ls", - asset, - ["dandi", "ls", str(asset.filepath)], - env={"DANDI_CACHE": "ignore"}, - ) - - -TESTFUNCS = [pynwb_open_load_ns, matnwb_nwbRead] - -TESTS = {t.__name__: t for t in TESTFUNCS} - -TIMED_TEST_FUNCS = [*TESTFUNCS, dandi_ls] - -TIMED_TESTS = {t.__name__: t for t in TIMED_TEST_FUNCS} From 16ec8d284916e607cfbb95fe787330bddb484a1f Mon Sep 17 00:00:00 2001 From: "John T. Wodder II" Date: Tue, 23 Jan 2024 08:35:53 -0500 Subject: [PATCH 16/34] Factor out test preparation & version-fetching --- code/src/healthstatus/__main__.py | 29 +++++++++++------------------ code/src/healthstatus/checker.py | 3 +-- code/src/healthstatus/tests.py | 27 +++++++++++++++++++-------- code/src/healthstatus/util.py | 8 +------- 4 files changed, 32 insertions(+), 35 deletions(-) diff --git a/code/src/healthstatus/__main__.py b/code/src/healthstatus/__main__.py index 2f62acbc5..086f5116f 100644 --- a/code/src/healthstatus/__main__.py +++ b/code/src/healthstatus/__main__.py @@ -13,7 +13,6 @@ import click from packaging.version import Version from .checker import AssetReport, Dandiset, HealthStatus -from .config import MATNWB_INSTALL_DIR from .core import AssetPath, AssetTestResult, DandisetStatus, Outcome, TestSummary, log from .mounts import ( AssetInDandiset, @@ -23,7 +22,6 @@ iter_mounters, ) from .tests import TESTS, TIMED_TESTS -from .util import MatNWBInstaller, get_package_versions E = TypeVar("E", bound="Enum") @@ -99,15 +97,16 @@ def check( dandisets: tuple[str, ...], mode: str, ) -> None: - installer = MatNWBInstaller(MATNWB_INSTALL_DIR) - installer.install(update=True) + pkg_versions = {} + for t in TESTS: + pkg_versions.update(t.prepare()) hs = HealthStatus( backup_root=mount_point, reports_root=Path.cwd(), dandisets=dandisets, dandiset_jobs=dandiset_jobs, + versions=pkg_versions, ) - hs.versions["matnwb"] = installer.get_version() mnt = FuseMounter(dataset_path=dataset_path, mount_path=mount_point, update=True) with mnt.mount(): if mode == "all": @@ -199,13 +198,9 @@ def report() -> None: type=click.Path(exists=True, dir_okay=False, path_type=Path), ) def test_files(testname: str, files: tuple[Path, ...], save_results: bool) -> None: - versions = get_package_versions() - installer = MatNWBInstaller(MATNWB_INSTALL_DIR) - if "matnwb" in testname.lower(): - installer.install(update=True) - else: - installer.download(update=False) - versions["matnwb"] = installer.get_version() + pkg_versions = {} + for t in TESTS: + pkg_versions.update(t.prepare(minimal=t.NAME != testname)) testfunc = TESTS.get(testname) ok = True dandiset_cache: dict[Path, tuple[Dandiset, set[AssetPath]]] = {} @@ -215,7 +210,7 @@ def test_files(testname: str, files: tuple[Path, ...], save_results: bool) -> No dandiset, asset_paths = dandiset_cache[path] except KeyError: dandiset = Dandiset( - path=path, reports_root=Path.cwd(), versions=versions + path=path, reports_root=Path.cwd(), versions=pkg_versions ) asset_paths = anyio.run(dandiset.get_asset_paths) dandiset_cache[path] = (dandiset, asset_paths) @@ -287,8 +282,8 @@ def time_mounts( mounts: set[MountType], update_dataset: bool, ) -> None: - installer = MatNWBInstaller(MATNWB_INSTALL_DIR) - installer.install(update=True) + for t in TESTS: + t.prepare() results = [] for mounter in iter_mounters( types=mounts, @@ -350,10 +345,8 @@ def time_mounts( type=click.Path(exists=True, dir_okay=False, path_type=Path), ) def time_files(testname: str, files: tuple[Path, ...]) -> None: - if "matnwb" in testname.lower(): - installer = MatNWBInstaller(MATNWB_INSTALL_DIR) - installer.install(update=True) testfunc = TIMED_TESTS.get(testname) + testfunc.prepare() for f in files: log.info("Testing %s ...", f) r = anyio.run(testfunc.run, f) diff --git a/code/src/healthstatus/checker.py b/code/src/healthstatus/checker.py index 685c2d8ae..9d21a9024 100644 --- a/code/src/healthstatus/checker.py +++ b/code/src/healthstatus/checker.py @@ -24,7 +24,6 @@ log, ) from .tests import TESTS, Test -from .util import get_package_versions @dataclass @@ -83,7 +82,7 @@ class HealthStatus: reports_root: Path dandisets: tuple[str, ...] dandiset_jobs: int - versions: dict[str, str] = field(default_factory=get_package_versions) + versions: dict[str, str] async def run_all(self) -> None: async def dowork(dandiset: Dandiset) -> None: diff --git a/code/src/healthstatus/tests.py b/code/src/healthstatus/tests.py index 06aba7041..e490af027 100644 --- a/code/src/healthstatus/tests.py +++ b/code/src/healthstatus/tests.py @@ -1,6 +1,7 @@ from __future__ import annotations from collections.abc import Iterator from dataclasses import dataclass, field +from importlib.metadata import version import os from pathlib import Path import shlex @@ -9,7 +10,12 @@ import time from typing import ClassVar, Protocol import anyio -from .config import MATNWB_INSTALL_DIR, PYNWB_OPEN_LOAD_NS_SCRIPT, TIMEOUT +from .config import ( + MATNWB_INSTALL_DIR, + PACKAGES_TO_VERSION, + PYNWB_OPEN_LOAD_NS_SCRIPT, + TIMEOUT, +) from .core import Outcome, TestResult, log from .util import MatNWBInstaller @@ -17,7 +23,7 @@ class Test(Protocol): NAME: ClassVar[str] - def prepare(self) -> None: + def prepare(self, minimal: bool = False) -> dict[str, str]: ... async def run(self, path: Path) -> TestResult: @@ -52,8 +58,8 @@ def keys(self) -> Iterator[str]: class PyNwbTest: NAME: ClassVar[str] = "pynwb_open_load_ns" - def prepare(self) -> None: - pass + def prepare(self, minimal: bool = False) -> dict[str, str]: # noqa: U100 + return {pkg: version(pkg) for pkg in PACKAGES_TO_VERSION} async def run(self, path: Path) -> TestResult: return await run_test_command( @@ -66,8 +72,13 @@ async def run(self, path: Path) -> TestResult: class MatNwbTest: NAME: ClassVar[str] = "matnwb_nwbRead" - def prepare(self) -> None: - MatNWBInstaller(MATNWB_INSTALL_DIR).install(update=True) + def prepare(self, minimal: bool = False) -> dict[str, str]: + installer = MatNWBInstaller(MATNWB_INSTALL_DIR) + if minimal: + installer.download(update=False) + else: + installer.install(update=True) + return {"matnwb": installer.get_version()} async def run(self, path: Path) -> TestResult: return await run_test_command( @@ -80,8 +91,8 @@ async def run(self, path: Path) -> TestResult: class DandiLsTest: NAME: ClassVar[str] = "dandi_ls" - def prepare(self) -> None: - pass + def prepare(self, minimal: bool = False) -> dict[str, str]: # noqa: U100 + return {} async def run(self, path: Path) -> TestResult: return await run_test_command( diff --git a/code/src/healthstatus/util.py b/code/src/healthstatus/util.py index 20438ef6d..6bfa9fe92 100644 --- a/code/src/healthstatus/util.py +++ b/code/src/healthstatus/util.py @@ -1,17 +1,11 @@ from __future__ import annotations from dataclasses import dataclass -from importlib.metadata import version from pathlib import Path import subprocess import requests -from .config import PACKAGES_TO_VERSION from .core import log -def get_package_versions() -> dict[str, str]: - return {pkg: version(pkg) for pkg in PACKAGES_TO_VERSION} - - @dataclass class MatNWBInstaller: install_dir: Path @@ -64,7 +58,7 @@ def download(self, update: bool) -> bool: def install(self, update: bool = True) -> None: if self.download(update): - log.info("Installing NeurodataWithoutBorders/matnwb") + log.info("Cleaning NeurodataWithoutBorders/matnwb") subprocess.run( ["git", "clean", "-dxf"], cwd=str(self.install_dir), check=True ) From 0010935c5ff8c5c627751676b59658592d840cbe Mon Sep 17 00:00:00 2001 From: "John T. Wodder II" Date: Tue, 23 Jan 2024 09:01:29 -0500 Subject: [PATCH 17/34] Add --help text --- code/src/healthstatus/__main__.py | 33 +++++++++++++++++++++++++------ 1 file changed, 27 insertions(+), 6 deletions(-) diff --git a/code/src/healthstatus/__main__.py b/code/src/healthstatus/__main__.py index 086f5116f..07b7c7e71 100644 --- a/code/src/healthstatus/__main__.py +++ b/code/src/healthstatus/__main__.py @@ -53,7 +53,7 @@ def get_metavar(self, _param: click.Parameter) -> str: return "[" + ",".join(v.value for v in self.klass) + "]" -@click.group() +@click.group(context_settings={"help_option_names": ["-h", "--help"]}) def main() -> None: logging.basicConfig( format="%(asctime)s [%(levelname)-8s] %(message)s", @@ -67,6 +67,7 @@ def main() -> None: "-d", "--dataset-path", type=click.Path(file_okay=False, exists=True, path_type=Path), + help="Directory containing a clone of dandi/dandisets", required=True, ) @click.option( @@ -81,11 +82,19 @@ def main() -> None: "-m", "--mount-point", type=click.Path(file_okay=False, exists=True, path_type=Path), + help="Directory at which to mount Dandisets", required=True, ) @click.option( "--mode", type=click.Choice(["all", "random-asset", "random-outdated-asset-first"]), + help=( + "Strategy for selecting which assets to test on. 'all' - Test all" + " assets. 'random-asset' - Test one randomly-selected NWB per Dandiset." + " 'random-outdated-asset-first' - Test one randomly-selected NWB per" + " Dandiset that has not been tested with the latest package versions," + " falling back to 'random-asset' if there are no such assets" + ), default="all", show_default=True, ) @@ -97,6 +106,7 @@ def check( dandisets: tuple[str, ...], mode: str, ) -> None: + """Run health status tests on Dandiset assets and record the results""" pkg_versions = {} for t in TESTS: pkg_versions.update(t.prepare()) @@ -119,6 +129,7 @@ def check( @main.command() def report() -> None: + """Produce a README.md file summarizing `check` results""" dandiset_qtys: Counter[Outcome] = Counter() asset_qtys: Counter[Outcome] = Counter() # Mapping from package names to package versions to test outcomes to @@ -190,7 +201,11 @@ def report() -> None: @main.command() -@click.option("--save-results", is_flag=True) +@click.option( + "--save-results", + is_flag=True, + help="Record test results for Dandiset assets", +) @click.argument("testname", type=click.Choice(list(TESTS.keys()))) @click.argument( "files", @@ -198,6 +213,7 @@ def report() -> None: type=click.Path(exists=True, dir_okay=False, path_type=Path), ) def test_files(testname: str, files: tuple[Path, ...], save_results: bool) -> None: + """Run a single health status test on some number of files""" pkg_versions = {} for t in TESTS: pkg_versions.update(t.prepare(minimal=t.NAME != testname)) @@ -246,26 +262,29 @@ def test_files(testname: str, files: tuple[Path, ...], save_results: bool) -> No "-d", "--dataset-path", type=click.Path(file_okay=False, exists=True, path_type=Path), + help=( + "Directory containing a clone of dandi/dandisets. Required when using" + " the fusefs mount." + ), ) @click.option( "-m", "--mount-point", type=click.Path(file_okay=False, exists=True, path_type=Path), + help="Directory at which to mount Dandisets", required=True, ) @click.option( "-M", "--mounts", type=EnumSet(MountType), + help="Comma-separated list of mount types to test against", default=set(MountType), show_default="all", ) @click.option( "--update-dataset/--no-update-dataset", - help=( - "Whether to pull the latest changes to the Dandisets dataset repository" - " before fuse-mounting" - ), + help="Whether to update the dandi/dandisets clone before fuse-mounting", default=True, show_default=True, ) @@ -282,6 +301,7 @@ def time_mounts( mounts: set[MountType], update_dataset: bool, ) -> None: + """Run timed tests on Dandiset assets using various mounting technologies""" for t in TESTS: t.prepare() results = [] @@ -345,6 +365,7 @@ def time_mounts( type=click.Path(exists=True, dir_okay=False, path_type=Path), ) def time_files(testname: str, files: tuple[Path, ...]) -> None: + """Run a single timed test on some number of files""" testfunc = TIMED_TESTS.get(testname) testfunc.prepare() for f in files: From 479d8064209d70eee86e3663e61db46b25fc1ba4 Mon Sep 17 00:00:00 2001 From: "John T. Wodder II" Date: Tue, 23 Jan 2024 09:10:19 -0500 Subject: [PATCH 18/34] Use MountType for mount names --- code/src/healthstatus/__main__.py | 10 ++++++---- code/src/healthstatus/mounts.py | 28 ++++++++++++++-------------- 2 files changed, 20 insertions(+), 18 deletions(-) diff --git a/code/src/healthstatus/__main__.py b/code/src/healthstatus/__main__.py index 07b7c7e71..2eae2247d 100644 --- a/code/src/healthstatus/__main__.py +++ b/code/src/healthstatus/__main__.py @@ -311,7 +311,7 @@ def time_mounts( mount_path=mount_point, update_dataset=update_dataset, ): - log.info("Testing with mount type: %s", mounter.name) + log.info("Testing with mount type: %s", mounter.type.value) with mounter.mount(): for a in assets: log.info( @@ -326,7 +326,7 @@ def time_mounts( log.info("Test passed in %f seconds", r.elapsed) results.append( MountBenchmark( - mount_name=mounter.name, + mount_type=mounter.type, asset=a, testname=tfunc.NAME, elapsed=r.elapsed, @@ -343,12 +343,14 @@ def time_mounts( assert r.outcome is Outcome.TIMEOUT log.error("Test timed out") sys.exit(1) - csvout = DictWriter(sys.stdout, ["mount", "dandiset", "asset", "test", "time_sec"]) + csvout = DictWriter( + sys.stdout, ["mount_type", "dandiset", "asset", "test", "time_sec"] + ) csvout.writeheader() for res in results: csvout.writerow( { - "mount": res.mount_name, + "mount_type": res.mount_type.value, "dandiset": res.asset.dandiset_id, "asset": res.asset.asset_path, "test": res.testname, diff --git a/code/src/healthstatus/mounts.py b/code/src/healthstatus/mounts.py index d92a84755..748caee93 100644 --- a/code/src/healthstatus/mounts.py +++ b/code/src/healthstatus/mounts.py @@ -32,9 +32,15 @@ def parse(cls, s: str) -> AssetInDandiset: return AssetInDandiset(dandiset_id, AssetPath(asset_path)) +class MountType(Enum): + FUSEFS = "fusefs" + WEBDAVFS = "webdavfs" + DAVFS2 = "davfs2" + + @dataclass class MountBenchmark: - mount_name: str + mount_type: MountType asset: AssetInDandiset testname: str elapsed: float @@ -43,7 +49,7 @@ class MountBenchmark: class Mounter(ABC): @property @abstractmethod - def name(self) -> str: + def type(self) -> MountType: ... @abstractmethod @@ -63,8 +69,8 @@ class FuseMounter(Mounter): logdir: Path = field(default_factory=Path) @property - def name(self) -> str: - return "fusefs" + def type(self) -> MountType: + return MountType.FUSEFS @contextmanager def mount(self) -> Iterator[None]: @@ -149,8 +155,8 @@ def resolve(self, asset: AssetInDandiset) -> Path: class WebDavFSMounter(DandiDavMounter): @property - def name(self) -> str: - return "webdavfs" + def type(self) -> MountType: + return MountType.WEBDAVFS @contextmanager def mount_webdav(self, url: str) -> Iterator[None]: @@ -168,20 +174,14 @@ def mount_webdav(self, url: str) -> Iterator[None]: class DavFS2Mounter(DandiDavMounter): @property - def name(self) -> str: - return "davfs2" + def type(self) -> MountType: + return MountType.DAVFS2 @contextmanager def mount_webdav(self, url: str) -> Iterator[None]: raise NotImplementedError -class MountType(Enum): - FUSEFS = "fusefs" - WEBDAVFS = "webdavfs" - DAVFS2 = "davfs2" - - def iter_mounters( types: set[MountType], dataset_path: Path | None, From 1e8f23d78a162ea9846e79cca41653d60dc3ce94 Mon Sep 17 00:00:00 2001 From: "John T. Wodder II" Date: Tue, 23 Jan 2024 10:20:40 -0500 Subject: [PATCH 19/34] Implement davfs2 mounting --- code/src/healthstatus/mounts.py | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/code/src/healthstatus/mounts.py b/code/src/healthstatus/mounts.py index 748caee93..c26d3412b 100644 --- a/code/src/healthstatus/mounts.py +++ b/code/src/healthstatus/mounts.py @@ -179,7 +179,16 @@ def type(self) -> MountType: @contextmanager def mount_webdav(self, url: str) -> Iterator[None]: - raise NotImplementedError + log.debug("Mounting davfs2 mount ...") + subprocess.run( + ["sudo", "mount", "-t", "davfs", url, os.fspath(self.mount_path)], + check=True, + ) + try: + yield + finally: + log.debug("Unmounting davfs2 mount ...") + subprocess.run(["sudo", "umount", os.fspath(self.mount_path)], check=True) def iter_mounters( From 1aa1e73f4a30ee100193cbf6f2a9898b7ec824c5 Mon Sep 17 00:00:00 2001 From: "John T. Wodder II" Date: Tue, 23 Jan 2024 11:14:16 -0500 Subject: [PATCH 20/34] Get subdatasets of dandi/dandisets when first cloning repo --- code/src/healthstatus/mounts.py | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/code/src/healthstatus/mounts.py b/code/src/healthstatus/mounts.py index c26d3412b..2323f80ec 100644 --- a/code/src/healthstatus/mounts.py +++ b/code/src/healthstatus/mounts.py @@ -10,6 +10,7 @@ from signal import SIGINT import subprocess from time import sleep +from typing import Any import click from ghreq import Client import requests @@ -74,6 +75,8 @@ def type(self) -> MountType: @contextmanager def mount(self) -> Iterator[None]: + from datalad.api import Dataset + if not self.dataset_path.exists(): log.info("Cloning dandi/dandisets to %s ...", self.dataset_path) subprocess.run( @@ -85,6 +88,7 @@ def mount(self) -> Iterator[None]: ], check=True, ) + get_dandisets(Dataset(self.dataset_path)) elif self.update: update_dandisets(self.dataset_path) with (self.logdir / "fuse.log").open("wb") as fp: @@ -219,6 +223,13 @@ def update_dandisets(dataset_path: Path) -> None: from datalad.api import Dataset log.info("Updating Dandisets dataset ...") + + ds = Dataset(dataset_path) + ds.update(follow="parentds", how="ff-only", recursive=True, recursion_limit=1) + get_dandisets(ds) + + +def get_dandisets(ds: Any) -> None: # Fetch just the public repositories from the dandisets org, and then get # or update just those subdatasets rather than trying to get all # subdatasets and failing on private/embargoed ones @@ -228,10 +239,8 @@ def update_dandisets(dataset_path: Path) -> None: name = repo["name"] if re.fullmatch("[0-9]{6}", name): datasets.add(name) - ds = Dataset(dataset_path) - ds.update(follow="parentds", how="ff-only", recursive=True, recursion_limit=1) for sub in ds.subdatasets(state="present"): - name = Path(sub["path"]).relative_to(dataset_path).as_posix() + name = Path(sub["path"]).relative_to(ds.pathobj).as_posix() datasets.discard(name) if datasets: ds.get(path=list(datasets), jobs=5, get_data=False) From 66b6406fa94885c47a74807de046cb6b9eae4660 Mon Sep 17 00:00:00 2001 From: "John T. Wodder II" Date: Tue, 23 Jan 2024 11:22:56 -0500 Subject: [PATCH 21/34] Don't require --dataset-path to exist --- code/src/healthstatus/__main__.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/code/src/healthstatus/__main__.py b/code/src/healthstatus/__main__.py index 2eae2247d..9db4033e5 100644 --- a/code/src/healthstatus/__main__.py +++ b/code/src/healthstatus/__main__.py @@ -66,7 +66,7 @@ def main() -> None: @click.option( "-d", "--dataset-path", - type=click.Path(file_okay=False, exists=True, path_type=Path), + type=click.Path(file_okay=False, path_type=Path), help="Directory containing a clone of dandi/dandisets", required=True, ) @@ -261,7 +261,7 @@ def test_files(testname: str, files: tuple[Path, ...], save_results: bool) -> No @click.option( "-d", "--dataset-path", - type=click.Path(file_okay=False, exists=True, path_type=Path), + type=click.Path(file_okay=False, path_type=Path), help=( "Directory containing a clone of dandi/dandisets. Required when using" " the fusefs mount." From 441ce9e829603472eff06ef9b795d2e966675cb0 Mon Sep 17 00:00:00 2001 From: "John T. Wodder II" Date: Tue, 23 Jan 2024 11:32:05 -0500 Subject: [PATCH 22/34] Pass `-o allow_other` to webdavfs mount command --- code/src/healthstatus/mounts.py | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/code/src/healthstatus/mounts.py b/code/src/healthstatus/mounts.py index 2323f80ec..f9f8fe906 100644 --- a/code/src/healthstatus/mounts.py +++ b/code/src/healthstatus/mounts.py @@ -166,7 +166,16 @@ def type(self) -> MountType: def mount_webdav(self, url: str) -> Iterator[None]: log.debug("Mounting webdavfs mount ...") subprocess.run( - ["sudo", "mount", "-t", "webdavfs", url, os.fspath(self.mount_path)], + [ + "sudo", + "mount", + "-t", + "webdavfs", + "-o", + "allow_other", + url, + os.fspath(self.mount_path), + ], check=True, ) try: From f28b18096fda83578f07300a44fb391d7bf51d14 Mon Sep 17 00:00:00 2001 From: "John T. Wodder II" Date: Wed, 27 Mar 2024 09:48:19 -0400 Subject: [PATCH 23/34] Use hosted dandidav service instead of running locally --- code/setup.cfg | 5 --- code/src/healthstatus/mounts.py | 65 ++++++++++----------------------- 2 files changed, 19 insertions(+), 51 deletions(-) diff --git a/code/setup.cfg b/code/setup.cfg index 42442d387..fd9549ed5 100644 --- a/code/setup.cfg +++ b/code/setup.cfg @@ -33,14 +33,9 @@ install_requires = requests ~= 2.20 [options.extras_require] -all = - %(dandi)s - %(dandidav)s dandi = # Needed for timing `dandi ls` runs dandi -dandidav = - dandidav @ git+https://github.com/dandi/dandi-webdav [options.packages.find] where = src diff --git a/code/src/healthstatus/mounts.py b/code/src/healthstatus/mounts.py index f9f8fe906..9521b32d8 100644 --- a/code/src/healthstatus/mounts.py +++ b/code/src/healthstatus/mounts.py @@ -13,9 +13,10 @@ from typing import Any import click from ghreq import Client -import requests from .core import AssetPath, log +DANDIDAV_URL = "https://webdav.dandiarchive.org" + @dataclass class AssetInDandiset: @@ -118,52 +119,15 @@ def resolve(self, asset: AssetInDandiset) -> Path: @dataclass -class DandiDavMounter(Mounter): +class WebDavFSMounter(Mounter): mount_path: Path - logdir: Path = field(default_factory=Path) - - @contextmanager - def dandidav(self) -> Iterator[str]: - with (self.logdir / "dandidav.log").open("wb") as fp: - log.debug("Starting `dandidav` process ...") - with subprocess.Popen(["dandidav"], stdout=fp, stderr=fp) as p: - try: - url = "http://127.0.0.1:8080" - for _ in range(10): - try: - requests.get(url, timeout=1) - except requests.RequestException: - sleep(1) - else: - break - else: - raise RuntimeError("WebDAV server did not start up time") - yield url - finally: - log.debug("Terminating `dandidav` process ...") - p.send_signal(SIGINT) - - @abstractmethod - def mount_webdav(self, url: str) -> AbstractContextManager[None]: - ... - - @contextmanager - def mount(self) -> Iterator[None]: - with self.dandidav() as url: - with self.mount_webdav(url): - yield - - def resolve(self, asset: AssetInDandiset) -> Path: - return self.mount_path / asset.dandiset_id / "draft" / asset.asset_path - -class WebDavFSMounter(DandiDavMounter): @property def type(self) -> MountType: return MountType.WEBDAVFS @contextmanager - def mount_webdav(self, url: str) -> Iterator[None]: + def mount(self) -> Iterator[None]: log.debug("Mounting webdavfs mount ...") subprocess.run( [ @@ -173,7 +137,7 @@ def mount_webdav(self, url: str) -> Iterator[None]: "webdavfs", "-o", "allow_other", - url, + DANDIDAV_URL, os.fspath(self.mount_path), ], check=True, @@ -184,17 +148,23 @@ def mount_webdav(self, url: str) -> Iterator[None]: log.debug("Unmounting webdavfs mount ...") subprocess.run(["sudo", "umount", os.fspath(self.mount_path)], check=True) + def resolve(self, asset: AssetInDandiset) -> Path: + return self.mount_path / asset.dandiset_id / "draft" / asset.asset_path + + +@dataclass +class DavFS2Mounter(Mounter): + mount_path: Path -class DavFS2Mounter(DandiDavMounter): @property def type(self) -> MountType: return MountType.DAVFS2 @contextmanager - def mount_webdav(self, url: str) -> Iterator[None]: + def mount(self) -> Iterator[None]: log.debug("Mounting davfs2 mount ...") subprocess.run( - ["sudo", "mount", "-t", "davfs", url, os.fspath(self.mount_path)], + ["sudo", "mount", "-t", "davfs", DANDIDAV_URL, os.fspath(self.mount_path)], check=True, ) try: @@ -203,6 +173,9 @@ def mount_webdav(self, url: str) -> Iterator[None]: log.debug("Unmounting davfs2 mount ...") subprocess.run(["sudo", "umount", os.fspath(self.mount_path)], check=True) + def resolve(self, asset: AssetInDandiset) -> Path: + return self.mount_path / asset.dandiset_id / "draft" / asset.asset_path + def iter_mounters( types: set[MountType], @@ -221,9 +194,9 @@ def iter_mounters( logdir=logdir, ) if MountType.WEBDAVFS in types: - yield WebDavFSMounter(mount_path=mount_path, logdir=logdir) + yield WebDavFSMounter(mount_path=mount_path) if MountType.DAVFS2 in types: - yield DavFS2Mounter(mount_path=mount_path, logdir=logdir) + yield DavFS2Mounter(mount_path=mount_path) def update_dandisets(dataset_path: Path) -> None: From 9a13d6e239a1213af3819813609ea22d939b63ac Mon Sep 17 00:00:00 2001 From: "John T. Wodder II" Date: Wed, 27 Mar 2024 10:04:12 -0400 Subject: [PATCH 24/34] Shell script for running benchmarks --- tools/bench.sh | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) create mode 100755 tools/bench.sh diff --git a/tools/bench.sh b/tools/bench.sh new file mode 100755 index 000000000..6cea937d8 --- /dev/null +++ b/tools/bench.sh @@ -0,0 +1,20 @@ +#!/bin/bash +set -ex + +PYTHON="$HOME"/miniconda3/bin/python +DANDISETS_PATH=/mnt/backup/dandi/dandisets-healthstatus/dandisets +MOUNT_PATH=/tmp/dandisets-fuse + +cd "$(dirname "$0")"/.. + +"$PYTHON" -m virtualenv --clear venv +. venv/bin/activate +pip install './code[dandi]' +pip install 'git+https://github.com/jwodder/filesystem_spec@rlock-cache' + +dandisets-healthstatus time-mounts \ + -d "$DANDISETS_PATH" \ + -m "$MOUNT_PATH" \ + --no-update-dataset \ + --mounts fusefs,davfs2 \ + 000016/sub-mouse1-fni16/sub-mouse1-fni16_ses-161228151100.nwb From 74e27291b91c559c660466c025c1e4729b7cc17f Mon Sep 17 00:00:00 2001 From: "John T. Wodder II" Date: Wed, 27 Mar 2024 10:27:15 -0400 Subject: [PATCH 25/34] The weird import error seems to be gone now --- code/src/healthstatus/mounts.py | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/code/src/healthstatus/mounts.py b/code/src/healthstatus/mounts.py index 9521b32d8..6c3d2db51 100644 --- a/code/src/healthstatus/mounts.py +++ b/code/src/healthstatus/mounts.py @@ -12,6 +12,7 @@ from time import sleep from typing import Any import click +from datalad.api import Dataset from ghreq import Client from .core import AssetPath, log @@ -76,8 +77,6 @@ def type(self) -> MountType: @contextmanager def mount(self) -> Iterator[None]: - from datalad.api import Dataset - if not self.dataset_path.exists(): log.info("Cloning dandi/dandisets to %s ...", self.dataset_path) subprocess.run( @@ -200,12 +199,7 @@ def iter_mounters( def update_dandisets(dataset_path: Path) -> None: - # Importing this at the top of the file leads to some weird import error - # when running tests: - from datalad.api import Dataset - log.info("Updating Dandisets dataset ...") - ds = Dataset(dataset_path) ds.update(follow="parentds", how="ff-only", recursive=True, recursion_limit=1) get_dandisets(ds) From a3a5a776bc03046b936fbd1b9ad8162db1ae1c67 Mon Sep 17 00:00:00 2001 From: "John T. Wodder II" Date: Wed, 27 Mar 2024 10:44:06 -0400 Subject: [PATCH 26/34] Ensure dataset directory and mount point directory exist --- code/src/healthstatus/mounts.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/code/src/healthstatus/mounts.py b/code/src/healthstatus/mounts.py index 6c3d2db51..b554072e6 100644 --- a/code/src/healthstatus/mounts.py +++ b/code/src/healthstatus/mounts.py @@ -79,6 +79,7 @@ def type(self) -> MountType: def mount(self) -> Iterator[None]: if not self.dataset_path.exists(): log.info("Cloning dandi/dandisets to %s ...", self.dataset_path) + self.dataset_path.mkdir(parents=True, exist_ok=True) subprocess.run( [ "datalad", @@ -91,6 +92,7 @@ def mount(self) -> Iterator[None]: get_dandisets(Dataset(self.dataset_path)) elif self.update: update_dandisets(self.dataset_path) + self.mount_path.mkdir(parents=True, exist_ok=True) with (self.logdir / "fuse.log").open("wb") as fp: log.debug("Starting `datalad fusefs` process ...") with subprocess.Popen( @@ -128,6 +130,7 @@ def type(self) -> MountType: @contextmanager def mount(self) -> Iterator[None]: log.debug("Mounting webdavfs mount ...") + self.mount_path.mkdir(parents=True, exist_ok=True) subprocess.run( [ "sudo", @@ -162,6 +165,7 @@ def type(self) -> MountType: @contextmanager def mount(self) -> Iterator[None]: log.debug("Mounting davfs2 mount ...") + self.mount_path.mkdir(parents=True, exist_ok=True) subprocess.run( ["sudo", "mount", "-t", "davfs", DANDIDAV_URL, os.fspath(self.mount_path)], check=True, From 695305a02c0a2f0d9e4948f52d3cb3c518b822d1 Mon Sep 17 00:00:00 2001 From: "John T. Wodder II" Date: Wed, 27 Mar 2024 11:36:10 -0400 Subject: [PATCH 27/34] Adjust bench.sh --- tools/bench.sh | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/tools/bench.sh b/tools/bench.sh index 6cea937d8..42721d914 100755 --- a/tools/bench.sh +++ b/tools/bench.sh @@ -2,15 +2,20 @@ set -ex PYTHON="$HOME"/miniconda3/bin/python -DANDISETS_PATH=/mnt/backup/dandi/dandisets-healthstatus/dandisets +DANDISETS_PATH="$HOME"/healthstatus-dandisets MOUNT_PATH=/tmp/dandisets-fuse cd "$(dirname "$0")"/.. -"$PYTHON" -m virtualenv --clear venv -. venv/bin/activate -pip install './code[dandi]' -pip install 'git+https://github.com/jwodder/filesystem_spec@rlock-cache' +if [ ! -e venv ] +then + "$PYTHON" -m virtualenv venv + . venv/bin/activate + pip install -e './code[dandi]' + pip install 'git+https://github.com/jwodder/filesystem_spec@rlock-cache' +else + . venv/bin/activate +fi dandisets-healthstatus time-mounts \ -d "$DANDISETS_PATH" \ From 98cdf1e0acbc6aacc991e8088fcc9c8c149c776d Mon Sep 17 00:00:00 2001 From: "John T. Wodder II" Date: Wed, 27 Mar 2024 11:39:11 -0400 Subject: [PATCH 28/34] time-mounts: Prepare timed tests --- code/src/healthstatus/__main__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/code/src/healthstatus/__main__.py b/code/src/healthstatus/__main__.py index 9db4033e5..7f2bd548a 100644 --- a/code/src/healthstatus/__main__.py +++ b/code/src/healthstatus/__main__.py @@ -302,7 +302,7 @@ def time_mounts( update_dataset: bool, ) -> None: """Run timed tests on Dandiset assets using various mounting technologies""" - for t in TESTS: + for t in TIMED_TESTS: t.prepare() results = [] for mounter in iter_mounters( From 965573351ea6e172a1860787cee04e8420bf2133 Mon Sep 17 00:00:00 2001 From: "John T. Wodder II" Date: Mon, 1 Apr 2024 08:40:36 -0400 Subject: [PATCH 29/34] Fix webdav mount paths --- code/src/healthstatus/mounts.py | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/code/src/healthstatus/mounts.py b/code/src/healthstatus/mounts.py index b554072e6..f87871b47 100644 --- a/code/src/healthstatus/mounts.py +++ b/code/src/healthstatus/mounts.py @@ -151,7 +151,13 @@ def mount(self) -> Iterator[None]: subprocess.run(["sudo", "umount", os.fspath(self.mount_path)], check=True) def resolve(self, asset: AssetInDandiset) -> Path: - return self.mount_path / asset.dandiset_id / "draft" / asset.asset_path + return ( + self.mount_path + / "dandisets" + / asset.dandiset_id + / "draft" + / asset.asset_path + ) @dataclass @@ -177,7 +183,13 @@ def mount(self) -> Iterator[None]: subprocess.run(["sudo", "umount", os.fspath(self.mount_path)], check=True) def resolve(self, asset: AssetInDandiset) -> Path: - return self.mount_path / asset.dandiset_id / "draft" / asset.asset_path + return ( + self.mount_path + / "dandisets" + / asset.dandiset_id + / "draft" + / asset.asset_path + ) def iter_mounters( From 66cbeae8f3a4e687606665c8faaf8ef1b9625621 Mon Sep 17 00:00:00 2001 From: "John T. Wodder II" Date: Mon, 3 Jun 2024 15:47:06 -0400 Subject: [PATCH 30/34] Point `bench.sh` at the asset that actually worked --- tools/bench.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/bench.sh b/tools/bench.sh index 42721d914..b01505531 100755 --- a/tools/bench.sh +++ b/tools/bench.sh @@ -22,4 +22,4 @@ dandisets-healthstatus time-mounts \ -m "$MOUNT_PATH" \ --no-update-dataset \ --mounts fusefs,davfs2 \ - 000016/sub-mouse1-fni16/sub-mouse1-fni16_ses-161228151100.nwb + 000005/sub-anm236462/sub-anm236462_ses-20140210_behavior+icephys.nwb From 1e83d3eea2bfeb360d8a0d8926977351affcf481 Mon Sep 17 00:00:00 2001 From: "John T. Wodder II" Date: Mon, 3 Jun 2024 15:51:20 -0400 Subject: [PATCH 31/34] Comment out webdavfs support --- code/src/healthstatus/mounts.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/code/src/healthstatus/mounts.py b/code/src/healthstatus/mounts.py index f87871b47..6151212e9 100644 --- a/code/src/healthstatus/mounts.py +++ b/code/src/healthstatus/mounts.py @@ -37,7 +37,7 @@ def parse(cls, s: str) -> AssetInDandiset: class MountType(Enum): FUSEFS = "fusefs" - WEBDAVFS = "webdavfs" + # WEBDAVFS = "webdavfs" DAVFS2 = "davfs2" @@ -119,6 +119,9 @@ def resolve(self, asset: AssetInDandiset) -> Path: return self.mount_path / asset.dandiset_id / asset.asset_path +# Currently disabled due to webdavfs not supporting redirects +# +""" @dataclass class WebDavFSMounter(Mounter): mount_path: Path @@ -158,6 +161,7 @@ def resolve(self, asset: AssetInDandiset) -> Path: / "draft" / asset.asset_path ) +""" @dataclass @@ -208,8 +212,8 @@ def iter_mounters( update=update_dataset, logdir=logdir, ) - if MountType.WEBDAVFS in types: - yield WebDavFSMounter(mount_path=mount_path) + # if MountType.WEBDAVFS in types: + # yield WebDavFSMounter(mount_path=mount_path) if MountType.DAVFS2 in types: yield DavFS2Mounter(mount_path=mount_path) From 5560a921c4e2041a9a9cf835622b125ef2fddfe2 Mon Sep 17 00:00:00 2001 From: "John T. Wodder II" Date: Mon, 3 Jun 2024 16:07:32 -0400 Subject: [PATCH 32/34] Document setup for WebDAV mounts --- WEBDAV.md | 67 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 67 insertions(+) create mode 100644 WEBDAV.md diff --git a/WEBDAV.md b/WEBDAV.md new file mode 100644 index 000000000..27571507e --- /dev/null +++ b/WEBDAV.md @@ -0,0 +1,67 @@ +Required Setup for WebDAV Mounts +================================ + +davfs2 +------ + +- Install [davfs2](https://savannah.nongnu.org/projects/davfs2/). This code + was initially tested against davfs2, version 1.6.1-1, installed via APT on + Debian 12 (bookworm). + +- Create a directory to use as the mount point; `tools/bench.sh` uses + `/tmp/dandisets-fuse` by default, and this path will be used throughout the + following instructions. + +- Add the following to `/etc/davfs2/davfs2.conf`: + + ``` + [/tmp/dandisets-fuse] + ask_auth 0 + follow_redirect 1 + ``` + +- Grant the user who will be running `dandisets-healthstatus` permission to run + the following commands with `sudo` without entering a password: + + ``` + mount -t davfs https://webdav.dandiarchive.org /tmp/dandisets-fuse + umount /tmp/dandisets-fuse + ``` + + This can be done by adding the following lines to `/etc/sudoers`, where + `username` is replaced by the name of the user account: + + ``` + username ALL=(ALL:ALL) NOPASSWD: /usr/bin/mount -t davfs https\://webdav.dandiarchive.org /tmp/dandisets-fuse + username ALL=(ALL:ALL) NOPASSWD: /usr/bin/umount /tmp/dandisets-fuse + ``` + + + From 870c4ecaac84a8580d554834354183bbac70a349 Mon Sep 17 00:00:00 2001 From: "John T. Wodder II" Date: Tue, 4 Jun 2024 12:13:08 -0400 Subject: [PATCH 33/34] WEBDAV.md: Mention need for `--prefer-s3-redirects` option --- WEBDAV.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/WEBDAV.md b/WEBDAV.md index 27571507e..ac1d9d051 100644 --- a/WEBDAV.md +++ b/WEBDAV.md @@ -36,6 +36,9 @@ davfs2 username ALL=(ALL:ALL) NOPASSWD: /usr/bin/umount /tmp/dandisets-fuse ``` +- Ensure that the `dandidav` instance at webdav.dandiarchive.org is being run + with the `--prefer-s3-redirects` option + diff --git a/tools/bench.sh b/tools/bench.sh index b01505531..ab67aebb4 100755 --- a/tools/bench.sh +++ b/tools/bench.sh @@ -3,7 +3,7 @@ set -ex PYTHON="$HOME"/miniconda3/bin/python DANDISETS_PATH="$HOME"/healthstatus-dandisets -MOUNT_PATH=/tmp/dandisets-fuse +MOUNT_PATH=/mnt/backup/dandi/dandisets-healthstatus/dandisets-fuse cd "$(dirname "$0")"/.. diff --git a/tools/run.sh b/tools/run.sh index 493911a71..603e69e4f 100755 --- a/tools/run.sh +++ b/tools/run.sh @@ -3,7 +3,7 @@ set -ex PYTHON="$HOME"/miniconda3/bin/python DANDISETS_PATH=/mnt/backup/dandi/dandisets-healthstatus/dandisets -MOUNT_PATH=/tmp/dandisets-fuse +MOUNT_PATH=/mnt/backup/dandi/dandisets-healthstatus/dandisets-fuse cd "$(dirname "$0")"/.. #git reset --hard HEAD