diff --git a/.github/workflows/validate_bids-examples.yml b/.github/workflows/validate_bids-examples.yml new file mode 100644 index 0000000000..7911ad5c82 --- /dev/null +++ b/.github/workflows/validate_bids-examples.yml @@ -0,0 +1,115 @@ +name: validate_datasets + +on: + push: + branches: ['master'] + pull_request: + branches: ['**'] +# create: +# branches: [master] +# tags: ['**'] +# schedule: +# - cron: "0 4 * * 1" + +concurrency: + group: ${{ github.ref }} + cancel-in-progress: true + +jobs: + build: + strategy: + fail-fast: false + matrix: + platform: [ubuntu-latest] # , macos-latest, windows-latest] + bids-validator: [master-deno] + python-version: ["3.11"] + + runs-on: ${{ matrix.platform }} + + env: + TZ: Europe/Berlin + FORCE_COLOR: 1 + + steps: + - uses: actions/checkout@v4 + + # Setup Python with bst + - uses: actions/setup-python@v5 + with: + python-version: ${{ matrix.python-version }} + - name: "Install build dependencies" + run: pip install --upgrade build twine + - name: "Build source distribution and wheel" + run: python -m build tools/schemacode + - name: "Check distribution metadata" + run: twine check tools/schemacode/dist/* + - name: "Install bst tools from the build" + run: pip install $( ls tools/schemacode/dist/*.whl )[all] + - name: "Produce dump of the schema as schema.json" + run: bst -v export --output src/schema.json + + - uses: denoland/setup-deno@v1.1.2 + if: "matrix.bids-validator == 'master-deno'" + with: + deno-version: v1.x + + - name: Install BIDS validator (master deno build) + if: "matrix.bids-validator == 'master-deno'" + run: | + pushd .. + # Let's use specific commit for now + # TODO: progress it once in a while + commit=a7b291b882a8c6184219ccb84faae255ba96203a + git clone --depth 1 https://github.com/bids-standard/bids-validator + cd bids-validator + git fetch --depth 1 origin $commit; + echo -e '#!/bin/sh\n'"$PWD/bids-validator/bids-validator-deno \"\$@\"" >| /usr/local/bin/bids-validator + chmod a+x /usr/local/bin/bids-validator + which -a bids-validator + bids-validator --help + popd + + - name: Display versions and environment information + run: | + echo $TZ + date + echo -n "npm: "; npm --version + echo -n "node: "; node --version + echo -n "bids-validator: "; bids-validator --version + echo -n "python: "; python --version + + # Checkout bids-examples + - uses: actions/checkout@v4 + with: + # repository: bids-standard/bids-examples + # For now use the forked repository with support for deno validator + # from https://github.com/bids-standard/bids-examples/pull/435 + repository: yarikoptic/bids-examples + ref: deno-validator + path: bids-examples + + - name: Mark known not yet to be deno-legit BIDS datasets + run: touch {ds000117,ds000246,ds000247,ds000248,eeg_ds003645s_hed_demo,ieeg_motorMiller2007,ieeg_visual}/.SKIP_VALIDATION + shell: bash + working-directory: bids-examples + + - name: Validate using bids-validator without migration + run: ./run_tests.sh + working-directory: bids-examples + + - name: Migrate all BIDS datasets + run: /bin/ls */dataset_description.json | sed -e 's,/.*,,g' | xargs bst migrate-datasets + shell: bash + working-directory: bids-examples + + - name: Show migrated datasets diff + run: git diff HEAD + working-directory: bids-examples + + # TODO: commit as a merge from current state of bids-examples + # and prior bids-2.0 branch there, but overloading with new updated + # state and recording commit hash of bids-specification used. + + - name: Validate all BIDS datasets using bids-validator after migration + run: VALIDATOR_ARGS="--schema file://$PWD/../src/schema.json" bash ./run_tests.sh + working-directory: bids-examples diff --git a/tools/schemacode/bidsschematools/migrations.py b/tools/schemacode/bidsschematools/migrations.py index 09848acbb9..1cf0441cf1 100644 --- a/tools/schemacode/bidsschematools/migrations.py +++ b/tools/schemacode/bidsschematools/migrations.py @@ -1,7 +1,11 @@ import json import os import re +import subprocess +from functools import lru_cache +from itertools import chain from pathlib import Path +from typing import Optional import bidsschematools as bst import bidsschematools.utils @@ -11,10 +15,14 @@ TARGET_VERSION = "2.0.0" +class NotBIDSDatasetError(Exception): + pass + + def get_bids_version(dataset_path: Path) -> str: dataset_description = dataset_path / "dataset_description.json" if not dataset_description.exists(): - raise ValueError(f"dataset_description.json not found in {dataset_path}") + raise NotBIDSDatasetError(f"dataset_description.json not found in {dataset_path}") return json.loads(dataset_description.read_text())["BIDSVersion"] @@ -39,19 +47,26 @@ def migrate_participants(dataset_path: Path): old_file = dataset_path / f"participants{ext}" new_file = dataset_path / f"subjects{ext}" if old_file.exists(): - os.rename(old_file, new_file) + rename_path(old_file, new_file) lgr.info(f" - renamed {old_file} to {new_file}") if ext == ".tsv": - migrated = new_file.read_text().replace("participant_id", "subject_id", 1) - new_file.write_text(migrated) + # Do manual .decode() and .encode() to avoid changing line endings + migrated = ( + new_file.read_bytes().decode().replace("participant_id", "subject_id", 1) + ) + new_file.write_bytes(migrated.encode()) lgr.info(f" - migrated content in {new_file}") def migrate_dataset(dataset_path): lgr.info(f"Migrating dataset at {dataset_path}") dataset_path = Path(dataset_path) - if get_bids_version(dataset_path) == TARGET_VERSION: - lgr.info(f"Dataset already at version {TARGET_VERSION}") + try: + if get_bids_version(dataset_path) == TARGET_VERSION: + lgr.info(f"Dataset already at version {TARGET_VERSION}") + return + except NotBIDSDatasetError: + lgr.warning("%s not a BIDS dataset, skipping", dataset_path) return # TODO: possibly add a check for BIDS version in dataset_description.json # and skip if already 2.0, although ideally transformations @@ -62,3 +77,32 @@ def migrate_dataset(dataset_path): ]: lgr.info(f" - applying migration {migration.__name__}") migration(dataset_path) + + +@lru_cache +def path_has_git(path: Path) -> bool: + return (path / ".git").exists() + + +def git_topdir(path: Path) -> Optional[Path]: + """Return top-level directory of a git repository containing path, + or None if not under git.""" + path = path.absolute() + for p in chain([path] if path.is_dir() else [], path.parents): + if path_has_git(p): + return p + return None + + +def rename_path(old_path: Path, new_path: Path): + """git aware rename. If under git, use git mv, otherwise just os.rename.""" + # if under git, use git mv but ensure that on border + # crossing (should just use DataLad and `mv` and it would do the right thing!) + if (old_git_top := git_topdir(old_path)) != (new_git_top := git_topdir(new_path)): + raise NotImplementedError( + f"Did not implement moving across git repo boundaries {old_git_top} -> {new_git_top}" + ) + if old_git_top: + subprocess.run(["git", "mv", str(old_path), str(new_path)], check=True, cwd=old_git_top) + else: + os.rename(old_path, new_path)