From 50907ba7f42b7fae7caf0e5d3ab4986dbd05bd14 Mon Sep 17 00:00:00 2001 From: Alexandre Pron Date: Tue, 3 Sep 2024 16:48:16 +0200 Subject: [PATCH 1/3] [BUGFIX]: correct wrong run entity placing into BIDS filename --- shanoir2bids.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/shanoir2bids.py b/shanoir2bids.py index ecf5545..1f113b7 100755 --- a/shanoir2bids.py +++ b/shanoir2bids.py @@ -201,7 +201,7 @@ def generate_bids_heuristic_file( def create_bids_key(dataset): - template = create_key(subdir=dataset['bidsDir'],file_suffix=r"run-{{item:02d}}_" + dataset['bidsName'],outtype={outtype}) + template = create_key(subdir=dataset['bidsDir'],file_suffix="_".join(dataset['bidsName'].split('_')[:-1]) + '_' + r"run-{{item:02d}}_" + dataset['bidsName'].split('_')[-1],outtype={outtype}) return template def get_dataset_to_key_mapping(shanoir2bids): From 6bf2d7af1c2835ea18da1a8845bce57e8b0bde0a Mon Sep 17 00:00:00 2001 From: Alexandre Pron Date: Fri, 25 Oct 2024 15:42:57 +0200 Subject: [PATCH 2/3] [ENH]: added documentation about shanoir2bids --- docs/shanoir-bids-datalad.md | 450 +++++++++++++++++++++++++++++++++++ 1 file changed, 450 insertions(+) create mode 100644 docs/shanoir-bids-datalad.md diff --git a/docs/shanoir-bids-datalad.md b/docs/shanoir-bids-datalad.md new file mode 100644 index 0000000..4bfba0e --- /dev/null +++ b/docs/shanoir-bids-datalad.md @@ -0,0 +1,450 @@ +# How to process data stored in Shanoir with shanoir-downloader and datalad? + +[toc] + +## Context + +In this tutorial we will learn how a study hosted on a Shanoir +instance can be retrieved for local processing. Below are two +typical use cases: + ++ **Case 1: Processing datasets from a study for which the acquisition is in progress** + +Study data images are retrieved as they are added into the Shanoir +instance and, after passing an initial visual quality check, are +preprocessed by a predefined processing pipeline. +Datasets that do not pass quality control are reacquired. +Both raw and processed datasets are shared with collaborators. + ++ **Case 2: Development of a new segmentation method** + +Multi-modal brain datasets (e.g. T2 and T1 weighted MRI scans) from a +study hosted on a Shanoir instance are used to develop a new method for +segmenting brain structures. Only subjects for which both contrast scans per +acquisition session are available are being retrieved. +The new segmentation method is implemented and corrected iteratively +according to the quality of the generated segmentation. The +segmentation method is then tested on a held out study (test set). + +With these two example use-cases, we will show how to: + 1. Retrieve data from Shanoir in a standardized manner + 2. Track (manual) changes to data (additions, deletions, tags + stemming from quality check) + 3. Track changes to both the code and the data generated by the code + +Accordingly, the workflow delineated in this tutorial entails the +downloading of data from a Shanoir study +in [BIDS](https://bids.neuroimaging.io/) +format. + + +> [!NOTE] +> The Brain Imaging Data Structure [[BIDS](https://bids.neuroimaging.io/), +> [BIDS article](https://www.nature.com/articles/sdata201644)] +> is a framework for organizing data that standardizes file organization +> and dataset description. BIDS supports multiple biomedical imaging modalities +> (e.g. MRI, EEG, PET, microscopy), species (e.g. human, primate non humans, mouse) +> and body parts (e.g. brain, spinal cord, samples). BIDS has been massively adopted by +> the human neuroimaging scientific community and is still in very active development. +> See [The past, present and future of BIDS article](https://direct.mit.edu/imag/article/doi/10.1162/imag_a_00103/119672) + +The downloaded data is then stored as +datalad datasets, thereby facilitating the tracking of data versions +and any associated code. The remainder of this tutorial delineates: + +1. the installation of `shanoir2bids.py`, the python script utilized for +the downloading of data, +2. the datalad commands essential for the effective tracking of data versions. + +## Data Download + +### shanoir2bids.py installation + +1. Go + to [GitHub repository of shanoir-downloader](https://github.com/Inria-Empenn/shanoir_downloader) + +2. Follow + the [Installation with conda instructions](https://github.com/Inria-Empenn/shanoir_downloader?tab=readme-ov-file#installation-with-conda) + +3. Test the installation by typing the following commands + in the obtained *conda virtual environment*: + + > Click on the dark arrows to display the expected results + + 1.
+ + datalad -h + + ```bash + Usage: datalad [global-opts] command [command-opts] + + Comprehensive data management solution + + DataLad provides a unified data distribution system built on + the Git and Git-annex. DataLad command line tools allow to + manipulate (obtain,create, update, publish, etc.) datasets + and provide a comprehensive toolbox for joint management of + data and code. Compared to Git/annex it primarily extends their + functionality to transparently andsimultaneously work with + multiple inter-related repositories. + + + *Essential* + + create + Create a new dataset from scratch + save + Save the current state of a dataset + status + Report on the state of dataset content + clone + Obtain a dataset (copy) from a URL or local directory + get + Get any dataset content (files/directories/subdatasets) + push + Push a dataset to a known sibling + run + Run an arbitrary shell command and record its impact on a dataset + diff + Report differences between two states of a dataset (hierarchy) + + ``` +
+ + + 2.
+ + + python shanoir2bids.py -h + + ```bash + shanoir2bids.py is a script that allows to download a Shanoir + dataset and organise it as a BIDS data structure. + The script is made to run for every project given some + information provided by the user into a ".json" configuration file. + More details regarding the configuration file in the Readme.md + + options: + -h, --help show this help message and exit + -u USERNAME, --username USERNAME + Your shanoir username. (default: None) + -d DOMAIN, --domain DOMAIN + The shanoir domain to query. + (default: shanoir.irisa.fr) + --outformat {nifti,dicom,both} + The format to download. (default: both) + -of OUTPUT_FOLDER, --output_folder OUTPUT_FOLDER + The destination folder where files will be downloaded. (default: None) + -j CONFIG_FILE, --config_file CONFIG_FILE + Path to the .json configuration file specifying parameters for shanoir downloading. (default: None) + -L, --longitudinal Toggle longitudinal approach. (default: False) + --debug Toggle debug mode (keep temporary directories) (default: False) + ``` + + +
+ + + + + +If both your terminal outputs are similar you are ready to continue. + +### Initial Download + +#### Select the data of interest and map to BIDS + +The selection of datasets of interest within a Shanoir study, as well +as the correspondence between the Shanoir dataset and the BIDS file, +are configured using a JSON file. A template configuration file is +available [here](https://github.com/Inria-Empenn/shanoir_downloader/blob/main/s2b_example_config.json) +for reference. The content of this file is reproduced below (Click to display) + +
+ Example configuration file + +```json +{ + "study_name": "Aneravimm", + "subjects": ["VS_Aneravimm_010","VS_Aneravimm_011"], + "data_to_bids": [ + {"datasetName": "t1_mprage_sag_p2_iso", "bidsDir": "anat", + "bidsName": "acq-mprage_T1w"}, + {"datasetName": "t2_tse_HR_cor_MTL", "bidsDir": "anat", + "bidsName": "acq-hr_T2w"}, + {"datasetName": "Resting State_bold AP 1.6mm HIPP", "bidsDir": "func", + "bidsName": "task-restingState_dir-AP_bold"}, + {"datasetName": "Diff cusp66 b3000 AP 1.5mm", "bidsDir": "dwi", + "bidsName": "acq-b3000_dir-AP_dwi"}, + {"datasetName": "Diff cusp66 b0 PA 1.5mm", "bidsDir": "dwi", + "bidsName": "acq-b0_dir-AP_dwi"}, + {"datasetName": "Diff cusp66 b0 PA 1.5mm", "bidsDir": "dwi", + "bidsName": "acq-b0_dir-PA_dwi"} + ], + "dcm2niix": "/home/alpron/softs/miniconda3/envs/test-env/bin/dcm2niix", + "dcm2niix_comment": "change the dcm2niix path above to match your + local installation" + "dcm2niix_options_comment": "dcm2niix configuration options in + the nipype format (see https://nipype.readthedocs.io/en/latest/api/generated/nipype.interfaces.dcm2nii.html)", + "dcm2niix_options": { + "bids_format": true, + "anon_bids": true, + "compress": "y", + "compression": 5, + "crop": false, + "has_private": false, + "ignore_deriv": false, + "single_file": false, + "verbose": false + }, + "find_and_replace_subject": [ + {"find": "VS_Aneravimm_", "replace": "VS"}, + {"find": "Vs_Aneravimm_", "replace": "VS"} + ] +} +``` +
+ ++ In this file the ```dcm2niix``` entry key should contain the path + location of the ```dcm2niix``` instance. If the path does not + correspond + the script will return an error. + ++ BIDS mapping is configured in the `data_to_bids` dictionary + + `datasetName`: the name of the Shanoir dataset (scan) of interest, + a dataset per entry + + `bidsDir`: the BIDS datatype (e.g. anat, dwi, fmri) , please + report to the [BIDS starter kit](https://bids-standard.github.io/bids-starter-kit/folders_and_files/files.html) + for exhaustive list of BIDS datatypes + + `bidsName`: name of the output file, extension should not be + included, please report to [BIDS starter kit](https://bids-standard.github.io/bids-starter-kit/folders_and_files/files.html) + for filename templates according to the BIDS datatype + + the `run` entities can be set explicitly or omitted. In this + case they are added automatically if two files have identical + filenames + + compliance of the provided BIDS mapping in the configuration + file is checked before any download using `bids-validator`. In + case of error the download is not initiated + +#### Launch Download + +Below is an example of a command to launch the download : + +```sh +python shanoir2bids.py -u $SHANOIR_USERNAME -d $SHANOIR_INSTANCE +--outformat nifti -of $OUTPUT_PATH -j $PATH_CONFIG_FILE +``` +Where `$SHANOIR_USERNAME`, `$SHANOIR_INSTANCE`, `$OUTPUT_PATH` and `$PATH_CONFIG_FILE` +should be replaced by values adapted to the user (e.g. +$SHANOIR_USERNAME by the actual shanoir user name) +The download may now be initiated. Once the download is complete, a +datalad dataset should be obtained, containing the selected Shanoir +datasets for the subjects of interest and organized in BIDS format. +Hereunder is the result corresponding to the example configuration +file. + +
+ BIDS datalad dataset corresponding to config file + + ```bash +├── CHANGES +├── dataset_description.json +├── participants.json +├── participants.tsv +├── README +├── scans.json +├── sub-VS010 +│   ├── anat +│   │   ├── sub-VS010_acq-hr_T2w.json +│   │   ├── sub-VS010_acq-hr_T2w.nii.gz -> ../../.git/annex/objects/4f/4P/MD5E-s8243750--ef1c843f9ed9add054c7019c1745a96b.nii.gz/MD5E-s8243750--ef1c843f9ed9add054c7019c1745a96b.nii.gz +│   │   ├── sub-VS010_acq-mprage_T1w.json +│   │   └── sub-VS010_acq-mprage_T1w.nii.gz -> ../../.git/annex/objects/z7/gg/MD5E-s11346920--0203b00fae3304f9b3a9d3862a46408d.nii.gz/MD5E-s11346920--0203b00fae3304f9b3a9d3862a46408d.nii.gz +│   ├── dwi +│   │   ├── sub-VS010_acq-b0_dir-PA_dwi.bval +│   │   ├── sub-VS010_acq-b0_dir-PA_dwi.bvec +│   │   ├── sub-VS010_acq-b0_dir-PA_dwi.json +│   │   ├── sub-VS010_acq-b0_dir-PA_dwi.nii.gz -> ../../.git/annex/objects/fq/4x/MD5E-s3281565--34362c006007468b9a2e1aa88361cd20.nii.gz/MD5E-s3281565--34362c006007468b9a2e1aa88361cd20.nii.gz +│   │   ├── sub-VS010_acq-b3000_dir-AP_dwi.bval +│   │   ├── sub-VS010_acq-b3000_dir-AP_dwi.bvec +│   │   ├── sub-VS010_acq-b3000_dir-AP_dwi.json +│   │   └── sub-VS010_acq-b3000_dir-AP_dwi.nii.gz -> ../../.git/annex/objects/3z/6J/MD5E-s209523683--c78c69c86b35599f35197065c365dce0.nii.gz/MD5E-s209523683--c78c69c86b35599f35197065c365dce0.nii.gz +│   ├── func +│   │   ├── sub-VS010_task-restingState_dir-AP_bold.json +│   │   ├── sub-VS010_task-restingState_dir-AP_bold.nii.gz -> ../../.git/annex/objects/Gf/m5/MD5E-s2092935--d05a45930f912fc41ade1758d208c31f.nii.gz/MD5E-s2092935--d05a45930f912fc41ade1758d208c31f.nii.gz +│   │   └── sub-VS010_task-restingState_dir-AP_events.tsv +│   └── sub-VS010_scans.tsv -> ../.git/annex/objects/x9/2Q/MD5E-s444--259fa1a5b951c01ae4dec95ecaa12f77.tsv/MD5E-s444--259fa1a5b951c01ae4dec95ecaa12f77.tsv +├── sub-VS011 +│   ├── anat +│   │   ├── sub-VS011_acq-hr_T2w.json +│   │   ├── sub-VS011_acq-hr_T2w.nii.gz -> ../../.git/annex/objects/ww/PX/MD5E-s8189478--a1ed4a2b38ca387a3f660e1dd8745319.nii.gz/MD5E-s8189478--a1ed4a2b38ca387a3f660e1dd8745319.nii.gz +│   │   ├── sub-VS011_acq-mprage_T1w.json +│   │   └── sub-VS011_acq-mprage_T1w.nii.gz -> ../../.git/annex/objects/KG/2Z/MD5E-s10205871--e6943189bb481ee33374b1d5c26794c7.nii.gz/MD5E-s10205871--e6943189bb481ee33374b1d5c26794c7.nii.gz +│   ├── dwi +│   │   ├── sub-VS011_acq-b0_dir-PA_dwi.bval +│   │   ├── sub-VS011_acq-b0_dir-PA_dwi.bvec +│   │   ├── sub-VS011_acq-b0_dir-PA_dwi.json +│   │   ├── sub-VS011_acq-b0_dir-PA_dwi.nii.gz -> ../../.git/annex/objects/vJ/Qj/MD5E-s3257438--6041de88b7e1c3d120e1255e1c82447d.nii.gz/MD5E-s3257438--6041de88b7e1c3d120e1255e1c82447d.nii.gz +│   │   ├── sub-VS011_acq-b3000_dir-AP_dwi.bval +│   │   ├── sub-VS011_acq-b3000_dir-AP_dwi.bvec +│   │   ├── sub-VS011_acq-b3000_dir-AP_dwi.json +│   │   └── sub-VS011_acq-b3000_dir-AP_dwi.nii.gz -> ../../.git/annex/objects/3Q/V6/MD5E-s208386898--5dc2191259cc2483e29668aab7712b70.nii.gz/MD5E-s208386898--5dc2191259cc2483e29668aab7712b70.nii.gz +│   ├── func +│   │   ├── sub-VS011_task-restingState_dir-AP_bold.json +│   │   ├── sub-VS011_task-restingState_dir-AP_bold.nii.gz -> ../../.git/annex/objects/9w/KW/MD5E-s2084455--d8495d3dde9ef498001e02282bbf841f.nii.gz/MD5E-s2084455--d8495d3dde9ef498001e02282bbf841f.nii.gz +│   │   └── sub-VS011_task-restingState_dir-AP_events.tsv +│   └── sub-VS011_scans.tsv -> ../.git/annex/objects/4K/4z/MD5E-s444--f0ac4f6b16438aaf83bdf527d6648376.tsv/MD5E-s444--f0ac4f6b16438aaf83bdf527d6648376.tsv +└── task-restingState_bold.json +``` +
+ +#### Enrich the obtained BIDS dataset + +##### Add configuration file into the dataset + +To ensure reproducibility of the study the `configuration.json` file +used to parameterize the download should be included in the obtained +datalad dataset. This can be achieved using the following commands: + +```sh + cp + cd + datalad add + datalad save -m "include download configuration file" +``` + +##### Supplement BIDS dataset files + +As illustrated below, the BIDS dataset files generated ( +i.e. `dataset_description.json`, `participants.json`, `README`) are +not particularly detailed. It may be beneficial to supplement these +with additional information in order to enhance the level of data +documentation and facilitate sharing with other users. + +
+ Default dataset_description.json to be enhanced + +```json{ +{ +"Acknowledgements": "TODO: whom you want to acknowledge", +"Authors": [ +"TODO:", +"First1 Last1", +"First2 Last2", +"..." +], +"BIDSVersion": "1.8.0", +"DatasetDOI": "TODO: eventually a DOI for the dataset", +"Funding": [ +"TODO", +"GRANT #1", +"GRANT #2" +], +"HowToAcknowledge": "TODO: describe how to acknowledge -- either cite a corresponding paper, or just in acknowledgement section", +"License": "TODO: choose a license, e.g. PDDL (http://opendatacommons.org/licenses/pddl/)", +"Name": "TODO: name of the dataset", +"ReferencesAndLinks": [ +"TODO", +"List of papers or websites" +] +} +``` +
+ +### Downloading data from several Shanoir studies + +Create a new study in Shanoir that contains either all the studies +from which the data originate or the datasets of interest only +following instructions available [here](https://github.com/fli-iam/shanoir-ng/wiki/Copy-of-datasets) +Once this has been done, report to the [Download section](#initial-download) + +### Update the BIDS datalad dataset + +In the event that additional subjects or examinations are required ( +e.g. continuous patient inclusion), the download procedure can be +restarted by incorporating all the subjects into the `.json` +configuration file. This will result in the re-downloading of all +subjects, with only the new or modified subjects being integrated. The +integration of modifications is managed internally by datalad. + +## Processing the data in a reproducible manner + +### Getting a versioned local processing environment + +Once the data has been downloaded in BIDS format, it needs to be +processed in a reproducible way, i.e. the data and the tools that +modify the data (e.g. code, containers) need to be versioned together. +This can be achieved, for example, by : + ++ creating a super dataset datalad for the study ++ adding to it the git repo containing the data processing code as a + code sub-directory ++ also add the containers involved in the processing, either by + including the Dockerfile with dependencies on the exact version, or + by using + the [datalad-container extension](https://docs.datalad.org/projects/container/en/latest/). + + +> [!NOTE] +> Software such as [nippopy](https://github.com/nipoppy/nipoppy) +> extends the BIDS data organisation structure to include code and metadata. +> Nipoppy does not support datalad yet but datalad can still be used +> on the side to track (meta)data modifications. + + +The super dataset simultaneously versions the data, the code and the +modifications to the data generated by the code, making it possible to +track changes in these components over time and guarantee a high level +of reproducibility for the study. + +### Modifying the dataset (code or data) + ++ Manual or scripted modifications of the code can be done using + classic git commands (i.e. git commit) ++ Manual modifications of the files using datalad unlock and datalad + save mechanism indicating explicit commit message using the -m + option ++ Command based/scripted modifications of the dataset should be + performed using *datalad run* and *datalad-containers-run* commands + indicating explicit commit message using the `-m` option + This way, the provenance of the commands is captured. + ++ derivatives organization according to BIDS standard if applicable at + least for files of interest + + +## Resources + +### Why using BIDS + +Even if `shanoir2bids.py` can download Shanoir datasets both as DICOM +and NIFTI files it is recommended that, unless there is a specific +requirement to do otherwise (e.g. failed NIFTI conversion, specific +DICOM field lost during the conversion, software taking DICOM only as +input), the data be retrieved in NIFTI BIDS format. Indeed, adherence +to the BIDS standard will facilitate: + ++ the utilization of existing state-of-the-art tools + ++ (e.g. [pyBIDS](https://bids-standard.github.io/pybids/) for the + browsing of study subjects without the necessity for the writing of + specific code) ++ [BIDS-APPS](https://bids-apps.neuroimaging.io/apps/) + ++ code development + ++ the sharing with collaborators + + + standardised filenames + + documentation of the dataset + + + + + + + + + + + + \ No newline at end of file From 00de5035d3f5f1a9890f5bf4f6b9d1790d43b1fa Mon Sep 17 00:00:00 2001 From: Alexandre Pron Date: Fri, 25 Oct 2024 16:14:23 +0200 Subject: [PATCH 3/3] [ENH]: added documentation structure and github action --- .github/workflows/sphinx.yml | 24 +++++++++++++++++++ docs/conf.py | 46 ++++++++++++++++++++++++++++++++++++ 2 files changed, 70 insertions(+) create mode 100644 .github/workflows/sphinx.yml create mode 100644 docs/conf.py diff --git a/.github/workflows/sphinx.yml b/.github/workflows/sphinx.yml new file mode 100644 index 0000000..1e63e92 --- /dev/null +++ b/.github/workflows/sphinx.yml @@ -0,0 +1,24 @@ +name: "Sphinx: Render docs" + +on: push + +jobs: + build: + runs-on: ubuntu-latest + permissions: + contents: write + steps: + - uses: actions/checkout@v4 + - name: Build HTML + uses: ammaraskar/sphinx-action@master + - name: Upload artifacts + uses: actions/upload-artifact@v4 + with: + name: html-docs + path: docs/build/html/ + - name: Deploy + uses: peaceiris/actions-gh-pages@v3 + if: github.ref == 'refs/heads/main' + with: + github_token: ${{ secrets.GITHUB_TOKEN }} + publish_dir: docs/build/html \ No newline at end of file diff --git a/docs/conf.py b/docs/conf.py new file mode 100644 index 0000000..fbcad34 --- /dev/null +++ b/docs/conf.py @@ -0,0 +1,46 @@ +# Configuration file for the Sphinx documentation builder. +# +# For the full list of built-in configuration values, see the documentation: +# https://www.sphinx-doc.org/en/master/usage/configuration.html + +# -- Project information ----------------------------------------------------- +# https://www.sphinx-doc.org/en/master/usage/configuration.html#project-information + +project = 'Shanoir Downloader' +author = 'Shanoir developer team' +release = '0.0.0' # TODO: parse github tag + +# -- General configuration --------------------------------------------------- +# https://www.sphinx-doc.org/en/master/usage/configuration.html#general-configuration +# root_doc='contents/README' +extensions = ['myst_parser', "sphinx_design"] +myst_enable_extensions = [ + "colon_fence", # Enables ::: block syntax + "tasklist" # Enables task list - [] +] +# sphinxemoji_style = 'twemoji' +templates_path = ['_templates'] +exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store'] + +# -- Options for HTML output ------------------------------------------------- +# https://www.sphinx-doc.org/en/master/usage/configuration.html#options-for-html-output + +html_theme = 'sphinx_book_theme' +html_static_path = ['_static'] +# html_css_files = ["theme.css"] +html_sourcelink_suffix = ".md" # our sources are in markdown; but this only has an effect if "use_download_button": True +# html_favicon = "_static/favicon_dark.png" +html_theme_options = { + "toc_title": "Page Contents", + "search_bar_text": "Search...", + "repository_branch": "main", + "use_fullscreen_button": True, + "use_source_button": True, + "use_edit_page_button": True, + "use_download_button": False, + "use_issues_button": True, + "use_repository_button": True, + "default_mode": "light", + "repository_provider": "github", + "repository_url": "https://github.com/empenn/username.github.io" +}