From 9b67a6cbbc953d0767a49532da53787ad182eaa3 Mon Sep 17 00:00:00 2001 From: hamshkhawar Date: Wed, 28 Feb 2024 14:59:15 -0600 Subject: [PATCH 1/9] cwlfeature_extraction --- README.md | 84 ++++++ bbbc_json/bbbc_config.json | 17 ++ cwl_adapters/basic-flatfield-estimation.cwl | 115 -------- cwl_adapters/bbbcdownload.cwl | 61 ---- cwl_adapters/file-renaming.cwl | 85 ------ cwl_adapters/image_assembler.cwl | 101 ------- cwl_adapters/montage.cwl | 123 -------- cwl_adapters/ome-converter.cwl | 85 ------ cwl_adapters/precompute_slide.cwl | 81 ------ cwl_workflows/__init__.py | 2 + cwl_workflows/__main__.py | 54 ++++ .../cwl_features_extraction.cpython-310.pyc | Bin 0 -> 8762 bytes .../__pycache__/utils.cpython-310.pyc | Bin 0 -> 1923 bytes {workflows => cwl_workflows}/bbbc.py | 0 {workflows => cwl_workflows}/bbbc.yml | 0 cwl_workflows/cwl_features_extraction.py | 265 ++++++++++++++++++ cwl_workflows/utils.py | 45 +++ 17 files changed, 467 insertions(+), 651 deletions(-) create mode 100644 README.md create mode 100644 bbbc_json/bbbc_config.json delete mode 100644 cwl_adapters/basic-flatfield-estimation.cwl delete mode 100644 cwl_adapters/bbbcdownload.cwl delete mode 100644 cwl_adapters/file-renaming.cwl delete mode 100644 cwl_adapters/image_assembler.cwl delete mode 100644 cwl_adapters/montage.cwl delete mode 100644 cwl_adapters/ome-converter.cwl delete mode 100644 cwl_adapters/precompute_slide.cwl create mode 100644 cwl_workflows/__init__.py create mode 100644 cwl_workflows/__main__.py create mode 100644 cwl_workflows/__pycache__/cwl_features_extraction.cpython-310.pyc create mode 100644 cwl_workflows/__pycache__/utils.cpython-310.pyc rename {workflows => cwl_workflows}/bbbc.py (100%) rename {workflows => cwl_workflows}/bbbc.yml (100%) create mode 100644 cwl_workflows/cwl_features_extraction.py create mode 100644 cwl_workflows/utils.py diff --git a/README.md b/README.md new file mode 100644 index 0000000..c735f02 --- /dev/null +++ b/README.md @@ -0,0 +1,84 @@ +# Common Workflow Language (CWL) Feature Extraction worflow + +CWL feature extraction workflow for imaging dataset + +## Workflow Steps: + +create a [Conda](https://conda.io/projects/conda/en/latest/user-guide/tasks/manage-environments.html#activating-an-environment) environment using python = ">=3.9,<3.12" + +#### 1. Install polus-plugins. + +- clone a image-tools reporsitory +`git clone https://github.com/camilovelezr/image-tools.git` +- cd `image-tools` +- `pip install .` + +#### 2. Install workflow-inference-compiler. +- clone a workflow-inference-compiler reporsitory +`git clone https://github.com/camilovelezr/workflow-inference-compiler.git` +- cd `workflow-inference-compiler` +- `pip install -e ".[all]"` + +## Details +This workflow integrates eight distinct plugins, starting from data retrieval from [Broad Bioimage Benchmark Collection](https://bbbc.broadinstitute.org/), renaming files, correcting uneven illumination, segmenting nuclear objects, and culminating in the extraction of features from identified objects + +Below are the specifics of the plugins employed in the workflow +1. [bbbc-download-plugin](https://github.com/saketprem/polus-plugins/tree/bbbc_download/utils/bbbc-download-plugin) +2. [file-renaming-tool](https://github.com/PolusAI/image-tools/tree/master/formats/file-renaming-tool) +3. [ome-converter-tool](https://github.com/PolusAI/image-tools/tree/master/formats/ome-converter-tool) +4. [basic-flatfield-estimation-tool](https://github.com/PolusAI/image-tools/tree/master/regression/basic-flatfield-estimation-tool) +5. [apply-flatfield-tool](https://github.com/PolusAI/image-tools/tree/master/transforms/images/apply-flatfield-tool) +6. [kaggle-nuclei-segmentation](https://github.com/hamshkhawar/image-tools/tree/kaggle-nuclei_seg/segmentation/kaggle-nuclei-segmentation) +7. [polus-ftl-label-plugin](https://github.com/hamshkhawar/image-tools/tree/kaggle-nuclei_seg/transforms/images/polus-ftl-label-plugin) +8. [nyxus-plugin](https://github.com/PolusAI/image-tools/tree/kaggle-nuclei_seg/features/nyxus-plugin) + +## Execute CWL feature extraction workflow + +The parameters for each imaging dataset are pre-defined and stored in JSON format. A Pydantic model in a utils Python file can be utilized to store parameters for any new dataset + +`python cwl_workflows/__main__.py --name="BBBC039" --workflow=CWLFeatureWorkflow` + +A directory named `workflow` is generated, encompassing CLTs for each plugin, YAML files, and all outputs are stored within the `outdir` directory. +``` +workflows +├── experiment +│ └── cwl_adapters +| experiment.cwl +| experiment.yml +| +└── outdir + └── experiment + ├── step 1 BbbcDownload + │ └── outDir + │ └── bbbc.outDir + │ └── BBBC + │ └── BBBC039 + │ └── raw + │ ├── Ground_Truth + │ │ ├── masks + │ │ └── metadata + │ └── Images + │ └── images + ├── step 2 FileRenaming + │ └── outDir + │ └── rename.outDir + ├── step 3 OmeConverter + │ └── outDir + │ └── ome_converter.outDir + ├── step 4 BasicFlatfieldEstimation + │ └── outDir + │ └── estimate_flatfield.outDir + ├── step 5 ApplyFlatfield + │ └── outDir + │ └── apply_flatfield.outDir + ├── step 6 KaggleNucleiSegmentation + │ └── outDir + │ └── kaggle_nuclei_segmentation.outDir + ├── step 7 FtlLabel + │ └── outDir + │ └── ftl_plugin.outDir + └── step 8 NyxusPlugin + └── outDir + └── nyxus_plugin.outDir + +``` diff --git a/bbbc_json/bbbc_config.json b/bbbc_json/bbbc_config.json new file mode 100644 index 0000000..c5d55c6 --- /dev/null +++ b/bbbc_json/bbbc_config.json @@ -0,0 +1,17 @@ +{ + "data": { + "BBBC039": { + "name": "BBBC039", + "file_pattern": ".*_{row:c}{col:dd}_s{s:d}_w{channel:d}.*.tif", + "out_file_pattern": "x{row:dd}_y{col:dd}_p{s:dd}_c{channel:d}.tif", + "image_pattern": "images_x{x:dd}_y{y:dd}_p{p:dd}_c{c:d}.ome.tif", + "seg_pattern": "images_x{x:dd}_y{y:dd}_p{p:dd}_c1.ome.tif", + "map_directory": "raw", + "ff_pattern": "images_x\\(00-15\\)_y\\(01-24\\)_p0\\(1-9\\)_c{c:d}_flatfield.ome.tif", + "df_pattern": "images_x\\(00-15\\)_y\\(01-24\\)_p0\\(1-9\\)_c{c:d}_darkfield.ome.tif", + "group_by": "c", + "features": "ALL_INTENSITY,ALL_MORPHOLOGY", + "file_extension": "pandas" + } + } +} \ No newline at end of file diff --git a/cwl_adapters/basic-flatfield-estimation.cwl b/cwl_adapters/basic-flatfield-estimation.cwl deleted file mode 100644 index 3893ae8..0000000 --- a/cwl_adapters/basic-flatfield-estimation.cwl +++ /dev/null @@ -1,115 +0,0 @@ -#!/usr/bin/env cwl-runner -class: CommandLineTool -cwlVersion: v1.0 - -label: BaSiC Flatfield Estimation - -doc: |- - This WIPP plugin will take a collection of images and use the BaSiC flatfield correction algorithm to generate a flatfield image, a darkfield image, and a photobleach offset. - https://github.com/PolusAI/polus-plugins/tree/master/regression/basic-flatfield-estimation-plugin - -requirements: - DockerRequirement: - dockerPull: polusai/basic-flatfield-estimation-plugin:2.1.1 - # See https://www.commonwl.org/v1.0/CommandLineTool.html#InitialWorkDirRequirement - InitialWorkDirRequirement: - listing: - - entry: $(inputs.outDir) - writable: true # Output directories must be writable - InlineJavascriptRequirement: {} - -# See https://github.com/PolusAI/workflow-inference-compiler/blob/master/docker_remove_entrypoints.py -baseCommand: python3 -arguments: ["-m", "polus.plugins.regression.basic_flatfield_estimation"] - -# "jax._src.xla_bridge - WARNING - An NVIDIA GPU may be present on this machine, but a CUDA-enabled jaxlib is not installed. Falling back to cpu." -hints: - cwltool:CUDARequirement: - cudaVersionMin: "11.4" - cudaComputeCapabilityMin: "3.0" - cudaDeviceCountMin: 1 - cudaDeviceCountMax: 1 - -inputs: - inpDir: - label: Path to input images - doc: |- - Path to input images - type: Directory - inputBinding: - prefix: --inpDir - - getDarkfield: - label: If 'true', will calculate darkfield image - doc: |- - If 'true', will calculate darkfield image - type: boolean? - inputBinding: - prefix: --getDarkfield - - # photobleach: - # label: If 'true', will calculate photobleach scalar - # doc: |- - # If 'true', will calculate photobleach scalar - # type: boolean? - # inputBinding: - # prefix: --photobleach - - filePattern: - label: File pattern to subset data - doc: |- - File pattern to subset data - type: string? - inputBinding: - prefix: --filePattern - - groupBy: - label: Variables to group together - doc: |- - Variables to group together - type: string? - inputBinding: - prefix: --groupBy - - preview: - label: Generate a JSON file describing what the outputs should be - doc: |- - Generate a JSON file describing what the outputs should be - type: boolean? - inputBinding: - prefix: --preview - - outDir: - label: Output image collection - doc: |- - Output image collection - type: Directory - inputBinding: - prefix: --outDir - -outputs: - outDir: - label: Output image collection - doc: |- - Output image collection - type: Directory - outputBinding: - glob: $(inputs.outDir.basename) - - preview_json: - label: JSON file describing what the outputs should be - doc: |- - JSON file describing what the outputs should be - type: File? # if --preview - format: edam:format_3464 - outputBinding: - glob: preview.json - -$namespaces: - edam: https://edamontology.org/ - cwltool: http://commonwl.org/cwltool# - -$schemas: -- https://raw.githubusercontent.com/edamontology/edamontology/master/EDAM_dev.owl - -# manifest: https://raw.githubusercontent.com/PolusAI/polus-plugins/master/regression/basic-flatfield-estimation-plugin/plugin.json \ No newline at end of file diff --git a/cwl_adapters/bbbcdownload.cwl b/cwl_adapters/bbbcdownload.cwl deleted file mode 100644 index 252514a..0000000 --- a/cwl_adapters/bbbcdownload.cwl +++ /dev/null @@ -1,61 +0,0 @@ -class: CommandLineTool -cwlVersion: v1.1 - -label: BBBC Download - -doc: |- - Downloads the datasets on the Broad Bioimage Benchmark Collection website - https://github.com/saketprem/polus-plugins/tree/bbbc_download/utils/bbbc-download-plugin - -# See https://github.com/PolusAI/workflow-inference-compiler/blob/master/docker_remove_entrypoints.py -baseCommand: python3 -arguments: ["-m", "polus.plugins.utils.bbbc_download"] - -requirements: - DockerRequirement: - dockerPull: polusai/bbbc-download-plugin:0.1.0-dev1 - # See https://www.commonwl.org/v1.0/CommandLineTool.html#InitialWorkDirRequirement - InitialWorkDirRequirement: - listing: - - entry: $(inputs.outDir) - writable: true # Output directories must be writable - InlineJavascriptRequirement: {} - # NOTE: By default, "tools must not assume network access, except for localhost" - # See https://www.commonwl.org/v1.1/CommandLineTool.html#NetworkAccess - NetworkAccess: - networkAccess: true - -inputs: - name: - label: The name of the dataset(s) to be downloaded (separate the datasets with a comma. eg BBBC001,BBBC002,BBBC003) - doc: |- - The name of the dataset(s) to be downloaded (separate the datasets with a comma. eg BBBC001,BBBC002,BBBC003) - inputBinding: - prefix: --name - type: string - # default: BBBC001 - - outDir: - label: Output collection - doc: |- - Output collection - inputBinding: - prefix: --outDir - type: Directory - -outputs: - outDir: - label: Output collection - doc: |- - Output collection - type: Directory - outputBinding: - glob: $(inputs.outDir.basename) - -$namespaces: - edam: https://edamontology.org/ - -$schemas: -- https://raw.githubusercontent.com/edamontology/edamontology/master/EDAM_dev.owl - -# manifest: "https://raw.githubusercontent.com/saketprem/polus-plugins/bbbc_download/utils/bbbc-download-plugin/plugin.json" \ No newline at end of file diff --git a/cwl_adapters/file-renaming.cwl b/cwl_adapters/file-renaming.cwl deleted file mode 100644 index a2df113..0000000 --- a/cwl_adapters/file-renaming.cwl +++ /dev/null @@ -1,85 +0,0 @@ -class: CommandLineTool -cwlVersion: v1.0 - -label: File Renaming - -doc: |- - Rename and store image collection files in a new image collection - https://github.com/PolusAI/polus-plugins/tree/master/formats/file-renaming-plugin - -# See https://github.com/PolusAI/workflow-inference-compiler/blob/master/docker_remove_entrypoints.py -baseCommand: python3 -arguments: ["-m", "polus.plugins.formats.file_renaming"] - -requirements: - DockerRequirement: - dockerPull: polusai/file-renaming-plugin:0.2.1-dev0 # NOTE: 0.2.3 not pushed yet - # See https://www.commonwl.org/v1.0/CommandLineTool.html#InitialWorkDirRequirement - InitialWorkDirRequirement: - listing: - - entry: $(inputs.outDir) - writable: true # Output directories must be writable - InlineJavascriptRequirement: {} - -inputs: - inpDir: - inputBinding: - prefix: --inpDir - type: Directory - - filePattern: - inputBinding: - prefix: --filePattern - type: string - - mapDirectory: - inputBinding: - prefix: --mapDirectory - type: string? # enum: raw, map, default - - preview: - label: Generate a JSON file describing what the outputs should be - doc: |- - Generate a JSON file describing what the outputs should be - inputBinding: - prefix: --preview - type: boolean? - - outFilePattern: - inputBinding: - prefix: --outFilePattern - type: string - - outDir: - label: Output collection - doc: |- - Output collection - inputBinding: - prefix: --outDir - type: Directory - -outputs: - outDir: - label: Output collection - doc: |- - Output collection - type: Directory - outputBinding: - glob: $(inputs.outDir.basename) - - preview_json: - label: JSON file describing what the outputs should be - doc: |- - JSON file describing what the outputs should be - type: File? # if --preview - format: edam:format_3464 - outputBinding: - glob: preview.json - -$namespaces: - edam: https://edamontology.org/ - -$schemas: -- https://raw.githubusercontent.com/edamontology/edamontology/master/EDAM_dev.owl - -# manifest: https://raw.githubusercontent.com/PolusAI/polus-plugins/master/formats/file-renaming-plugin/plugin.json \ No newline at end of file diff --git a/cwl_adapters/image_assembler.cwl b/cwl_adapters/image_assembler.cwl deleted file mode 100644 index 5b9eca3..0000000 --- a/cwl_adapters/image_assembler.cwl +++ /dev/null @@ -1,101 +0,0 @@ -#!/usr/bin/env cwl-runner -class: CommandLineTool -cwlVersion: v1.0 - -label: Image Assembler - -doc: |- - This plugin assembles images into a stitched image using an image stitching vector. - https://github.com/PolusAI/polus-plugins/tree/master/transforms/images/image-assembler-plugin - -# See https://github.com/PolusAI/workflow-inference-compiler/blob/master/docker_remove_entrypoints.py -baseCommand: python3 -arguments: ["-m", "polus.plugins.transforms.images.image_assembler"] - -requirements: - DockerRequirement: - dockerPull: polusai/image-assembler-plugin:1.4.0-dev0 - # See https://www.commonwl.org/v1.0/CommandLineTool.html#InitialWorkDirRequirement - InitialWorkDirRequirement: - listing: - - $(inputs.stitchPath) # Must stage inputs for tools which do not accept full paths. - - entry: $(inputs.outDir) - writable: true # Output directories must be writable - InlineJavascriptRequirement: {} - -inputs: - stitchPath: - label: Path to directory containing "stitching vector" file img-global-positions-0.txt - doc: |- - Path to directory containing "stitching vector" file img-global-positions-0.txt - type: Directory - inputBinding: - prefix: --stitchPath - - imgPath: - label: Path to input image collection - doc: |- - Path to input image collection - type: Directory - inputBinding: - prefix: --imgPath - - timesliceNaming: - label: Label images by timeslice rather than analyzing input image names - doc: |- - Label images by timeslice rather than analyzing input image names - inputBinding: - prefix: --timesliceNaming - type: boolean? - - preview: - label: Generate a JSON file describing what the outputs should be - doc: |- - Generate a JSON file describing what the outputs should be - type: boolean? - inputBinding: - prefix: --preview - - outDir: - label: Output collection - doc: |- - Output collection - type: Directory - inputBinding: - prefix: --outDir - -outputs: - outDir: - label: Output collection - doc: |- - Output collection - type: Directory - outputBinding: - glob: $(inputs.outDir.basename) - - assembled_image: - label: The assembled montage image - doc: |- - JSON file with outputs - type: File? # if not --preview - # See https://bioportal.bioontology.org/ontologies/EDAM?p=classes&conceptid=format_3727 - format: edam:format_3727 - outputBinding: - glob: "*.ome.tif" - - preview_json: - label: JSON file with outputs - doc: |- - JSON file with outputs - type: File? # if --preview - format: edam:format_3464 - outputBinding: - glob: preview.json - -$namespaces: - edam: https://edamontology.org/ - -$schemas: -- https://raw.githubusercontent.com/edamontology/edamontology/master/EDAM_dev.owl - -# manifest: https://raw.githubusercontent.com/PolusAI/polus-plugins/master/transforms/images/image-assembler-plugin/plugin.json \ No newline at end of file diff --git a/cwl_adapters/montage.cwl b/cwl_adapters/montage.cwl deleted file mode 100644 index ac4007f..0000000 --- a/cwl_adapters/montage.cwl +++ /dev/null @@ -1,123 +0,0 @@ -#!/usr/bin/env cwl-runner -class: CommandLineTool -cwlVersion: v1.0 - -label: Montage - -doc: |- - This plugin generates a stitching vector that will montage images together. - https://github.com/PolusAI/polus-plugins/tree/master/transforms/images/montage-plugin - -# See https://github.com/PolusAI/workflow-inference-compiler/blob/master/docker_remove_entrypoints.py -baseCommand: python3 -arguments: ["-m", "polus.plugins.transforms.images.montage"] - -requirements: - DockerRequirement: - dockerPull: polusai/montage-plugin:0.5.0 - # See https://www.commonwl.org/v1.0/CommandLineTool.html#InitialWorkDirRequirement - InitialWorkDirRequirement: - listing: - - entry: $(inputs.outDir) - writable: true # Output directories must be writable - InlineJavascriptRequirement: {} - -inputs: - inpDir: - label: Input image collection to be processed by this plugin - doc: |- - Input image collection to be processed by this plugin - type: Directory - inputBinding: - prefix: --inpDir - - filePattern: - label: Filename pattern used to parse data - doc: |- - Filename pattern used to parse data - type: string - inputBinding: - prefix: --filePattern - - layout: - label: Specify montage organization - doc: |- - Specify montage organization - type: string? - # optional array of strings? - inputBinding: - prefix: --layout - - gridSpacing: - label: Specify spacing between images in the lowest grid - doc: |- - Specify spacing between images in the lowest grid - inputBinding: - prefix: --gridSpacing - type: int? - - imageSpacing: - label: Specify spacing multiplier between grids - doc: |- - Specify spacing multiplier between grids - inputBinding: - prefix: --imageSpacing - type: int? - - flipAxis: - label: Axes to flip when laying out images - doc: |- - Axes to flip when laying out images - inputBinding: - prefix: --flipAxis - type: string? - - preview: - label: Generate a JSON file describing what the outputs should be - doc: |- - Generate a JSON file describing what the outputs should be - type: boolean? - inputBinding: - prefix: --preview - - outDir: - label: Output collection - doc: |- - Output collection - type: Directory - inputBinding: - prefix: --outDir - -outputs: - outDir: - label: Output collection - doc: |- - Output collection - type: Directory - outputBinding: - glob: $(inputs.outDir.basename) - - global_positions: - label: The "stitching vector", i.e. the positions of the individual images in the montage - doc: |- - The "stitching vector", i.e. the positions of the individual images in the montage - type: File? # if not --preview - outputBinding: - glob: $(inputs.outDir.basename)/img-global-positions-0.txt - - preview_json: - label: JSON file describing what the outputs should be - doc: |- - JSON file describing what the outputs should be - type: File? # if --preview - format: edam:format_3464 - outputBinding: - glob: preview.json - -$namespaces: - edam: https://edamontology.org/ - -$schemas: -- https://raw.githubusercontent.com/edamontology/edamontology/master/EDAM_dev.owl - -# manifest: https://raw.githubusercontent.com/PolusAI/polus-plugins/master/transforms/images/montage-plugin/plugin.json \ No newline at end of file diff --git a/cwl_adapters/ome-converter.cwl b/cwl_adapters/ome-converter.cwl deleted file mode 100644 index af846a5..0000000 --- a/cwl_adapters/ome-converter.cwl +++ /dev/null @@ -1,85 +0,0 @@ -#!/usr/bin/env cwl-runner -class: CommandLineTool -cwlVersion: v1.0 - -label: OME Zarr Converter - -doc: |- - This WIPP plugin converts BioFormats supported data types to the OME Zarr file format. - https://github.com/PolusAI/polus-plugins/tree/master/formats/ome-converter-plugin - -# See https://github.com/PolusAI/workflow-inference-compiler/blob/master/docker_remove_entrypoints.py -baseCommand: python3 -arguments: ["-m", "polus.plugins.formats.ome_converter"] - -requirements: - DockerRequirement: - dockerPull: jakefennick/ome-converter-plugin:0.3.2 - # See https://www.commonwl.org/v1.0/CommandLineTool.html#InitialWorkDirRequirement - InitialWorkDirRequirement: - listing: - - entry: $(inputs.outDir) - writable: true # Output directories must be writable - InlineJavascriptRequirement: {} -# NOTE: polusai/ome-converter-plugin:0.3.1 uses the base image -# polusai/bfio:2.3.2 which now un-bundles the java maven package -# ome:formats-gpl:7.1.0 due to licensing reasons. -# To avoid requiring network access at runtime, in the bfio Dockerfile -# it is pre-installed and saved in ~/.m2/ However, by default -# CWL hides all environment variables (including HOME), so we need to -# set HOME here so that at runtime we get a cache hit on the maven install. - EnvVarRequirement: -# See https://www.commonwl.org/user_guide/topics/environment-variables.html - envDef: - HOME: /home/polusai - -inputs: - inpDir: - label: Input generic data collection to be processed by this plugin - doc: |- - Input generic data collection to be processed by this plugin - type: Directory - inputBinding: - prefix: --inpDir - - filePattern: - label: A filepattern, used to select data for conversion - doc: |- - A filepattern, used to select data for conversion - type: string - inputBinding: - prefix: --filePattern - - fileExtension: - label: The file extension - doc: |- - The file extension - type: string - inputBinding: - prefix: --fileExtension - default: "default" # enum: .ome.tiff, .ome.zarr, default - - outDir: - label: Output collection - doc: |- - Output collection - type: Directory - inputBinding: - prefix: --outDir - -outputs: - outDir: - label: Output collection - doc: |- - Output collection - type: Directory - outputBinding: - glob: $(inputs.outDir.basename) - -$namespaces: - edam: https://edamontology.org/ - -$schemas: -- https://raw.githubusercontent.com/edamontology/edamontology/master/EDAM_dev.owl - -# manifest: https://raw.githubusercontent.com/PolusAI/polus-plugins/master/formats/ome-converter-plugin/plugin.json \ No newline at end of file diff --git a/cwl_adapters/precompute_slide.cwl b/cwl_adapters/precompute_slide.cwl deleted file mode 100644 index 44753d2..0000000 --- a/cwl_adapters/precompute_slide.cwl +++ /dev/null @@ -1,81 +0,0 @@ -#!/usr/bin/env cwl-runner -class: CommandLineTool -cwlVersion: v1.0 - -label: Precompute Slide - -doc: |- - This plugin generates image pyramids in multiple viewing formats. - https://github.com/PolusAI/polus-plugins/tree/master/visualization/polus-precompute-slide-plugin - -# See https://github.com/PolusAI/workflow-inference-compiler/blob/master/docker_remove_entrypoints.py -baseCommand: python3 -arguments: ["-m", "polus.plugins.visualization.precompute_slide"] - -requirements: - DockerRequirement: - dockerPull: polusai/precompute-slide-plugin:1.7.0-dev0 - # See https://www.commonwl.org/v1.0/CommandLineTool.html#InitialWorkDirRequirement - InitialWorkDirRequirement: - listing: - - entry: $(inputs.outDir) - writable: true # Output directories must be writable - InlineJavascriptRequirement: {} - -inputs: - inpDir: - label: Input generic data collection to be processed by this plugin - doc: |- - Input generic data collection to be processed by this plugin - type: Directory - inputBinding: - prefix: --inpDir - - pyramidType: - label: Build a DeepZoom, Neuroglancer, Zarr pyramid - doc: |- - Build a DeepZoom, Neuroglancer, Zarr pyramid - type: string # enum: DeepZoom, Neuroglancer, Zarr - inputBinding: - prefix: --pyramidType - - imageType: - label: Image is either Segmentation or Image - doc: |- - Image is either Segmentation or Image - inputBinding: - prefix: --imageType - type: string - - filePattern: - label: Filename pattern used to parse data - doc: |- - Filename pattern used to parse data - type: string? - inputBinding: - prefix: --filePattern - - outDir: - label: Output collection - doc: |- - Output collection - type: Directory - inputBinding: - prefix: --outDir - -outputs: - outDir: - label: Output collection - doc: |- - Output collection - type: Directory - outputBinding: - glob: $(inputs.outDir.basename) - -$namespaces: - edam: https://edamontology.org/ - -$schemas: -- https://raw.githubusercontent.com/edamontology/edamontology/master/EDAM_dev.owl - -# \ No newline at end of file diff --git a/cwl_workflows/__init__.py b/cwl_workflows/__init__.py new file mode 100644 index 0000000..b2e9ca5 --- /dev/null +++ b/cwl_workflows/__init__.py @@ -0,0 +1,2 @@ +import cwl_features_extraction as cwl_features_extraction +import utils as utils \ No newline at end of file diff --git a/cwl_workflows/__main__.py b/cwl_workflows/__main__.py new file mode 100644 index 0000000..971173e --- /dev/null +++ b/cwl_workflows/__main__.py @@ -0,0 +1,54 @@ +"""Ome Converter.""" +import logging +from typing import Any +from typing import Optional +import typer +from utils import JSON_FILENAME +from utils import get_params +from cwl_features_extraction import CWLFeatureWorkflow + + +app = typer.Typer() + +# Initialize the logger +logging.basicConfig( + format="%(asctime)s - %(name)-8s - %(levelname)-8s - %(message)s", + datefmt="%d-%b-%y %H:%M:%S", +) +logger = logging.getLogger("WIC Python API") +logger.setLevel(logging.INFO) + + +@app.command() +def main( + name: str = typer.Option( + ..., + "--name", + "-n", + help="Name of imaging dataset of Broad Bioimage Benchmark Collection (https://bbbc.broadinstitute.org/image_sets)" + ), + workflow: str = typer.Option( + ..., + "--workflow", + "-w", + help="Name of cwl workflow" + ) +) -> None: + + """Execute CWL Workflow.""" + + logger.info(f"name = {name}") + logger.info(f"workflow = {workflow}") + + params = get_params(JSON_FILENAME, name) + + if workflow == "CWLFeatureWorkflow": + logger.info(f"Executing {workflow}!!!") + model = CWLFeatureWorkflow(**params) + model.workflow() + + logger.info("Completed CWL workflow!!!") + + +if __name__ == "__main__": + app() \ No newline at end of file diff --git a/cwl_workflows/__pycache__/cwl_features_extraction.cpython-310.pyc b/cwl_workflows/__pycache__/cwl_features_extraction.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..a5f83f499631cc14de5d0c8d4ff9374f7cac7cc4 GIT binary patch literal 8762 zcma)B%ahwidIvy|1TiG%q0#$c$WKwbL)xM(eu^vlTQp2zs;~4^O;MGeW~*vClI!VqUfyed zp;xqvlX1f~Bt*$B$=dh9-&Hql|;itHw ze?r3*TV;x^G1bnMv{H`Mn9hM8R{>WMmw~H<%fz*WtBk9{^IModsjzBx*FMIU*~%Ar zyT~rGBW(Q(9dCP)9c9OGSHj(Kb^>>%#J|e*w$4tnQy5udr`ee=6ukRcb`E`oJ%=4u zd39?UYbY7(`k!8jfLBay)dx)UZ4A3!0TqZE{=uQ9K<}jBA*F1aC_Xk zVr^0vwk@yccD$ftF*kN29@EhKB6OMcz8BIQx8CPLtJ`zMBkNk|`#jy~LN|{4(UnG{ z*=)AzO@i=(DE4A(Ru4s|Az>Y0jB54i-fhq4PT!4VE&`kZjFxxpP*{CeM05_}ZdH}!&;2ay-=NOJHQ7RznpgqEP=(CN0j zJg^4xpgp(mFi(IUq1c)37V*v@FF1&}6^HU2xrHS?9>lI>SW4pQ3s3PZU!yOO`XNWkl&}#c`-1fN7tXA0ThXD>0S#1&a zte7yguX|!fd^X$Q|BIhNUVZ@o^!yzW4*E`W=ZbZc?ugD$T;aJ*pW{purcPvK3N#V^ z=*mP%Ca2%rc{+%!sdRR{xI1Xp@s5pqp+AVO-f9H2qEW+pLSzCCs?79fhMfcz<0KVf zaK*Yii2DOj4Z{;@lS`A_)!Vld!xbXj_WG?vC%z=*c|lE94vKD4SrEx&X-;O6$^zpP zbH>}moUtJ>GMXhuMu?;;x1AokRfcKIesr(nx`rznp&6+o#Zg8YT6L5|tBne1bE6_! zePp1`k4k6@adC^DO0hXown|$|&(v`_Hsi`Ob*swMlgb`pS{^Cml|AJl;akPEHqy}^ z8RgNgj{xZ$9W9|f#uQjLZS(6t?C4fCdO9i1vH!(PHHKfB#*f`fZ&*{>K~fYv3jHU% zmJ{b_jwmF~QbP<%8;kBvqoLP}NI1({6;vy$hB|!tFp7Owx)x}b2MW5R2L2;%;t9nh zjkIx&X+%Ai+tp&|&Q^Y0*dyu{seh(jSAKr#p|+db)knE4V_X_xwRj~q$4ev77C7|j z=%v}{;}av5-~n9$q+G+WURnVIQjS^mY2Nxvui$g1G3VH z72tjc0ds?Rfpr}oZ~+HLYXVCQ0=XPi8y=qm&$0Ua`SXd%pZ2-%ND*qfU2z9-IKjf1 zZM580mjlAqKmK&{6Wi>I@Ckf!(Bk3+&~md@NeX`0>2Q(gUI3pV*(sTx8+v_l6pNCg z2gw1FR3gaqKym^WZNo_m4Qvv-eEO9a}jEiLSw+zWlljXonefaKu;RX@ns7%dfK>#f7l&L!u zI#6Vg=@-Qo-Jx{{M4Cck$-*?)Y8RlrAk}8ABx*RRxJV6gLm)9xQd7D?cXeuBrG{8n z(^A#EL<5UjnN%ENjWaO|HdaJrs7>f+S*;Q!)gi^llfdA?#IDyDz*__(nTv9h@ z1Z5f|KR4F)6w*$X+tWxt$Iu6ctYMsoeyS|b3cu7&Djg6R1TDgng~1apVyo{D5Ll<- zHYn3VWZcjel`o-EXSHD;nZvm%x6Adgckr_(IUD4_$ zc^S@3>~W$#g{5}mR`($1*PV0^NABUIdvt&(B2tt!U0u_BpM;doQ@lah;ql3-Q0*2tTcT6Dq~jD zZ9R&7H|mlaWo8XgHrW!H0l5K^#OdR(bm8HJ+UUZ=S`7qz`0%xdmxkIyW@`vxNK%GH zr}fvZD{NO1Q<8N;QSlC@lYG?oy?A20#8nL0rO8gtYvj_SfgjI?E*+%vdAM2TOb_ie zIV>tAZWL*CdBmeWa7OEB^jsBWFKe2r?H|*$KWh7Wet7vX2K*4pBsUO?@<=LK#3fns zF>WL~D9F%a3M6vR)ZHA$;4a5`JQom(LP=rgVCCQ29DGY12D?hqEvfk~lS&>u1$ z6v#_vZQ>?YQnJfl`wF^*=iH>%Cw+d>7bbmi(i>lCtn?2CnVk`G3^2+ZzVPYCwEUzU%=h=o)Uhq)eyV`yFike4s1*-1RE6l;3Y7%K_^-u(1Y(i zHe6lo$V@kCAUi>j0a8Ti?#O=neSc?ekMAJ-Ek~i=5F;O!kxNoePn$jo((f!` zk{(5exsDE+7L+eiJLu5t-vLH?2}3uKH_iTqZw>R%7;OQ3A?PLlOlg?3FaM02n6e`k z-WX>6nTp)y7l_VaEw*w`6ww`i6eILSdk@`N%sY+nHrbO4bk$chqGhE-9{bX_W zv7sw*X{VDykRxJM%H2Lw9vj=ZSOs=&d3f_q$h`KBEb$^-@Pbw-`XSOOZt>uWC&EDT z%bzQfQTTvpsOL!qw%k?=syxhJBwu9b&?Ks`D*=xwKEEnw@7ra;9}iH#l(h%DvN=C% z7qHcJ-cIxn@80=fSZQ=2XsJPUJ$v+ZG0Qh5fl#92b!elc0F7yToy0&6A!|hk#n|jg z7LXtQdu$zDK(h`9sUg2GRb4CZ8+v)aoU5v5_O;K?e5-$6Hj#PkYv1VA;n_o6=M3M1 zP*dq>8g#yiw?xcBmn@}FIRGLKk#@8g3CJ^)R#2MDRv-$eNC+zs`#31xZ57d@RJVoV zfMY@tbo5Jr)B&l@LFOeSihfA$0Yy=hQ(y(O6+}@8aV1m{P`NoMBaNR>en26?K{0Qu zik|jeM!&*JXjieD2{?TYZb`!BC0rhbL4wPpG^hhl9>qar>nQqT=#Qg60Sske$UFF8 zgZAX~eJc`AVU!14VX}V#<;fhYVtrw5$7Q*)I4WRe(OF_EXiu?Kw5PFp4XcZDtMNe! zbTlMh19*#oGMqE=oQ882{W&~8hu%VOp+Aql!q(9~&yJ#f0a%X#tKsPJi{qEraaoW7 z&B<(_1X@5*)#w9mYC-jJipxK6#?=M3$`&cbM%8(yB4uCM;^w;^lt6BMTnm*giLsKRxWM()x`Mfw+f z$4STT(U?((1*pefd$_*fbFxK$fkpR+C%@}}vIP{t504#iM%m;iGda+jt3Jid*W@rJtN#7H;gNY~Hd!~^xP>yQm)5DEwAQ3zkbP`Hz++|A z1Jp7ESpj^XETQ@!wgrkwH)@()LUjvijrrllYxthxbFz0ofU>?>Ylu655kI4b z?0_t3%bE%Cg2HKgi7eK=G)DMT`fKoVNAZKaIkl};u zTZWM3YO1W;MVWQKzhjq?id@e=*Z{cK0kCbNpX_1h|yp9=h8BJ{|t^Mhw9+WgwwNH>P2$FR{@+?Shg%nDXIV9dlsZft5H%nh^5BL?a zg+;%^B_pGiya8vjlFPy(y04-vKR;SnSV1!dRR@H=Z=g+~sjxqfhw!CiriLG!*6go; zyJD6#1OKX)VnD8U*av{@4Nz;MG+a;~j{E1)ZPwQ569bXMaT3!(U5yR!(ZWf}j`Min z`jeTWgRr&bIN}z7l0v*gU-lB?Zl5vf_a5Mw|IlADmmYe`y}jhn^vzlZA1YOy(agMg^X7fO7Zr;Zf^pXQo&A`pIk1`32ZxAOoE${qZxjf;jJBjMRJyOW`W>@4#|YuV86Bb;N->H+@(1 zK0Od3=Bu?@;&iG_UmUa>ZO#%m3Iz*A)s0%N`Zzwo!9!<$4gf>SfwuM>fk{TX3{RUy zYOxH(Yv5cEuglz~Bb+A@Wq~v(fb~jNnuLT-ixDC%LRt}R2MR6_@~Z6w!}NEKe$ie*kgmK5;nJ$h5+6zY<6E4R;M4H20FboS0-p=WjQBxW@l<}8LA^Vm z2uYRT)wLjUoq#_8U__cIohpVjqL_u!P&P;-0E=YC7p%oAnM74-Dop0oGf^fc%Cj;% zJIvD6s%G@ zAiU34;pp_;14u0f$U$|XDtxcEc%0HIqV6SmtjGc5j?*jZr8KKJIqn~)1l@i&J>z~n z`k+`xEtWE`{1^Gkh*auOm%Xn?^!s<0mzP$)+OLoHY5r-JRX7mnuRbb$BH| z3}8XU4zfW&K8A8CYKt_QmzZK|(d9MIS@ZHzY&kKZe!^T4C0%KG-sO`;uVAy8MD3U~ zy3+KRBiae$vXqKt$I!btG>HmM)90N^Jv-NEG+d$*Yp0VDx4rYQzW(H4GOYxtYKLyyZpBx%Jyyu(+OX$7Q2ybou$joj i1(=3b*2{)wY53o!@t5(znl$l@R>Br8X=VJ`hkpRqt`#@{ literal 0 HcmV?d00001 diff --git a/workflows/bbbc.py b/cwl_workflows/bbbc.py similarity index 100% rename from workflows/bbbc.py rename to cwl_workflows/bbbc.py diff --git a/workflows/bbbc.yml b/cwl_workflows/bbbc.yml similarity index 100% rename from workflows/bbbc.yml rename to cwl_workflows/bbbc.yml diff --git a/cwl_workflows/cwl_features_extraction.py b/cwl_workflows/cwl_features_extraction.py new file mode 100644 index 0000000..6232604 --- /dev/null +++ b/cwl_workflows/cwl_features_extraction.py @@ -0,0 +1,265 @@ +import wic.api.pythonapi as api +import polus.plugins as pp +from pathlib import Path +import yaml +import logging +import re +import shutil +import typing +from utils import GITHUB_TAG + +# Initialize the logger +logger = logging.getLogger(__name__) +logger.setLevel(logging.INFO) + + +class CWLFeatureWorkflow: + """ + A CWL feature extraction pipeline. + + Attributes: + name : Name of imaging dataset of Broad Bioimage Benchmark Collection (https://bbbc.broadinstitute.org/image_sets). + file_pattern : Pattern for parsing raw filenames. + out_file_pattern : Preferred format for filenames + image_pattern : Pattern for parsing intensity image filenames after renaming when using map_directory + seg_pattern : Pattern use to parse segmentation image filenames + map_directory : Extract folder name + ff_pattern: The filename pattern employed to select flatfield components from the ffDir. + df_pattern:The filename pattern employed to select darkfield components from the ffDir + group_by: Grouping variables for filePattern + features:Features from Nyxus (https://github.com/PolusAI/nyxus/) that need extraction + file_extension: Output file format + """ + def __init__( + self, + name: str, + file_pattern: str, + out_file_pattern: str, + image_pattern: str, + seg_pattern: str, + map_directory: str, + ff_pattern: str, + df_pattern: str, + group_by: str, + features: typing.Optional[str]="ALL", + file_extension: typing.Optional[str]="arrowipc" + ): + self.name = name + self.file_pattern = file_pattern + self.out_file_pattern = out_file_pattern + self.map_directory = map_directory + self.ff_pattern = ff_pattern + self.df_pattern = df_pattern + self.group_by = group_by + self.wic_path = api._WIC_PATH + self.PATH = Path(self.wic_path.parent).joinpath(Path(__file__).parts[-3]) + self.cwl_path, self.workflow_path = self._create_directories() + self.image_pattern = image_pattern + self.seg_pattern = seg_pattern + self.features = features + self.file_extension = file_extension + + def _create_directories(self) -> None: + """Create directories for CWL outputs""" + cwl_path = self.PATH.joinpath("cwl_adapters") + cwl_path.mkdir(parents=True, exist_ok=True) + workflow_path = self.PATH.joinpath("workflows").resolve() + workflow_path.mkdir(exist_ok=True) + return cwl_path, workflow_path + + def _clean(self) -> None: + """Cleaning of redundant directories generating on running CWL""" + logger.info("Cleaning directories!!!") + destination_path = self.workflow_path.joinpath("experiment") + dir_names = ("autogenerated", "cachedir", "RUNS", "provenance") + for i, d in zip(self.wic_path.iterdir(), self.PATH.iterdir()): + if i.name.endswith(dir_names): + shutil.rmtree(d) + if d.name.endswith(dir_names): + shutil.rmtree(d) + + for d in destination_path.iterdir(): + if d.name.endswith("cwl_adapters"): + shutil.rmtree(d) + for d in self.PATH.iterdir(): + if d.name.endswith("cwl_adapters"): + shutil.move(d, destination_path) + + return + + def _move_outputs(self) -> None: + """Transfer outputs from the WIC directory to the workflow path""" + logger.info("Move outputs to workflow path!!!") + for d in self.wic_path.iterdir(): + if d.name.endswith("outdir"): + shutil.move(d, self.workflow_path) + return + + def _camel(self, name: str) -> str: + """Convert plugin name to camel case.""" + name = re.sub(r"(_|-)+", " ", name).title().replace(" ", "") + return "".join([name[0].upper(), name[1:]]) + + def _string_after_period(self, x): + """Get a string after period.""" + match = re.search(r"\.(.*)", x) + if match: + # Get the part after the period + return f".*.{match.group(1)}" + else: + return "" + + def _add_backslash_before_parentheses(self, x): + """Add backslash to generate ff_pattern and df_pattern""" + # Define the regular expression pattern to match parenthesis + pattern_1 = r"(\()|(\))" + # Use re.sub() to add a backslash before starting and finishing parenthesis + result = re.sub(pattern_1, r"\\\1\2", x) + pattern_2 = r"\d" + result = ( + result.split("_c")[0] + + "_c{c:d}" + + re.sub(pattern_2, "", result.split("_c")[1]) + ) + return result + + def create_step(self, url: str) -> api.Step: + """Generate the plugin class name from the plugin name specified in the manifest""" + manifest = pp.submit_plugin(url) + plugin_version = str(manifest.version) + cwl_tool = pp.get_plugin(self._camel(manifest.name), plugin_version).save_cwl( + self.cwl_path.joinpath(f"{self._camel(manifest.name)}.cwl") + ) + step = api.Step(cwl_tool) + return step + + def manifest_urls(self, x: str) -> str: + """URLs on GitHub for plugin manifests""" + + urls = { + "bbbc_download": f"{GITHUB_TAG}/saketprem/polus-plugins/bbbc_download/utils/bbbc-download-plugin/plugin.json", + "file_renaming": f"{GITHUB_TAG}/PolusAI/polus-plugins/f20a2f75264d59af78cfb40b4c3cec118309f7ec/formats/file-renaming-plugin/plugin.json", + "ome_converter": f"{GITHUB_TAG}/hamshkhawar/image-tools/basecontainer_omecontainer/formats/ome-converter-plugin/plugin.json", + "estimate_flatfield": f"{GITHUB_TAG}/nishaq503/image-tools/fix/basic/regression/basic-flatfield-estimation-tool/plugin.json", + "apply_flatfield": f"{GITHUB_TAG}/hamshkhawar/image-tools/cast_images/transforms/images/apply-flatfield-tool/plugin.json", + "kaggle_nuclei_segmentation": f"{GITHUB_TAG}/hamshkhawar/image-tools/kaggle-nuclei_seg/segmentation/kaggle-nuclei-segmentation/plugin.json", + "ftl_plugin": f"{GITHUB_TAG}/nishaq503/image-tools/fix/ftl-label/transforms/images/polus-ftl-label-plugin/plugin.json", + "nyxus_plugin": f"{GITHUB_TAG}/hamshkhawar/image-tools/nyxus_manifest/features/nyxus-plugin/plugin.json", + } + return urls[x] + + def modify_cwl(self) -> None: + """Modify CWL to incorporate environmental variables and permission access""" + for f in list(self.cwl_path.rglob("*.cwl")): + if "cwl" in f.name: + try: + with Path.open(f, "r") as file: + config = yaml.safe_load(file) + config["requirements"]["NetworkAccess"] = { + "networkAccess": True + } + config["requirements"]["EnvVarRequirement"] = { + "envDef": {"HOME": "/home/polusai"} + } + with open(f, "w") as out_file: + yaml.dump(config, out_file) + except FileNotFoundError: + logger.info("Error: There was an unexpected error while processing the file.") + return + + def workflow(self) -> None: + """ + A CWL feature extraction pipeline. + """ + # BBBCDownload + bbbc = self.create_step(self.manifest_urls("bbbc_download")) + bbbc.name = self.name + bbbc.outDir = Path("bbbc.outDir") + + # Renaming plugin + rename = self.create_step(self.manifest_urls("file_renaming")) + rename.filePattern = self.file_pattern + rename.outFilePattern = self.out_file_pattern + rename.mapDirectory = self.map_directory + rename.inpDir = bbbc.outDir + rename.outDir = Path("rename.outDir") + + # OMEConverter + ome_converter = self.create_step(self.manifest_urls("ome_converter")) + ome_converter.filePattern = self._string_after_period(self.out_file_pattern) + ome_converter.fileExtension = ".ome.tif" + ome_converter.inpDir = rename.outDir + ome_converter.outDir = Path("ome_converter.outDir") + + # Estimate Flatfield + estimate_flatfield = self.create_step(self.manifest_urls("estimate_flatfield")) + estimate_flatfield.inpDir = ome_converter.outDir + estimate_flatfield.filePattern = self.image_pattern + estimate_flatfield.groupBy = self.group_by + estimate_flatfield.getDarkfield = True + estimate_flatfield.outDir = Path("estimate_flatfield.outDir") + + # # Apply Flatfield + apply_flatfield = self.create_step(self.manifest_urls("apply_flatfield")) + apply_flatfield.imgDir = ome_converter.outDir + apply_flatfield.imgPattern = self.image_pattern + apply_flatfield.ffDir = estimate_flatfield.outDir + apply_flatfield.ffPattern = self.ff_pattern + apply_flatfield.dfPattern = self.df_pattern + apply_flatfield.outDir = Path("apply_flatfield.outDir") + apply_flatfield.dataType = True + + ## Kaggle Nuclei Segmentation + kaggle_nuclei_segmentation = self.create_step( + self.manifest_urls("kaggle_nuclei_segmentation") + ) + kaggle_nuclei_segmentation.inpDir = apply_flatfield.outDir + kaggle_nuclei_segmentation.filePattern = self.image_pattern + kaggle_nuclei_segmentation.outDir = Path("kaggle_nuclei_segmentation.outDir") + + ## FTL Label Plugin + ftl_plugin = self.create_step(self.manifest_urls("ftl_plugin")) + ftl_plugin.inpDir = kaggle_nuclei_segmentation.outDir + ftl_plugin.connectivity = 1 + ftl_plugin.binarizationThreshold = 0.5 + ftl_plugin.outDir = Path("ftl_plugin.outDir") + + # ## Nyxus Plugin + nyxus_plugin = self.create_step(self.manifest_urls("nyxus_plugin")) + nyxus_plugin.inpDir = apply_flatfield.outDir + nyxus_plugin.segDir = ftl_plugin.outDir + nyxus_plugin.intPattern = self.image_pattern + nyxus_plugin.segPattern = self.seg_pattern + nyxus_plugin.features = self.features + nyxus_plugin.fileExtension = self.file_extension + nyxus_plugin.neighborDist = 5 + nyxus_plugin.pixelPerMicron = 1.0 + nyxus_plugin.outDir = Path("nyxus_plugin.outDir") + + logger.info("Initiating CWL Feature Extraction Workflow!!!") + steps = [ + bbbc, + rename, + ome_converter, + estimate_flatfield, + apply_flatfield, + kaggle_nuclei_segmentation, + ftl_plugin, + nyxus_plugin + ] + workflow = api.Workflow(steps, "experiment", self.workflow_path) + # # Saving CLT for plugins + workflow._save_all_cwl(overwrite=True) + # # Adding environmental variables for bbbc_download and ome_converter plugin + self.modify_cwl() + # # # Save yaml to run CWL tool + workflow._save_yaml() + # Compile and run using WIC python API + workflow.compile(run_local=True, overwrite=False) + # # print(workflow.yml_path) + # # clean autognerated directories + self._clean() + self._move_outputs() + logger.info("Completed CWL Feature Extraction Workflow.") + return + \ No newline at end of file diff --git a/cwl_workflows/utils.py b/cwl_workflows/utils.py new file mode 100644 index 0000000..2da1a30 --- /dev/null +++ b/cwl_workflows/utils.py @@ -0,0 +1,45 @@ +import json +import pydantic +from pathlib import Path +from typing import Dict + +GITHUB_TAG = "https://raw.githubusercontent.com" + + +class DataModel(pydantic.BaseModel): + data: Dict[str, Dict[str, str]] + + +def get_params(path: Path, name: str): + """Loading json file for getting parameters""" + with open(path) as json_file: + # Read the JSON data + data = json.load(json_file) + params = [v[name] for k, v in data.items()][0] + return params + + +params = { + "BBBC039": { + "name": "BBBC039", + "file_pattern": ".*_{row:c}{col:dd}_s{s:d}_w{channel:d}.*.tif", + "out_file_pattern": "x{row:dd}_y{col:dd}_p{s:dd}_c{channel:d}.tif", + "image_pattern": "images_x{x:dd}_y{y:dd}_p{p:dd}_c{c:d}.ome.tif", + "seg_pattern":"images_x{x:dd}_y{y:dd}_p{p:dd}_c1.ome.tif", + "map_directory": "raw", + "ff_pattern": "images_x\\(00-15\\)_y\\(01-24\\)_p0\\(1-9\\)_c{c:d}_flatfield.ome.tif", + "df_pattern": "images_x\\(00-15\\)_y\\(01-24\\)_p0\\(1-9\\)_c{c:d}_darkfield.ome.tif", + "group_by": "c", + "features": "ALL_INTENSITY,ALL_MORPHOLOGY", + "file_extension": "pandas" + } +} +model = DataModel(data=params) +model_dict = model.dict() + +json_dir = Path(Path(__file__).parents[1]).joinpath("bbbc_json") +json_dir.mkdir(parents=True, exist_ok=True) +JSON_FILENAME = json_dir.joinpath("bbbc_config.json") + +with Path.open(JSON_FILENAME, "w") as json_file: + json.dump(model_dict, json_file, indent=2) From 1d589e6b2e117eb6888e1a3e617be312adbe37b1 Mon Sep 17 00:00:00 2001 From: hamshkhawar Date: Wed, 28 Feb 2024 15:02:40 -0600 Subject: [PATCH 2/9] remove hidden files --- .../cwl_features_extraction.cpython-310.pyc | Bin 8762 -> 0 bytes cwl_workflows/__pycache__/utils.cpython-310.pyc | Bin 1923 -> 0 bytes 2 files changed, 0 insertions(+), 0 deletions(-) delete mode 100644 cwl_workflows/__pycache__/cwl_features_extraction.cpython-310.pyc delete mode 100644 cwl_workflows/__pycache__/utils.cpython-310.pyc diff --git a/cwl_workflows/__pycache__/cwl_features_extraction.cpython-310.pyc b/cwl_workflows/__pycache__/cwl_features_extraction.cpython-310.pyc deleted file mode 100644 index a5f83f499631cc14de5d0c8d4ff9374f7cac7cc4..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 8762 zcma)B%ahwidIvy|1TiG%q0#$c$WKwbL)xM(eu^vlTQp2zs;~4^O;MGeW~*vClI!VqUfyed zp;xqvlX1f~Bt*$B$=dh9-&Hql|;itHw ze?r3*TV;x^G1bnMv{H`Mn9hM8R{>WMmw~H<%fz*WtBk9{^IModsjzBx*FMIU*~%Ar zyT~rGBW(Q(9dCP)9c9OGSHj(Kb^>>%#J|e*w$4tnQy5udr`ee=6ukRcb`E`oJ%=4u zd39?UYbY7(`k!8jfLBay)dx)UZ4A3!0TqZE{=uQ9K<}jBA*F1aC_Xk zVr^0vwk@yccD$ftF*kN29@EhKB6OMcz8BIQx8CPLtJ`zMBkNk|`#jy~LN|{4(UnG{ z*=)AzO@i=(DE4A(Ru4s|Az>Y0jB54i-fhq4PT!4VE&`kZjFxxpP*{CeM05_}ZdH}!&;2ay-=NOJHQ7RznpgqEP=(CN0j zJg^4xpgp(mFi(IUq1c)37V*v@FF1&}6^HU2xrHS?9>lI>SW4pQ3s3PZU!yOO`XNWkl&}#c`-1fN7tXA0ThXD>0S#1&a zte7yguX|!fd^X$Q|BIhNUVZ@o^!yzW4*E`W=ZbZc?ugD$T;aJ*pW{purcPvK3N#V^ z=*mP%Ca2%rc{+%!sdRR{xI1Xp@s5pqp+AVO-f9H2qEW+pLSzCCs?79fhMfcz<0KVf zaK*Yii2DOj4Z{;@lS`A_)!Vld!xbXj_WG?vC%z=*c|lE94vKD4SrEx&X-;O6$^zpP zbH>}moUtJ>GMXhuMu?;;x1AokRfcKIesr(nx`rznp&6+o#Zg8YT6L5|tBne1bE6_! zePp1`k4k6@adC^DO0hXown|$|&(v`_Hsi`Ob*swMlgb`pS{^Cml|AJl;akPEHqy}^ z8RgNgj{xZ$9W9|f#uQjLZS(6t?C4fCdO9i1vH!(PHHKfB#*f`fZ&*{>K~fYv3jHU% zmJ{b_jwmF~QbP<%8;kBvqoLP}NI1({6;vy$hB|!tFp7Owx)x}b2MW5R2L2;%;t9nh zjkIx&X+%Ai+tp&|&Q^Y0*dyu{seh(jSAKr#p|+db)knE4V_X_xwRj~q$4ev77C7|j z=%v}{;}av5-~n9$q+G+WURnVIQjS^mY2Nxvui$g1G3VH z72tjc0ds?Rfpr}oZ~+HLYXVCQ0=XPi8y=qm&$0Ua`SXd%pZ2-%ND*qfU2z9-IKjf1 zZM580mjlAqKmK&{6Wi>I@Ckf!(Bk3+&~md@NeX`0>2Q(gUI3pV*(sTx8+v_l6pNCg z2gw1FR3gaqKym^WZNo_m4Qvv-eEO9a}jEiLSw+zWlljXonefaKu;RX@ns7%dfK>#f7l&L!u zI#6Vg=@-Qo-Jx{{M4Cck$-*?)Y8RlrAk}8ABx*RRxJV6gLm)9xQd7D?cXeuBrG{8n z(^A#EL<5UjnN%ENjWaO|HdaJrs7>f+S*;Q!)gi^llfdA?#IDyDz*__(nTv9h@ z1Z5f|KR4F)6w*$X+tWxt$Iu6ctYMsoeyS|b3cu7&Djg6R1TDgng~1apVyo{D5Ll<- zHYn3VWZcjel`o-EXSHD;nZvm%x6Adgckr_(IUD4_$ zc^S@3>~W$#g{5}mR`($1*PV0^NABUIdvt&(B2tt!U0u_BpM;doQ@lah;ql3-Q0*2tTcT6Dq~jD zZ9R&7H|mlaWo8XgHrW!H0l5K^#OdR(bm8HJ+UUZ=S`7qz`0%xdmxkIyW@`vxNK%GH zr}fvZD{NO1Q<8N;QSlC@lYG?oy?A20#8nL0rO8gtYvj_SfgjI?E*+%vdAM2TOb_ie zIV>tAZWL*CdBmeWa7OEB^jsBWFKe2r?H|*$KWh7Wet7vX2K*4pBsUO?@<=LK#3fns zF>WL~D9F%a3M6vR)ZHA$;4a5`JQom(LP=rgVCCQ29DGY12D?hqEvfk~lS&>u1$ z6v#_vZQ>?YQnJfl`wF^*=iH>%Cw+d>7bbmi(i>lCtn?2CnVk`G3^2+ZzVPYCwEUzU%=h=o)Uhq)eyV`yFike4s1*-1RE6l;3Y7%K_^-u(1Y(i zHe6lo$V@kCAUi>j0a8Ti?#O=neSc?ekMAJ-Ek~i=5F;O!kxNoePn$jo((f!` zk{(5exsDE+7L+eiJLu5t-vLH?2}3uKH_iTqZw>R%7;OQ3A?PLlOlg?3FaM02n6e`k z-WX>6nTp)y7l_VaEw*w`6ww`i6eILSdk@`N%sY+nHrbO4bk$chqGhE-9{bX_W zv7sw*X{VDykRxJM%H2Lw9vj=ZSOs=&d3f_q$h`KBEb$^-@Pbw-`XSOOZt>uWC&EDT z%bzQfQTTvpsOL!qw%k?=syxhJBwu9b&?Ks`D*=xwKEEnw@7ra;9}iH#l(h%DvN=C% z7qHcJ-cIxn@80=fSZQ=2XsJPUJ$v+ZG0Qh5fl#92b!elc0F7yToy0&6A!|hk#n|jg z7LXtQdu$zDK(h`9sUg2GRb4CZ8+v)aoU5v5_O;K?e5-$6Hj#PkYv1VA;n_o6=M3M1 zP*dq>8g#yiw?xcBmn@}FIRGLKk#@8g3CJ^)R#2MDRv-$eNC+zs`#31xZ57d@RJVoV zfMY@tbo5Jr)B&l@LFOeSihfA$0Yy=hQ(y(O6+}@8aV1m{P`NoMBaNR>en26?K{0Qu zik|jeM!&*JXjieD2{?TYZb`!BC0rhbL4wPpG^hhl9>qar>nQqT=#Qg60Sske$UFF8 zgZAX~eJc`AVU!14VX}V#<;fhYVtrw5$7Q*)I4WRe(OF_EXiu?Kw5PFp4XcZDtMNe! zbTlMh19*#oGMqE=oQ882{W&~8hu%VOp+Aql!q(9~&yJ#f0a%X#tKsPJi{qEraaoW7 z&B<(_1X@5*)#w9mYC-jJipxK6#?=M3$`&cbM%8(yB4uCM;^w;^lt6BMTnm*giLsKRxWM()x`Mfw+f z$4STT(U?((1*pefd$_*fbFxK$fkpR+C%@}}vIP{t504#iM%m;iGda+jt3Jid*W@rJtN#7H;gNY~Hd!~^xP>yQm)5DEwAQ3zkbP`Hz++|A z1Jp7ESpj^XETQ@!wgrkwH)@()LUjvijrrllYxthxbFz0ofU>?>Ylu655kI4b z?0_t3%bE%Cg2HKgi7eK=G)DMT`fKoVNAZKaIkl};u zTZWM3YO1W;MVWQKzhjq?id@e=*Z{cK0kCbNpX_1h|yp9=h8BJ{|t^Mhw9+WgwwNH>P2$FR{@+?Shg%nDXIV9dlsZft5H%nh^5BL?a zg+;%^B_pGiya8vjlFPy(y04-vKR;SnSV1!dRR@H=Z=g+~sjxqfhw!CiriLG!*6go; zyJD6#1OKX)VnD8U*av{@4Nz;MG+a;~j{E1)ZPwQ569bXMaT3!(U5yR!(ZWf}j`Min z`jeTWgRr&bIN}z7l0v*gU-lB?Zl5vf_a5Mw|IlADmmYe`y}jhn^vzlZA1YOy(agMg^X7fO7Zr;Zf^pXQo&A`pIk1`32ZxAOoE${qZxjf;jJBjMRJyOW`W>@4#|YuV86Bb;N->H+@(1 zK0Od3=Bu?@;&iG_UmUa>ZO#%m3Iz*A)s0%N`Zzwo!9!<$4gf>SfwuM>fk{TX3{RUy zYOxH(Yv5cEuglz~Bb+A@Wq~v(fb~jNnuLT-ixDC%LRt}R2MR6_@~Z6w!}NEKe$ie*kgmK5;nJ$h5+6zY<6E4R;M4H20FboS0-p=WjQBxW@l<}8LA^Vm z2uYRT)wLjUoq#_8U__cIohpVjqL_u!P&P;-0E=YC7p%oAnM74-Dop0oGf^fc%Cj;% zJIvD6s%G@ zAiU34;pp_;14u0f$U$|XDtxcEc%0HIqV6SmtjGc5j?*jZr8KKJIqn~)1l@i&J>z~n z`k+`xEtWE`{1^Gkh*auOm%Xn?^!s<0mzP$)+OLoHY5r-JRX7mnuRbb$BH| z3}8XU4zfW&K8A8CYKt_QmzZK|(d9MIS@ZHzY&kKZe!^T4C0%KG-sO`;uVAy8MD3U~ zy3+KRBiae$vXqKt$I!btG>HmM)90N^Jv-NEG+d$*Yp0VDx4rYQzW(H4GOYxtYKLyyZpBx%Jyyu(+OX$7Q2ybou$joj i1(=3b*2{)wY53o!@t5(znl$l@R>Br8X=VJ`hkpRqt`#@{ From 81af0325eedf8a834c259f665ad63f0d541ecd5d Mon Sep 17 00:00:00 2001 From: hamshkhawar Date: Wed, 28 Feb 2024 15:03:50 -0600 Subject: [PATCH 3/9] fix typo error --- cwl_workflows/__main__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cwl_workflows/__main__.py b/cwl_workflows/__main__.py index 971173e..115895f 100644 --- a/cwl_workflows/__main__.py +++ b/cwl_workflows/__main__.py @@ -1,4 +1,4 @@ -"""Ome Converter.""" +"""CWL Workflow.""" import logging from typing import Any from typing import Optional From 42131e4800daa58f5a5ed79e03619d29f7993952 Mon Sep 17 00:00:00 2001 From: hamshkhawar Date: Wed, 28 Feb 2024 15:50:22 -0600 Subject: [PATCH 4/9] cwl segmentation workflow --- README.md | 12 +++----- ...bbc_config.json => bbbc_segmentation.json} | 4 +-- cwl_workflows/__main__.py | 13 ++++----- ...raction.py => cwl_nuclear_segmentation.py} | 29 +++---------------- cwl_workflows/utils.py | 12 ++++---- 5 files changed, 19 insertions(+), 51 deletions(-) rename bbbc_json/{bbbc_config.json => bbbc_segmentation.json} (84%) rename cwl_workflows/{cwl_features_extraction.py => cwl_nuclear_segmentation.py} (89%) diff --git a/README.md b/README.md index c735f02..056d400 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ -# Common Workflow Language (CWL) Feature Extraction worflow +# Common Workflow Language (CWL) Nuclear Segmentation worflow CWL feature extraction workflow for imaging dataset @@ -20,7 +20,7 @@ create a [Conda](https://conda.io/projects/conda/en/latest/user-guide/tasks/mana - `pip install -e ".[all]"` ## Details -This workflow integrates eight distinct plugins, starting from data retrieval from [Broad Bioimage Benchmark Collection](https://bbbc.broadinstitute.org/), renaming files, correcting uneven illumination, segmenting nuclear objects, and culminating in the extraction of features from identified objects +This workflow integrates seven distinct plugins, starting from data retrieval from [Broad Bioimage Benchmark Collection](https://bbbc.broadinstitute.org/), renaming files, correcting uneven illumination, segmenting nuclear objects. Below are the specifics of the plugins employed in the workflow 1. [bbbc-download-plugin](https://github.com/saketprem/polus-plugins/tree/bbbc_download/utils/bbbc-download-plugin) @@ -30,13 +30,12 @@ Below are the specifics of the plugins employed in the workflow 5. [apply-flatfield-tool](https://github.com/PolusAI/image-tools/tree/master/transforms/images/apply-flatfield-tool) 6. [kaggle-nuclei-segmentation](https://github.com/hamshkhawar/image-tools/tree/kaggle-nuclei_seg/segmentation/kaggle-nuclei-segmentation) 7. [polus-ftl-label-plugin](https://github.com/hamshkhawar/image-tools/tree/kaggle-nuclei_seg/transforms/images/polus-ftl-label-plugin) -8. [nyxus-plugin](https://github.com/PolusAI/image-tools/tree/kaggle-nuclei_seg/features/nyxus-plugin) -## Execute CWL feature extraction workflow +## Execute CWL Segmentation workflow The parameters for each imaging dataset are pre-defined and stored in JSON format. A Pydantic model in a utils Python file can be utilized to store parameters for any new dataset -`python cwl_workflows/__main__.py --name="BBBC039" --workflow=CWLFeatureWorkflow` +`python cwl_workflows/__main__.py --name="BBBC039" --workflow=segmentation` A directory named `workflow` is generated, encompassing CLTs for each plugin, YAML files, and all outputs are stored within the `outdir` directory. ``` @@ -77,8 +76,5 @@ workflows ├── step 7 FtlLabel │ └── outDir │ └── ftl_plugin.outDir - └── step 8 NyxusPlugin - └── outDir - └── nyxus_plugin.outDir ``` diff --git a/bbbc_json/bbbc_config.json b/bbbc_json/bbbc_segmentation.json similarity index 84% rename from bbbc_json/bbbc_config.json rename to bbbc_json/bbbc_segmentation.json index c5d55c6..9eec985 100644 --- a/bbbc_json/bbbc_config.json +++ b/bbbc_json/bbbc_segmentation.json @@ -9,9 +9,7 @@ "map_directory": "raw", "ff_pattern": "images_x\\(00-15\\)_y\\(01-24\\)_p0\\(1-9\\)_c{c:d}_flatfield.ome.tif", "df_pattern": "images_x\\(00-15\\)_y\\(01-24\\)_p0\\(1-9\\)_c{c:d}_darkfield.ome.tif", - "group_by": "c", - "features": "ALL_INTENSITY,ALL_MORPHOLOGY", - "file_extension": "pandas" + "group_by": "c" } } } \ No newline at end of file diff --git a/cwl_workflows/__main__.py b/cwl_workflows/__main__.py index 115895f..c0b8893 100644 --- a/cwl_workflows/__main__.py +++ b/cwl_workflows/__main__.py @@ -1,11 +1,9 @@ """CWL Workflow.""" import logging -from typing import Any -from typing import Optional import typer -from utils import JSON_FILENAME +from utils import SEG_JSON_FILENAME from utils import get_params -from cwl_features_extraction import CWLFeatureWorkflow +from cwl_nuclear_segmentation import CWLSegmentationWorkflow app = typer.Typer() @@ -40,11 +38,10 @@ def main( logger.info(f"name = {name}") logger.info(f"workflow = {workflow}") - params = get_params(JSON_FILENAME, name) - - if workflow == "CWLFeatureWorkflow": + if workflow == "segmentation": + params = get_params(SEG_JSON_FILENAME, name) logger.info(f"Executing {workflow}!!!") - model = CWLFeatureWorkflow(**params) + model = CWLSegmentationWorkflow(**params) model.workflow() logger.info("Completed CWL workflow!!!") diff --git a/cwl_workflows/cwl_features_extraction.py b/cwl_workflows/cwl_nuclear_segmentation.py similarity index 89% rename from cwl_workflows/cwl_features_extraction.py rename to cwl_workflows/cwl_nuclear_segmentation.py index 6232604..8b53d73 100644 --- a/cwl_workflows/cwl_features_extraction.py +++ b/cwl_workflows/cwl_nuclear_segmentation.py @@ -5,7 +5,6 @@ import logging import re import shutil -import typing from utils import GITHUB_TAG # Initialize the logger @@ -13,9 +12,9 @@ logger.setLevel(logging.INFO) -class CWLFeatureWorkflow: +class CWLSegmentationWorkflow: """ - A CWL feature extraction pipeline. + A CWL Nuclear Segmentation pipeline. Attributes: name : Name of imaging dataset of Broad Bioimage Benchmark Collection (https://bbbc.broadinstitute.org/image_sets). @@ -27,8 +26,6 @@ class CWLFeatureWorkflow: ff_pattern: The filename pattern employed to select flatfield components from the ffDir. df_pattern:The filename pattern employed to select darkfield components from the ffDir group_by: Grouping variables for filePattern - features:Features from Nyxus (https://github.com/PolusAI/nyxus/) that need extraction - file_extension: Output file format """ def __init__( self, @@ -40,9 +37,7 @@ def __init__( map_directory: str, ff_pattern: str, df_pattern: str, - group_by: str, - features: typing.Optional[str]="ALL", - file_extension: typing.Optional[str]="arrowipc" + group_by: str ): self.name = name self.file_pattern = file_pattern @@ -56,8 +51,6 @@ def __init__( self.cwl_path, self.workflow_path = self._create_directories() self.image_pattern = image_pattern self.seg_pattern = seg_pattern - self.features = features - self.file_extension = file_extension def _create_directories(self) -> None: """Create directories for CWL outputs""" @@ -144,7 +137,6 @@ def manifest_urls(self, x: str) -> str: "apply_flatfield": f"{GITHUB_TAG}/hamshkhawar/image-tools/cast_images/transforms/images/apply-flatfield-tool/plugin.json", "kaggle_nuclei_segmentation": f"{GITHUB_TAG}/hamshkhawar/image-tools/kaggle-nuclei_seg/segmentation/kaggle-nuclei-segmentation/plugin.json", "ftl_plugin": f"{GITHUB_TAG}/nishaq503/image-tools/fix/ftl-label/transforms/images/polus-ftl-label-plugin/plugin.json", - "nyxus_plugin": f"{GITHUB_TAG}/hamshkhawar/image-tools/nyxus_manifest/features/nyxus-plugin/plugin.json", } return urls[x] @@ -224,18 +216,6 @@ def workflow(self) -> None: ftl_plugin.binarizationThreshold = 0.5 ftl_plugin.outDir = Path("ftl_plugin.outDir") - # ## Nyxus Plugin - nyxus_plugin = self.create_step(self.manifest_urls("nyxus_plugin")) - nyxus_plugin.inpDir = apply_flatfield.outDir - nyxus_plugin.segDir = ftl_plugin.outDir - nyxus_plugin.intPattern = self.image_pattern - nyxus_plugin.segPattern = self.seg_pattern - nyxus_plugin.features = self.features - nyxus_plugin.fileExtension = self.file_extension - nyxus_plugin.neighborDist = 5 - nyxus_plugin.pixelPerMicron = 1.0 - nyxus_plugin.outDir = Path("nyxus_plugin.outDir") - logger.info("Initiating CWL Feature Extraction Workflow!!!") steps = [ bbbc, @@ -244,8 +224,7 @@ def workflow(self) -> None: estimate_flatfield, apply_flatfield, kaggle_nuclei_segmentation, - ftl_plugin, - nyxus_plugin + ftl_plugin ] workflow = api.Workflow(steps, "experiment", self.workflow_path) # # Saving CLT for plugins diff --git a/cwl_workflows/utils.py b/cwl_workflows/utils.py index 2da1a30..e14b9c3 100644 --- a/cwl_workflows/utils.py +++ b/cwl_workflows/utils.py @@ -19,7 +19,7 @@ def get_params(path: Path, name: str): return params -params = { +seg_params = { "BBBC039": { "name": "BBBC039", "file_pattern": ".*_{row:c}{col:dd}_s{s:d}_w{channel:d}.*.tif", @@ -29,17 +29,15 @@ def get_params(path: Path, name: str): "map_directory": "raw", "ff_pattern": "images_x\\(00-15\\)_y\\(01-24\\)_p0\\(1-9\\)_c{c:d}_flatfield.ome.tif", "df_pattern": "images_x\\(00-15\\)_y\\(01-24\\)_p0\\(1-9\\)_c{c:d}_darkfield.ome.tif", - "group_by": "c", - "features": "ALL_INTENSITY,ALL_MORPHOLOGY", - "file_extension": "pandas" + "group_by": "c" } } -model = DataModel(data=params) +model = DataModel(data=seg_params) model_dict = model.dict() json_dir = Path(Path(__file__).parents[1]).joinpath("bbbc_json") json_dir.mkdir(parents=True, exist_ok=True) -JSON_FILENAME = json_dir.joinpath("bbbc_config.json") +SEG_JSON_FILENAME = json_dir.joinpath("bbbc_segmentation.json") -with Path.open(JSON_FILENAME, "w") as json_file: +with Path.open(SEG_JSON_FILENAME, "w") as json_file: json.dump(model_dict, json_file, indent=2) From 0f0c98403e585c6b640e742b268fc229d2fc5bd3 Mon Sep 17 00:00:00 2001 From: hamshkhawar Date: Mon, 4 Mar 2024 12:33:10 -0600 Subject: [PATCH 5/9] updating urls for plugins and updating data ingestion model --- bbbc_json/bbbc_segmentation.json | 6 +-- cwl_workflows/__main__.py | 13 +++--- cwl_workflows/cwl_nuclear_segmentation.py | 14 +++---- cwl_workflows/utils.py | 48 ++++++++++++++++------- 4 files changed, 50 insertions(+), 31 deletions(-) diff --git a/bbbc_json/bbbc_segmentation.json b/bbbc_json/bbbc_segmentation.json index 9eec985..09b4c95 100644 --- a/bbbc_json/bbbc_segmentation.json +++ b/bbbc_json/bbbc_segmentation.json @@ -2,14 +2,14 @@ "data": { "BBBC039": { "name": "BBBC039", - "file_pattern": ".*_{row:c}{col:dd}_s{s:d}_w{channel:d}.*.tif", + "file_pattern": "/.*/.*/.*/Images/(?P.*)/.*_{row:c}{col:dd}_s{s:d}_w{channel:d}.*.tif", "out_file_pattern": "x{row:dd}_y{col:dd}_p{s:dd}_c{channel:d}.tif", "image_pattern": "images_x{x:dd}_y{y:dd}_p{p:dd}_c{c:d}.ome.tif", "seg_pattern": "images_x{x:dd}_y{y:dd}_p{p:dd}_c1.ome.tif", - "map_directory": "raw", "ff_pattern": "images_x\\(00-15\\)_y\\(01-24\\)_p0\\(1-9\\)_c{c:d}_flatfield.ome.tif", "df_pattern": "images_x\\(00-15\\)_y\\(01-24\\)_p0\\(1-9\\)_c{c:d}_darkfield.ome.tif", - "group_by": "c" + "group_by": "c", + "map_directory": false } } } \ No newline at end of file diff --git a/cwl_workflows/__main__.py b/cwl_workflows/__main__.py index c0b8893..0b6df25 100644 --- a/cwl_workflows/__main__.py +++ b/cwl_workflows/__main__.py @@ -1,8 +1,8 @@ """CWL Workflow.""" import logging import typer -from utils import SEG_JSON_FILENAME -from utils import get_params +from utils import FEAT_JSON_FILENAME +from utils import LoadData from cwl_nuclear_segmentation import CWLSegmentationWorkflow @@ -39,10 +39,11 @@ def main( logger.info(f"workflow = {workflow}") if workflow == "segmentation": - params = get_params(SEG_JSON_FILENAME, name) - logger.info(f"Executing {workflow}!!!") - model = CWLSegmentationWorkflow(**params) - model.workflow() + model = LoadData(path=FEAT_JSON_FILENAME, name= name) + params = model.parse_json() + logger.info(f"Executing {workflow}!!!") + model = CWLSegmentationWorkflow(**params) + model.workflow() logger.info("Completed CWL workflow!!!") diff --git a/cwl_workflows/cwl_nuclear_segmentation.py b/cwl_workflows/cwl_nuclear_segmentation.py index 8b53d73..43ff054 100644 --- a/cwl_workflows/cwl_nuclear_segmentation.py +++ b/cwl_workflows/cwl_nuclear_segmentation.py @@ -3,6 +3,7 @@ from pathlib import Path import yaml import logging +import typing import re import shutil from utils import GITHUB_TAG @@ -22,7 +23,7 @@ class CWLSegmentationWorkflow: out_file_pattern : Preferred format for filenames image_pattern : Pattern for parsing intensity image filenames after renaming when using map_directory seg_pattern : Pattern use to parse segmentation image filenames - map_directory : Extract folder name + map_directory : Mapping of folder name ff_pattern: The filename pattern employed to select flatfield components from the ffDir. df_pattern:The filename pattern employed to select darkfield components from the ffDir group_by: Grouping variables for filePattern @@ -34,10 +35,10 @@ def __init__( out_file_pattern: str, image_pattern: str, seg_pattern: str, - map_directory: str, ff_pattern: str, df_pattern: str, - group_by: str + group_by: str, + map_directory: typing.Optional[bool] = False, ): self.name = name self.file_pattern = file_pattern @@ -128,15 +129,14 @@ def create_step(self, url: str) -> api.Step: def manifest_urls(self, x: str) -> str: """URLs on GitHub for plugin manifests""" - urls = { "bbbc_download": f"{GITHUB_TAG}/saketprem/polus-plugins/bbbc_download/utils/bbbc-download-plugin/plugin.json", - "file_renaming": f"{GITHUB_TAG}/PolusAI/polus-plugins/f20a2f75264d59af78cfb40b4c3cec118309f7ec/formats/file-renaming-plugin/plugin.json", + "file_renaming": f"{GITHUB_TAG}/hamshkhawar/image-tools/filepattern_filerenaming/formats/file-renaming-tool/plugin.json", "ome_converter": f"{GITHUB_TAG}/hamshkhawar/image-tools/basecontainer_omecontainer/formats/ome-converter-plugin/plugin.json", "estimate_flatfield": f"{GITHUB_TAG}/nishaq503/image-tools/fix/basic/regression/basic-flatfield-estimation-tool/plugin.json", "apply_flatfield": f"{GITHUB_TAG}/hamshkhawar/image-tools/cast_images/transforms/images/apply-flatfield-tool/plugin.json", - "kaggle_nuclei_segmentation": f"{GITHUB_TAG}/hamshkhawar/image-tools/kaggle-nuclei_seg/segmentation/kaggle-nuclei-segmentation/plugin.json", - "ftl_plugin": f"{GITHUB_TAG}/nishaq503/image-tools/fix/ftl-label/transforms/images/polus-ftl-label-plugin/plugin.json", + "kaggle_nuclei_segmentation": f"{GITHUB_TAG}/hamshkhawar/image-tools/kaggle-nucleiseg/segmentation/kaggle-nuclei-segmentation-tool/plugin.json", + "ftl_plugin": f"{GITHUB_TAG}/nishaq503/image-tools/fix/ftl-label/transforms/images/polus-ftl-label-plugin/plugin.json" } return urls[x] diff --git a/cwl_workflows/utils.py b/cwl_workflows/utils.py index e14b9c3..100b1c7 100644 --- a/cwl_workflows/utils.py +++ b/cwl_workflows/utils.py @@ -2,34 +2,52 @@ import pydantic from pathlib import Path from typing import Dict +from typing import Union GITHUB_TAG = "https://raw.githubusercontent.com" class DataModel(pydantic.BaseModel): - data: Dict[str, Dict[str, str]] - - -def get_params(path: Path, name: str): - """Loading json file for getting parameters""" - with open(path) as json_file: - # Read the JSON data - data = json.load(json_file) - params = [v[name] for k, v in data.items()][0] - return params + data: Dict[str, Dict[str, Union[str, bool]]] + + +class LoadData(pydantic.BaseModel): + path: Union[str, Path] + name:str + + @pydantic.validator("path", pre=True) + @classmethod + def validate_path(cls, value: Union[str, Path]) -> Union[str, Path]: + """Validation of Paths.""" + if not Path(value).exists(): + msg = f"{value} do not exist! Please do check it again" + raise ValueError(msg) + if isinstance(value, str): + return Path(value) + return value + + def parse_json(self) -> Dict[str, Union[str, bool]]: + with open(self.path) as json_file: + # Read the JSON data + data = json.load(json_file) + params = [v[self.name] for k, v in data.items()][0] + if len(params) == 0: + msg = f"{self.name} dataset donot exist! Please do check it again" + raise ValueError(msg) + return params seg_params = { "BBBC039": { "name": "BBBC039", - "file_pattern": ".*_{row:c}{col:dd}_s{s:d}_w{channel:d}.*.tif", + "file_pattern": "/.*/.*/.*/Images/(?P.*)/.*_{row:c}{col:dd}_s{s:d}_w{channel:d}.*.tif", "out_file_pattern": "x{row:dd}_y{col:dd}_p{s:dd}_c{channel:d}.tif", "image_pattern": "images_x{x:dd}_y{y:dd}_p{p:dd}_c{c:d}.ome.tif", "seg_pattern":"images_x{x:dd}_y{y:dd}_p{p:dd}_c1.ome.tif", - "map_directory": "raw", "ff_pattern": "images_x\\(00-15\\)_y\\(01-24\\)_p0\\(1-9\\)_c{c:d}_flatfield.ome.tif", "df_pattern": "images_x\\(00-15\\)_y\\(01-24\\)_p0\\(1-9\\)_c{c:d}_darkfield.ome.tif", - "group_by": "c" + "group_by": "c", + "map_directory": False } } model = DataModel(data=seg_params) @@ -37,7 +55,7 @@ def get_params(path: Path, name: str): json_dir = Path(Path(__file__).parents[1]).joinpath("bbbc_json") json_dir.mkdir(parents=True, exist_ok=True) -SEG_JSON_FILENAME = json_dir.joinpath("bbbc_segmentation.json") +FEAT_JSON_FILENAME = json_dir.joinpath("bbbc_segmentation.json") -with Path.open(SEG_JSON_FILENAME, "w") as json_file: +with Path.open(FEAT_JSON_FILENAME, "w") as json_file: json.dump(model_dict, json_file, indent=2) From 5c6df0a81b3857c1ce6918c757c5ed76af90803d Mon Sep 17 00:00:00 2001 From: hamshkhawar Date: Mon, 4 Mar 2024 12:36:38 -0600 Subject: [PATCH 6/9] fix typo error --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 056d400..d7ef105 100644 --- a/README.md +++ b/README.md @@ -8,13 +8,13 @@ create a [Conda](https://conda.io/projects/conda/en/latest/user-guide/tasks/mana #### 1. Install polus-plugins. -- clone a image-tools reporsitory +- clone a image-tools repository `git clone https://github.com/camilovelezr/image-tools.git` - cd `image-tools` - `pip install .` #### 2. Install workflow-inference-compiler. -- clone a workflow-inference-compiler reporsitory +- clone a workflow-inference-compiler repository `git clone https://github.com/camilovelezr/workflow-inference-compiler.git` - cd `workflow-inference-compiler` - `pip install -e ".[all]"` From 032cd6ee35e57da0be49b5a5a6708202a8c04411 Mon Sep 17 00:00:00 2001 From: hamshkhawar Date: Mon, 11 Mar 2024 08:51:56 -0500 Subject: [PATCH 7/9] added more documentation --- README.md | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index d7ef105..336bfb7 100644 --- a/README.md +++ b/README.md @@ -9,16 +9,21 @@ create a [Conda](https://conda.io/projects/conda/en/latest/user-guide/tasks/mana #### 1. Install polus-plugins. - clone a image-tools repository -`git clone https://github.com/camilovelezr/image-tools.git` +`git clone https://github.com/camilovelezr/image-tools.git ../` - cd `image-tools` - `pip install .` #### 2. Install workflow-inference-compiler. - clone a workflow-inference-compiler repository -`git clone https://github.com/camilovelezr/workflow-inference-compiler.git` +`git clone https://github.com/camilovelezr/workflow-inference-compiler.git ../` - cd `workflow-inference-compiler` - `pip install -e ".[all]"` +#### Note: +Ensure that the [docker-desktop](https://www.docker.com/products/docker-desktop/) is running in the background. To verify that it's operational, you can use the following command: +`docker run -d -p 80:80 docker/getting-started` +This command will launch the `docker/getting-started container` in detached mode (-d flag), exposing port 80 on your local machine (-p 80:80). It's a simple way to test if Docker Desktop is functioning correctly. + ## Details This workflow integrates seven distinct plugins, starting from data retrieval from [Broad Bioimage Benchmark Collection](https://bbbc.broadinstitute.org/), renaming files, correcting uneven illumination, segmenting nuclear objects. From 33f71c9f8ab90ed67714df34d789acf4ada98262 Mon Sep 17 00:00:00 2001 From: hamshkhawar Date: Fri, 15 Mar 2024 17:12:17 -0500 Subject: [PATCH 8/9] fix docstring and updated documentation --- README.md | 2 ++ cwl_workflows/cwl_nuclear_segmentation.py | 4 ++-- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 336bfb7..392a80b 100644 --- a/README.md +++ b/README.md @@ -10,6 +10,8 @@ create a [Conda](https://conda.io/projects/conda/en/latest/user-guide/tasks/mana - clone a image-tools repository `git clone https://github.com/camilovelezr/image-tools.git ../` +- create a new branch +`git checkout -b hd2 remotes/origin/hd2` - cd `image-tools` - `pip install .` diff --git a/cwl_workflows/cwl_nuclear_segmentation.py b/cwl_workflows/cwl_nuclear_segmentation.py index 43ff054..20d0687 100644 --- a/cwl_workflows/cwl_nuclear_segmentation.py +++ b/cwl_workflows/cwl_nuclear_segmentation.py @@ -161,7 +161,7 @@ def modify_cwl(self) -> None: def workflow(self) -> None: """ - A CWL feature extraction pipeline. + A CWL nuclear segmentation pipeline. """ # BBBCDownload bbbc = self.create_step(self.manifest_urls("bbbc_download")) @@ -239,6 +239,6 @@ def workflow(self) -> None: # # clean autognerated directories self._clean() self._move_outputs() - logger.info("Completed CWL Feature Extraction Workflow.") + logger.info("Completed CWL nuclear segmentation workflow.") return \ No newline at end of file From 47e839cfb1ca840df6e76c2fba6d94cc8fab94c1 Mon Sep 17 00:00:00 2001 From: hamshkhawar Date: Fri, 15 Mar 2024 17:14:13 -0500 Subject: [PATCH 9/9] fix install documentation --- README.md | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 392a80b..cca3f9a 100644 --- a/README.md +++ b/README.md @@ -10,15 +10,17 @@ create a [Conda](https://conda.io/projects/conda/en/latest/user-guide/tasks/mana - clone a image-tools repository `git clone https://github.com/camilovelezr/image-tools.git ../` +- cd `image-tools` - create a new branch `git checkout -b hd2 remotes/origin/hd2` -- cd `image-tools` - `pip install .` #### 2. Install workflow-inference-compiler. - clone a workflow-inference-compiler repository `git clone https://github.com/camilovelezr/workflow-inference-compiler.git ../` - cd `workflow-inference-compiler` +- create a new branch +`git checkout -b hd2 remotes/origin/hd2` - `pip install -e ".[all]"` #### Note: