diff --git a/README.md b/README.md new file mode 100644 index 0000000..cca3f9a --- /dev/null +++ b/README.md @@ -0,0 +1,89 @@ +# Common Workflow Language (CWL) Nuclear Segmentation worflow + +CWL feature extraction workflow for imaging dataset + +## Workflow Steps: + +create a [Conda](https://conda.io/projects/conda/en/latest/user-guide/tasks/manage-environments.html#activating-an-environment) environment using python = ">=3.9,<3.12" + +#### 1. Install polus-plugins. + +- clone a image-tools repository +`git clone https://github.com/camilovelezr/image-tools.git ../` +- cd `image-tools` +- create a new branch +`git checkout -b hd2 remotes/origin/hd2` +- `pip install .` + +#### 2. Install workflow-inference-compiler. +- clone a workflow-inference-compiler repository +`git clone https://github.com/camilovelezr/workflow-inference-compiler.git ../` +- cd `workflow-inference-compiler` +- create a new branch +`git checkout -b hd2 remotes/origin/hd2` +- `pip install -e ".[all]"` + +#### Note: +Ensure that the [docker-desktop](https://www.docker.com/products/docker-desktop/) is running in the background. To verify that it's operational, you can use the following command: +`docker run -d -p 80:80 docker/getting-started` +This command will launch the `docker/getting-started container` in detached mode (-d flag), exposing port 80 on your local machine (-p 80:80). It's a simple way to test if Docker Desktop is functioning correctly. + +## Details +This workflow integrates seven distinct plugins, starting from data retrieval from [Broad Bioimage Benchmark Collection](https://bbbc.broadinstitute.org/), renaming files, correcting uneven illumination, segmenting nuclear objects. + +Below are the specifics of the plugins employed in the workflow +1. [bbbc-download-plugin](https://github.com/saketprem/polus-plugins/tree/bbbc_download/utils/bbbc-download-plugin) +2. [file-renaming-tool](https://github.com/PolusAI/image-tools/tree/master/formats/file-renaming-tool) +3. [ome-converter-tool](https://github.com/PolusAI/image-tools/tree/master/formats/ome-converter-tool) +4. [basic-flatfield-estimation-tool](https://github.com/PolusAI/image-tools/tree/master/regression/basic-flatfield-estimation-tool) +5. [apply-flatfield-tool](https://github.com/PolusAI/image-tools/tree/master/transforms/images/apply-flatfield-tool) +6. [kaggle-nuclei-segmentation](https://github.com/hamshkhawar/image-tools/tree/kaggle-nuclei_seg/segmentation/kaggle-nuclei-segmentation) +7. [polus-ftl-label-plugin](https://github.com/hamshkhawar/image-tools/tree/kaggle-nuclei_seg/transforms/images/polus-ftl-label-plugin) + +## Execute CWL Segmentation workflow + +The parameters for each imaging dataset are pre-defined and stored in JSON format. A Pydantic model in a utils Python file can be utilized to store parameters for any new dataset + +`python cwl_workflows/__main__.py --name="BBBC039" --workflow=segmentation` + +A directory named `workflow` is generated, encompassing CLTs for each plugin, YAML files, and all outputs are stored within the `outdir` directory. +``` +workflows +├── experiment +│ └── cwl_adapters +| experiment.cwl +| experiment.yml +| +└── outdir + └── experiment + ├── step 1 BbbcDownload + │ └── outDir + │ └── bbbc.outDir + │ └── BBBC + │ └── BBBC039 + │ └── raw + │ ├── Ground_Truth + │ │ ├── masks + │ │ └── metadata + │ └── Images + │ └── images + ├── step 2 FileRenaming + │ └── outDir + │ └── rename.outDir + ├── step 3 OmeConverter + │ └── outDir + │ └── ome_converter.outDir + ├── step 4 BasicFlatfieldEstimation + │ └── outDir + │ └── estimate_flatfield.outDir + ├── step 5 ApplyFlatfield + │ └── outDir + │ └── apply_flatfield.outDir + ├── step 6 KaggleNucleiSegmentation + │ └── outDir + │ └── kaggle_nuclei_segmentation.outDir + ├── step 7 FtlLabel + │ └── outDir + │ └── ftl_plugin.outDir + +``` diff --git a/bbbc_json/bbbc_segmentation.json b/bbbc_json/bbbc_segmentation.json new file mode 100644 index 0000000..09b4c95 --- /dev/null +++ b/bbbc_json/bbbc_segmentation.json @@ -0,0 +1,15 @@ +{ + "data": { + "BBBC039": { + "name": "BBBC039", + "file_pattern": "/.*/.*/.*/Images/(?P.*)/.*_{row:c}{col:dd}_s{s:d}_w{channel:d}.*.tif", + "out_file_pattern": "x{row:dd}_y{col:dd}_p{s:dd}_c{channel:d}.tif", + "image_pattern": "images_x{x:dd}_y{y:dd}_p{p:dd}_c{c:d}.ome.tif", + "seg_pattern": "images_x{x:dd}_y{y:dd}_p{p:dd}_c1.ome.tif", + "ff_pattern": "images_x\\(00-15\\)_y\\(01-24\\)_p0\\(1-9\\)_c{c:d}_flatfield.ome.tif", + "df_pattern": "images_x\\(00-15\\)_y\\(01-24\\)_p0\\(1-9\\)_c{c:d}_darkfield.ome.tif", + "group_by": "c", + "map_directory": false + } + } +} \ No newline at end of file diff --git a/cwl_adapters/basic-flatfield-estimation.cwl b/cwl_adapters/basic-flatfield-estimation.cwl deleted file mode 100644 index 3893ae8..0000000 --- a/cwl_adapters/basic-flatfield-estimation.cwl +++ /dev/null @@ -1,115 +0,0 @@ -#!/usr/bin/env cwl-runner -class: CommandLineTool -cwlVersion: v1.0 - -label: BaSiC Flatfield Estimation - -doc: |- - This WIPP plugin will take a collection of images and use the BaSiC flatfield correction algorithm to generate a flatfield image, a darkfield image, and a photobleach offset. - https://github.com/PolusAI/polus-plugins/tree/master/regression/basic-flatfield-estimation-plugin - -requirements: - DockerRequirement: - dockerPull: polusai/basic-flatfield-estimation-plugin:2.1.1 - # See https://www.commonwl.org/v1.0/CommandLineTool.html#InitialWorkDirRequirement - InitialWorkDirRequirement: - listing: - - entry: $(inputs.outDir) - writable: true # Output directories must be writable - InlineJavascriptRequirement: {} - -# See https://github.com/PolusAI/workflow-inference-compiler/blob/master/docker_remove_entrypoints.py -baseCommand: python3 -arguments: ["-m", "polus.plugins.regression.basic_flatfield_estimation"] - -# "jax._src.xla_bridge - WARNING - An NVIDIA GPU may be present on this machine, but a CUDA-enabled jaxlib is not installed. Falling back to cpu." -hints: - cwltool:CUDARequirement: - cudaVersionMin: "11.4" - cudaComputeCapabilityMin: "3.0" - cudaDeviceCountMin: 1 - cudaDeviceCountMax: 1 - -inputs: - inpDir: - label: Path to input images - doc: |- - Path to input images - type: Directory - inputBinding: - prefix: --inpDir - - getDarkfield: - label: If 'true', will calculate darkfield image - doc: |- - If 'true', will calculate darkfield image - type: boolean? - inputBinding: - prefix: --getDarkfield - - # photobleach: - # label: If 'true', will calculate photobleach scalar - # doc: |- - # If 'true', will calculate photobleach scalar - # type: boolean? - # inputBinding: - # prefix: --photobleach - - filePattern: - label: File pattern to subset data - doc: |- - File pattern to subset data - type: string? - inputBinding: - prefix: --filePattern - - groupBy: - label: Variables to group together - doc: |- - Variables to group together - type: string? - inputBinding: - prefix: --groupBy - - preview: - label: Generate a JSON file describing what the outputs should be - doc: |- - Generate a JSON file describing what the outputs should be - type: boolean? - inputBinding: - prefix: --preview - - outDir: - label: Output image collection - doc: |- - Output image collection - type: Directory - inputBinding: - prefix: --outDir - -outputs: - outDir: - label: Output image collection - doc: |- - Output image collection - type: Directory - outputBinding: - glob: $(inputs.outDir.basename) - - preview_json: - label: JSON file describing what the outputs should be - doc: |- - JSON file describing what the outputs should be - type: File? # if --preview - format: edam:format_3464 - outputBinding: - glob: preview.json - -$namespaces: - edam: https://edamontology.org/ - cwltool: http://commonwl.org/cwltool# - -$schemas: -- https://raw.githubusercontent.com/edamontology/edamontology/master/EDAM_dev.owl - -# manifest: https://raw.githubusercontent.com/PolusAI/polus-plugins/master/regression/basic-flatfield-estimation-plugin/plugin.json \ No newline at end of file diff --git a/cwl_adapters/bbbcdownload.cwl b/cwl_adapters/bbbcdownload.cwl deleted file mode 100644 index 252514a..0000000 --- a/cwl_adapters/bbbcdownload.cwl +++ /dev/null @@ -1,61 +0,0 @@ -class: CommandLineTool -cwlVersion: v1.1 - -label: BBBC Download - -doc: |- - Downloads the datasets on the Broad Bioimage Benchmark Collection website - https://github.com/saketprem/polus-plugins/tree/bbbc_download/utils/bbbc-download-plugin - -# See https://github.com/PolusAI/workflow-inference-compiler/blob/master/docker_remove_entrypoints.py -baseCommand: python3 -arguments: ["-m", "polus.plugins.utils.bbbc_download"] - -requirements: - DockerRequirement: - dockerPull: polusai/bbbc-download-plugin:0.1.0-dev1 - # See https://www.commonwl.org/v1.0/CommandLineTool.html#InitialWorkDirRequirement - InitialWorkDirRequirement: - listing: - - entry: $(inputs.outDir) - writable: true # Output directories must be writable - InlineJavascriptRequirement: {} - # NOTE: By default, "tools must not assume network access, except for localhost" - # See https://www.commonwl.org/v1.1/CommandLineTool.html#NetworkAccess - NetworkAccess: - networkAccess: true - -inputs: - name: - label: The name of the dataset(s) to be downloaded (separate the datasets with a comma. eg BBBC001,BBBC002,BBBC003) - doc: |- - The name of the dataset(s) to be downloaded (separate the datasets with a comma. eg BBBC001,BBBC002,BBBC003) - inputBinding: - prefix: --name - type: string - # default: BBBC001 - - outDir: - label: Output collection - doc: |- - Output collection - inputBinding: - prefix: --outDir - type: Directory - -outputs: - outDir: - label: Output collection - doc: |- - Output collection - type: Directory - outputBinding: - glob: $(inputs.outDir.basename) - -$namespaces: - edam: https://edamontology.org/ - -$schemas: -- https://raw.githubusercontent.com/edamontology/edamontology/master/EDAM_dev.owl - -# manifest: "https://raw.githubusercontent.com/saketprem/polus-plugins/bbbc_download/utils/bbbc-download-plugin/plugin.json" \ No newline at end of file diff --git a/cwl_adapters/file-renaming.cwl b/cwl_adapters/file-renaming.cwl deleted file mode 100644 index a2df113..0000000 --- a/cwl_adapters/file-renaming.cwl +++ /dev/null @@ -1,85 +0,0 @@ -class: CommandLineTool -cwlVersion: v1.0 - -label: File Renaming - -doc: |- - Rename and store image collection files in a new image collection - https://github.com/PolusAI/polus-plugins/tree/master/formats/file-renaming-plugin - -# See https://github.com/PolusAI/workflow-inference-compiler/blob/master/docker_remove_entrypoints.py -baseCommand: python3 -arguments: ["-m", "polus.plugins.formats.file_renaming"] - -requirements: - DockerRequirement: - dockerPull: polusai/file-renaming-plugin:0.2.1-dev0 # NOTE: 0.2.3 not pushed yet - # See https://www.commonwl.org/v1.0/CommandLineTool.html#InitialWorkDirRequirement - InitialWorkDirRequirement: - listing: - - entry: $(inputs.outDir) - writable: true # Output directories must be writable - InlineJavascriptRequirement: {} - -inputs: - inpDir: - inputBinding: - prefix: --inpDir - type: Directory - - filePattern: - inputBinding: - prefix: --filePattern - type: string - - mapDirectory: - inputBinding: - prefix: --mapDirectory - type: string? # enum: raw, map, default - - preview: - label: Generate a JSON file describing what the outputs should be - doc: |- - Generate a JSON file describing what the outputs should be - inputBinding: - prefix: --preview - type: boolean? - - outFilePattern: - inputBinding: - prefix: --outFilePattern - type: string - - outDir: - label: Output collection - doc: |- - Output collection - inputBinding: - prefix: --outDir - type: Directory - -outputs: - outDir: - label: Output collection - doc: |- - Output collection - type: Directory - outputBinding: - glob: $(inputs.outDir.basename) - - preview_json: - label: JSON file describing what the outputs should be - doc: |- - JSON file describing what the outputs should be - type: File? # if --preview - format: edam:format_3464 - outputBinding: - glob: preview.json - -$namespaces: - edam: https://edamontology.org/ - -$schemas: -- https://raw.githubusercontent.com/edamontology/edamontology/master/EDAM_dev.owl - -# manifest: https://raw.githubusercontent.com/PolusAI/polus-plugins/master/formats/file-renaming-plugin/plugin.json \ No newline at end of file diff --git a/cwl_adapters/image_assembler.cwl b/cwl_adapters/image_assembler.cwl deleted file mode 100644 index 5b9eca3..0000000 --- a/cwl_adapters/image_assembler.cwl +++ /dev/null @@ -1,101 +0,0 @@ -#!/usr/bin/env cwl-runner -class: CommandLineTool -cwlVersion: v1.0 - -label: Image Assembler - -doc: |- - This plugin assembles images into a stitched image using an image stitching vector. - https://github.com/PolusAI/polus-plugins/tree/master/transforms/images/image-assembler-plugin - -# See https://github.com/PolusAI/workflow-inference-compiler/blob/master/docker_remove_entrypoints.py -baseCommand: python3 -arguments: ["-m", "polus.plugins.transforms.images.image_assembler"] - -requirements: - DockerRequirement: - dockerPull: polusai/image-assembler-plugin:1.4.0-dev0 - # See https://www.commonwl.org/v1.0/CommandLineTool.html#InitialWorkDirRequirement - InitialWorkDirRequirement: - listing: - - $(inputs.stitchPath) # Must stage inputs for tools which do not accept full paths. - - entry: $(inputs.outDir) - writable: true # Output directories must be writable - InlineJavascriptRequirement: {} - -inputs: - stitchPath: - label: Path to directory containing "stitching vector" file img-global-positions-0.txt - doc: |- - Path to directory containing "stitching vector" file img-global-positions-0.txt - type: Directory - inputBinding: - prefix: --stitchPath - - imgPath: - label: Path to input image collection - doc: |- - Path to input image collection - type: Directory - inputBinding: - prefix: --imgPath - - timesliceNaming: - label: Label images by timeslice rather than analyzing input image names - doc: |- - Label images by timeslice rather than analyzing input image names - inputBinding: - prefix: --timesliceNaming - type: boolean? - - preview: - label: Generate a JSON file describing what the outputs should be - doc: |- - Generate a JSON file describing what the outputs should be - type: boolean? - inputBinding: - prefix: --preview - - outDir: - label: Output collection - doc: |- - Output collection - type: Directory - inputBinding: - prefix: --outDir - -outputs: - outDir: - label: Output collection - doc: |- - Output collection - type: Directory - outputBinding: - glob: $(inputs.outDir.basename) - - assembled_image: - label: The assembled montage image - doc: |- - JSON file with outputs - type: File? # if not --preview - # See https://bioportal.bioontology.org/ontologies/EDAM?p=classes&conceptid=format_3727 - format: edam:format_3727 - outputBinding: - glob: "*.ome.tif" - - preview_json: - label: JSON file with outputs - doc: |- - JSON file with outputs - type: File? # if --preview - format: edam:format_3464 - outputBinding: - glob: preview.json - -$namespaces: - edam: https://edamontology.org/ - -$schemas: -- https://raw.githubusercontent.com/edamontology/edamontology/master/EDAM_dev.owl - -# manifest: https://raw.githubusercontent.com/PolusAI/polus-plugins/master/transforms/images/image-assembler-plugin/plugin.json \ No newline at end of file diff --git a/cwl_adapters/montage.cwl b/cwl_adapters/montage.cwl deleted file mode 100644 index ac4007f..0000000 --- a/cwl_adapters/montage.cwl +++ /dev/null @@ -1,123 +0,0 @@ -#!/usr/bin/env cwl-runner -class: CommandLineTool -cwlVersion: v1.0 - -label: Montage - -doc: |- - This plugin generates a stitching vector that will montage images together. - https://github.com/PolusAI/polus-plugins/tree/master/transforms/images/montage-plugin - -# See https://github.com/PolusAI/workflow-inference-compiler/blob/master/docker_remove_entrypoints.py -baseCommand: python3 -arguments: ["-m", "polus.plugins.transforms.images.montage"] - -requirements: - DockerRequirement: - dockerPull: polusai/montage-plugin:0.5.0 - # See https://www.commonwl.org/v1.0/CommandLineTool.html#InitialWorkDirRequirement - InitialWorkDirRequirement: - listing: - - entry: $(inputs.outDir) - writable: true # Output directories must be writable - InlineJavascriptRequirement: {} - -inputs: - inpDir: - label: Input image collection to be processed by this plugin - doc: |- - Input image collection to be processed by this plugin - type: Directory - inputBinding: - prefix: --inpDir - - filePattern: - label: Filename pattern used to parse data - doc: |- - Filename pattern used to parse data - type: string - inputBinding: - prefix: --filePattern - - layout: - label: Specify montage organization - doc: |- - Specify montage organization - type: string? - # optional array of strings? - inputBinding: - prefix: --layout - - gridSpacing: - label: Specify spacing between images in the lowest grid - doc: |- - Specify spacing between images in the lowest grid - inputBinding: - prefix: --gridSpacing - type: int? - - imageSpacing: - label: Specify spacing multiplier between grids - doc: |- - Specify spacing multiplier between grids - inputBinding: - prefix: --imageSpacing - type: int? - - flipAxis: - label: Axes to flip when laying out images - doc: |- - Axes to flip when laying out images - inputBinding: - prefix: --flipAxis - type: string? - - preview: - label: Generate a JSON file describing what the outputs should be - doc: |- - Generate a JSON file describing what the outputs should be - type: boolean? - inputBinding: - prefix: --preview - - outDir: - label: Output collection - doc: |- - Output collection - type: Directory - inputBinding: - prefix: --outDir - -outputs: - outDir: - label: Output collection - doc: |- - Output collection - type: Directory - outputBinding: - glob: $(inputs.outDir.basename) - - global_positions: - label: The "stitching vector", i.e. the positions of the individual images in the montage - doc: |- - The "stitching vector", i.e. the positions of the individual images in the montage - type: File? # if not --preview - outputBinding: - glob: $(inputs.outDir.basename)/img-global-positions-0.txt - - preview_json: - label: JSON file describing what the outputs should be - doc: |- - JSON file describing what the outputs should be - type: File? # if --preview - format: edam:format_3464 - outputBinding: - glob: preview.json - -$namespaces: - edam: https://edamontology.org/ - -$schemas: -- https://raw.githubusercontent.com/edamontology/edamontology/master/EDAM_dev.owl - -# manifest: https://raw.githubusercontent.com/PolusAI/polus-plugins/master/transforms/images/montage-plugin/plugin.json \ No newline at end of file diff --git a/cwl_adapters/ome-converter.cwl b/cwl_adapters/ome-converter.cwl deleted file mode 100644 index af846a5..0000000 --- a/cwl_adapters/ome-converter.cwl +++ /dev/null @@ -1,85 +0,0 @@ -#!/usr/bin/env cwl-runner -class: CommandLineTool -cwlVersion: v1.0 - -label: OME Zarr Converter - -doc: |- - This WIPP plugin converts BioFormats supported data types to the OME Zarr file format. - https://github.com/PolusAI/polus-plugins/tree/master/formats/ome-converter-plugin - -# See https://github.com/PolusAI/workflow-inference-compiler/blob/master/docker_remove_entrypoints.py -baseCommand: python3 -arguments: ["-m", "polus.plugins.formats.ome_converter"] - -requirements: - DockerRequirement: - dockerPull: jakefennick/ome-converter-plugin:0.3.2 - # See https://www.commonwl.org/v1.0/CommandLineTool.html#InitialWorkDirRequirement - InitialWorkDirRequirement: - listing: - - entry: $(inputs.outDir) - writable: true # Output directories must be writable - InlineJavascriptRequirement: {} -# NOTE: polusai/ome-converter-plugin:0.3.1 uses the base image -# polusai/bfio:2.3.2 which now un-bundles the java maven package -# ome:formats-gpl:7.1.0 due to licensing reasons. -# To avoid requiring network access at runtime, in the bfio Dockerfile -# it is pre-installed and saved in ~/.m2/ However, by default -# CWL hides all environment variables (including HOME), so we need to -# set HOME here so that at runtime we get a cache hit on the maven install. - EnvVarRequirement: -# See https://www.commonwl.org/user_guide/topics/environment-variables.html - envDef: - HOME: /home/polusai - -inputs: - inpDir: - label: Input generic data collection to be processed by this plugin - doc: |- - Input generic data collection to be processed by this plugin - type: Directory - inputBinding: - prefix: --inpDir - - filePattern: - label: A filepattern, used to select data for conversion - doc: |- - A filepattern, used to select data for conversion - type: string - inputBinding: - prefix: --filePattern - - fileExtension: - label: The file extension - doc: |- - The file extension - type: string - inputBinding: - prefix: --fileExtension - default: "default" # enum: .ome.tiff, .ome.zarr, default - - outDir: - label: Output collection - doc: |- - Output collection - type: Directory - inputBinding: - prefix: --outDir - -outputs: - outDir: - label: Output collection - doc: |- - Output collection - type: Directory - outputBinding: - glob: $(inputs.outDir.basename) - -$namespaces: - edam: https://edamontology.org/ - -$schemas: -- https://raw.githubusercontent.com/edamontology/edamontology/master/EDAM_dev.owl - -# manifest: https://raw.githubusercontent.com/PolusAI/polus-plugins/master/formats/ome-converter-plugin/plugin.json \ No newline at end of file diff --git a/cwl_adapters/precompute_slide.cwl b/cwl_adapters/precompute_slide.cwl deleted file mode 100644 index 44753d2..0000000 --- a/cwl_adapters/precompute_slide.cwl +++ /dev/null @@ -1,81 +0,0 @@ -#!/usr/bin/env cwl-runner -class: CommandLineTool -cwlVersion: v1.0 - -label: Precompute Slide - -doc: |- - This plugin generates image pyramids in multiple viewing formats. - https://github.com/PolusAI/polus-plugins/tree/master/visualization/polus-precompute-slide-plugin - -# See https://github.com/PolusAI/workflow-inference-compiler/blob/master/docker_remove_entrypoints.py -baseCommand: python3 -arguments: ["-m", "polus.plugins.visualization.precompute_slide"] - -requirements: - DockerRequirement: - dockerPull: polusai/precompute-slide-plugin:1.7.0-dev0 - # See https://www.commonwl.org/v1.0/CommandLineTool.html#InitialWorkDirRequirement - InitialWorkDirRequirement: - listing: - - entry: $(inputs.outDir) - writable: true # Output directories must be writable - InlineJavascriptRequirement: {} - -inputs: - inpDir: - label: Input generic data collection to be processed by this plugin - doc: |- - Input generic data collection to be processed by this plugin - type: Directory - inputBinding: - prefix: --inpDir - - pyramidType: - label: Build a DeepZoom, Neuroglancer, Zarr pyramid - doc: |- - Build a DeepZoom, Neuroglancer, Zarr pyramid - type: string # enum: DeepZoom, Neuroglancer, Zarr - inputBinding: - prefix: --pyramidType - - imageType: - label: Image is either Segmentation or Image - doc: |- - Image is either Segmentation or Image - inputBinding: - prefix: --imageType - type: string - - filePattern: - label: Filename pattern used to parse data - doc: |- - Filename pattern used to parse data - type: string? - inputBinding: - prefix: --filePattern - - outDir: - label: Output collection - doc: |- - Output collection - type: Directory - inputBinding: - prefix: --outDir - -outputs: - outDir: - label: Output collection - doc: |- - Output collection - type: Directory - outputBinding: - glob: $(inputs.outDir.basename) - -$namespaces: - edam: https://edamontology.org/ - -$schemas: -- https://raw.githubusercontent.com/edamontology/edamontology/master/EDAM_dev.owl - -# \ No newline at end of file diff --git a/cwl_workflows/__init__.py b/cwl_workflows/__init__.py new file mode 100644 index 0000000..b2e9ca5 --- /dev/null +++ b/cwl_workflows/__init__.py @@ -0,0 +1,2 @@ +import cwl_features_extraction as cwl_features_extraction +import utils as utils \ No newline at end of file diff --git a/cwl_workflows/__main__.py b/cwl_workflows/__main__.py new file mode 100644 index 0000000..0b6df25 --- /dev/null +++ b/cwl_workflows/__main__.py @@ -0,0 +1,52 @@ +"""CWL Workflow.""" +import logging +import typer +from utils import FEAT_JSON_FILENAME +from utils import LoadData +from cwl_nuclear_segmentation import CWLSegmentationWorkflow + + +app = typer.Typer() + +# Initialize the logger +logging.basicConfig( + format="%(asctime)s - %(name)-8s - %(levelname)-8s - %(message)s", + datefmt="%d-%b-%y %H:%M:%S", +) +logger = logging.getLogger("WIC Python API") +logger.setLevel(logging.INFO) + + +@app.command() +def main( + name: str = typer.Option( + ..., + "--name", + "-n", + help="Name of imaging dataset of Broad Bioimage Benchmark Collection (https://bbbc.broadinstitute.org/image_sets)" + ), + workflow: str = typer.Option( + ..., + "--workflow", + "-w", + help="Name of cwl workflow" + ) +) -> None: + + """Execute CWL Workflow.""" + + logger.info(f"name = {name}") + logger.info(f"workflow = {workflow}") + + if workflow == "segmentation": + model = LoadData(path=FEAT_JSON_FILENAME, name= name) + params = model.parse_json() + logger.info(f"Executing {workflow}!!!") + model = CWLSegmentationWorkflow(**params) + model.workflow() + + logger.info("Completed CWL workflow!!!") + + +if __name__ == "__main__": + app() \ No newline at end of file diff --git a/workflows/bbbc.py b/cwl_workflows/bbbc.py similarity index 100% rename from workflows/bbbc.py rename to cwl_workflows/bbbc.py diff --git a/workflows/bbbc.yml b/cwl_workflows/bbbc.yml similarity index 100% rename from workflows/bbbc.yml rename to cwl_workflows/bbbc.yml diff --git a/cwl_workflows/cwl_nuclear_segmentation.py b/cwl_workflows/cwl_nuclear_segmentation.py new file mode 100644 index 0000000..20d0687 --- /dev/null +++ b/cwl_workflows/cwl_nuclear_segmentation.py @@ -0,0 +1,244 @@ +import wic.api.pythonapi as api +import polus.plugins as pp +from pathlib import Path +import yaml +import logging +import typing +import re +import shutil +from utils import GITHUB_TAG + +# Initialize the logger +logger = logging.getLogger(__name__) +logger.setLevel(logging.INFO) + + +class CWLSegmentationWorkflow: + """ + A CWL Nuclear Segmentation pipeline. + + Attributes: + name : Name of imaging dataset of Broad Bioimage Benchmark Collection (https://bbbc.broadinstitute.org/image_sets). + file_pattern : Pattern for parsing raw filenames. + out_file_pattern : Preferred format for filenames + image_pattern : Pattern for parsing intensity image filenames after renaming when using map_directory + seg_pattern : Pattern use to parse segmentation image filenames + map_directory : Mapping of folder name + ff_pattern: The filename pattern employed to select flatfield components from the ffDir. + df_pattern:The filename pattern employed to select darkfield components from the ffDir + group_by: Grouping variables for filePattern + """ + def __init__( + self, + name: str, + file_pattern: str, + out_file_pattern: str, + image_pattern: str, + seg_pattern: str, + ff_pattern: str, + df_pattern: str, + group_by: str, + map_directory: typing.Optional[bool] = False, + ): + self.name = name + self.file_pattern = file_pattern + self.out_file_pattern = out_file_pattern + self.map_directory = map_directory + self.ff_pattern = ff_pattern + self.df_pattern = df_pattern + self.group_by = group_by + self.wic_path = api._WIC_PATH + self.PATH = Path(self.wic_path.parent).joinpath(Path(__file__).parts[-3]) + self.cwl_path, self.workflow_path = self._create_directories() + self.image_pattern = image_pattern + self.seg_pattern = seg_pattern + + def _create_directories(self) -> None: + """Create directories for CWL outputs""" + cwl_path = self.PATH.joinpath("cwl_adapters") + cwl_path.mkdir(parents=True, exist_ok=True) + workflow_path = self.PATH.joinpath("workflows").resolve() + workflow_path.mkdir(exist_ok=True) + return cwl_path, workflow_path + + def _clean(self) -> None: + """Cleaning of redundant directories generating on running CWL""" + logger.info("Cleaning directories!!!") + destination_path = self.workflow_path.joinpath("experiment") + dir_names = ("autogenerated", "cachedir", "RUNS", "provenance") + for i, d in zip(self.wic_path.iterdir(), self.PATH.iterdir()): + if i.name.endswith(dir_names): + shutil.rmtree(d) + if d.name.endswith(dir_names): + shutil.rmtree(d) + + for d in destination_path.iterdir(): + if d.name.endswith("cwl_adapters"): + shutil.rmtree(d) + for d in self.PATH.iterdir(): + if d.name.endswith("cwl_adapters"): + shutil.move(d, destination_path) + + return + + def _move_outputs(self) -> None: + """Transfer outputs from the WIC directory to the workflow path""" + logger.info("Move outputs to workflow path!!!") + for d in self.wic_path.iterdir(): + if d.name.endswith("outdir"): + shutil.move(d, self.workflow_path) + return + + def _camel(self, name: str) -> str: + """Convert plugin name to camel case.""" + name = re.sub(r"(_|-)+", " ", name).title().replace(" ", "") + return "".join([name[0].upper(), name[1:]]) + + def _string_after_period(self, x): + """Get a string after period.""" + match = re.search(r"\.(.*)", x) + if match: + # Get the part after the period + return f".*.{match.group(1)}" + else: + return "" + + def _add_backslash_before_parentheses(self, x): + """Add backslash to generate ff_pattern and df_pattern""" + # Define the regular expression pattern to match parenthesis + pattern_1 = r"(\()|(\))" + # Use re.sub() to add a backslash before starting and finishing parenthesis + result = re.sub(pattern_1, r"\\\1\2", x) + pattern_2 = r"\d" + result = ( + result.split("_c")[0] + + "_c{c:d}" + + re.sub(pattern_2, "", result.split("_c")[1]) + ) + return result + + def create_step(self, url: str) -> api.Step: + """Generate the plugin class name from the plugin name specified in the manifest""" + manifest = pp.submit_plugin(url) + plugin_version = str(manifest.version) + cwl_tool = pp.get_plugin(self._camel(manifest.name), plugin_version).save_cwl( + self.cwl_path.joinpath(f"{self._camel(manifest.name)}.cwl") + ) + step = api.Step(cwl_tool) + return step + + def manifest_urls(self, x: str) -> str: + """URLs on GitHub for plugin manifests""" + urls = { + "bbbc_download": f"{GITHUB_TAG}/saketprem/polus-plugins/bbbc_download/utils/bbbc-download-plugin/plugin.json", + "file_renaming": f"{GITHUB_TAG}/hamshkhawar/image-tools/filepattern_filerenaming/formats/file-renaming-tool/plugin.json", + "ome_converter": f"{GITHUB_TAG}/hamshkhawar/image-tools/basecontainer_omecontainer/formats/ome-converter-plugin/plugin.json", + "estimate_flatfield": f"{GITHUB_TAG}/nishaq503/image-tools/fix/basic/regression/basic-flatfield-estimation-tool/plugin.json", + "apply_flatfield": f"{GITHUB_TAG}/hamshkhawar/image-tools/cast_images/transforms/images/apply-flatfield-tool/plugin.json", + "kaggle_nuclei_segmentation": f"{GITHUB_TAG}/hamshkhawar/image-tools/kaggle-nucleiseg/segmentation/kaggle-nuclei-segmentation-tool/plugin.json", + "ftl_plugin": f"{GITHUB_TAG}/nishaq503/image-tools/fix/ftl-label/transforms/images/polus-ftl-label-plugin/plugin.json" + } + return urls[x] + + def modify_cwl(self) -> None: + """Modify CWL to incorporate environmental variables and permission access""" + for f in list(self.cwl_path.rglob("*.cwl")): + if "cwl" in f.name: + try: + with Path.open(f, "r") as file: + config = yaml.safe_load(file) + config["requirements"]["NetworkAccess"] = { + "networkAccess": True + } + config["requirements"]["EnvVarRequirement"] = { + "envDef": {"HOME": "/home/polusai"} + } + with open(f, "w") as out_file: + yaml.dump(config, out_file) + except FileNotFoundError: + logger.info("Error: There was an unexpected error while processing the file.") + return + + def workflow(self) -> None: + """ + A CWL nuclear segmentation pipeline. + """ + # BBBCDownload + bbbc = self.create_step(self.manifest_urls("bbbc_download")) + bbbc.name = self.name + bbbc.outDir = Path("bbbc.outDir") + + # Renaming plugin + rename = self.create_step(self.manifest_urls("file_renaming")) + rename.filePattern = self.file_pattern + rename.outFilePattern = self.out_file_pattern + rename.mapDirectory = self.map_directory + rename.inpDir = bbbc.outDir + rename.outDir = Path("rename.outDir") + + # OMEConverter + ome_converter = self.create_step(self.manifest_urls("ome_converter")) + ome_converter.filePattern = self._string_after_period(self.out_file_pattern) + ome_converter.fileExtension = ".ome.tif" + ome_converter.inpDir = rename.outDir + ome_converter.outDir = Path("ome_converter.outDir") + + # Estimate Flatfield + estimate_flatfield = self.create_step(self.manifest_urls("estimate_flatfield")) + estimate_flatfield.inpDir = ome_converter.outDir + estimate_flatfield.filePattern = self.image_pattern + estimate_flatfield.groupBy = self.group_by + estimate_flatfield.getDarkfield = True + estimate_flatfield.outDir = Path("estimate_flatfield.outDir") + + # # Apply Flatfield + apply_flatfield = self.create_step(self.manifest_urls("apply_flatfield")) + apply_flatfield.imgDir = ome_converter.outDir + apply_flatfield.imgPattern = self.image_pattern + apply_flatfield.ffDir = estimate_flatfield.outDir + apply_flatfield.ffPattern = self.ff_pattern + apply_flatfield.dfPattern = self.df_pattern + apply_flatfield.outDir = Path("apply_flatfield.outDir") + apply_flatfield.dataType = True + + ## Kaggle Nuclei Segmentation + kaggle_nuclei_segmentation = self.create_step( + self.manifest_urls("kaggle_nuclei_segmentation") + ) + kaggle_nuclei_segmentation.inpDir = apply_flatfield.outDir + kaggle_nuclei_segmentation.filePattern = self.image_pattern + kaggle_nuclei_segmentation.outDir = Path("kaggle_nuclei_segmentation.outDir") + + ## FTL Label Plugin + ftl_plugin = self.create_step(self.manifest_urls("ftl_plugin")) + ftl_plugin.inpDir = kaggle_nuclei_segmentation.outDir + ftl_plugin.connectivity = 1 + ftl_plugin.binarizationThreshold = 0.5 + ftl_plugin.outDir = Path("ftl_plugin.outDir") + + logger.info("Initiating CWL Feature Extraction Workflow!!!") + steps = [ + bbbc, + rename, + ome_converter, + estimate_flatfield, + apply_flatfield, + kaggle_nuclei_segmentation, + ftl_plugin + ] + workflow = api.Workflow(steps, "experiment", self.workflow_path) + # # Saving CLT for plugins + workflow._save_all_cwl(overwrite=True) + # # Adding environmental variables for bbbc_download and ome_converter plugin + self.modify_cwl() + # # # Save yaml to run CWL tool + workflow._save_yaml() + # Compile and run using WIC python API + workflow.compile(run_local=True, overwrite=False) + # # print(workflow.yml_path) + # # clean autognerated directories + self._clean() + self._move_outputs() + logger.info("Completed CWL nuclear segmentation workflow.") + return + \ No newline at end of file diff --git a/cwl_workflows/utils.py b/cwl_workflows/utils.py new file mode 100644 index 0000000..100b1c7 --- /dev/null +++ b/cwl_workflows/utils.py @@ -0,0 +1,61 @@ +import json +import pydantic +from pathlib import Path +from typing import Dict +from typing import Union + +GITHUB_TAG = "https://raw.githubusercontent.com" + + +class DataModel(pydantic.BaseModel): + data: Dict[str, Dict[str, Union[str, bool]]] + + +class LoadData(pydantic.BaseModel): + path: Union[str, Path] + name:str + + @pydantic.validator("path", pre=True) + @classmethod + def validate_path(cls, value: Union[str, Path]) -> Union[str, Path]: + """Validation of Paths.""" + if not Path(value).exists(): + msg = f"{value} do not exist! Please do check it again" + raise ValueError(msg) + if isinstance(value, str): + return Path(value) + return value + + def parse_json(self) -> Dict[str, Union[str, bool]]: + with open(self.path) as json_file: + # Read the JSON data + data = json.load(json_file) + params = [v[self.name] for k, v in data.items()][0] + if len(params) == 0: + msg = f"{self.name} dataset donot exist! Please do check it again" + raise ValueError(msg) + return params + + +seg_params = { + "BBBC039": { + "name": "BBBC039", + "file_pattern": "/.*/.*/.*/Images/(?P.*)/.*_{row:c}{col:dd}_s{s:d}_w{channel:d}.*.tif", + "out_file_pattern": "x{row:dd}_y{col:dd}_p{s:dd}_c{channel:d}.tif", + "image_pattern": "images_x{x:dd}_y{y:dd}_p{p:dd}_c{c:d}.ome.tif", + "seg_pattern":"images_x{x:dd}_y{y:dd}_p{p:dd}_c1.ome.tif", + "ff_pattern": "images_x\\(00-15\\)_y\\(01-24\\)_p0\\(1-9\\)_c{c:d}_flatfield.ome.tif", + "df_pattern": "images_x\\(00-15\\)_y\\(01-24\\)_p0\\(1-9\\)_c{c:d}_darkfield.ome.tif", + "group_by": "c", + "map_directory": False + } +} +model = DataModel(data=seg_params) +model_dict = model.dict() + +json_dir = Path(Path(__file__).parents[1]).joinpath("bbbc_json") +json_dir.mkdir(parents=True, exist_ok=True) +FEAT_JSON_FILENAME = json_dir.joinpath("bbbc_segmentation.json") + +with Path.open(FEAT_JSON_FILENAME, "w") as json_file: + json.dump(model_dict, json_file, indent=2)