From 2a9468b69919901e5f29023ffb876a2a2c9fba32 Mon Sep 17 00:00:00 2001 From: Ivan William Harsono Date: Tue, 9 Apr 2024 22:03:25 +0700 Subject: [PATCH] rename folder --- iderare.yml | 90 -- .../ETL LOINC HPO Subset.ipynb | 0 .../Generate HPO Subset.ipynb | 0 .../Generate ORDO Subset.ipynb | 0 .../Generate SNOMED ORPHA Subset.ipynb | 0 .../Phenotype Data Source.md | 0 .../rawdl_20240310/ORDO.csv | 0 .../rawdl_20240310/genes_to_phenotype.txt | 0 .../rawdl_20240310/hp.obo | 0 .../rawdl_20240310/loinc2hpo-annotations.tsv | 0 .../rawdl_20240310/phenotype.hpoa | 0 .../subset/icd102omim_subset.tsv | 0 .../subset/loinc2hpo_standardized.tsv | 0 .../subset/omim2hpo_subset.tsv | 0 .../subset/orpha2hpo_subset.tsv | 0 .../subset/orpha2omim_subset.tsv | 0 .../subset/snomed2hpo_subset.tsv | 0 .../subset/snomed2orpha_subset.tsv | 0 pipeline.sh | 778 ------------------ 19 files changed, 868 deletions(-) delete mode 100644 iderare.yml rename {db_used => iderare_pheno_db}/ETL LOINC HPO Subset.ipynb (100%) rename {db_used => iderare_pheno_db}/Generate HPO Subset.ipynb (100%) rename {db_used => iderare_pheno_db}/Generate ORDO Subset.ipynb (100%) rename {db_used => iderare_pheno_db}/Generate SNOMED ORPHA Subset.ipynb (100%) rename {db_used => iderare_pheno_db}/Phenotype Data Source.md (100%) rename {db_used => iderare_pheno_db}/rawdl_20240310/ORDO.csv (100%) rename {db_used => iderare_pheno_db}/rawdl_20240310/genes_to_phenotype.txt (100%) rename {db_used => iderare_pheno_db}/rawdl_20240310/hp.obo (100%) rename {db_used => iderare_pheno_db}/rawdl_20240310/loinc2hpo-annotations.tsv (100%) rename {db_used => iderare_pheno_db}/rawdl_20240310/phenotype.hpoa (100%) rename {db_used => iderare_pheno_db}/subset/icd102omim_subset.tsv (100%) rename {db_used => iderare_pheno_db}/subset/loinc2hpo_standardized.tsv (100%) rename {db_used => iderare_pheno_db}/subset/omim2hpo_subset.tsv (100%) rename {db_used => iderare_pheno_db}/subset/orpha2hpo_subset.tsv (100%) rename {db_used => iderare_pheno_db}/subset/orpha2omim_subset.tsv (100%) rename {db_used => iderare_pheno_db}/subset/snomed2hpo_subset.tsv (100%) rename {db_used => iderare_pheno_db}/subset/snomed2orpha_subset.tsv (100%) delete mode 100644 pipeline.sh diff --git a/iderare.yml b/iderare.yml deleted file mode 100644 index 0bc790f..0000000 --- a/iderare.yml +++ /dev/null @@ -1,90 +0,0 @@ -analysis: - # Full Path of trio data - data_dir: /home/ivanwilliamharsono/Downloads/trio_patient - - # Note : - # 1. Ensure all proband, father, mother saved on the same path - # 2. Ensure file name is cleaned .fq.gz, if your file is raw untrimmed fastq, adjust pipeline_template.sh, uncomment Step 0 - # 3. Insert proband, mother, father file name (not full path) without .fq.gz file - # 4. File should be prepared on {data_dir}/input/A_FASTQ - # This data source follow the original data sourced from this link : https://www.ncbi.nlm.nih.gov/sra?linkname=bioproject_sra_all&from_uid=1077459 - proband: V350145665_L04_B5EHOMdmhwXAAAA-515 - mother: V350145665_L04_B5EHOMdmhwXAABA-517 - father: V350145665_L04_B5EHOMdmhwXAACA-519 - - # Gender setting 1=male, 2=female, 0=unknown - # Proband Gender - proband_gender: 1 - - # Phenotype -9=missing, 0=missing; 1=unaffected; 2=affected - proband_phen: 2 - mother_phen: 1 - father_phen: 1 - - # HPO IDs of patient - hpo_ids: - [ - "HP:0002366", - "HP:0005561", - "HP:0001903", - "HP:0010972", - "HP:0001541", - "HP:0004333", - "HP:0001396", - "HP:0002910", - "HP:0001531", - "HP:0001399", - "HP:0001433", - "HP:0003073", - "HP:0003233", - "HP:0006568", - "HP:0002151", - "HP:0200114", - "HP:0001653", - "HP:0000938", - "HP:0001873", - ] - - # Library Name - Any kind of sequencing method (e.g. AgilentV6) - library: AgilentV6 - # Method - Any kind of sequencing method (e.g. DNBSeq) - method: DNBSeq - -setup: - # deep variant version #1.5.0 - tested for GPU <8GB | v1.6.0 need minimum GPU memory of 16 GB - dv_version: 1.5.0 - - # glnexus and tiddit docker version - glnexus_version: 1.4.1 - tiddit_version: 3.6.1--py38h24c8ff8_0 - - # deep variant model (WES/WGS) - dv_model: WES - - # max memory allowed (G = Gigabyte) - max_mem: 60G - - # refseq directory (full folder), assuming you save it on Downloads/Database - ref_dir: /home/ivanwilliamharsono/Downloads/Database - # refseq fasta file name (file name) - ref_fasta: hg38.p14.fa - - # SnpEff data directory & version - snpEff_dir: /home/ivanwilliamharsono/Downloads/Sandbox/snpEff/data - snpEff_ver: GRCh38.p14 - - # Exomiser data directory & version - exomiser_dir: /home/ivanwilliamharsono/Downloads/Sandbox/exomiser/data/exomiser-data - exomiser_data_ver: 2309 - - #dbNSFP file - dbNSFP_file: /home/ivanwilliamharsono/Downloads/Database/dbNSFPv4.5a_custombuild.gz - - #dbSNP file - dbSNP_file: /home/ivanwilliamharsono/Downloads/Database/Homo_sapiens_assembly38.dbsnp156_snpsift.fixed.vcf.gz - - #ClinVar file - ClinVar_file: /home/ivanwilliamharsono/Downloads/Database/clinvar_20240215.vcf.gz - - # SnpSift adjuster - chr_rename: /home/ivanwilliamharsono/Downloads/IDeRare/convert/vcf_rename.chrnames diff --git a/db_used/ETL LOINC HPO Subset.ipynb b/iderare_pheno_db/ETL LOINC HPO Subset.ipynb similarity index 100% rename from db_used/ETL LOINC HPO Subset.ipynb rename to iderare_pheno_db/ETL LOINC HPO Subset.ipynb diff --git a/db_used/Generate HPO Subset.ipynb b/iderare_pheno_db/Generate HPO Subset.ipynb similarity index 100% rename from db_used/Generate HPO Subset.ipynb rename to iderare_pheno_db/Generate HPO Subset.ipynb diff --git a/db_used/Generate ORDO Subset.ipynb b/iderare_pheno_db/Generate ORDO Subset.ipynb similarity index 100% rename from db_used/Generate ORDO Subset.ipynb rename to iderare_pheno_db/Generate ORDO Subset.ipynb diff --git a/db_used/Generate SNOMED ORPHA Subset.ipynb b/iderare_pheno_db/Generate SNOMED ORPHA Subset.ipynb similarity index 100% rename from db_used/Generate SNOMED ORPHA Subset.ipynb rename to iderare_pheno_db/Generate SNOMED ORPHA Subset.ipynb diff --git a/db_used/Phenotype Data Source.md b/iderare_pheno_db/Phenotype Data Source.md similarity index 100% rename from db_used/Phenotype Data Source.md rename to iderare_pheno_db/Phenotype Data Source.md diff --git a/db_used/rawdl_20240310/ORDO.csv b/iderare_pheno_db/rawdl_20240310/ORDO.csv similarity index 100% rename from db_used/rawdl_20240310/ORDO.csv rename to iderare_pheno_db/rawdl_20240310/ORDO.csv diff --git a/db_used/rawdl_20240310/genes_to_phenotype.txt b/iderare_pheno_db/rawdl_20240310/genes_to_phenotype.txt similarity index 100% rename from db_used/rawdl_20240310/genes_to_phenotype.txt rename to iderare_pheno_db/rawdl_20240310/genes_to_phenotype.txt diff --git a/db_used/rawdl_20240310/hp.obo b/iderare_pheno_db/rawdl_20240310/hp.obo similarity index 100% rename from db_used/rawdl_20240310/hp.obo rename to iderare_pheno_db/rawdl_20240310/hp.obo diff --git a/db_used/rawdl_20240310/loinc2hpo-annotations.tsv b/iderare_pheno_db/rawdl_20240310/loinc2hpo-annotations.tsv similarity index 100% rename from db_used/rawdl_20240310/loinc2hpo-annotations.tsv rename to iderare_pheno_db/rawdl_20240310/loinc2hpo-annotations.tsv diff --git a/db_used/rawdl_20240310/phenotype.hpoa b/iderare_pheno_db/rawdl_20240310/phenotype.hpoa similarity index 100% rename from db_used/rawdl_20240310/phenotype.hpoa rename to iderare_pheno_db/rawdl_20240310/phenotype.hpoa diff --git a/db_used/subset/icd102omim_subset.tsv b/iderare_pheno_db/subset/icd102omim_subset.tsv similarity index 100% rename from db_used/subset/icd102omim_subset.tsv rename to iderare_pheno_db/subset/icd102omim_subset.tsv diff --git a/db_used/subset/loinc2hpo_standardized.tsv b/iderare_pheno_db/subset/loinc2hpo_standardized.tsv similarity index 100% rename from db_used/subset/loinc2hpo_standardized.tsv rename to iderare_pheno_db/subset/loinc2hpo_standardized.tsv diff --git a/db_used/subset/omim2hpo_subset.tsv b/iderare_pheno_db/subset/omim2hpo_subset.tsv similarity index 100% rename from db_used/subset/omim2hpo_subset.tsv rename to iderare_pheno_db/subset/omim2hpo_subset.tsv diff --git a/db_used/subset/orpha2hpo_subset.tsv b/iderare_pheno_db/subset/orpha2hpo_subset.tsv similarity index 100% rename from db_used/subset/orpha2hpo_subset.tsv rename to iderare_pheno_db/subset/orpha2hpo_subset.tsv diff --git a/db_used/subset/orpha2omim_subset.tsv b/iderare_pheno_db/subset/orpha2omim_subset.tsv similarity index 100% rename from db_used/subset/orpha2omim_subset.tsv rename to iderare_pheno_db/subset/orpha2omim_subset.tsv diff --git a/db_used/subset/snomed2hpo_subset.tsv b/iderare_pheno_db/subset/snomed2hpo_subset.tsv similarity index 100% rename from db_used/subset/snomed2hpo_subset.tsv rename to iderare_pheno_db/subset/snomed2hpo_subset.tsv diff --git a/db_used/subset/snomed2orpha_subset.tsv b/iderare_pheno_db/subset/snomed2orpha_subset.tsv similarity index 100% rename from db_used/subset/snomed2orpha_subset.tsv rename to iderare_pheno_db/subset/snomed2orpha_subset.tsv diff --git a/pipeline.sh b/pipeline.sh deleted file mode 100644 index 7f6ce5c..0000000 --- a/pipeline.sh +++ /dev/null @@ -1,778 +0,0 @@ -#!/bin/bash -set -euo pipefail - -echo "STEP 0 : Setup Variable, Directory, and Ensure NVIDIA Docker Supported" -### WARNING : Don't use tilde (~) in the path - -# ------------------- -## Step 0a: Set variable necessary with sample name for RG -# ------------------- -echo "STEP 0a: Set variable necessary" - -BIN_VERSION=1.5.0 -memory=60G -DEEPTRIO_MODEL=WES - -glnexus_version=1.4.1 -tiddit_version=3.6.1--py38h24c8ff8_0 - -# Father -father_name=V350145665_L04_B5EHOMdmhwXAACA-519 - -# Mother -mother_name=V350145665_L04_B5EHOMdmhwXAABA-517 - -# Proband -proband_name=V350145665_L04_B5EHOMdmhwXAAAA-515 - -proband_SM=Proband -proband_PU=AgilentV6 -proband_PL=DNBSeq -proband_LB=WXS - -exomiser_solo=V350145665_L04_B5EHOMdmhwXAAAA-515_exomiser_solo.yml -exomiser_solo_sv=V350145665_L04_B5EHOMdmhwXAAAA-515_tiddit_exomiser_solo.yml -exomiser_trio=V350145665_L04_B5EHOMdmhwXAAAA-515_exomiser_trio.yml - -mother_SM=Mother -mother_PU=AgilentV6 -mother_PL=DNBSeq -mother_LB=WXS - -father_SM=Father -father_PU=AgilentV6 -father_PL=DNBSeq -father_LB=WXS - -trimming=false -solo_analysis=true -trio_analysis=true - -# Check if BIN_VERSION is null -if [ -z "$BIN_VERSION" ]; then - echo "BIN_VERSION is null, stopping the pipeline." - exit 1 -fi - -# Check if memory is null -if [ -z "$memory" ]; then - echo "memory is null, stopping the pipeline." - exit 1 -fi - -# Check if DEEPTRIO_MODEL is null -if [ -z "$DEEPTRIO_MODEL" ]; then - echo "DEEPTRIO_MODEL is null, stopping the pipeline." - exit 1 -fi - -# Check if glnexus_version is null -if [ -z "$glnexus_version" ]; then - echo "glnexus_version is null, stopping the pipeline." - exit 1 -fi - -# Check if tiddit_version is null -if [ -z "$tiddit_version" ]; then - echo "tiddit_version is null, stopping the pipeline." - exit 1 -fi - -# Check if proband_name is null -if [ -z "$proband_name" ]; then - echo "Input the filename for proband." - exit 1 -fi - -# If trio_analysis is not null, check if mother_name and father_name is null -if [ "$trio_analysis" = true ]; then - if [ -z "$mother_name" ]; then - echo "Input the filename for mother." - exit 1 - fi - - if [ -z "$father_name" ]; then - echo "Input the filename for father." - exit 1 - fi -fi - -# ------------------- -## Step 0b: Prepare directory and pedigree file -# ------------------- -echo "STEP 0b: Prepare directory and pedigree file" - -## Pedigree file on INPUT_DIR (Just file name only) -INPUT_DIR="/home/ivanwilliamharsono/Downloads/trio_patient/input" -PEDIGREE="trio.ped" - -## Subfolder of INPUT_DIR -FASTQ_DIR=${INPUT_DIR}/A_FASTQ -SAM_DIR=${INPUT_DIR}/B_RAW_SAM_BAM - -## Output Dir -OUTPUT_DIR="/home/ivanwilliamharsono/Downloads/trio_patient/output" -ANNOTATED_DIR="/home/ivanwilliamharsono/Downloads/trio_patient/annotated" -SV_DIR="/home/ivanwilliamharsono/Downloads/trio_patient/sv_tiddit" -EXOMISER_DIR="/home/ivanwilliamharsono/Downloads/trio_patient/exomiser" - -## FASTA file on REFERENCE_DIR (Just file name only) -REFERENCE_DIR=/home/ivanwilliamharsono/Downloads/Database -REFSEQ_FASTA=hg38.p14.fa - -snpEff_dir=/home/ivanwilliamharsono/Downloads/Sandbox/snpEff/data -snpEff_ver=GRCh38.p14 -dbnsfp=/home/ivanwilliamharsono/Downloads/Database/dbNSFPv4.5a_custombuild.gz -dbSNP=/home/ivanwilliamharsono/Downloads/Database/Homo_sapiens_assembly38.dbsnp156_snpsift.fixed.vcf.gz -ClinVar=/home/ivanwilliamharsono/Downloads/Database/clinvar_20240215.vcf.gz -chr_rename=/home/ivanwilliamharsono/Downloads/IDeRare/convert/vcf_rename.chrnames - -cd /home/ivanwilliamharsono/Downloads/trio_patient - -## Make Directory -mkdir -p ${INPUT_DIR} ${FASTQ_DIR} ${SAM_DIR} ${OUTPUT_DIR} ${OUTPUT_DIR}/intermediate_results_dir_proband ${OUTPUT_DIR}/intermediate_results_dir_trio ${ANNOTATED_DIR} ${SV_DIR} ${EXOMISER_DIR} - -# ------------------- -# STEP 1: QC - Run fastqp -# ------------------- -echo "STEP 1: QC - Run fastqp - Optional (if the input is raw untrimmed fastq)" - -# Check if proband_name is null and trimming is true -if [ ! -z "$proband_name" ] && [ "$trimming" = true ]; then - echo "STEP 1a : Proband" - - if [ -f "${FASTQ_DIR}/${proband_name}_1.fastq" ] && [ -f "${FASTQ_DIR}/${proband_name}_2.fastq" ]; then - echo "Raw fastq exist, continue to Trimming" - else - echo "File not exist, please check the input directory" - exit 1 - fi - - if [ -f "${FASTQ_DIR}/${proband_name}_1.fq.gz" ] && [ -f "${FASTQ_DIR}/${proband_name}_2.fq.gz" ]; then - echo "Output fq.qz exist, skipping Trimming" - else - echo "File not exist, continue to Trimming" - fastp -g -x -w $(nproc) \ - -D --dup_calc_accuracy 6 \ - --in1 ${FASTQ_DIR}/${proband_name}_1.fastq \ - --in2 ${FASTQ_DIR}/${proband_name}_2.fastq \ - --out1 ${FASTQ_DIR}/${proband_name}_1.fq.gz \ - --out2 ${FASTQ_DIR}/${proband_name}_2.fq.gz \ - -h ${FASTQ_DIR}/${proband_name}.html \ - -j ${FASTQ_DIR}/${proband_name}.json \ - -R ${proband_name}-${proband_SM} - fi -fi - - -# Check if mother_name is null and trimming is true -if [ ! -z "$mother_name" ] && [ "$trimming" = true ] && [ "$trio_analysis" = true ]; then - echo "STEP 1b : Mother" - - if [ -f "${FASTQ_DIR}/${mother_name}_1.fastq" ] && [ -f "${FASTQ_DIR}/${mother_name}_2.fastq" ]; then - echo "Raw fastq exist, continue to Trimming" - else - echo "File not exist, please check the input directory" - exit 1 - fi - - if [ -f "${FASTQ_DIR}/${mother_name}_1.fq.gz" ] && [ -f "${FASTQ_DIR}/${mother_name}_2.fq.gz" ]; then - echo "Output fq.qz exist, skipping Trimming" - else - echo "File not exist, continue to Trimming" - - fastp -g -x -w $(nproc) \ - -D --dup_calc_accuracy 6 \ - --in1 ${FASTQ_DIR}/${mother_name}_1.fastq \ - --in2 ${FASTQ_DIR}/${mother_name}_2.fastq \ - --out1 ${FASTQ_DIR}/${mother_name}_1.fq.gz \ - --out2 ${FASTQ_DIR}/${mother_name}_2.fq.gz \ - -h ${FASTQ_DIR}/${mother_name}.html \ - -j ${FASTQ_DIR}/${mother_name}.json \ - -R ${mother_name}-${mother_SM} - fi -fi - -# Check if father_name is null and trimming is true -if [ ! -z "$father_name" ] && [ "$trimming" = true ] && [ "$trio_analysis" = true ]; then - echo "STEP 1C : Father" - - if [ -f "${FASTQ_DIR}/${father_name}_1.fastq" ] && [ -f "${FASTQ_DIR}/${father_name}_2.fastq" ]; then - echo "Raw fastq exist, continue to Trimming" - else - echo "File not exist, please check the input directory" - exit 1 - fi - - if [ -f "${FASTQ_DIR}/${father_name}_1.fq.gz" ] && [ -f "${FASTQ_DIR}/${father_name}_2.fq.gz" ]; then - echo "Output fq.qz exist, skipping Trimming" - else - echo "File not exist, continue to Trimming" - fastp -g -x -w $(nproc) \ - -D --dup_calc_accuracy 6 \ - --in1 ${FASTQ_DIR}/${father_name}_1.fastq \ - --in2 ${FASTQ_DIR}/${father_name}_2.fastq \ - --out1 ${FASTQ_DIR}/${father_name}_1.fq.gz \ - --out2 ${FASTQ_DIR}/${father_name}_2.fq.gz \ - -h ${FASTQ_DIR}/${father_name}.html \ - -j ${FASTQ_DIR}/${father_name}.json \ - -R ${father_name}-${father_SM} - fi -fi - -# -------------------------------------- -# STEP 2: Map to reference using BWA-MEM2 -# -------------------------------------- -echo "STEP 2: Map to reference using BWA-MEM2" - - -# Check if proband_name is not null -if [ ! -z "$proband_name" ]; then - echo "STEP 2a : Proband" - - if [ -f "${FASTQ_DIR}/${proband_name}_1.fq.gz" ] && [ -f "${FASTQ_DIR}/${proband_name}_2.fq.gz" ]; then - echo "Paired end fq.qz exist, continue to BWA-MEM2" - else - echo "File not exist, please check the input directory" - exit 1 - fi - - if [-f "${SAM_DIR}/${proband_name}_raw.sam" ] || [ -f "${INPUT_DIR}/${proband_name}.bam" ]; then - echo "SAM file or final BAM file exist, skipping BWA-MEM2" - else - # BWA MEM2 Alignment - bwa-mem2 mem -R "@RG\tID:${proband_name}\tSM:${proband_SM}\tPU:${proband_PU}\tPL:${proband_PL}\tLB:${proband_LB}" \ - -t $(nproc) ${REFERENCE_DIR}/${REFSEQ_FASTA} \ - ${FASTQ_DIR}/${proband_name}_1.fq.gz \ - ${FASTQ_DIR}/${proband_name}_2.fq.gz \ - > ${SAM_DIR}/${proband_name}_raw.sam - fi - -fi - - -# Check if mother_name is not null and trio_analysis is true -if [ ! -z "$mother_name" ] && [ "$trio_analysis" = true ]; then - echo "STEP 2b : Mother" - - if [ -f "${FASTQ_DIR}/${mother_name}_1.fq.gz" ] && [ -f "${FASTQ_DIR}/${mother_name}_2.fq.gz" ]; then - echo "Paired end fq.qz exist, continue to BWA-MEM2" - else - echo "File not exist, please check the input directory" - exit 1 - fi - - if [-f "${SAM_DIR}/${mother_name}_raw.sam" ] || [ -f "${INPUT_DIR}/${mother_name}.bam" ]; then - echo "SAM file or final BAM file exist, skipping BWA-MEM2" - else - # BWA MEM2 Alignment - bwa-mem2 mem -R "@RG\tID:${mother_name}\tSM:${mother_SM}\tPU:${mother_PU}\tPL:${mother_PL}\tLB:${mother_LB}" \ - -t $(nproc) ${REFERENCE_DIR}/${REFSEQ_FASTA} \ - ${FASTQ_DIR}/${mother_name}_1.fq.gz \ - ${FASTQ_DIR}/${mother_name}_2.fq.gz \ - > ${SAM_DIR}/${mother_name}_raw.sam - fi -fi - - - -# Check if father_name is not null and trio_analysis is true -if [ ! -z "$father_name" ] && [ "$trio_analysis" = true ]; then - echo "STEP 2c : Father" - - if [ -f "${FASTQ_DIR}/${father_name}_1.fq.gz" ] && [ -f "${FASTQ_DIR}/${father_name}_2.fq.gz" ]; then - echo "Paired end fq.qz exist, continue to BWA-MEM2" - else - echo "File not exist, please check the input directory" - exit 1 - fi - - if [-f "${SAM_DIR}/${father_name}_raw.sam" ] || [ -f "${INPUT_DIR}/${father_name}.bam" ]; then - echo "SAM file or final BAM file exist, skipping BWA-MEM2" - else - # BWA MEM2 Alignment - bwa-mem2 mem -R "@RG\tID:${father_name}\tSM:${father_SM}\tPU:${father_PU}\tPL:${father_PL}\tLB:${father_LB}" \ - -t $(nproc) ${REFERENCE_DIR}/${REFSEQ_FASTA} \ - ${FASTQ_DIR}/${father_name}_1.fq.gz \ - ${FASTQ_DIR}/${father_name}_2.fq.gz \ - > ${SAM_DIR}/${father_name}_raw.sam - fi -fi - -# ----------------------------------------- -# STEP 3: Mark Duplicates and Sort - sambamba -# ----------------------------------------- -echo "STEP 3: Mark Duplicates and Sort - Sambamba" - -# Conversion of SAM to BAM & Markdup - -# Check if proband_name is not null -# 2nd pass : check if ${INPUT_DIR}/${proband_name}.bam not exist yet --> then run the function, else skipped - -if [ ! -z "$proband_name" ]; then - echo "STEP 3a : Proband" - - if [ -f "${INPUT_DIR}/${proband_name}.bam" ]; then - echo "Final BAM file exist, skipping Mark Duplicates and Sort" - else - sambamba view -p -t=$(nproc) -l=9 \ - -S ${SAM_DIR}/${proband_name}_raw.sam \ - -f=bam -o=${SAM_DIR}/${proband_name}_raw.bam - - sambamba markdup -r -p -t=$(nproc) -l=9 \ - ${SAM_DIR}/${proband_name}_raw.bam \ - ${SAM_DIR}/${proband_name}_dedup.bam - - sambamba sort -m=${memory} -p -t=$(nproc) -l=9 \ - ${SAM_DIR}/${proband_name}_dedup.bam \ - -o=${INPUT_DIR}/${proband_name}.bam - fi -fi - -# Check if mother_name is not null and trio_analysis is true -if [ ! -z "$mother_name" ] && [ "$trio_analysis" = true ]; then - echo "STEP 3b : Mother" - - if [ -f "${INPUT_DIR}/${mother_name}.bam" ]; then - echo "Final BAM file exist, skipping Mark Duplicates and Sort" - else - sambamba view -p -t=$(nproc) -l=9 \ - -S ${SAM_DIR}/${mother_name}_raw.sam \ - -f=bam -o=${SAM_DIR}/${mother_name}_raw.bam - - sambamba markdup -r -p -t=$(nproc) -l=9 \ - ${SAM_DIR}/${mother_name}_raw.bam \ - ${SAM_DIR}/${mother_name}_dedup.bam - - sambamba sort -m=${memory} -p -t=$(nproc) -l=9 \ - ${SAM_DIR}/${mother_name}_dedup.bam \ - -o=${INPUT_DIR}/${mother_name}.bam - fi -fi - -# Check if father_name is not null and trio_analysis is true -if [ ! -z "$father_name" ] && [ "$trio_analysis" = true ]; then - echo "STEP 3c : Father" - - if [ -f "${INPUT_DIR}/${father_name}.bam" ]; then - echo "Final BAM file exist, skipping Mark Duplicates and Sort" - else - sambamba view -p -t=$(nproc) -l=9 \ - -S ${SAM_DIR}/${father_name}_raw.sam \ - -f=bam -o=${SAM_DIR}/${father_name}_raw.bam - - sambamba markdup -r -p -t=$(nproc) -l=9 \ - ${SAM_DIR}/${father_name}_raw.bam \ - ${SAM_DIR}/${father_name}_dedup.bam - - sambamba sort -m=${memory} -p -t=$(nproc) -l=9 \ - ${SAM_DIR}/${father_name}_dedup.bam \ - -o=${INPUT_DIR}/${father_name}.bam - fi -fi - -### Remove Intermediate SAM BAM file as it consumes too much spaces - -# Check if the directory exists -if [ -d "$SAM_DIR" ]; then - echo "Remove all data inside SAM directory to save harddisk space" - rm -r "$SAM_DIR" -else - echo "Directory $SAM_DIR does not exist." -fi - -# ---------------------------------------------- -# STEP 4: Variant Calling -# ---------------------------------------------- - -if [ ! -z "$proband_name" ] && [ "$solo_analysis" = true ]; then - echo "STEP 4a: Variant Calling Proband DeepVariant" - - # Further check if ${INPUT_DIR}/${proband_name}.bam all exists - if [ -f "${INPUT_DIR}/${proband_name}.bam" ]; then - if [ -f "${OUTPUT_DIR}/${proband_name}_proband.vcf.gz" ]; then - echo "VCF file exist, skipping DeepVariant" - else - echo "BAM file exist, continue to DeepVariant" - docker run --gpus 1 \ - -v "${INPUT_DIR}":"/input" \ - -v "${OUTPUT_DIR}":"/output" \ - -v "${REFERENCE_DIR}":"/reference" \ - google/deepvariant:"${BIN_VERSION}-gpu" \ - /opt/deepvariant/bin/run_deepvariant \ - --model_type ${DEEPTRIO_MODEL} \ - --ref /reference/${REFSEQ_FASTA} \ - --reads /input/${proband_name}.bam \ - --num_shards $(nproc) \ - --intermediate_results_dir /output/intermediate_results_dir_proband \ - --output_gvcf /output/${proband_name}_proband.g.vcf.gz \ - --output_vcf /output/${proband_name}_proband.vcf.gz - - ## Remove Intermediate_results_dir to save spaces - rm -r -f ${OUTPUT_DIR}/intermediate_results_dir_proband - - ## Stop Docker to save spaces and memory - docker stop $(docker ps -aq) - docker rm $(docker ps -aq) - fi - else - echo "BAM file not exist, please check the input directory 3a" - exit 1 - fi -fi - -if [ ! -z "$proband_name" ] && [ "$trio_analysis" = true ]; then - echo "STEP 4b: Variant Calling DeepTrio" - - # Further check if ${INPUT_DIR}/${father_name}.bam and ${INPUT_DIR}/${mother_name}.bam and ${INPUT_DIR}/${proband_name}.bam all exists - if [ -f "${INPUT_DIR}/${father_name}.bam" ] && [ -f "${INPUT_DIR}/${mother_name}.bam" ] && [ -f "${INPUT_DIR}/${proband_name}.bam" ]; then - - if [ -f "${OUTPUT_DIR}/${proband_name}.g.vcf.gz" ] && [ -f "${OUTPUT_DIR}/${father_name}.g.vcf.gz" ] && [ -f "${OUTPUT_DIR}/${mother_name}.g.vcf.gz" ]; then - echo "gVCF file exist, skipping DeepTrio" - else - echo "All BAM files exist, continue to DeepTrio" - docker run --gpus 1 \ - -v "${INPUT_DIR}":"/input" \ - -v "${OUTPUT_DIR}":"/output" \ - -v "${REFERENCE_DIR}":"/reference" \ - google/deepvariant:deeptrio-"${BIN_VERSION}-gpu" \ - /opt/deepvariant/bin/deeptrio/run_deeptrio \ - --model_type ${DEEPTRIO_MODEL} \ - --ref /reference/${REFSEQ_FASTA} \ - --reads_child /input/${proband_name}.bam \ - --reads_parent1 /input/${father_name}.bam \ - --reads_parent2 /input/${mother_name}.bam \ - --output_vcf_child /output/${proband_name}.output.vcf.gz \ - --output_vcf_parent1 /output/${father_name}.output.vcf.gz \ - --output_vcf_parent2 /output/${mother_name}.output.vcf.gz \ - --sample_name_child 'child' \ - --sample_name_parent1 'father' \ - --sample_name_parent2 'mother' \ - --num_shards $(nproc) \ - --intermediate_results_dir /output/intermediate_results_dir_trio \ - --output_gvcf_child /output/${proband_name}.g.vcf.gz \ - --output_gvcf_parent1 /output/${father_name}.g.vcf.gz \ - --output_gvcf_parent2 /output/${mother_name}.g.vcf.gz - - ## Remove Intermediate_results_dir to save spaces - rm -r -f ${OUTPUT_DIR}/intermediate_results_dir_trio - - ## Stop Docker to save spaces and memory - docker stop $(docker ps -aq) - docker rm $(docker ps -aq) - fi - else - echo "One of the BAM files not exist, please check the input proband.bam, father.bam, mother.bam or redo the step 3a, 3b, 3c" - exit 1 - fi -fi - -# ---------------------------------------------- -# STEP 5: Merge gVCF files with GLnexus -# ---------------------------------------------- -# Check if solo_analysis=true then skip, if trio_analysis=true then continue -if [ ! -z "$proband_name" ] && [ "$trio_analysis" = true ]; then - echo "STEP 5: Merge gVCF files with GLnexus" - - if [ -f "${OUTPUT_DIR}/${proband_name}_trio_merged.vcf.gz" ]; then - echo "Merged VCF file exist, skipping GLnexus" - else - docker run \ - -v "${OUTPUT_DIR}":"/output" \ - ghcr.io/dnanexus-rnd/glnexus:v${glnexus_version} \ - /usr/local/bin/glnexus_cli \ - --config DeepVariant_unfiltered \ - /output/${proband_name}.g.vcf.gz \ - /output/${father_name}.g.vcf.gz \ - /output/${mother_name}.g.vcf.gz \ - | bcftools view -Oz -o ${OUTPUT_DIR}/${proband_name}_trio_merged.vcf.gz - - ## Stop Docker to save spaces and memory - docker stop $(docker ps -aq) - docker rm $(docker ps -aq) - fi - - # ---------------------------------------------- - # STEP 6: Calculate Mendelian Violation Rate using RTG Tools - # ---------------------------------------------- - echo "STEP 6: Calculate Mendelian Violation Rate using RTG Tools" - #### Reference : https://www.animalgenome.org/bioinfo/resources/manuals/RTGOperationsManual.pdf - - - if [ -f "${OUTPUT_DIR}/${proband_name}_trio_merged.vcf.gz" ]; then - echo "Merged VCF file exist, continue to RTG Tools" - - if [ -d ${REFERENCE_DIR}/${REFSEQ_FASTA}.sdf ]; - then - echo "${REFERENCE_DIR}/${REFSEQ_FASTA}.sdf exists. Didn't create sdf." - else - echo "${REFERENCE_DIR}/${REFSEQ_FASTA}.sdf directory does not exist. Creating it" - docker run \ - -v "${INPUT_DIR}":"/input" \ - -v "${REFERENCE_DIR}":"/reference" \ - realtimegenomics/rtg-tools format \ - -o /reference/${REFSEQ_FASTA}.sdf "/reference/${REFSEQ_FASTA}" - fi - - - docker run \ - -v "${INPUT_DIR}":"/input" \ - -v "${REFERENCE_DIR}":"/reference" \ - -v "${OUTPUT_DIR}":"/output" \ - realtimegenomics/rtg-tools vcfstats \ - "/output/${proband_name}_trio_merged.vcf.gz" \ - | tee output/deepvariant.${proband_name}_trio.vcfstats.txt - - docker run \ - -v "${INPUT_DIR}":"/input" \ - -v "${REFERENCE_DIR}":"/reference" \ - -v "${OUTPUT_DIR}":"/output" \ - realtimegenomics/rtg-tools mendelian \ - -i "/output/${proband_name}_trio_merged.vcf.gz" \ - -o "/output/${proband_name}_trio_annotated.output.vcf.gz" \ - --pedigree=/input/trio.ped \ - -t /reference/${REFSEQ_FASTA}.sdf \ - | tee output/deepvariant.${proband_name}_trio.mendelian.txt - - # ------------------- - # STEP 6b: Docker Stop and Kill All docker Container Process - # ------------------- - docker stop $(docker ps -aq) - docker rm $(docker ps -aq) - else - echo "Merged VCF file not exist, please check the input directory" - exit 1 - fi -fi - -if [ ! -z "$proband_name" ] && [ "$solo_analysis" = true ]; then - echo "SKIPPED STEP 5: Merge gVCF files with GLnexus, due to solo analysis (Trio)" - echo "SKIPPED STEP 6: Calculate Mendelian Violation Rate using RTG Tools (Trio)" -fi - -# ## Jump to direct to annotation session - -# ------------------- -# STEP 7: Annotate Variants - SnpEff SnpSift -# ------------------- - -if [ ! -z "$proband_name" ] && [ "$solo_analysis" = true ]; then - echo "STEP 7a: Annotate Variants Proband - SNPEff with latest database, SnpSift ClinVar, SnpSift dbNSFP" - - if [ ! -f "${OUTPUT_DIR}/${proband_name}_proband.vcf.gz" ]; then - echo "VCF file not exist, please check the input directory" - exit 1 - fi - - # If output file not exist, run annotate - if [ ! -f "${proband_name}-converted-deepVariant.vcf" ] && [ ! -f "${proband_name}-converted-deepVariant.vcf.gz" ]; then - # Create GATK > dbnsfp Chromosome - bcftools annotate \ - --rename-chrs "${chr_rename}" \ - --threads "$(nproc)" -Oz \ - -o "${OUTPUT_DIR}/${proband_name}-converted-deepVariant.vcf" \ - "${OUTPUT_DIR}/${proband_name}_proband.vcf.gz" - else - echo "Deepvariant converted VCF file exists, skipping conversion" - fi - - # If output file not exist yet, and previous file exist - if [ ! -f "${ANNOTATED_DIR}/${proband_name}-SnpEff-deepVariant.vcf" ] && [ ! -f "${ANNOTATED_DIR}/${proband_name}-SnpEff-deepVariant.vcf.gz" ] && [ -f "${OUTPUT_DIR}/${proband_name}-converted-deepVariant.vcf" ]; then - # SnpEff with recent GRCh38.p14 database - SnpEff -v "${snpEff_ver}" -dataDir "${snpEff_dir}" \ - -s "${ANNOTATED_DIR}/${proband_name}-SnpEff-deepVariant.html" \ - "${OUTPUT_DIR}/${proband_name}-converted-deepVariant.vcf" \ - > "${ANNOTATED_DIR}/${proband_name}-SnpEff-deepVariant.vcf" - else - echo "SnpEff VCF file exists, skipping SnpEff" - fi - - # If output file not exist yet, and previous file exist - if [ ! -f "${ANNOTATED_DIR}/${proband_name}-SnpEff-dbSNP-deepVariant.vcf" ] && [ ! -f "${ANNOTATED_DIR}/${proband_name}-SnpEff-dbSNP-deepVariant.vcf.gz" ] && [ -f "${ANNOTATED_DIR}/${proband_name}-SnpEff-deepVariant.vcf" ]; then - # SnpSift Annotate ClinVar - SnpSift annotate -v "${dbSNP}" \ - "${ANNOTATED_DIR}/${proband_name}-SnpEff-deepVariant.vcf" \ - > "${ANNOTATED_DIR}/${proband_name}-SnpEff-dbSNP-deepVariant.vcf" - else - echo "Annotated dbSNP file exists, skipping annotation" - fi - - # If output file not exist yet, and previous file exist - if [ ! -f "${ANNOTATED_DIR}/${proband_name}-SnpEff-dbSNP-ClinVar-deepVariant.vcf" ] && [ ! -f "${ANNOTATED_DIR}/${proband_name}-SnpEff-dbSNP-ClinVar-deepVariant.vcf.gz" ] && [ -f "${ANNOTATED_DIR}/${proband_name}-SnpEff-dbSNP-deepVariant.vcf" ]; then - SnpSift annotate -v "${ClinVar}" \ - "${ANNOTATED_DIR}/${proband_name}-SnpEff-dbSNP-deepVariant.vcf" \ - > "${ANNOTATED_DIR}/${proband_name}-SnpEff-dbSNP-ClinVar-deepVariant.vcf" - else - echo "Annotated ClinVar file exists, skipping annotation" - fi - - # If output file not exist yet, and previous file exist - if [ ! -f "${ANNOTATED_DIR}/${proband_name}-SnpEff-dbSNP-ClinVar-dbNSFP_annotated-deepVariant.vcf" ] && [ ! -f "${ANNOTATED_DIR}/${proband_name}-SnpEff-dbSNP-ClinVar-dbNSFP_annotated-deepVariant.vcf.gz" ] && [ -f "${ANNOTATED_DIR}/${proband_name}-SnpEff-dbSNP-ClinVar-deepVariant.vcf" ]; then - # Annotate using dbNSFP for SNP Only (Indel Give 0 Annotation Result) - SnpSift dbnsfp -v -db "${dbnsfp}" \ - "${ANNOTATED_DIR}/${proband_name}-SnpEff-dbSNP-ClinVar-deepVariant.vcf" \ - > "${ANNOTATED_DIR}/${proband_name}-SnpEff-dbSNP-ClinVar-dbNSFP_annotated-deepVariant.vcf" - else - echo "Annotated dbNSFP file exists, skipping annotation" - fi - - # Compress file size using bgzip if the previous .vcf file exist - if [ -f "${OUTPUT_DIR}/${proband_name}-converted-deepVariant.vcf" ] && [ ! -f "${OUTPUT_DIR}/${proband_name}-converted-deepVariant.vcf.gz" ]; then - bgzip --threads "$(nproc)" "${OUTPUT_DIR}/${proband_name}-converted-deepVariant.vcf" - fi - - if [ -f "${ANNOTATED_DIR}/${proband_name}-SnpEff-deepVariant.vcf" ] && [ ! -f "${ANNOTATED_DIR}/${proband_name}-SnpEff-deepVariant.vcf.gz" ]; then - bgzip --threads "$(nproc)" "${ANNOTATED_DIR}/${proband_name}-SnpEff-deepVariant.vcf" - fi - - if [ -f "${ANNOTATED_DIR}/${proband_name}-SnpEff-dbSNP-deepVariant.vcf" ] && [ ! -f "${ANNOTATED_DIR}/${proband_name}-SnpEff-dbSNP-deepVariant.vcf.gz" ]; then - bgzip --threads "$(nproc)" "${ANNOTATED_DIR}/${proband_name}-SnpEff-dbSNP-deepVariant.vcf" - fi - - if [ -f "${ANNOTATED_DIR}/${proband_name}-SnpEff-dbSNP-ClinVar-deepVariant.vcf" ] && [ ! -f "${ANNOTATED_DIR}/${proband_name}-SnpEff-dbSNP-ClinVar-deepVariant.vcf.gz" ]; then - bgzip --threads "$(nproc)" "${ANNOTATED_DIR}/${proband_name}-SnpEff-dbSNP-ClinVar-deepVariant.vcf" - fi - - if [ -f "${ANNOTATED_DIR}/${proband_name}-SnpEff-dbSNP-ClinVar-dbNSFP_annotated-deepVariant.vcf" ] && [ ! -f "${ANNOTATED_DIR}/${proband_name}-SnpEff-dbSNP-ClinVar-dbNSFP_annotated-deepVariant.vcf.gz" ]; then - bgzip --threads "$(nproc)" "${ANNOTATED_DIR}/${proband_name}-SnpEff-dbSNP-ClinVar-dbNSFP_annotated-deepVariant.vcf" - fi -fi - -# Similar to above, but for trio_analysis - -if [ ! -z "$proband_name" ] && [ "$trio_analysis" = true ]; then - echo "STEP 7b: Annotate Variants Trio - SNP Sift" - - # If output file not exist yet, and previous file exist - if [ ! -f "${OUTPUT_DIR}/${proband_name}-converted-deepTrio.vcf" ] && [ ! -f "${OUTPUT_DIR}/${proband_name}-converted-deepTrio.vcf.gz" ]; then - # Create GATK > dbnsfp Chromosome - bcftools annotate \ - --rename-chrs "${chr_rename}" \ - --threads "$(nproc)" -Oz \ - -o "${OUTPUT_DIR}/${proband_name}-converted-deepTrio.vcf" \ - "${OUTPUT_DIR}/${proband_name}_trio_annotated.output.vcf.gz" - else - echo "DeepTrio converted VCF file exists, skipping conversion" - fi - - # If output file not exist yet, and previous file exist - if [ ! -f "${ANNOTATED_DIR}/${proband_name}-SnpEff-deepTrio.vcf" ] && [ ! -f "${ANNOTATED_DIR}/${proband_name}-SnpEff-deepTrio.vcf.gz" ] && [ -f "${OUTPUT_DIR}/${proband_name}-converted-deepTrio.vcf" ]; then - # SnpEff with recent GRCh38.p14 database - SnpEff -v "${snpEff_ver}" -dataDir "${snpEff_dir}" \ - -s "${ANNOTATED_DIR}/${proband_name}-SnpEff-deepTrio.html" \ - "${OUTPUT_DIR}/${proband_name}-converted-deepTrio.vcf" \ - > "${ANNOTATED_DIR}/${proband_name}-SnpEff-deepTrio.vcf" - else - echo "SnpEff VCF file exists, skipping SnpEff" - fi - - # If output file not exist yet, and previous file exist - if [ ! -f "${ANNOTATED_DIR}/${proband_name}-SnpEff-dbSNP-deepTrio.vcf" ] && [ ! -f "${ANNOTATED_DIR}/${proband_name}-SnpEff-dbSNP-deepTrio.vcf.gz" ] && [ -f "${ANNOTATED_DIR}/${proband_name}-SnpEff-deepTrio.vcf" ]; then - # SnpSift - SnpSift annotate -v "${dbSNP}" \ - "${ANNOTATED_DIR}/${proband_name}-SnpEff-deepTrio.vcf" \ - > "${ANNOTATED_DIR}/${proband_name}-SnpEff-dbSNP-deepTrio.vcf" - else - echo "Annotated dbSNP file exists, skipping annotation" - fi - - # If output file not exist yet, and previous file exist - if [ ! -f "${ANNOTATED_DIR}/${proband_name}-SnpEff-dbSNP-ClinVar-deepTrio.vcf" ] && [ ! -f "${ANNOTATED_DIR}/${proband_name}-SnpEff-dbSNP-ClinVar-deepTrio.vcf.gz" ] && [ -f "${ANNOTATED_DIR}/${proband_name}-SnpEff-dbSNP-deepTrio.vcf" ]; then - # SnpSift Annotate ClinVar - SnpSift annotate -v "${ClinVar}" \ - "${ANNOTATED_DIR}/${proband_name}-SnpEff-dbSNP-deepTrio.vcf" \ - > "${ANNOTATED_DIR}/${proband_name}-SnpEff-dbSNP-ClinVar-deepTrio.vcf" - else - echo "Annotated ClinVar file exists, skipping annotation" - fi - - # If output file not exist yet, and previous file exist - if [ ! -f "${ANNOTATED_DIR}/${proband_name}-SnpEff-dbSNP-ClinVar-dbNSFP_annotated-deepTrio.vcf" ] && [ ! -f "${ANNOTATED_DIR}/${proband_name}-SnpEff-dbSNP-ClinVar-dbNSFP_annotated-deepTrio.vcf.gz" ] && [ -f "${ANNOTATED_DIR}/${proband_name}-SnpEff-dbSNP-ClinVar-deepTrio.vcf" ]; then - # Annotate using dbNSFP for SNP Only (Indel Give 0 Annotation Result) - SnpSift dbnsfp -v -db "${dbnsfp}" \ - "${ANNOTATED_DIR}/${proband_name}-SnpEff-dbSNP-ClinVar-deepTrio.vcf" \ - > "${ANNOTATED_DIR}/${proband_name}-SnpEff-dbSNP-ClinVar-dbNSFP_annotated-deepTrio.vcf" - else - echo "Annotated dbNSFP file exists, skipping annotation" - fi - - # Compress file size using bgzip if the previous .vcf file exist - if [ -f "${OUTPUT_DIR}/${proband_name}-converted-deepTrio.vcf" ] && [ ! -f "${OUTPUT_DIR}/${proband_name}-converted-deepTrio.vcf.gz" ]; then - bgzip --threads "$(nproc)" "${OUTPUT_DIR}/${proband_name}-converted-deepTrio.vcf" - fi - - if [ -f "${ANNOTATED_DIR}/${proband_name}-SnpEff-deepTrio.vcf" ] && [ ! -f "${ANNOTATED_DIR}/${proband_name}-SnpEff-deepTrio.vcf.gz" ]; then - bgzip --threads "$(nproc)" "${ANNOTATED_DIR}/${proband_name}-SnpEff-deepTrio.vcf" - fi - - if [ -f "${ANNOTATED_DIR}/${proband_name}-SnpEff-dbSNP-deepTrio.vcf" ] && [ ! -f "${ANNOTATED_DIR}/${proband_name}-SnpEff-dbSNP-deepTrio.vcf.gz" ]; then - bgzip --threads "$(nproc)" "${ANNOTATED_DIR}/${proband_name}-SnpEff-dbSNP-deepTrio.vcf" - fi - - if [ -f "${ANNOTATED_DIR}/${proband_name}-SnpEff-dbSNP-ClinVar-deepTrio.vcf" ] && [ ! -f "${ANNOTATED_DIR}/${proband_name}-SnpEff-dbSNP-ClinVar-deepTrio.vcf.gz" ]; then - bgzip --threads "$(nproc)" "${ANNOTATED_DIR}/${proband_name}-SnpEff-dbSNP-ClinVar-deepTrio.vcf" - fi - - if [ -f "${ANNOTATED_DIR}/${proband_name}-SnpEff-dbSNP-ClinVar-dbNSFP_annotated-deepTrio.vcf" ] && [ ! -f "${ANNOTATED_DIR}/${proband_name}-SnpEff-dbSNP-ClinVar-dbNSFP_annotated-deepTrio.vcf.gz" ]; then - bgzip --threads "$(nproc)" "${ANNOTATED_DIR}/${proband_name}-SnpEff-dbSNP-ClinVar-dbNSFP_annotated-deepTrio.vcf" - fi -fi - -# Additional SV Step using Tiddit @ SV_DIR - -# Check if proband_name is not null and annotated tiddit output is not yet exist in folder -if [ ! -z "$proband_name" ] && [ ! -f "${SV_DIR}/output.filtered.dbnsfp.vcf" ]; then - echo "STEP 8: SV calling using Tiddit, SnpEff annotation, SnpSift dbNSFP annotation" - - if [ -f "${INPUT_DIR}/${proband_name}.bam" ]; then - echo "Final BAM file exist, continue to Tiddit" - docker run \ - -v "${INPUT_DIR}":"/input" \ - -v "${SV_DIR}":"/output" \ - -v "${REFERENCE_DIR}":"/reference" \ - quay.io/biocontainers/tiddit:${tiddit_version} tiddit \ - --sv \ - --ref /reference/${REFSEQ_FASTA} \ - --bam /input/${proband_name}.bam \ - --skip_assembly \ - --threads $(nproc) \ - -o /output/output - - # Stop Docker - docker stop $(docker ps -aq) - docker rm $(docker ps -aq) - - grep -E "#|PASS" ${SV_DIR}/output.vcf > ${SV_DIR}/output.filtered.vcf - - SnpEff -v ${snpEff_ver} -dataDir ${snpEff_dir} \ - ${SV_DIR}/output.filtered.vcf > ${SV_DIR}/output.filtered.snpeff.vcf - - SnpSift dbnsfp -v -db ${dbnsfp} \ - ${SV_DIR}/output.filtered.snpeff.vcf > ${SV_DIR}/output.filtered.dbnsfp.vcf - else - echo "BAM file not exist, please check the input directory" - exit 1 - fi -else - echo "SV output folder exist, and there are already final data, skipping Tiddit" -fi - -# Step using Exomiser @ Exomiser root folder - -# Check if solo_analysis is true -if [ "$solo_analysis" = true ]; then - echo "STEP 9: Exomiser SNP Solo Analysis" - exomiser --analysis ${exomiser_solo} -fi - -# Check if trio_analysis is true -if [ "$trio_analysis" = true ]; then - echo "STEP 9: Exomiser SNP Trio Analysis" - exomiser --analysis ${exomiser_trio} -fi - -# Check if SV VCF is true -if [ -f "${SV_DIR}/output.filtered.dbnsfp.vcf" ]; then - echo "STEP 9: Exomiser SV Solo Analysis" - exomiser --analysis ${exomiser_solo_sv} -fi \ No newline at end of file