Skip to content

Commit

Permalink
Adapt processes and naming of processes
Browse files Browse the repository at this point in the history
  • Loading branch information
AmstlerStephan committed Dec 4, 2024
1 parent b462ce2 commit fb9f862
Show file tree
Hide file tree
Showing 5 changed files with 38 additions and 15 deletions.
9 changes: 8 additions & 1 deletion bin/extract_umis.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,13 @@ def parse_args(argv):
required=False,
help="Output directory"
)
parser.add_argument(
"--output_filename",
dest="OUT_FILENAME",
type=str,
required=False,
help="Output filename"
)
parser.add_argument(
"--output_format",
dest="OUT_FORMAT",
Expand Down Expand Up @@ -324,7 +331,7 @@ def extract_umis(
input_file = args.INPUT_FA
umi_fwd = args.FWD_UMI
umi_rev = args.REV_UMI
output_file_name = "detected_umis"
output_file_name = args.OUT_FILENAME
format = args.OUT_FORMAT

output_file = os.path.join(
Expand Down
34 changes: 24 additions & 10 deletions bin/filter_reads.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,20 @@ def parse_args(argv):
)

parser.add_argument(
"-o", "--output", dest="OUT", type=str, required=False, help="Output folder"
"-o",
"--output",
dest="OUT",
type=str,
required=False,
help="Output folder"
)

parser.add_argument(
"--output_filename",
dest="OUT_FILENAME",
type=str,
required=False,
help="Output filename"
)

parser.add_argument(
Expand Down Expand Up @@ -118,9 +131,9 @@ def parse_bed(bed_regions):
return region


def write_read(read, output, region, type, format):
def write_read(read, output, type, format):
output_fastx = os.path.join(
output, "{}_{}.{}".format(region["name"], type, format)
output, "{}.{}".format(type, format)
)

# see if appending line is no problem by running it with nextflow (Otherwise delete files before appending for the first time)
Expand Down Expand Up @@ -178,6 +191,7 @@ def filter_reads(args):
output = args.OUT
out_format = args.OUT_FORMAT
tsv = args.TSV
output_filename = args.OUT_FILENAME
stats_out_filename = "umi_filter_reads_stats"

n_non_reads = 0
Expand Down Expand Up @@ -207,38 +221,38 @@ def filter_reads(args):
n_total += 1
if read.is_unmapped:
n_unmapped += 1
write_read(read, output, region, "unmapped", out_format)
write_read(read, output, "unmapped", out_format)
continue

if read.is_secondary:
n_secondary += 1
if not incl_sec:
write_read(read, output, region, "secondary", out_format)
write_read(read, output, "secondary", out_format)
continue

if read.is_supplementary:
n_supplementary += 1
write_read(read, output, region, "supplementary", out_format)
write_read(read, output, "supplementary", out_format)
continue

n_ontarget += 1
if read.query_alignment_length < (read.query_length - 2 * adapter_length):
n_concatamer += 1
write_read(read, output, region, "concatamer", out_format)
write_read(read, output, "concatamer", out_format)
continue

if read.query_alignment_length < (region_length * min_overlap):
n_short += 1
write_read(read, output, region, "short", out_format)
write_read(read, output, "short", out_format)
continue

if read.query_length > (region_length * ( 2 - min_overlap) + 2 * adapter_length):
n_long += 1
write_read(read, output, region, "long", out_format)
write_read(read, output, "long", out_format)
continue

n_reads_region += 1
write_read(read, output, region, "filtered", out_format)
write_read(read, output, output_filename, out_format)

if tsv:
stats_out_filename = os.path.join(
Expand Down
1 change: 1 addition & 0 deletions lib/processes/detect_umi_fastq.nf
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ process DETECT_UMI_FASTQ {
--max-error ${params.umi_errors} \
--adapter_length ${params.adapter_length} \
--output_format ${params.output_format} \
--output_filename ${fastq.baseName}_umis \
$write_report \
-o . ${fastq}
"""
Expand Down
6 changes: 3 additions & 3 deletions lib/processes/merge_input.nf
Original file line number Diff line number Diff line change
@@ -1,17 +1,17 @@
merged_fastq="merged.fastq"
process MERGE_FASTQ {

input:
tuple val ( sample ), path ( fastq_path )

output:
tuple val( "${sample}" ), val( "target" ), path( "${merged_fastq}"), emit: merged_fastq
tuple val( "${sample}" ), val( "target" ), path( "*fastq" ), emit: merged_fastq

script:
def fastq_name = params.live ? "filtered_$fastq_path.Name" : "merged.fastq"
"""
catfishq \
--min-length ${params.min_read_length} \
--min-qscore ${params.min_qscore} \
${fastq_path} > ${merged_fastq}
${fastq_path} > ${fastq_name}
"""
}
3 changes: 2 additions & 1 deletion lib/processes/split_reads.nf
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ process SPLIT_READS {

output:
path "*.tsv"
tuple val ( "${sample}" ), val( "target" ), path ( "*filtered.${params.output_format}" ), emit: split_reads_fastx
tuple val ( "${sample}" ), val( "target" ), path ( "${bam.baseName}.${params.output_format}" ), emit: split_reads_fastx
path "*${params.output_format}"

script:
Expand All @@ -21,6 +21,7 @@ process SPLIT_READS {
--min_overlap ${params.min_overlap} \
--output_format ${params.output_format} \
--adapter_length ${params.adapter_length} \
--output_filename ${bam.baseName} \
$include_secondary_reads \
$write_report \
-o . ${bed} \
Expand Down

0 comments on commit fb9f862

Please sign in to comment.