-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
0 parents
commit caab215
Showing
12 changed files
with
875 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,45 @@ | ||
#!/usr/bin/env python | ||
#-*- coding: utf-8 -*- | ||
# @Time : 12/12/2022 1:25 PM | ||
# @Author : Runsheng | ||
# @File : fq2vcf.py.py | ||
|
||
""" | ||
mapping fastq to reference with bwa mem, and call the vcf | ||
""" | ||
import argparse | ||
import os | ||
import sys | ||
|
||
from primervcf.map2vcf import flow_fastq2vcf | ||
|
||
parser=argparse.ArgumentParser() | ||
parser.add_argument("-d", "--wkdir", default=None, | ||
help="The dir path contain the file, if not given, use the current dir") | ||
# input and out | ||
parser.add_argument("-f", "--file", | ||
help="the fastq file used as input, name seperated by , " | ||
"example for pair-end like SRR1793006_1.fastq,SRR1793006_2.fastq") | ||
parser.add_argument("-g", "--genome", | ||
help="the genome file used for mapping") | ||
parser.add_argument("-p", "--prefix",default="raw", | ||
help="the outvcf filename prefix, default is using the file prefix of fastq") | ||
|
||
# performace | ||
parser.add_argument("-c", "--core", | ||
help="the core used for bwa mem mapping and samtools sort") | ||
|
||
|
||
args = parser.parse_args(args=None if sys.argv[1:] else ['--help']) | ||
wkdir=os.getcwd() if args.wkdir is None else args.wkdir | ||
|
||
os.chdir(wkdir) | ||
name_str=args.file | ||
read_list=name_str.strip().split(",") | ||
|
||
flow_fastq2vcf(ref=args.genome, | ||
read=read_list, | ||
prefix=args.prefix, | ||
core=args.core, | ||
wkdir=args.wkdir | ||
) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,85 @@ | ||
#!/usr/bin/env python | ||
#-*- coding: utf-8 -*- | ||
# @Time : 12/12/2022 12:23 PM | ||
# @Author : Runsheng | ||
# @File : primerdesign_vcf.py.py | ||
|
||
""" | ||
The main script used to run cmd orders | ||
""" | ||
|
||
import argparse | ||
import os | ||
import sys | ||
|
||
#currentdir = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe()))) | ||
#parentdir = os.path.dirname(os.path.dirname(currentdir)) | ||
#sys.path.insert(0,parentdir) | ||
|
||
from primervcf.primer_indel import primer3_general_settings | ||
from primerdiffer.utils import fasta2dic,dic2dic | ||
|
||
# self import | ||
from primervcf.primer_indel import flow_walk_deletion, file2bedl | ||
|
||
parser=argparse.ArgumentParser() | ||
parser.add_argument("-d", "--wkdir", default=None, | ||
help="The dir path contain the file, if not given, use the current dir") | ||
# input | ||
parser.add_argument("-g", "--genome", | ||
help="the fasta file used to design primer and check specificity") | ||
parser.add_argument("-b", "--bedfile",default=None, | ||
help="the bed file containing the deletion region interval") | ||
|
||
# primer cutoff | ||
parser.add_argument("--alignlen", type=int, default=16, | ||
help="the cutoff of primer min align length as a right hit, default is 16") | ||
parser.add_argument("--free3len", type=int, default=2, | ||
help="the cutoff of primer 3' align length as a right hit, default is 2") | ||
|
||
# run parameter, how dense the primer should be | ||
parser.add_argument("-n","--primernumber", type=int, default=5, | ||
help="the primer designed for each region, default is 5, do not have much impact for primer design") | ||
|
||
parser.add_argument("--debug", type=str, default="no", | ||
help='open debug mode or not, default is no') | ||
|
||
# output parameter | ||
parser.add_argument("--prefix", default="primers", | ||
help="prefix of output file, default is primers") | ||
|
||
# add user settings | ||
parser.add_argument("--primer3config", default=None, | ||
help="the config file for the primer3 ") | ||
|
||
# default handler | ||
args = parser.parse_args(args=None if sys.argv[1:] else ['--help']) | ||
wkdir=os.getcwd() if args.wkdir is None else args.wkdir | ||
debug=False if args.debug=="no" else True | ||
|
||
os.chdir(wkdir) | ||
|
||
# deal with config | ||
primer3_setting = primer3_general_settings | ||
if args.primer3config is None: | ||
pass | ||
else: # update the primer3 setting from the given parameters | ||
with open(args.primer3config, "r") as f: | ||
primer3_user_setting = eval(f.read()) # read the config as a dict | ||
primer3_setting.update(primer3_user_setting) | ||
|
||
ref_dict=dic2dic(fasta2dic(args.genome)) | ||
bed_l=file2bedl(args.bedfile) | ||
#print(bed_l) | ||
|
||
flow_walk_deletion( | ||
ref_dict=ref_dict, | ||
db=args.genome, | ||
deletion_bedlist=bed_l, | ||
prefix=args.prefix, | ||
primer_number=args.primernumber, | ||
debugmod=debug, | ||
cutoff_alignlength=args.alignlen, | ||
cutoff_free3=args.free3len, | ||
flank=1000, | ||
primer3_general_settings=primer3_setting) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,31 @@ | ||
#!/usr/bin/env python | ||
#-*- coding: utf-8 -*- | ||
# @Time : 12/12/2022 12:38 PM | ||
# @Author : Runsheng | ||
# @File : vcf2del.py | ||
""" | ||
parser the vcf file, get a bed like list for deletion | ||
""" | ||
import argparse | ||
import os | ||
import sys | ||
from primervcf.primer_indel import get_deletion_region, bedl2file | ||
|
||
parser=argparse.ArgumentParser() | ||
parser.add_argument("-d", "--wkdir", default=None, | ||
help="The dir path contain the file, if not given, use the current dir") | ||
# input and out | ||
parser.add_argument("-f", "--file", | ||
help="the vcf file used as input") | ||
parser.add_argument("-o", "--out",default="del.out", | ||
help="bed file output") | ||
# len cutoff | ||
parser.add_argument("-l", "--length",default=10, | ||
help="the min length for a deletion to be used, default is 10") | ||
|
||
args = parser.parse_args(args=None if sys.argv[1:] else ['--help']) | ||
wkdir=os.getcwd() if args.wkdir is None else args.wkdir | ||
|
||
bed_l=get_deletion_region(args.file, key="del", len_cutoff=args.length) | ||
bedl2file(bed_l, filename=args.out) | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,7 @@ | ||
#!/usr/bin/env python | ||
# -*- coding: utf-8 -*- | ||
# @Time : 5/12/2022 12:40 PM | ||
# @Author : Runsheng | ||
# @File : __init__.py | ||
|
||
__version__="0.1.0" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,89 @@ | ||
#!/usr/bin/env python | ||
# -*- coding: utf-8 -*- | ||
# @Time : 5/12/2022 8:29 AM | ||
# @Author : Runsheng | ||
# @File : map2vcf.py | ||
|
||
|
||
""" | ||
The pipeline wrapper to get a vcf file from fastq mapping, need: | ||
1. bwa mem | ||
2. samtools | ||
3. vcftools | ||
""" | ||
import os | ||
|
||
from primervcf.utils import myexe | ||
|
||
def wrapper_bwamem(index, read_list, prefix="default", core=32 , wkdir=None): | ||
""" | ||
general wrapper for bwa mem to be used in python cmd line | ||
para: index, the bwa index of reference fasta file | ||
para: read_list, the fastq file names in a python list | ||
para: prefix, the prefix for the output bam file | ||
para: core, the cores used for mapping and sorting | ||
return: None | ||
""" | ||
prefix = read_list[0].split("_")[0] if prefix == "default" else prefix | ||
read_str = " ".join(read_list) | ||
|
||
if wkdir is None: | ||
wkdir=os.getcwd() | ||
os.chdir(wkdir) | ||
|
||
myexe("bwa mem -t {core} {index} {read_str} > {prefix}.sam".format(core=core, index=index, read_str=read_str, | ||
prefix=prefix)) | ||
myexe("samtools view -bS {prefix}.sam >{prefix}.bam".format(prefix=prefix)) | ||
myexe("samtools sort -@ {core} {prefix}.bam {prefix}_s".format(core=core, prefix=prefix)) | ||
myexe("samtools index {prefix}_s.bam".format(prefix=prefix)) | ||
myexe("rm {prefix}.sam {prefix}.bam".format(prefix=prefix)) | ||
|
||
print ("++++++++done++++++++++") | ||
return prefix+"_s.bam" | ||
|
||
|
||
def wrapper_bam2vcf(ref, bamfile, prefix="default", wkdir=None): | ||
""" | ||
para: ref, the reference fasta file | ||
para: bamfile: a single sorted bam file | ||
para: prefix, the prefix for the output vcf file | ||
return: None | ||
""" | ||
prefix = bamfile.split(".")[0].split("_")[0] if prefix == "default" else prefix | ||
|
||
if wkdir is None: | ||
wkdir=os.getcwd() | ||
os.chdir(wkdir) | ||
cmd_mpileup = ("bcftools mpileup -Ob -o {prefix}.bcf QR25.bcf -f {ref} {bamfile}" | ||
.format(ref=ref, bamfile=bamfile, prefix=prefix)) | ||
cmd_tovcf = ("bcftools call -vmO z -o {prefix}.vcf {prefix}.bcf". | ||
format(prefix=prefix)) | ||
|
||
print(cmd_mpileup) | ||
myexe(cmd_mpileup) | ||
print(cmd_tovcf) | ||
myexe(cmd_tovcf) | ||
|
||
|
||
def flow_fastq2vcf(ref,read, prefix="raw", core=16 ,wkdir=None): | ||
""" | ||
read=["/home/zhaolab1/data/dnaseq/trim/AF16_F_P.fq.gz", "/home/zhaolab1/data/dnaseq/trim/AF16_R_P.fq.gz"] | ||
prefix="AF16" | ||
ref="/home/zhaolab1/data/dnaseq/refindex/cb4.fa" | ||
wrapper_bwamem(ref, read, prefix) | ||
bamfile="AF16_s.bam" | ||
ref="/home/zhaolab1/data/dnaseq/refindex/cb4.fa" | ||
wrapper_bam2vcf(ref, bamfile) | ||
# use SRR1793006 as example, QR25 fastq | ||
:return: | ||
""" | ||
if wkdir is None: | ||
wkdir=os.getcwd() | ||
os.chdir(wkdir) | ||
prefix = read[0].split("_")[0] if prefix == "raw" else prefix | ||
bamfile=wrapper_bwamem(ref, read, prefix, core=core) | ||
wrapper_bam2vcf(ref, bamfile) |
Oops, something went wrong.