Skip to content

Commit

Permalink
Initial commit
Browse files Browse the repository at this point in the history
  • Loading branch information
Runsheng committed Dec 12, 2022
0 parents commit caab215
Show file tree
Hide file tree
Showing 12 changed files with 875 additions and 0 deletions.
45 changes: 45 additions & 0 deletions bin/fq2vcf.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
#!/usr/bin/env python
#-*- coding: utf-8 -*-
# @Time : 12/12/2022 1:25 PM
# @Author : Runsheng
# @File : fq2vcf.py.py

"""
mapping fastq to reference with bwa mem, and call the vcf
"""
import argparse
import os
import sys

from primervcf.map2vcf import flow_fastq2vcf

parser=argparse.ArgumentParser()
parser.add_argument("-d", "--wkdir", default=None,
help="The dir path contain the file, if not given, use the current dir")
# input and out
parser.add_argument("-f", "--file",
help="the fastq file used as input, name seperated by , "
"example for pair-end like SRR1793006_1.fastq,SRR1793006_2.fastq")
parser.add_argument("-g", "--genome",
help="the genome file used for mapping")
parser.add_argument("-p", "--prefix",default="raw",
help="the outvcf filename prefix, default is using the file prefix of fastq")

# performace
parser.add_argument("-c", "--core",
help="the core used for bwa mem mapping and samtools sort")


args = parser.parse_args(args=None if sys.argv[1:] else ['--help'])
wkdir=os.getcwd() if args.wkdir is None else args.wkdir

os.chdir(wkdir)
name_str=args.file
read_list=name_str.strip().split(",")

flow_fastq2vcf(ref=args.genome,
read=read_list,
prefix=args.prefix,
core=args.core,
wkdir=args.wkdir
)
85 changes: 85 additions & 0 deletions bin/primerdesign_vcf.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
#!/usr/bin/env python
#-*- coding: utf-8 -*-
# @Time : 12/12/2022 12:23 PM
# @Author : Runsheng
# @File : primerdesign_vcf.py.py

"""
The main script used to run cmd orders
"""

import argparse
import os
import sys

#currentdir = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe())))
#parentdir = os.path.dirname(os.path.dirname(currentdir))
#sys.path.insert(0,parentdir)

from primervcf.primer_indel import primer3_general_settings
from primerdiffer.utils import fasta2dic,dic2dic

# self import
from primervcf.primer_indel import flow_walk_deletion, file2bedl

parser=argparse.ArgumentParser()
parser.add_argument("-d", "--wkdir", default=None,
help="The dir path contain the file, if not given, use the current dir")
# input
parser.add_argument("-g", "--genome",
help="the fasta file used to design primer and check specificity")
parser.add_argument("-b", "--bedfile",default=None,
help="the bed file containing the deletion region interval")

# primer cutoff
parser.add_argument("--alignlen", type=int, default=16,
help="the cutoff of primer min align length as a right hit, default is 16")
parser.add_argument("--free3len", type=int, default=2,
help="the cutoff of primer 3' align length as a right hit, default is 2")

# run parameter, how dense the primer should be
parser.add_argument("-n","--primernumber", type=int, default=5,
help="the primer designed for each region, default is 5, do not have much impact for primer design")

parser.add_argument("--debug", type=str, default="no",
help='open debug mode or not, default is no')

# output parameter
parser.add_argument("--prefix", default="primers",
help="prefix of output file, default is primers")

# add user settings
parser.add_argument("--primer3config", default=None,
help="the config file for the primer3 ")

# default handler
args = parser.parse_args(args=None if sys.argv[1:] else ['--help'])
wkdir=os.getcwd() if args.wkdir is None else args.wkdir
debug=False if args.debug=="no" else True

os.chdir(wkdir)

# deal with config
primer3_setting = primer3_general_settings
if args.primer3config is None:
pass
else: # update the primer3 setting from the given parameters
with open(args.primer3config, "r") as f:
primer3_user_setting = eval(f.read()) # read the config as a dict
primer3_setting.update(primer3_user_setting)

ref_dict=dic2dic(fasta2dic(args.genome))
bed_l=file2bedl(args.bedfile)
#print(bed_l)

flow_walk_deletion(
ref_dict=ref_dict,
db=args.genome,
deletion_bedlist=bed_l,
prefix=args.prefix,
primer_number=args.primernumber,
debugmod=debug,
cutoff_alignlength=args.alignlen,
cutoff_free3=args.free3len,
flank=1000,
primer3_general_settings=primer3_setting)
31 changes: 31 additions & 0 deletions bin/vcf2del.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
#!/usr/bin/env python
#-*- coding: utf-8 -*-
# @Time : 12/12/2022 12:38 PM
# @Author : Runsheng
# @File : vcf2del.py
"""
parser the vcf file, get a bed like list for deletion
"""
import argparse
import os
import sys
from primervcf.primer_indel import get_deletion_region, bedl2file

parser=argparse.ArgumentParser()
parser.add_argument("-d", "--wkdir", default=None,
help="The dir path contain the file, if not given, use the current dir")
# input and out
parser.add_argument("-f", "--file",
help="the vcf file used as input")
parser.add_argument("-o", "--out",default="del.out",
help="bed file output")
# len cutoff
parser.add_argument("-l", "--length",default=10,
help="the min length for a deletion to be used, default is 10")

args = parser.parse_args(args=None if sys.argv[1:] else ['--help'])
wkdir=os.getcwd() if args.wkdir is None else args.wkdir

bed_l=get_deletion_region(args.file, key="del", len_cutoff=args.length)
bedl2file(bed_l, filename=args.out)

7 changes: 7 additions & 0 deletions primervcf/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# @Time : 5/12/2022 12:40 PM
# @Author : Runsheng
# @File : __init__.py

__version__="0.1.0"
89 changes: 89 additions & 0 deletions primervcf/map2vcf.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# @Time : 5/12/2022 8:29 AM
# @Author : Runsheng
# @File : map2vcf.py


"""
The pipeline wrapper to get a vcf file from fastq mapping, need:
1. bwa mem
2. samtools
3. vcftools
"""
import os

from primervcf.utils import myexe

def wrapper_bwamem(index, read_list, prefix="default", core=32 , wkdir=None):
"""
general wrapper for bwa mem to be used in python cmd line
para: index, the bwa index of reference fasta file
para: read_list, the fastq file names in a python list
para: prefix, the prefix for the output bam file
para: core, the cores used for mapping and sorting
return: None
"""
prefix = read_list[0].split("_")[0] if prefix == "default" else prefix
read_str = " ".join(read_list)

if wkdir is None:
wkdir=os.getcwd()
os.chdir(wkdir)

myexe("bwa mem -t {core} {index} {read_str} > {prefix}.sam".format(core=core, index=index, read_str=read_str,
prefix=prefix))
myexe("samtools view -bS {prefix}.sam >{prefix}.bam".format(prefix=prefix))
myexe("samtools sort -@ {core} {prefix}.bam {prefix}_s".format(core=core, prefix=prefix))
myexe("samtools index {prefix}_s.bam".format(prefix=prefix))
myexe("rm {prefix}.sam {prefix}.bam".format(prefix=prefix))

print ("++++++++done++++++++++")
return prefix+"_s.bam"


def wrapper_bam2vcf(ref, bamfile, prefix="default", wkdir=None):
"""
para: ref, the reference fasta file
para: bamfile: a single sorted bam file
para: prefix, the prefix for the output vcf file
return: None
"""
prefix = bamfile.split(".")[0].split("_")[0] if prefix == "default" else prefix

if wkdir is None:
wkdir=os.getcwd()
os.chdir(wkdir)
cmd_mpileup = ("bcftools mpileup -Ob -o {prefix}.bcf QR25.bcf -f {ref} {bamfile}"
.format(ref=ref, bamfile=bamfile, prefix=prefix))
cmd_tovcf = ("bcftools call -vmO z -o {prefix}.vcf {prefix}.bcf".
format(prefix=prefix))

print(cmd_mpileup)
myexe(cmd_mpileup)
print(cmd_tovcf)
myexe(cmd_tovcf)


def flow_fastq2vcf(ref,read, prefix="raw", core=16 ,wkdir=None):
"""
read=["/home/zhaolab1/data/dnaseq/trim/AF16_F_P.fq.gz", "/home/zhaolab1/data/dnaseq/trim/AF16_R_P.fq.gz"]
prefix="AF16"
ref="/home/zhaolab1/data/dnaseq/refindex/cb4.fa"
wrapper_bwamem(ref, read, prefix)
bamfile="AF16_s.bam"
ref="/home/zhaolab1/data/dnaseq/refindex/cb4.fa"
wrapper_bam2vcf(ref, bamfile)
# use SRR1793006 as example, QR25 fastq
:return:
"""
if wkdir is None:
wkdir=os.getcwd()
os.chdir(wkdir)
prefix = read[0].split("_")[0] if prefix == "raw" else prefix
bamfile=wrapper_bwamem(ref, read, prefix, core=core)
wrapper_bam2vcf(ref, bamfile)
Loading

0 comments on commit caab215

Please sign in to comment.