-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathSnakefile
84 lines (70 loc) · 2.6 KB
/
Snakefile
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
__author__ = "Taavi Päll"
__copyright__ = "Copyright 2018, Avilab"
__email__ = "taavi.pall@ut.ee"
__license__ = "MIT"
# Load libraries
import os
import json
import glob
import pandas as pd
from snakemake.remote.FTP import RemoteProvider as FTPRemoteProvider
from snakemake.utils import validate
shell.executable("bash")
# Load configuration file with sample and path info
configfile: "config.yaml"
validate(config, "schemas/config.schema.yaml")
# Load runs and groups
RUNS = pd.read_csv(config["samples"], sep = "\s+").set_index("run", drop = False)
validate(RUNS, "schemas/samples.schema.yaml")
RUN_IDS = RUNS.index.tolist()
N_FILES = config["split_fasta"]["n_files"]
N = list(range(1, N_FILES + 1, 1))
wildcard_constraints:
run = "[a-zA-Z0-9]+",
n = "\d+"
# Main output files
RESULTS = ["viruses.csv", "non-viral.csv", "unassigned.fa"]
BLASTV = ["blastn-virus", "blastx-virus"] if config["run_blastx"] else ["blastn-virus"]
BLASTNR = ["megablast-nt", "blastn-nt", "blastx-nr"] if config["run_blastx"] else ["megablast-nt", "blastn-nt"]
BLAST = BLASTV + BLASTNR
STATS = expand(["output/{run}/multiqc.html"], run = RUN_IDS)
OUTPUTS = expand("output/{run}/{result}", run = RUN_IDS, result = RESULTS) + STATS
# Remote outputs
if config["zenodo"]["deposition_id"]:
# Load zenodo remote provider module
from snakemake.remote.zenodo import RemoteProvider as ZENRemoteProvider
# Setup Zenodo RemoteProvider
ZEN = ZENRemoteProvider(deposition = config["zenodo"]["deposition_id"], access_token = os.environ["ZENODO_PAT"])
# Append uploads
ZENOUTPUTS = ZEN.remote(expand("output/{run}/counts.tgz", run = RUN_IDS))
OUTPUTS = OUTPUTS + ZENOUTPUTS
# Report
report: "report/workflow.rst"
rule all:
input:
OUTPUTS
# Check file exists
def file_exists(file):
try:
with open(file, 'r') as fh:
print("{} is set up correctly".format(file))
except FileNotFoundError:
("Could not find {}").format(file)
# Path to reference genomes
HOST_GENOME = os.getenv("REF_GENOME_HUMAN_MASKED")
# file_exists(REF_GENOME)
REF_BACTERIA = os.getenv("REF_BACTERIA")
# file_exists(REF_BACTERIA)
TAXON_DB = os.getenv("TAXON_DB")
RRNA_DB = os.getenv("SILVA")
CPNDB = os.getenv("CPNDB")
# Wrappers
WRAPPER_PREFIX = "https://raw.githubusercontent.com/avilab/virome-wrappers/"
BWA_UNMAPPED = WRAPPER_PREFIX + "master/unmapped"
BLAST_QUERY = WRAPPER_PREFIX + "master/blast/query"
PARSE_BLAST = WRAPPER_PREFIX + "master/blast/parse"
BLAST_TAXONOMY = WRAPPER_PREFIX + "master/blast/taxonomy"
SUBSET_FASTA = WRAPPER_PREFIX + "master/subset_fasta"
# Rules
include: "rules/preprocess.smk"
include: "rules/blast.smk"