Skip to content

Commit 725f091

Browse files
author
Ryan Routsong
committed
fix: align mg assembly rules to expected outputs
1 parent 0efd53f commit 725f091

File tree

6 files changed

+57
-16
lines changed

6 files changed

+57
-16
lines changed

config/skyline.json

+20
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
{
2+
"sif": "/data/openomics/SIFs/",
3+
"mounts": {
4+
"kaiju": {
5+
"to": "/opt/kaiju",
6+
"from": "/data/openomics/references/weave/kaiju/kaiju_db_nr_euk_2023-05-10",
7+
"mode": "ro"
8+
},
9+
"kraken2" : {
10+
"to": "/opt/kraken2",
11+
"from": "/data/openomics/references/weave/kraken2/k2_pluspfp_20230605",
12+
"mode": "ro"
13+
},
14+
"fastq_screen" : {
15+
"to": "/fdb/fastq_screen/FastQ_Screen_Genomes",
16+
"from": "/data/openomics/references/weave/FastQ_Screen_Genomes",
17+
"mode": "ro"
18+
}
19+
}
20+
}

requirements.txt

+1
Original file line numberDiff line numberDiff line change
@@ -4,3 +4,4 @@ terminaltables
44
pyyaml
55
tabulate
66
progressbar
7+
python-dateutil

scripts/config.py

+9-4
Original file line numberDiff line numberDiff line change
@@ -20,11 +20,11 @@ def get_current_server():
2020
re_biowulf_head = (r"biowulf\.nih\.gov", "biowulf")
2121
re_biowulf_compute = (r"cn\d{4}", "biowulf")
2222

23-
# locus hostnames
24-
re_locus_head = (r"ai\-submit\d{1}", "locus")
25-
re_locus_compute = (r"ai\-hpcn\d{3}", "locus")
23+
# skyline hostnames
24+
re_skyline_head = (r"ai-hpc(submit|n)(\d+)?", "skyline")
25+
re_skyline_compute = (r"ai-hpc(submit|n)(\d+)?", "skyline")
2626

27-
host_profiles = [re_bigsky, re_biowulf_compute, re_biowulf_head, re_locus_compute, re_locus_head]
27+
host_profiles = [re_bigsky, re_biowulf_compute, re_biowulf_head, re_skyline_head, re_skyline_compute]
2828

2929
host = None
3030
for pat, this_host in host_profiles:
@@ -157,6 +157,11 @@ def get_bigsky_seq_dirs():
157157
"seqroot": "/data/RTB_GRS/SequencerRuns/",
158158
"seq": get_biowulf_seq_dirs(),
159159
"profile": Path(Path(__file__).parent.parent, "utils", "profiles", "biowulf").resolve(),
160+
},
161+
"skyline": {
162+
"seqroot": "/data/rtb_grs/SequencerRuns/",
163+
"seq": get_bigsky_seq_dirs(),
164+
"profile": Path(Path(__file__).parent.parent, "utils", "profiles", "skyline").resolve(),
160165
}
161166
}
162167

scripts/utils.py

+10-6
Original file line numberDiff line numberDiff line change
@@ -113,9 +113,10 @@ def exec_snakemake(popen_cmd, local=False, dry_run=False, env=None, cwd=None):
113113
else:
114114
popen_kwargs['cwd'] = str(Path.cwd())
115115

116+
parent_jobid = None
116117
if local or dry_run:
117-
proc = Popen(popen_cmd, stdout=PIPE, stderr=STDOUT, **popen_kwargs)
118-
parent_jobid = None
118+
popen_kwargs['env'].update(os.environ)
119+
proc = Popen(map(str, popen_cmd), stdout=PIPE, stderr=STDOUT, **popen_kwargs)
119120
for line in proc.stdout:
120121
lutf8 = line.decode('utf-8')
121122
jid_search = re.search(r"external jobid \'(\d+)\'", lutf8, re.MULTILINE)
@@ -139,15 +140,16 @@ def exec_snakemake(popen_cmd, local=False, dry_run=False, env=None, cwd=None):
139140
def mk_sbatch_script(wd, cmd):
140141
if not Path(wd, 'logs', 'masterjob').exists():
141142
Path(wd, 'logs', 'masterjob').mkdir(mode=0o755, parents=True)
143+
shebang = "#!/bin/bash --login" if host == 'skyline' else '#!/bin/bash'
142144
master_job_script = \
143145
f"""
144-
#!/bin/bash
146+
{shebang}
145147
#SBATCH --job-name=weave_masterjob
146148
#SBATCH --output={wd}/logs/masterjob/%x_%j.out
147149
#SBATCH --error={wd}/logs/masterjob/%x_%j.err
148150
#SBATCH --ntasks=1
149151
#SBATCH --cpus-per-task=2
150-
#SBATCH --time=02-00:00:00
152+
#SBATCH --time=05-00:00:00
151153
#SBATCH --export=ALL
152154
#SBATCH --mem=16g
153155
""".lstrip()
@@ -169,7 +171,9 @@ def get_mods(init=False):
169171
mod_cmd.append('source /gs1/apps/user/rmlspack/share/spack/setup-env.sh')
170172
mod_cmd.append('spack load miniconda3@4.11.0')
171173
mod_cmd.append('source activate snakemake7-19-1')
172-
else:
174+
elif host == 'skyline':
175+
mod_cmd.append('source /data/openomics/bin/dependencies.sh')
176+
elif host == 'biowulf':
173177
if init:
174178
mod_cmd.append('source /etc/profile.d/modules.sh')
175179
else:
@@ -259,7 +263,7 @@ def exec_pipeline(configs, dry_run=False, local=False):
259263
top_env['SINGULARITY_CACHEDIR'] = str(Path(this_config['out_to'], '.singularity').absolute())
260264
this_cmd = [
261265
"snakemake",
262-
"-pr",
266+
"-pr", "--cores", "all",
263267
"--use-singularity",
264268
"--rerun-incomplete",
265269
"--keep-incomplete",

weave

+13-2
Original file line numberDiff line numberDiff line change
@@ -26,9 +26,9 @@ def run(args):
2626
pairs = ['1', '2'] if sample_sheet.is_paired_end else ['1']
2727

2828
# ~~~ demultiplexing configuration ~~~
29-
bcls = [x for x in Path(rundir).rglob('*.bcl.*') if not 'tmp' in str(x)]
29+
bcls = [x.absolute() for x in Path(rundir).rglob('*.bcl.*') if not 'tmp' in str(x)]
3030
if not bcls:
31-
bcls = [x for x in Path(rundir).rglob('*.cbcl') if not 'tmp' in str(x)]
31+
bcls = [x.absolute() for x in Path(rundir).rglob('*.cbcl') if not 'tmp' in str(x)]
3232
exec_config['sample_sheet'].append(str(sample_sheet.path))
3333
exec_config['bcl_files'].append(bcls)
3434
exec_config['demux_data'].append(files.check_if_demuxed(rundir))
@@ -124,4 +124,15 @@ if __name__ == '__main__':
124124
parser_cache.set_defaults(func = get_cache)
125125
parser_unlock.set_defaults(func = unlock_dir)
126126
args = main_parser.parse_args()
127+
128+
if not hasattr(args, 'func'):
129+
print(main_parser.print_help())
130+
print('---')
131+
print(parser_run.print_help())
132+
print('---')
133+
print(parser_cache.print_help())
134+
print('---')
135+
print(parser_unlock.print_help())
136+
exit(0)
137+
127138
args.func(args)

workflow/qc.smk

+4-4
Original file line numberDiff line numberDiff line change
@@ -23,9 +23,9 @@ rule fastqc_untrimmed:
2323
fqreport = config['out_to'] + "/" + config["project"] + "/{sids}/fastqc_untrimmed/{sids}_R{rnums}_" + trim_input_suffix + "_fastqc.zip",
2424
params:
2525
output_dir = lambda w: config['out_to'] + "/" + config["project"] + "/" + w.sids + "/fastqc_untrimmed/"
26-
log: config['out_to'] + "/logs/" + "/" + config["project"] + "/fastqc_untrimmed/{sids}_R{rnums}.log"
26+
log: config['out_to'] + "/logs/" + config["project"] + "/fastqc_untrimmed/{sids}_R{rnums}.log"
2727
threads: 4
28-
containerized: config["resources"]["sif"] + "weave_ngsqc_0.0.1.sif"
28+
containerized: config["resources"]["sif"] + "weave_ngsqc_0.0.2.sif"
2929
resources: mem_mb = 8096
3030
shell:
3131
"""
@@ -42,7 +42,7 @@ rule fastqc_trimmed:
4242
fqreport = config['out_to'] + "/" + config["project"] + "/{sids}/fastqc_trimmed/{sids}_trimmed_R{rnums}_fastqc.zip",
4343
params:
4444
output_dir = lambda w: config['out_to'] + "/" + config["project"] + "/" + w.sids + "/fastqc_trimmed/"
45-
containerized: config["resources"]["sif"] + "weave_ngsqc_0.0.1.sif"
45+
containerized: config["resources"]["sif"] + "weave_ngsqc_0.0.2.sif"
4646
threads: 4
4747
resources: mem_mb = 8096
4848
log: config['out_to'] + "/logs/" + config["project"] + "/fastqc_trimmed/{sids}_R{rnums}.log"
@@ -124,7 +124,7 @@ rule multiqc_report:
124124
input_dir = config['out_to'],
125125
output_dir = config['out_to'] + "/" + config["project"] + "/multiqc/",
126126
report_title = "Run: " + config["run_ids"] + ", Project: " + config["project"],
127-
containerized: config["resources"]["sif"] + "weave_ngsqc_0.0.1.sif"
127+
containerized: config["resources"]["sif"] + "weave_ngsqc_0.0.2.sif"
128128
threads: 4
129129
resources: mem_mb = 8096
130130
log:

0 commit comments

Comments
 (0)