From d6d248a0debb1a48aa4b8bc93181738ec66c8df2 Mon Sep 17 00:00:00 2001 From: Rad Suchecki Date: Fri, 29 Nov 2019 14:37:07 +1030 Subject: [PATCH 01/10] introducing version parsing from mappers --- conf/mappers.config | 2 ++ groovy/Validators.groovy | 4 ++-- main.nf | 33 ++++++++++++++++++++++++++++++--- 3 files changed, 34 insertions(+), 5 deletions(-) diff --git a/conf/mappers.config b/conf/mappers.config index a71d20a..70b6be0 100644 --- a/conf/mappers.config +++ b/conf/mappers.config @@ -108,6 +108,7 @@ params { [ tool: 'kallisto', version: '0.46.0', + versionCall: 'kallisto version | cut -f3 -d" "', container: 'rsuchecki/kallisto:0.46.0_fe13871790208317a506f0a4f7c3a134cb48f0b4', index: true, rna2rna: true, @@ -115,6 +116,7 @@ params { [ tool: 'minimap2', version: '2.17', + versionCall: 'minimap2 --version', container: 'rsuchecki/minimap2:2.17_1d3f326820696496f025a95632979cd4ea4140cb', index: true, dna2dna: true, diff --git a/groovy/Validators.groovy b/groovy/Validators.groovy index 079b0de..d3831ad 100644 --- a/groovy/Validators.groovy +++ b/groovy/Validators.groovy @@ -19,12 +19,12 @@ def addToListInMap (map, key, value, context) { } } -def validateMappersDefinitions (mappers, allRequired, allModes) { +def validateMappersDefinitions (mappers, allRequired, allOptional, allModes) { def allVersions = [:] //Keep track of tool versions declared in config mappers.each { rec -> addToListInMap(allVersions, rec.tool, rec.version, rec) rec.each {k, v -> - if(!(k in (allModes.split('\\|')+allRequired))) { + if(!(k in (allModes.split('\\|')+allRequired+allOptional))) { System.err.println """Validation error: unexpected field in mapper definition: Offending field: ${k} Offending record: ${rec}""" diff --git a/main.nf b/main.nf index 66ede21..07e990c 100644 --- a/main.nf +++ b/main.nf @@ -28,8 +28,9 @@ def validators = new GroovyShell().parse(new File("${baseDir}/groovy/Validators. //Read, parse, validate and sanitize alignment/mapping tools config def allRequired = ['tool','version','container','index'] //Fields required for each tool in config +def allOptional = ['versionCall'] def allModes = 'dna2dna|rna2rna|rna2dna' //At leas one mode has to be defined as supported by each tool -def allVersions = validators.validateMappersDefinitions(params.mappersDefinitions, allRequired, allModes) +def allVersions = validators.validateMappersDefinitions(params.mappersDefinitions, allRequired, allOptional, allModes) //Check if specified template files exist validators.validateTemplatesAndScripts(params.mappersDefinitions, (['index']+(allModes.split('\\|') as List)), "${baseDir}/templates") @@ -52,9 +53,30 @@ Channel.from(params.mappersDefinitions) .filter{ params.mappers == 'all' || it.tool.matches(params.mappers) } //TODO Could allow :version // .tap { mappersMapChannel } // .map { it.subMap(allRequired)} //Exclude mapping specific fields from indexing process to avoid re-indexing e.g. on changes made to a mapping template - // .set { mappersIdxChannel } - .into { mappersIdxChannel; mappersMapChannel } + .set { mappersChannel } + // .into { mappersIdxChannel; mappersMapChannel; mappersVersionChannel } +// mappersVersionChannel.view{ it -> JsonOutput.prettyPrint(jsonGenerator.toJson(it))} + + +process parseMapperVersion { + container { "${mapmeta.container}" } + tag { mapmeta.subMap(['tool','version']) } + + input: val(mapmeta) from mappersChannel + + output: tuple val(mapmeta), stdout into mappersCapturedVersionChannel + + script: "${mapmeta.versionCall}" +} + +mappersCapturedVersionChannel +.map { meta, ver -> + meta.versionCall = ver.trim() + meta +} +.view{ it -> JsonOutput.prettyPrint(jsonGenerator.toJson(it))} +.into { mappersIdxChannel; mappersMapChannel } //...and their params definitions mappersParamsChannel = Channel.from(params.mapperParamsDefinitions) @@ -303,6 +325,10 @@ process faidxTranscriptomeFASTA { """ } + + + + /* Resolve variables emebeded in single-quoted strings */ @@ -552,6 +578,7 @@ process mapSimulatedReads { label 'align' container { "${meta.mapper.container}" } tag {"${meta.target.seqtype}@${meta.target.species}@${meta.target.version} << ${meta.query.nreads}@${meta.query.seqtype}; ${meta.mapper.tool}@${meta.mapper.version}@${meta.params.label}"} + // beforeScript meta.mapper.containsKey('versionCall') ? "${meta.mapper.versionCall} > .mapper.version" : '' input: set val(meta), file(reads), file(ref), file(fai), file('*'), val(run), val(ALIGN_PARAMS) from combinedToMap From 3649859bc5d14477ab8fd5aff15169faad31793b Mon Sep 17 00:00:00 2001 From: Rad Suchecki Date: Fri, 29 Nov 2019 16:57:32 +1030 Subject: [PATCH 02/10] version calls added for biokanga and hisat2 conf --- conf/mappers.config | 3 +++ 1 file changed, 3 insertions(+) diff --git a/conf/mappers.config b/conf/mappers.config index 70b6be0..c8e6614 100644 --- a/conf/mappers.config +++ b/conf/mappers.config @@ -41,6 +41,7 @@ params { [ tool: 'biokanga', version: '4.3.11', + versionCall: 'biokanga align --version | cut -f4 -d" "', container: 'rsuchecki/biokanga:4.3.11', index: 'biokanga index --threads ${task.cpus} -i ${ref} -o ${ref}.sfx --ref ${ref}', dna2dna: true, @@ -49,6 +50,7 @@ params { [ tool: 'biokanga', version: '4.4.2', + versionCall: 'biokanga align --version | cut -f4 -d" "', container: 'csirocropinformatics/biokanga:4.4.2_57f94e3e831cc2290cd86059b4d23dea22228262', index: true, //or index: 'biokanga index --threads ${task.cpus} -i ${ref} -o ${ref}.sfx --ref ${ref}', dna2dna: true, @@ -101,6 +103,7 @@ params { [ tool: 'hisat2', version: '2.1.0', + versionCall: 'hisat2 --version | head -1 | cut -f3 -d" "', container: 'rsuchecki/hisat2:2.1.0_4cb1d4007322767b562e98f69179e8ebf6d31fb1', index: true, rna2dna: true, From 4470fe2194f61bfc04317aa6b92c9b9d34c15162 Mon Sep 17 00:00:00 2001 From: Rad Suchecki Date: Fri, 29 Nov 2019 17:01:14 +1030 Subject: [PATCH 03/10] overwrite declared versions with parsed, warn or abort? --- main.nf | 26 ++++++++++++++++++++++++-- 1 file changed, 24 insertions(+), 2 deletions(-) diff --git a/main.nf b/main.nf index 07e990c..9227f5e 100644 --- a/main.nf +++ b/main.nf @@ -68,14 +68,36 @@ process parseMapperVersion { output: tuple val(mapmeta), stdout into mappersCapturedVersionChannel script: "${mapmeta.versionCall}" + // script: "set -o pipefail; ${mapmeta.versionCall}" } mappersCapturedVersionChannel .map { meta, ver -> - meta.versionCall = ver.trim() + if(meta.version != ver.trim()) { + log.warn """ + Decalred version ${meta.version} for ${meta.tool} + does not match version ${ver.trim()} + obtained from versionCall: ${meta.versionCall} + Updating version in metadata to ${ver.trim()} + """ + //Please correct your mapper configuration file(s). + // throw new RuntimeException('msg') or // + // session.abort(new Exception()) + meta.version = ver.trim() + if(!meta.container.contains(meta.version)) { + log.error """ + Updated tool version string ${meta.version} + not found in container image spec ${meta.container}. + Please correct your mapper configuration file(s). + + Aborting... + """ + session.abort(new Exception()) // throw new RuntimeException('msg') + } + } meta } -.view{ it -> JsonOutput.prettyPrint(jsonGenerator.toJson(it))} +// .view{ it -> JsonOutput.prettyPrint(jsonGenerator.toJson(it))} .into { mappersIdxChannel; mappersMapChannel } //...and their params definitions From a3e0dbb1196557f942b9e7e84acf3377bf9c8f14 Mon Sep 17 00:00:00 2001 From: Rad Suchecki Date: Tue, 3 Dec 2019 16:17:06 +1030 Subject: [PATCH 04/10] added version calls for several mappers --- conf/mappers.config | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/conf/mappers.config b/conf/mappers.config index c8e6614..eb343ee 100644 --- a/conf/mappers.config +++ b/conf/mappers.config @@ -33,6 +33,7 @@ params { [ tool: 'bbmap', version: '38.49', + versionCall: 'bbmap.sh version 2>&1 | awk \'/BBMap version/ {print $3}\'', container: 'rsuchecki/bbmap:38.49_9e975d9bc6a657bc4306f4475be393b9fbe8e3fb', index: 'bbmap.sh ref=${ref} Xmx=${task.memory.toMega()}M', dna2dna: true, @@ -73,6 +74,7 @@ params { [ tool: 'bowtie2', version: '2.3.5', + versionCall: 'bowtie2 --version | awk \'/bowtie2-align-s version.*$/{print $3}\'', container: 'rsuchecki/bowtie2:2.3.5_cf1abfa200bdeb2d1c3095be1e8ad99142121b45', index: true, dna2dna: true, @@ -80,7 +82,8 @@ params { ], [ tool: 'bwa', - version: '0.7.17', + version: '0.7.17-r1188', + versionCall: 'bwa 2>&1 | awk \'/Version.*$/{print $2}\'', container: 'rsuchecki/bwa:0.7.17_8b61e2a77c105f3ec28d260b556af5cf12c49111', index: true, dna2dna: true, @@ -88,13 +91,15 @@ params { ], [ tool: 'dart', - version: '1.3.5', + versionCall: 'dart -v | awk \'{print $2}\'', + version: 'v1.3.5', container: 'rsuchecki/dart:1.3.5_a2af064d7ed5df1f16e31ea46645793b356a7758', index: true, rna2dna: true, ], [ tool: 'gsnap', + versionCall: 'gsnap --version 2>&1 | awk \'/GSNAP version.*/{print $3}\'', version: '2019-03-15', container: 'rsuchecki/gmap-gsnap:2019-03-15_af7b4bf0f03952c583ba30657f2aca90f8a8783a', index: true, @@ -102,7 +107,7 @@ params { ], [ tool: 'hisat2', - version: '2.1.0', + version: '2.1.0', versionCall: 'hisat2 --version | head -1 | cut -f3 -d" "', container: 'rsuchecki/hisat2:2.1.0_4cb1d4007322767b562e98f69179e8ebf6d31fb1', index: true, @@ -118,7 +123,7 @@ params { ], [ tool: 'minimap2', - version: '2.17', + version: '2.17-r941', versionCall: 'minimap2 --version', container: 'rsuchecki/minimap2:2.17_1d3f326820696496f025a95632979cd4ea4140cb', index: true, @@ -135,6 +140,7 @@ params { [ tool: 'star', version: '2.7.0f', + versionCall: 'STAR --version', container: 'rsuchecki/star-aligner:2.7.0f_c5dc08f157c803fec027679c00759359c9c8a814', index: true, rna2dna: true, From bb252f67fe3d78b4f1a538e98d483c299f92adf0 Mon Sep 17 00:00:00 2001 From: Rad Suchecki Date: Tue, 3 Dec 2019 16:18:23 +1030 Subject: [PATCH 05/10] added filter exluding mappers lacking versionCall definition --- main.nf | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/main.nf b/main.nf index 9227f5e..fdc301c 100644 --- a/main.nf +++ b/main.nf @@ -58,12 +58,23 @@ Channel.from(params.mappersDefinitions) // mappersVersionChannel.view{ it -> JsonOutput.prettyPrint(jsonGenerator.toJson(it))} +mappersChannel.filter { + if(it.containsKey('versionCall')) { + true + } else { + log.warn """ + versionCall not specified for ${it.tool} ${it.version} + it will not be included in this run + """ + } +} +.set { mappersVithVersionCallChannel } process parseMapperVersion { container { "${mapmeta.container}" } tag { mapmeta.subMap(['tool','version']) } - input: val(mapmeta) from mappersChannel + input: val(mapmeta) from mappersVithVersionCallChannel output: tuple val(mapmeta), stdout into mappersCapturedVersionChannel From ad72a3d094c9b9abc4eab0d955b214364ec0f4d5 Mon Sep 17 00:00:00 2001 From: Rad Suchecki Date: Thu, 23 Jan 2020 11:35:22 +1030 Subject: [PATCH 06/10] Implemented GitHub actions workflow run tests * Docker * Singularity * Actions caching of * Test data * Singularity images * NF work/ and ./.nextflow/cache * Per mapper/mapmode test runs (docker) * Multiple mapper/mapmode test run (singularity) * Leaner container images * Updated exec profiles Details: commit 2c5a8062b632f015c40a7eae976bf90bfa0e87d9 Author: Rad Suchecki Date: Thu Jan 23 11:06:08 2020 +1030 simg size commit 66cb0ddc23f18c21ef852fee34e5040b85cfaf1f Author: Rad Suchecki Date: Thu Jan 23 11:03:01 2020 +1030 keep NF/work caches separate per mapmode 4 independent parallel exec commit 01fc7891c08cc8974073d56e8d42b4746e401160 Author: Rad Suchecki Date: Thu Jan 23 11:00:27 2020 +1030 cleanup commit 283afe223b8224ca2663fd92c80759b15a6aaaef Author: Rad Suchecki Date: Thu Jan 23 10:52:14 2020 +1030 missing quotes around mappers regex commit 389da5524e0c38bdb499f1b0f01b6de2cc2f57ba Author: Rad Suchecki Date: Thu Jan 23 10:40:12 2020 +1030 singularity test all-in one commit 2b58954a7fe7d8f45f66379ce244b290ef2c85dc Author: Rad Suchecki Date: Thu Jan 23 10:30:34 2020 +1030 -redundancy commit 4d3cf8f096b4d9b49efe9f5ed87904952c9dad10 Author: Rad Suchecki Date: Thu Jan 23 10:21:06 2020 +1030 re-worked singularity caching commit a13b306fbdb19107086c702455f4a0949148aaf4 Author: Rad Suchecki Date: Thu Jan 23 10:09:24 2020 +1030 seaprated data cache from work cache which callows to keep docker/singularity work caches separate commit c65c389c55a289187b993d28ab17984c25e3dbe5 Author: Rad Suchecki Date: Wed Jan 22 17:07:57 2020 +1030 cache-in commit b012dcfd50f2e28a1bdfdecb260b31a2cb6f1003 Author: Rad Suchecki Date: Wed Jan 22 16:51:42 2020 +1030 cache-er commit a4bb2ca8701cefc085408dc48d90b22f3f951fe4 Author: Rad Suchecki Date: Wed Jan 22 16:45:39 2020 +1030 cache-ish commit df39af7af24e59271678afcaa78867a05619e0bb Author: Rad Suchecki Date: Wed Jan 22 16:44:07 2020 +1030 simplified restore commit a2346e552821d6e12bf60169ebb55e07eac661e7 Author: Rad Suchecki Date: Wed Jan 22 16:35:00 2020 +1030 cache-keys commit 393ab7e0906e951db3594e9bbe97b67e10cc4f20 Author: Rad Suchecki Date: Wed Jan 22 16:15:52 2020 +1030 cache key changes commit a6c328269d76692b767df9b06910361cd2135c1e Author: Rad Suchecki Date: Wed Jan 22 16:03:08 2020 +1030 cache conditional2 commit 07694f5273106419abcb94336fada9ebc60726b3 Author: Rad Suchecki Date: Wed Jan 22 16:02:22 2020 +1030 cache conditional2 commit b9cfb45e05cc32d918635ff8efe14aeb549b9b54 Author: Rad Suchecki Date: Wed Jan 22 16:00:18 2020 +1030 cache conditional2 commit 811533ab81c89f89c6525224bf4daf4b28eeab6c Author: Rad Suchecki Date: Wed Jan 22 15:47:37 2020 +1030 cache conditional1 commit d643974d833c13f7bbccec423def89f82e0e35bd Author: Rad Suchecki Date: Wed Jan 22 15:40:29 2020 +1030 cache conditional commit e9f8b50e7886be46f38249996508558081254562 Author: Rad Suchecki Date: Wed Jan 22 15:26:55 2020 +1030 cache conditional commit ffe15200e03f59fa0538e38fe7e49fe8b7826fbf Author: Rad Suchecki Date: Wed Jan 22 15:14:38 2020 +1030 cache conditional commit cc77479f96b7e8dc34ddba578147b040263f0794 Author: Rad Suchecki Date: Wed Jan 22 15:09:57 2020 +1030 cache conditional commit 258fbcf15210ddb4c881e75a0ebfc0d78e6c009f Author: Rad Suchecki Date: Wed Jan 22 15:08:43 2020 +1030 cache conditional commit aa93c37a881ca2ccfbc4550ad397ec2fbd85dea3 Author: Rad Suchecki Date: Wed Jan 22 15:07:45 2020 +1030 cache conditional commit f17f3113cf16a8fdbfac0354c7313aa1e30ec9a2 Author: Rad Suchecki Date: Wed Jan 22 15:07:27 2020 +1030 cache conditional commit 26542e6099705c1661ac419fa220cb1de257f6f2 Author: Rad Suchecki Date: Wed Jan 22 14:53:59 2020 +1030 cache conditional commit be93a9d63872316e82e46ab5cb2fb7ed6de5b77c Author: Rad Suchecki Date: Wed Jan 22 14:50:01 2020 +1030 cache conditional commit feceebae147cd129aac790b68e4954e1206171e5 Author: Rad Suchecki Date: Wed Jan 22 14:40:42 2020 +1030 cache conditional commit ad77c77ef3f6475d58b612e6e4d8e79de5281309 Author: Rad Suchecki Date: Wed Jan 22 14:31:02 2020 +1030 data commit 0ad06bb0903147fb314fbb1c7afc280cd38a6b08 Author: Rad Suchecki Date: Wed Jan 22 14:29:02 2020 +1030 data commit fbf2c867f45abbbb43c06eaeea3de824bf91b7e4 Author: Rad Suchecki Date: Wed Jan 22 14:28:19 2020 +1030 data commit f714d7338df2ac3adba48bde0b80645a2321bfb7 Author: Rad Suchecki Date: Wed Jan 22 14:27:43 2020 +1030 data commit e8e31e7f6bf650f1aa2fe535a1ed225e3e7554ec Author: Rad Suchecki Date: Wed Jan 22 14:18:31 2020 +1030 cache commit d7643bab8cc0aa9d38e9f2a3397d5c5f49bbf561 Author: Rad Suchecki Date: Wed Jan 22 14:16:44 2020 +1030 cache commit ad65601d76aad2a31a8fe1f49b48e1d23948957f Author: Rad Suchecki Date: Wed Jan 22 14:13:30 2020 +1030 cache commit 9f5f114d4d68520dee7e653df38410ba85d75014 Author: Rad Suchecki Date: Wed Jan 22 14:11:44 2020 +1030 cache commit 390cf715d2bb59f9eb7136d26ebfcefa9ba57381 Author: Rad Suchecki Date: Wed Jan 22 14:09:52 2020 +1030 cache commit 080a09bb123bf84879ebf6de57cf3f9a76637661 Author: Rad Suchecki Date: Wed Jan 22 14:07:42 2020 +1030 cache commit 396433667e1b611c93d718f3b5839c8d66ea651a Author: Rad Suchecki Date: Wed Jan 22 14:05:58 2020 +1030 cache commit 6484441b576daf506c7345374365af99fa1f0df0 Author: Rad Suchecki Date: Wed Jan 22 14:04:00 2020 +1030 cache commit 66ed6dadba7c15dff3c0e82de0ca67ecc25e2e3b Author: Rad Suchecki Date: Wed Jan 22 13:51:06 2020 +1030 indent? commit 1813403649fa3decd584af44a6de5b123f5c8287 Author: Rad Suchecki Date: Wed Jan 22 13:49:04 2020 +1030 Using NF install action commit 533a18c95907f44802150c84ff7eced67169127e Author: Rad Suchecki Date: Wed Jan 22 12:55:05 2020 +1030 typos? commit 3f153b903ea80fa6e865b9410fbc3fd121ec676c Merge: 02c9b3c ee8a048 Author: Rad Suchecki Date: Wed Jan 22 12:53:46 2020 +1030 Merge branch 'feature/ci' of github.com:csiro-crop-informatics/repset into feature/ci commit 02c9b3cb7c5506af499772cb84f5eaece5315e7c Author: Rad Suchecki Date: Tue Jan 21 16:36:36 2020 +1030 new matrix commit ee8a048882678f5f1f74f2e7d6e54fb04f3752bf Author: Rad Suchecki Date: Tue Jan 21 16:36:36 2020 +1030 artefact sharing4 commit 06ae782bdd52546590874e89583cf9f34fb06200 Author: Rad Suchecki Date: Tue Jan 21 16:34:47 2020 +1030 artefact sharing3 commit 82b038be96bdebb8abe40a5a8f1a4b5b8bdf3939 Author: Rad Suchecki Date: Tue Jan 21 16:31:15 2020 +1030 artefact sharing2 commit 9903211760f7d621d7c71ceae78c7bbb236c4cde Author: Rad Suchecki Date: Tue Jan 21 16:29:53 2020 +1030 artefact sharing commit a715b97a8ae3a90dd7bb4f1ee56ec61be1da927d Author: Rad Suchecki Date: Tue Jan 21 16:07:07 2020 +1030 CI refinements cont commit 3cce35829af5bff41b60ec1315811abbb74a6fa3 Author: Rad Suchecki Date: Tue Jan 21 15:47:18 2020 +1030 CI refinements commit 1ac1f031e2740f0f2ff859d44e23e20296428ae0 Author: Rad Suchecki Date: Mon Jan 20 13:48:12 2020 +1030 try report rendering @ CI commit 580f8286ca3c3a736901e5b092b0faeaed3e3b6c Author: Rad Suchecki Date: Mon Jan 20 13:29:40 2020 +1030 Updated report rendering isntructions outside the pipeline commit d5f3c9277aa75690d7f94a51140f80e25dfe4a25 Author: Rad Suchecki Date: Fri Jan 17 16:50:05 2020 +1030 added graph-easy rendering commit 8368e6272105b3ac2c295c455dddf9f33b887e0c Author: Rad Suchecki Date: Fri Jan 17 16:39:49 2020 +1030 typo commit aeefa69006505303056e7f73e37747aeb3771188 Author: Rad Suchecki Date: Fri Jan 17 16:34:20 2020 +1030 typo commit d637ead75edea97a32ada59db060cf8fd7ebdf01 Author: Rad Suchecki Date: Fri Jan 17 16:32:31 2020 +1030 singularity containered pulls taylored to CI commit f67dfab4509b969c023d8c64d363f7aadf41112c Author: Rad Suchecki Date: Fri Jan 17 16:07:07 2020 +1030 not pulling s images, more sysinfo commit 69c5241ed24ed195564da54dcf915d859d1f45f1 Author: Rad Suchecki Date: Fri Jan 17 15:43:27 2020 +1030 typo commit 4fa616f8d77a6764d58a460810826d993f14e2aa Author: Rad Suchecki Date: Fri Jan 17 15:32:32 2020 +1030 temp: skipping report rendering in CI commit 5a1809276ea3769a5492e53f6e87813e41bd60eb Author: Rad Suchecki Date: Fri Jan 17 15:31:55 2020 +1030 added sysinfo commit adc622351108fb0bbea7a5280882907249b3de8a Author: Rad Suchecki Date: Fri Jan 17 15:08:16 2020 +1030 updated picocli version commit 0d472c66fb675ad234efbf3b8bd0c1ac752410af Author: Rad Suchecki Date: Fri Jan 17 15:07:49 2020 +1030 added minimal eukaryotic dataset commit 2904e6a38a89d0bb789f9ab69c2a726cda169933 Author: Rad Suchecki Date: Fri Jan 17 15:04:50 2020 +1030 catch all "groovy" containing labels commit 40586042c2cd7b068d561338bf3ac3fc288f582a Merge: 90caaca d933b7c Author: Rad Suchecki Date: Fri Jan 17 15:03:39 2020 +1030 Merge branch 'docker/samtools/1.10' into docker/groovy-samtools/3.0jre-1.9-alpine commit 90caaca1e768eb70a3a24dfaaf379a16f1a85d7c Author: Rad Suchecki Date: Fri Jan 17 15:03:20 2020 +1030 fixed label commit d933b7ca8232f96e58fc0edcd95e687062755db3 Author: Rad Suchecki Date: Fri Jan 17 15:02:20 2020 +1030 container spec commit 9870ff5feb92e24fca1b6bf69afed30c1d7684f2 Author: Rad Suchecki Date: Fri Jan 17 14:45:08 2020 +1030 biocontaiers mirror commit 879f6ced4ecb8faac3e6e42e32342b6fbe5ac9bd Author: Rad Suchecki Date: Fri Jan 17 13:21:10 2020 +1030 dropped image sha - could that be triggering internal server error? commit 58691d17a2eec62ea04b37469e34b175f797316f Author: Rad Suchecki Date: Fri Jan 17 12:58:05 2020 +1030 Samtools and bash added to alpine groovy 3.0 jre commit ce8211d50e5223ec1cbe03faaccf5060273f34f9 Author: Rad Suchecki Date: Thu Jan 16 15:57:50 2020 +1030 trigger build from quay commit d8fc8778e91ccd5fd14f5701282664e2d401dfe4 Author: Rad Suchecki Date: Thu Jan 16 15:54:00 2020 +1030 fix compatibility issues due to busybox sed on biocontainers rnftools image commit 92a59bc7c512a7217c1e58a39139d789a9bf98c5 Author: Rad Suchecki Date: Thu Jan 16 15:53:08 2020 +1030 downgrade for leaner container image commit 6785c050b9cce95353bf56f31c657330c31923ac Author: Rad Suchecki Date: Thu Jan 16 15:52:42 2020 +1030 tested quay rnftools containers commit 9277418659497c55f68e8c6ceca27fcc82fc2008 Author: Rad Suchecki Date: Thu Jan 16 11:27:36 2020 +1030 singularity and docker conf for GH actions CI --- .github/workflows/main.yml | 172 +++++++++++++++++++++++++ README.md | 53 +++++++- bin/eval_rnf.groovy | 3 +- bin/render.R | 3 + bin/tct_rnf.groovy | 3 +- conf/containers.config | 32 ++--- conf/simulations.config | 7 + dockerfiles/groovy-samtools.Dockerfile | 17 +++ dockerfiles/rnftools.Dockerfile | 20 +-- dockerfiles/samtools.Dockerfile | 11 +- main.nf | 22 +++- nextflow.config | 13 +- pull_containers.nf | 20 ++- 13 files changed, 310 insertions(+), 66 deletions(-) create mode 100644 .github/workflows/main.yml create mode 100644 dockerfiles/groovy-samtools.Dockerfile diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml new file mode 100644 index 0000000..fe84754 --- /dev/null +++ b/.github/workflows/main.yml @@ -0,0 +1,172 @@ +name: CI + +on: [push] + +jobs: + docker: + runs-on: ubuntu-18.04 + timeout-minutes: 60 + strategy: + fail-fast: false + matrix: + params: + - { mapper: minimap2, mode: dna2dna } + - { mapper: hisat2, mode: rna2dna } + - { mapper: kallisto, mode: rna2rna } + steps: + - name: Install Nextflow + env: + NXF_VERSION: 19.10.0 + run: | + wget -qO- get.nextflow.io | bash + sudo mv nextflow /usr/local/bin/ + + - name: Check out code + uses: actions/checkout@v2 + + - name: Setup test-data cache + id: cache-data + uses: actions/cache@v1 + with: + path: downloaded + key: data-${{ github.sha }} + restore-keys: | + data-${{ github.sha }}- + data- + + - name: Setup work-dir cache + id: cache-work + uses: actions/cache@v1 + with: + path: work + key: work-docker-${{ matrix.params.mode }}-${{ github.sha }} + restore-keys: | + work-docker-${{ matrix.params.mode }}-${{ github.sha }}- + work-docker-${{ matrix.params.mode }}- + + - name: Setup NF cache + id: cache-NF + uses: actions/cache@v1 + with: + path: .nextflow + key: nf-docker-${{ matrix.params.mode }}-${{ github.sha }} + restore-keys: | + nf-docker-${{ matrix.params.mode }}-${{ github.sha }}- + nf-docker-${{ matrix.params.mode }}- + + - name: Test workflow with docker - ${{ matrix.params.mapper }} + run: | + nextflow run ${GITHUB_WORKSPACE} -profile CI,docker --subset 1 --mappers ${{ matrix.params.mapper }} --mapmode ${{ matrix.params.mode }} --max_cpus 2 --max_memory 6.GB -ansi-log false -with-dag dag.dot -resume + - name: sysinfo + run: | + df -h + lscpu | egrep 'Model name|Socket|Thread|NUMA|CPU\(s\)' + awk '/MemTotal/ {print "RAM : "$2/1E6" GB"}' /proc/meminfo + docker image ls + # - name: garaph-easy dag + # run: | + # cat dag.dot | docker run -i panguolin/grapheasy:latest graph-easy + + singularity: + runs-on: ubuntu-18.04 + timeout-minutes: 60 + strategy: + fail-fast: false + max-parallel: 1 + matrix: + params: + - { mapper: 'minimap2|hisat2|kallisto', mode: 'dna2dna|rna2dnarna2rna' } + steps: + - name: Set up Go + uses: actions/setup-go@v1 + with: + go-version: 1.13 + id: go + - name: Install Dependencies for Singularity + run: | + sudo apt-get update && sudo apt-get install -y \ + build-essential \ + libssl-dev \ + uuid-dev \ + libgpgme11-dev \ + squashfs-tools \ + libseccomp-dev \ + pkg-config + - name: Install Singularity + env: + SINGULARITY_VERSION: 3.5.2 + run: | + export GOPATH=/tmp/go + mkdir -p $GOPATH + sudo mkdir -p /usr/local/var/singularity/mnt && \ + mkdir -p $GOPATH/src/github.com/sylabs && \ + cd $GOPATH/src/github.com/sylabs && \ + wget -qO- https://github.com/sylabs/singularity/releases/download/v${SINGULARITY_VERSION}/singularity-${SINGULARITY_VERSION}.tar.gz | \ + tar xzv && \ + cd singularity && \ + ./mconfig -p /usr/local && \ + make -C builddir && \ + sudo make -C builddir install + - name: Install Nextflow + env: + NXF_VERSION: 19.10.0 + run: | + wget -qO- get.nextflow.io | bash + sudo mv nextflow /usr/local/bin/ + + - name: Check out code + uses: actions/checkout@v2 + + - name: Setup test-data cache + id: cache-data + uses: actions/cache@v1 + with: + path: downloaded + key: data-${{ github.sha }} + restore-keys: | + data-${{ github.sha }}- + data- + + - name: Setup work cache + id: cache-work + uses: actions/cache@v1 + with: + path: work + key: work-sing-${{ github.sha }} + restore-keys: | + work-sing-${{ github.sha }}- + work-sing- + + - name: Setup NF cache + id: cache-NF + uses: actions/cache@v1 + with: + path: .nextflow + key: nf-sing-${{ github.sha }} + restore-keys: | + nf-sing-${{ github.sha }}- + nf-sing- + + - name: Setup singularity images cache + id: cache-singularity-images + uses: actions/cache@v1 + with: + path: singularity-images + key: simg-${{ github.sha }} + restore-keys: | + simg-${{ github.sha }}- + simg- + + - name: Pull containers + run: | + nextflow run ${GITHUB_WORKSPACE}/pull_containers.nf --mappers '${{ matrix.params.mapper }}' -ansi-log false + - name: Test workflow with singularity - '${{ matrix.params.mapper }}' - '${{ matrix.params.mode }}' + run: | + nextflow run ${GITHUB_WORKSPACE} -profile CI,singularity --subset 1 --mappers '${{ matrix.params.mapper }}' --mapmode '${{ matrix.params.mode }}' --max_cpus 2 --max_memory 6.GB -ansi-log false -with-dag dag.dot -resume + - name: sysinfo + run: | + df -h + lscpu | egrep 'Model name|Socket|Thread|NUMA|CPU\(s\)' + awk '/MemTotal/ {print "RAM : "$2/1E6" GB"}' /proc/meminfo + ls -lhS singularity-images/ + du -h singularity-images diff --git a/README.md b/README.md index 7094958..7c93997 100644 --- a/README.md +++ b/README.md @@ -433,18 +433,52 @@ There are several ways for rendering of the report outside the pipeline, with do ### Using docker +1. Put all requited files in one place + ```sh -docker run --rm --user $(id -u):$(id -g) \ - --volume $(pwd)/report:/report \ - --workdir /report rsuchecki/renderer:0.2 ./render.R +mkdir -p localrender +cp report/report.Rmd localrender/ +cp results/* localrender/ +cp flowinfo/*.{json,tsv} localrender/ ``` +2. Docker run rendering + +```sh +docker run \ + --rm \ + --user $(id -u):$(id -g) \ + --volume $(pwd)/localrender:/render \ + --volume $(pwd)/bin:/binr \ + --workdir /render \ + rsuchecki/renderer:0.4.1_81ab6b5d71509d48e3a37b5eafb4bca5b117b5fc /binr/render.R +``` + +3. Rendered report should be available under `./localrender` + + ### Using singularity +1. Put all requited files in one place + +```sh +mkdir -p localrender \ + && cp report/report.Rmd localrender/ \ + && cp results/* localrender/ \ + && cp flowinfo/*.{json,tsv} localrender/ +``` + +2. Docker run rendering + ```sh -singularity exec --pwd $(pwd)/report docker://rsuchecki/renderer:0.1 ./render.R +singularity exec \ + --bind $(pwd)/bin:/binr \ + --pwd $(pwd)/localrender \ + docker://rsuchecki/renderer:0.4.1_81ab6b5d71509d48e3a37b5eafb4bca5b117b5fc /binr/render.R ``` +3. Rendered report should be available under `./localrender` + ### Natively If you'd like to render the report without docker/singularity, you will need the following: @@ -456,13 +490,20 @@ If you'd like to render the report without docker/singularity, you will need the * `rmarkdown` * `rticles` * `bookdown` + * `tidyverse` + * `jsonlite` + * `kableExtra` Then: ``` -cd report && ./render.R -``` +mkdir -p localrender \ + && cp report/report.Rmd localrender/ \ + && cp results/* localrender/ \ + && cp flowinfo/*.{json,tsv} localrender/ +cd localrender && ../bin/render.R +``` # Manuscript diff --git a/bin/eval_rnf.groovy b/bin/eval_rnf.groovy index 40ce2cd..0ca2cac 100755 --- a/bin/eval_rnf.groovy +++ b/bin/eval_rnf.groovy @@ -5,7 +5,8 @@ import java.util.zip.GZIPInputStream import java.util.zip.GZIPOutputStream -@Grab('info.picocli:picocli:4.0.0-alpha-3') //command line interface +@Grab('info.picocli:picocli-groovy:4.1.2') //command line interface + @Command(header = [ //Font Name: Calvin S (Caps) $/@|bold,blue ╔═╗╦ ╦╔═╗╦ ╦═╗╔╗╔╔═╗ |@/$, diff --git a/bin/render.R b/bin/render.R index 8525ac7..14f4109 100755 --- a/bin/render.R +++ b/bin/render.R @@ -3,5 +3,8 @@ library(rmarkdown) library(rticles) library(bookdown) +library(tidyverse) +library(jsonlite) +library(kableExtra) rmarkdown::render(Sys.glob("*.Rmd")) \ No newline at end of file diff --git a/bin/tct_rnf.groovy b/bin/tct_rnf.groovy index 90c8345..92f5d17 100755 --- a/bin/tct_rnf.groovy +++ b/bin/tct_rnf.groovy @@ -4,7 +4,8 @@ import java.util.zip.GZIPInputStream import java.util.zip.GZIPOutputStream -@Grab('info.picocli:picocli:4.0.0-alpha-3') //command line interface +@Grab('info.picocli:picocli-groovy:4.1.2') //command line interface + @Command(header = [ $/@|bold,blue ╔╦╗╔═╗╔╦╗ ╦═╗╔╗╔╔═╗ |@/$, diff --git a/conf/containers.config b/conf/containers.config index 2a7c0af..8afb879 100644 --- a/conf/containers.config +++ b/conf/containers.config @@ -1,38 +1,32 @@ //Containers for processess other then indexing and alignment/mapping process { - //default container, standard-ish linux toool set: wget, gawk... //replaced by individual containers where applicable + //default container, standard-ish linux tool set: wget, gawk... //replaced by individual containers where applicable container = 'rsuchecki/tools:0.2' // container = {docker.enabled ? 'rsuchecki/tools:0.2@sha256:....' : 'rsuchecki/tools:0.2'} - withLabel: benchmark { - container = 'rsuchecki/biokanga_benchmark:0.7' - } withLabel: gffread { container = 'rsuchecki/gffread:0.11.4_32ec4f0a9d4c9acbbe9b93c0e6c2da3e9f60e097' } withLabel: groovy { - container = 'rsuchecki/groovy:3.0_868da92992a46b74552abbbf72b76c8aba3fbc9c' + container = 'rsuchecki/groovy-samtools:3.0jre-1.9-alpine_879f6ced4ecb8faac3e6e42e32342b6fbe5ac9bd' } withLabel: groovy_samtools { //now redundant, simply switch to 'samtools' label with our samtools container - // container = 'rsuchecki/groovy-conda-samtools:0.1_67d8f0d93333fa0511ae1d23064beba2b544ea0c' - container = 'rsuchecki/samtools:1.9_358fa2a91e7feaf5f30e46818d5c9d81ad3975f9' + container = 'rsuchecki/groovy-samtools:3.0jre-1.9-alpine_879f6ced4ecb8faac3e6e42e32342b6fbe5ac9bd' } withLabel: rnftools { - // container = 'rsuchecki/rnftools:0.3.1.3_3123fca68e14580a453deea77a0549929ed44715' //WORKS, more recent builds fail - container = 'rsuchecki/rnftools:0.3.1.3_3d09a45044213bfbc96a3e0e70924f68812c718b' - // container = 'rsuchecki/rnftools:0.3.1.3_a7e1f831b716c12385636652dc999e988b3c8af4' - //FAILS container = 'quay.io/biocontainers/rnftools@sha256:8b588055977bbf83116f394d755c088c885b37b2ccce0b81d50b2d87ba0d2f29' - //FAILS container = 'quay.io/biocontainers/rnftools@sha256:f3b8bedc40416bd40de0f1b5ef5096b9ab47a079ba1735a130825989de20f4d9' + //container = 'rsuchecki/rnftools:0.3.1.3_3d09a45044213bfbc96a3e0e70924f68812c718b' //OK + // container = 'quay.io/biocontainers/rnftools:0.3.1.3--py36_0' //OK + container = 'rsuchecki/rnftools:0.3.1.2_ce8211d50e5223ec1cbe03faaccf5060273f34f9' } withLabel: rrender { container = 'rsuchecki/renderer:0.4.1_81ab6b5d71509d48e3a37b5eafb4bca5b117b5fc' } - withLabel: rscript { - container = 'rsuchecki/rscripts:0.7_c66407a6a160a64a5f973868bd095e28bade0094' - } + // withLabel: rscript { + // container = 'rsuchecki/rscripts:0.7_c66407a6a160a64a5f973868bd095e28bade0094' + // } withLabel: samtools { - container = 'rsuchecki/samtools:1.9_358fa2a91e7feaf5f30e46818d5c9d81ad3975f9' - } - withLabel: sra { - container = 'ncbi/sra-toolkit:2.9.2' //ncbi/sra-toolkit@sha256:0e6ff2d6560ad7e59821dad53488bdcf09a37c6ccdeab0caced9bbf76837278d + container = 'rsuchecki/samtools:1.10_9870ff5feb92e24fca1b6bf69afed30c1d7684f2' } + // withLabel: sra { + // container = 'ncbi/sra-toolkit:2.9.2' //ncbi/sra-toolkit@sha256:0e6ff2d6560ad7e59821dad53488bdcf09a37c6ccdeab0caced9bbf76837278d + // } } diff --git a/conf/simulations.config b/conf/simulations.config index 356c1ac..0594e90 100644 --- a/conf/simulations.config +++ b/conf/simulations.config @@ -11,6 +11,13 @@ params { //NOTE: LOCAL FILES SHOULD NOT BE ACCESSED VIA SYMLINKS AND PATHS NEED TO BE CONSISTENT ACROSS HEAD AND COMPUTE NODES references = [ + [ + species : "Encephalitozoon_cuniculi_ecuniii_l", + version : "gca_001078035", + fasta : "ftp://ftp.ensemblgenomes.org/pub/fungi/release-45/fasta/fungi_microsporidia1_collection/encephalitozoon_cuniculi_ecuniii_l_gca_001078035/dna_index/Encephalitozoon_cuniculi_ecuniii_l_gca_001078035.ECIIIL.dna.toplevel.fa.gz", + gff : "ftp://ftp.ensemblgenomes.org/pub/fungi/release-45/gff3/fungi_microsporidia1_collection/encephalitozoon_cuniculi_ecuniii_l_gca_001078035/Encephalitozoon_cuniculi_ecuniii_l_gca_001078035.ECIIIL.45.gff3.gz", + seqtype : "DNA" + ], [ species : "Escherichia_coli", version : "str_k_12_substr_mg1655", diff --git a/dockerfiles/groovy-samtools.Dockerfile b/dockerfiles/groovy-samtools.Dockerfile new file mode 100644 index 0000000..cac4a36 --- /dev/null +++ b/dockerfiles/groovy-samtools.Dockerfile @@ -0,0 +1,17 @@ +FROM groovy:3.0-jre-alpine + +USER root + +CMD ["/bin/sh"] + +ENV SAMTOOLS_VERSION=1.9 + +RUN apk add --no-cache build-base zlib-dev bzip2-dev xz-dev ncurses-dev ca-certificates wget bash \ + && wget -q https://github.com/samtools/samtools/releases/download/${SAMTOOLS_VERSION}/samtools-${SAMTOOLS_VERSION}.tar.bz2 \ + && tar xjvf samtools-${SAMTOOLS_VERSION}.tar.bz2 \ + && cd samtools-${SAMTOOLS_VERSION}/ && make && cd .. \ + && mv samtools-${SAMTOOLS_VERSION}/samtools /bin/ \ + && rm -rf samtools* \ + && apk del build-base zlib-dev ca-certificates wget + +USER groovy \ No newline at end of file diff --git a/dockerfiles/rnftools.Dockerfile b/dockerfiles/rnftools.Dockerfile index 89b7601..7f5ed05 100644 --- a/dockerfiles/rnftools.Dockerfile +++ b/dockerfiles/rnftools.Dockerfile @@ -1,19 +1 @@ -FROM debian:stable-20190204 - -ARG ANACONDA_VERSION=4.5.12 - -RUN apt-get -qq update && apt-get -qq -y install --no-install-recommends libkeyutils-dev curl bzip2 ca-certificates procps \ - && curl -sSL https://repo.continuum.io/miniconda/Miniconda3-${ANACONDA_VERSION}-Linux-x86_64.sh -o /tmp/miniconda.sh \ - && bash /tmp/miniconda.sh -bfp /usr/local \ - && rm -rf /tmp/miniconda.sh \ - && conda install -y python=3 \ - && conda update conda \ - && apt-get -qq -y remove curl bzip2 \ - && apt-get -qq -y autoremove \ - && apt-get autoclean \ - && rm -rf /var/lib/apt/lists/* /var/log/dpkg.log \ - && conda clean --all --yes - -ENV PATH /opt/conda/bin:$PATH - -RUN conda install --override-channels -c conda-forge -c bioconda -c default rnftools \ No newline at end of file +FROM quay.io/biocontainers/rnftools:0.3.1.2--py35_2 \ No newline at end of file diff --git a/dockerfiles/samtools.Dockerfile b/dockerfiles/samtools.Dockerfile index b8ebf87..4aedef6 100644 --- a/dockerfiles/samtools.Dockerfile +++ b/dockerfiles/samtools.Dockerfile @@ -1,10 +1 @@ -FROM rsuchecki/miniconda3:4.6.14_050661b0ef92865fde5aea442f3440d1a7532659@sha256:c19f9684db9de4dd6852f942fa7a6a6e873d169c3bbe36ac3a365cbc458dee7e - -LABEL maintainer="Rad Suchecki " -SHELL ["/bin/bash", "-c"] - -RUN apt-get update && apt-get install -y groovy - -ARG SAMTOOLS_VERSION=1.9 - -RUN conda install --override-channels -c conda-forge -c bioconda -c default samtools=${SAMTOOLS_VERSION} +FROM quay.io/biocontainers/samtools:1.10--h9402c20_2 \ No newline at end of file diff --git a/main.nf b/main.nf index fdc301c..b73c4e9 100644 --- a/main.nf +++ b/main.nf @@ -1,5 +1,7 @@ #!/usr/bin/env nextflow +log.info workflow.profile + //For pretty-printing nested maps etc import groovy.json.JsonGenerator import groovy.json.JsonSlurper @@ -200,7 +202,7 @@ process stageRemoteInputFile { storeDir can be problematic on s3 - leads to "Missing output file(s)" error workDir should be more robust as it is mounted in singularity unlike outdir? */ - storeDir { executor == 'awsbatch' ? null : "${workDir}/downloaded" } + storeDir { executor == 'awsbatch' ? null : "downloaded" } input: @@ -470,6 +472,7 @@ process rnfSimReads { distDev="" } """ + set -eo pipefail echo "import rnftools rnftools.mishmash.sample(\\"${basename}_reads\\",reads_in_tuple=${tuple}) rnftools.mishmash.${simulator}( @@ -484,12 +487,18 @@ process rnfSimReads { rule: input: rnftools.input() " > Snakefile snakemake -p \ - && paste --delimiters '=' <(echo -n nreads) <(sed -n '1~4p' *.fq | wc -l) > simStats \ - && time sed -i '2~4 s/[^ACGTUacgtu]/N/g' *.fq \ - && time gzip --fast *.fq \ + && awk 'END{print "nreads="NR/4}' *.fq > simStats \ + && for f in *.fq; do + paste - - - - < \$f \ + | awk -vFS="\\t" -vOFS="\\n" '{gsub(/[^ACGTUacgtu]/,"N",\$2);print}' \ + | gzip -c > \${f}.gz + done && rm *.fq \ && find . -type d -mindepth 2 | xargs rm -r """ } + // && paste --delimiters '=' <(echo -n nreads) <(sed -n '1~4p' *.fq | wc -l) > simStats \ + // && time sed -i '2~4 s/[^ACGTUacgtu]/N/g' *.fq \ +// && time gzip --fast *.fq \ //extract simulation stats from file (currently number of reads only), reshape and split to different channels // readsForCoordinateConversion = Channel.create() @@ -630,7 +639,7 @@ process mapSimulatedReads { } process evaluateAlignmentsRNF { - label 'samtools' + label 'groovy_samtools' // label 'ES' // tag{alignmeta.tool.subMap(['name'])+alignmeta.target.subMap(['species','version'])+alignmeta.query.subMap(['seqtype','nreads'])+alignmeta.params.subMap(['paramslabel'])} // tag{alignmeta.params.subMap(['paramslabel'])} @@ -704,6 +713,9 @@ process renderReport { output: file '*' + + when: + !(workflow.profile.contains('CI')) //until leaner container script: """ diff --git a/nextflow.config b/nextflow.config index 4796ab7..7336a09 100644 --- a/nextflow.config +++ b/nextflow.config @@ -84,9 +84,18 @@ profiles { autoMounts = true cacheDir = "${params.singularitydir}" //when distibuting the pipeline probably should point under $workDir } + process { + withLabel: '.*groovy.*' { + containerOptions = '-B "$PWD":/home/groovy/.groovy' //otherwise grabbing grapes may fail with read-only filesystem error + } + } } - singularitymodule { // Should not be needed if compute nodes inherit env from head node - process.module = 'singularity/3.2.1' //Cluster specific - update if and as required + CI { + docker.runOptions = '-u root:root' //apparently required for GH actions but only causes problems with process using bin/paf2pretzel.groovy due to Ivy limitations + process { + errorStrategy = 'terminate' + maxRetries = 0 + } } } diff --git a/pull_containers.nf b/pull_containers.nf index 9221408..4b099c9 100644 --- a/pull_containers.nf +++ b/pull_containers.nf @@ -4,12 +4,26 @@ import nextflow.util.Escape import nextflow.container.SingularityCache -Channel.from(params.mappersDefinitions).map { +//Auxiliary container images +def containers = [] +session.getConfig().process.each {k, v -> + if((k.startsWith('withLabel:') || k.startsWith('withName:')) && v.containsKey('container') && !(k.endsWith('rrender'))) { //skipping rrender for CI + // println "$k -> $v.container" + containers << v.container + } +} + +//Mappers container images +Channel.from(params.mappersDefinitions) +.filter{ params.mappers == 'all' || it.tool.matches(params.mappers) } +.map { it.container -}.set { containers } +} +.set { mapperContainers } SingularityCache scache = new SingularityCache() //to get NF-consitent image file names + process pull_container { tag { remote } maxForks 1 @@ -17,7 +31,7 @@ process pull_container { echo true input: - val(remote) from containers + val(remote) from mapperContainers.mix(Channel.from(containers)).unique() output: file(img) From b5a658f96095a67c07b92a33e6e47cc72adf2eda Mon Sep 17 00:00:00 2001 From: Rad Suchecki Date: Thu, 23 Jan 2020 12:15:07 +1030 Subject: [PATCH 07/10] bug fix renderReport caching, typos, formatting --- conf/mappers.config | 2 +- groovy/Validators.groovy | 2 +- main.nf | 45 +++++++++++++++++++--------------------- 3 files changed, 23 insertions(+), 26 deletions(-) diff --git a/conf/mappers.config b/conf/mappers.config index eb343ee..2319f05 100644 --- a/conf/mappers.config +++ b/conf/mappers.config @@ -107,7 +107,7 @@ params { ], [ tool: 'hisat2', - version: '2.1.0', + version: '2.1.0', versionCall: 'hisat2 --version | head -1 | cut -f3 -d" "', container: 'rsuchecki/hisat2:2.1.0_4cb1d4007322767b562e98f69179e8ebf6d31fb1', index: true, diff --git a/groovy/Validators.groovy b/groovy/Validators.groovy index d3831ad..7941cb7 100644 --- a/groovy/Validators.groovy +++ b/groovy/Validators.groovy @@ -44,7 +44,7 @@ def validateMappersDefinitions (mappers, allRequired, allOptional, allModes) { System.exit 1 } if(!rec.container.contains(rec.version)) - System.err.println "Warning: decalred tool version string ${rec.version} not found in container image spec ${rec.container}." + System.err.println "Warning: declared tool version string ${rec.version} not found in container image spec ${rec.container}." } allVersions.each {k, v -> if(v.size()==1) diff --git a/main.nf b/main.nf index b73c4e9..690c463 100644 --- a/main.nf +++ b/main.nf @@ -1,9 +1,7 @@ #!/usr/bin/env nextflow -log.info workflow.profile - //For pretty-printing nested maps etc -import groovy.json.JsonGenerator +import groovy.json.JsonGenerator import groovy.json.JsonSlurper import groovy.json.JsonOutput //as JsonGenerator @@ -19,8 +17,8 @@ import groovy.json.JsonOutput JsonGenerator jsonGenerator = new JsonGenerator.Options() .addConverter(java.nio.file.Path) { java.nio.file.Path p, String key -> p.toUriString() } .addConverter(Duration) { Duration d, String key -> d.durationInMillis } - .addConverter(java.time.OffsetDateTime) { java.time.OffsetDateTime dt, String key -> dt.toString() } - .addConverter(nextflow.NextflowMeta) { nextflow.NextflowMeta m, String key -> m.toJsonMap() } //incompatible with Nextflow <= 19.04.0 + .addConverter(java.time.OffsetDateTime) { java.time.OffsetDateTime dt, String key -> dt.toString() } + .addConverter(nextflow.NextflowMeta) { nextflow.NextflowMeta m, String key -> m.toJsonMap() } //incompatible with Nextflow <= 19.04.0 .excludeFieldsByType(java.lang.Class) // .excludeFieldsByName('class') // .excludeNulls() .build() @@ -67,9 +65,9 @@ mappersChannel.filter { log.warn """ versionCall not specified for ${it.tool} ${it.version} it will not be included in this run - """ + """ } -} +} .set { mappersVithVersionCallChannel } process parseMapperVersion { @@ -88,14 +86,14 @@ mappersCapturedVersionChannel .map { meta, ver -> if(meta.version != ver.trim()) { log.warn """ - Decalred version ${meta.version} for ${meta.tool} - does not match version ${ver.trim()} - obtained from versionCall: ${meta.versionCall} + Decalred version ${meta.version} for ${meta.tool} + does not match version ${ver.trim()} + obtained from versionCall: ${meta.versionCall} Updating version in metadata to ${ver.trim()} - """ - //Please correct your mapper configuration file(s). - // throw new RuntimeException('msg') or // - // session.abort(new Exception()) + """ + //Please correct your mapper configuration file(s). + // throw new RuntimeException('msg') or // + // session.abort(new Exception()) meta.version = ver.trim() if(!meta.container.contains(meta.version)) { log.error """ @@ -103,9 +101,9 @@ mappersCapturedVersionChannel not found in container image spec ${meta.container}. Please correct your mapper configuration file(s). - Aborting... + Aborting... """ - session.abort(new Exception()) // throw new RuntimeException('msg') + session.abort(new Exception()) // throw new RuntimeException('msg') } } meta @@ -185,7 +183,7 @@ if (params.help){ but local files might not be on paths automatically mounted in the container. */ Channel.from(params.references) -.take( params.subset ) //only process n data sets (-1 means all) +.take( params.subset ) //only process n data sets (-1 means all) .combine(Channel.from('fasta','gff')) //duplicate each reference record .filter { meta, fileType -> meta.containsKey(fileType)} //exclude gff record if no gff declared .tap { refsToStage } //download if URL @@ -202,7 +200,7 @@ process stageRemoteInputFile { storeDir can be problematic on s3 - leads to "Missing output file(s)" error workDir should be more robust as it is mounted in singularity unlike outdir? */ - storeDir { executor == 'awsbatch' ? null : "downloaded" } + storeDir { executor == 'awsbatch' ? null : "downloaded" } input: @@ -705,15 +703,14 @@ process renderReport { label 'rrender' label 'report' stageInMode 'copy' - //scratch = true //hack, otherwise -profile singularity (with automounts) fails with FATAL: container creation failed: unabled to {task.workDir} to mount list: destination ${task.workDir} is already in the mount point list input: file(Rmd) from Channel.fromPath("$baseDir/report/report.Rmd") - file(json) from jsonChannel + file(json) from jsonChannel.collect() output: file '*' - + when: !(workflow.profile.contains('CI')) //until leaner container @@ -727,7 +724,7 @@ process renderReport { library(tidyverse) library(jsonlite) library(kableExtra) - + rmarkdown::render("${Rmd}") """ } @@ -776,7 +773,7 @@ workflow.onComplete { GroovyShell shell = new GroovyShell() def apiCalls = shell.parse(new File("$baseDir/groovy/ApiCalls.groovy")) - // def instant = Instant.now() + // def instant = Instant.now() // println instant // def utc = LocalDateTime.ofInstant(instant, ZoneOffset.UTC) // def local = LocalDateTime.ofInstant(instant, ZoneId.systemDefault()) @@ -796,7 +793,7 @@ workflow.onComplete { ], RELEASE_TAG: "${workflow.revision}_${workflow.complete.format(formatter)}_${workflow.runName}_${workflow.sessionId}", RELEASE_NAME: "${workflow.revision} - results and metadata for run '${workflow.runName}'", - RELEASE_BODY: + RELEASE_BODY: """ - revision `${workflow.revision}` - commit ID ${workflow.commitId} From ba4cbef56cd99be05719fb529b303079ae09cd61 Mon Sep 17 00:00:00 2001 From: Rad Suchecki Date: Thu, 23 Jan 2020 12:17:28 +1030 Subject: [PATCH 08/10] -redundant --- .github/workflows/main.yml | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index fe84754..b68f46f 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -168,5 +168,4 @@ jobs: df -h lscpu | egrep 'Model name|Socket|Thread|NUMA|CPU\(s\)' awk '/MemTotal/ {print "RAM : "$2/1E6" GB"}' /proc/meminfo - ls -lhS singularity-images/ - du -h singularity-images + ls -lhS singularity-images/ \ No newline at end of file From b3ff9c57c91ec3c68f7e0160ad993319d45ee2b5 Mon Sep 17 00:00:00 2001 From: Rad Suchecki Date: Thu, 23 Jan 2020 16:55:54 +1030 Subject: [PATCH 09/10] streamlined results and meta JSON handling --- main.nf | 35 +++++++++++++++++------------------ 1 file changed, 17 insertions(+), 18 deletions(-) diff --git a/main.nf b/main.nf index 690c463..b02aa4d 100644 --- a/main.nf +++ b/main.nf @@ -677,36 +677,35 @@ process evaluateAlignmentsRNF { **/ def slurper = new JsonSlurper() evaluatedAlignmentsRNF.map { META, JSON -> - [META+[evaluation: slurper.parseText(JSON.text)]] + [ META + [evaluation: slurper.parseText(JSON.text)] ] } .collect() - .map { - file("${params.outdir}").mkdirs() - outfile = file("${params.outdir}/allstats.json") - // outfile.text = groovy.json.JsonOutput.prettyPrint(jsonGenerator.toJson(it)) - outfile.text = JsonOutput.prettyPrint(jsonGenerator.toJson(it.sort( {k1,k2 -> k1.mapper.tool <=> k2.mapper.tool} ) )) - def runmetapart = [:] - runmetapart['workflow'] = workflow.getProperties() - runmetapart['params'] = params - runmetaJSONpartial = file("${params.infodir}/runmetapart.json") - runmetaJSONpartial.text = JsonOutput.prettyPrint(jsonGenerator.toJson(runmetapart)) - - [outfile, runmetaJSONpartial] - }.set { jsonChannel } - + .map { //Generate: [ allstats.json, runmetapart.json] + [ + JsonOutput.prettyPrint(jsonGenerator.toJson( + it.sort( { k1,k2 -> k1.mapper.tool <=> k2.mapper.tool } + ))), + JsonOutput.prettyPrint(jsonGenerator.toJson([ + workflow : workflow.getProperties(), + params : params + ])) + ] + } + .set { resultsJsonChannel } -//WRAP-UP --TODO Manuscript rendering to be separated -// writing = Channel.fromPath("$baseDir/report/*.Rmd").mix(Channel.fromPath("$baseDir/manuscript/*")) //manuscript dir exists only on manuscript branch +// //WRAP-UP --TODO Manuscript rendering to be separated +// // writing = Channel.fromPath("$baseDir/report/*.Rmd").mix(Channel.fromPath("$baseDir/manuscript/*")) //manuscript dir exists only on manuscript branch process renderReport { tag {"Render ${Rmd}"} label 'rrender' label 'report' stageInMode 'copy' + cache false //Input includes run metadata so cache would not work anyway input: file(Rmd) from Channel.fromPath("$baseDir/report/report.Rmd") - file(json) from jsonChannel.collect() + tuple file('allstats.json'), file('runmetapart.json') from resultsJsonChannel output: file '*' From c54ccfe18cbb8983ae3c53f7b4d7e3d7c074dd35 Mon Sep 17 00:00:00 2001 From: Rad Suchecki Date: Thu, 23 Jan 2020 16:56:18 +1030 Subject: [PATCH 10/10] supress DAG warning --- .github/workflows/main.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index b68f46f..61a011e 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -159,7 +159,7 @@ jobs: - name: Pull containers run: | - nextflow run ${GITHUB_WORKSPACE}/pull_containers.nf --mappers '${{ matrix.params.mapper }}' -ansi-log false + nextflow run ${GITHUB_WORKSPACE}/pull_containers.nf --mappers '${{ matrix.params.mapper }}' -ansi-log false -with-dag false - name: Test workflow with singularity - '${{ matrix.params.mapper }}' - '${{ matrix.params.mode }}' run: | nextflow run ${GITHUB_WORKSPACE} -profile CI,singularity --subset 1 --mappers '${{ matrix.params.mapper }}' --mapmode '${{ matrix.params.mode }}' --max_cpus 2 --max_memory 6.GB -ansi-log false -with-dag dag.dot -resume