diff --git a/stpipeline/core/annotation.py b/stpipeline/core/annotation.py index 1db8562..f799350 100755 --- a/stpipeline/core/annotation.py +++ b/stpipeline/core/annotation.py @@ -13,6 +13,8 @@ from stpipeline.common.utils import file_ok +logger = logging.getLogger("STPipeline") + class UnknownChrom(Exception): """ @@ -211,7 +213,6 @@ def annotateReads( Raises: RuntimeError: If input files are missing or errors occur during annotation. """ - logger = logging.getLogger("STPipeline") if not os.path.isfile(mappedReads): raise RuntimeError(f"Input file not found: {mappedReads}") @@ -234,12 +235,12 @@ def annotateReads( outputDiscarded, ) except Exception as e: - error = "Error during annotation. HTSEQ execution failed" + error = "Error during annotation: HTSEQ execution failed" logger.error(error) raise e if not file_ok(outputFile) or annotated == 0: - error = f"Error during annotation. Output file not present or empty {outputFile}" + error = f"Error during annotation: Output file not present or empty {outputFile}" logger.error(error) raise RuntimeError(error) diff --git a/stpipeline/core/mapping.py b/stpipeline/core/mapping.py index ec9050a..3ecb544 100755 --- a/stpipeline/core/mapping.py +++ b/stpipeline/core/mapping.py @@ -139,6 +139,7 @@ def alignReads( args += ["--outSAMunmapped", "None"] try: + logger.debug(f"STAR mapping, running command: {' '.join(args)}") proc = subprocess.Popen(args, stdout=subprocess.PIPE, stderr=subprocess.PIPE, close_fds=True, shell=False) _, errmsg = proc.communicate() @@ -146,11 +147,11 @@ def alignReads( logger.error(f"Error mapping with STAR: {errmsg.decode()}") raise RuntimeError(f"STAR mapping failed with error: {errmsg.decode()}") except OSError as e: - logger.error("Error mapping with STAR\n Executable not found.") + logger.error("Error mapping with STAR: Executable not found.") raise e if not file_ok(tmpOutputFile): - error = f"Error mapping with STAR. Output file not present {tmpOutputFile}\n" + error = f"Error mapping with STAR: Output file not present {tmpOutputFile}" logger.error(error) raise RuntimeError(error) @@ -167,13 +168,13 @@ def alignReads( with open(log_final, "r") as star_log: for line in star_log: if "Uniquely mapped reads number" in line: - uniquely_mapped = int(str(line).rstrip().split()[-1]) + uniquely_mapped = int(line.strip().split()[-1]) logger.info(line.strip()) elif "Number of reads mapped to multiple loci" in line: - multiple_mapped += int(str(line).rstrip().split()[-1]) - logger.info(str(line).rstrip()) + multiple_mapped += int(line.strip().split()[-1]) + logger.info(line.strip()) elif "% of reads mapped to multiple loci" in line or "% of reads unmapped: too short" in line: - logger.info(str(line).rstrip()) + logger.info(line.strip()) logger.info("Total mapped reads: {}".format(uniquely_mapped + multiple_mapped)) return uniquely_mapped + multiple_mapped @@ -267,6 +268,7 @@ def barcodeDemultiplexing( args += [idFile, reads, outputFilePrefix] try: + logger.debug(f"Taggd demultiplexing, running command: {' '.join(args)}") proc = subprocess.Popen(args, stdout=subprocess.PIPE, stderr=subprocess.PIPE, close_fds=True, shell=False) stdout, errmsg = proc.communicate() @@ -274,29 +276,33 @@ def barcodeDemultiplexing( logger.error(f"Error demultiplexing with Taggd: {errmsg.decode()}") raise RuntimeError(f"Taggd demultiplexing failed with error: {errmsg.decode()}") except OSError as e: - logger.error("Error demultiplexing with Taggd\n Executable not found.") + logger.error("Error demultiplexing with Taggd: Executable not found.") raise e outputFile = f"{outputFilePrefix}_matched{os.path.splitext(reads)[1].lower()}" if not file_ok(outputFile): - error = f"Error demultiplexing with Taggd. Output file not present {outputFile}\n" + error = f"Error demultiplexing with Taggd: Output file not present {outputFile}" logger.error(error) raise RuntimeError(error) + # Write log file and collect stats + outputLog = f"{outputFilePrefix}_log.txt" reads_after_demultiplexing = 0 - for line in stdout.decode().splitlines(): - tokens = [ - "Total reads:", - "Perfect Matches:", - "Imperfect Matches", - "Ambiguous matches:", - "Non-unique ambiguous matches:", - "Unmatched:", - ] - if any(x in line for x in tokens): - logger.info(line) - if "Total reads written:" in line: - logger.info(line) - reads_after_demultiplexing = int(line.split()[-1]) + with open(outputLog, "w") as fwrite: + for line in stdout.decode().splitlines(): + fwrite.write(line + "\n") + tokens = [ + "Total reads:", + "Perfect Matches:", + "Imperfect Matches", + "Ambiguous matches:", + "Non-unique ambiguous matches:", + "Unmatched:", + ] + if any(x in line for x in tokens): + logger.info(line.strip()) + if "Total reads written:" in line: + logger.info(line.strip()) + reads_after_demultiplexing = int(line.strip().split()[-1]) return reads_after_demultiplexing diff --git a/stpipeline/core/pipeline.py b/stpipeline/core/pipeline.py index 7c8c5ff..a5ac05a 100755 --- a/stpipeline/core/pipeline.py +++ b/stpipeline/core/pipeline.py @@ -364,7 +364,7 @@ def __call__(self, parser, namespace, values, option_string=None) -> None: # ty ) parser.add_argument( "--threads", - default=4, + default=8, metavar="[INT]", type=int, choices=range(1, 81), diff --git a/tests/mapping_test.py b/tests/mapping_test.py index c5781d3..aab729d 100644 --- a/tests/mapping_test.py +++ b/tests/mapping_test.py @@ -121,9 +121,10 @@ def test_alignReads(): assert total_reads == 95000 -def test_barcodeDemultiplexing(): +def test_barcodeDemultiplexing(tmpdir): with ( patch("subprocess.Popen") as mock_popen, + patch("stpipeline.core.mapping.open", mock_open()) as mock_open_file, patch("os.path.isfile", return_value=True), patch("stpipeline.core.mapping.file_ok", return_value=True), ): @@ -142,7 +143,7 @@ def test_barcodeDemultiplexing(): taggd_multiple_hits_keep_one=True, taggd_trim_sequences=[1, 2, 3], cores=4, - outputFilePrefix="output/test", + outputFilePrefix=str(tmpdir), keep_discarded_files=False, taggd_chunk_size=100, ) @@ -175,10 +176,11 @@ def test_barcodeDemultiplexing(): "--no-results-output", "barcodes.tsv", "reads.bam", - "output/test", + str(tmpdir), ] mock_popen.assert_called_once_with( expected_args, stdout=subprocess.PIPE, stderr=subprocess.PIPE, close_fds=True, shell=False ) + assert str(tmpdir) + "_log.txt" in mock_open_file.call_args[0][0] assert total_reads == 80