Skip to content

Commit

Permalink
Update
Browse files Browse the repository at this point in the history
  • Loading branch information
jfnavarro committed Jan 22, 2025
1 parent 4dd18b8 commit fb5b14a
Show file tree
Hide file tree
Showing 4 changed files with 38 additions and 29 deletions.
7 changes: 4 additions & 3 deletions stpipeline/core/annotation.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,8 @@

from stpipeline.common.utils import file_ok

logger = logging.getLogger("STPipeline")


class UnknownChrom(Exception):
"""
Expand Down Expand Up @@ -211,7 +213,6 @@ def annotateReads(
Raises:
RuntimeError: If input files are missing or errors occur during annotation.
"""
logger = logging.getLogger("STPipeline")

if not os.path.isfile(mappedReads):
raise RuntimeError(f"Input file not found: {mappedReads}")
Expand All @@ -234,12 +235,12 @@ def annotateReads(
outputDiscarded,
)
except Exception as e:
error = "Error during annotation. HTSEQ execution failed"
error = "Error during annotation: HTSEQ execution failed"
logger.error(error)
raise e

if not file_ok(outputFile) or annotated == 0:
error = f"Error during annotation. Output file not present or empty {outputFile}"
error = f"Error during annotation: Output file not present or empty {outputFile}"
logger.error(error)
raise RuntimeError(error)

Expand Down
50 changes: 28 additions & 22 deletions stpipeline/core/mapping.py
Original file line number Diff line number Diff line change
Expand Up @@ -139,18 +139,19 @@ def alignReads(
args += ["--outSAMunmapped", "None"]

try:
logger.debug(f"STAR mapping, running command: {' '.join(args)}")
proc = subprocess.Popen(args, stdout=subprocess.PIPE, stderr=subprocess.PIPE, close_fds=True, shell=False)
_, errmsg = proc.communicate()

if proc.returncode != 0:
logger.error(f"Error mapping with STAR: {errmsg.decode()}")
raise RuntimeError(f"STAR mapping failed with error: {errmsg.decode()}")
except OSError as e:
logger.error("Error mapping with STAR\n Executable not found.")
logger.error("Error mapping with STAR: Executable not found.")
raise e

if not file_ok(tmpOutputFile):
error = f"Error mapping with STAR. Output file not present {tmpOutputFile}\n"
error = f"Error mapping with STAR: Output file not present {tmpOutputFile}"
logger.error(error)
raise RuntimeError(error)

Expand All @@ -167,13 +168,13 @@ def alignReads(
with open(log_final, "r") as star_log:
for line in star_log:
if "Uniquely mapped reads number" in line:
uniquely_mapped = int(str(line).rstrip().split()[-1])
uniquely_mapped = int(line.strip().split()[-1])
logger.info(line.strip())
elif "Number of reads mapped to multiple loci" in line:
multiple_mapped += int(str(line).rstrip().split()[-1])
logger.info(str(line).rstrip())
multiple_mapped += int(line.strip().split()[-1])
logger.info(line.strip())
elif "% of reads mapped to multiple loci" in line or "% of reads unmapped: too short" in line:
logger.info(str(line).rstrip())
logger.info(line.strip())
logger.info("Total mapped reads: {}".format(uniquely_mapped + multiple_mapped))

return uniquely_mapped + multiple_mapped
Expand Down Expand Up @@ -267,36 +268,41 @@ def barcodeDemultiplexing(
args += [idFile, reads, outputFilePrefix]

try:
logger.debug(f"Taggd demultiplexing, running command: {' '.join(args)}")
proc = subprocess.Popen(args, stdout=subprocess.PIPE, stderr=subprocess.PIPE, close_fds=True, shell=False)
stdout, errmsg = proc.communicate()

if proc.returncode != 0:
logger.error(f"Error demultiplexing with Taggd: {errmsg.decode()}")
raise RuntimeError(f"Taggd demultiplexing failed with error: {errmsg.decode()}")
except OSError as e:
logger.error("Error demultiplexing with Taggd\n Executable not found.")
logger.error("Error demultiplexing with Taggd: Executable not found.")
raise e

outputFile = f"{outputFilePrefix}_matched{os.path.splitext(reads)[1].lower()}"
if not file_ok(outputFile):
error = f"Error demultiplexing with Taggd. Output file not present {outputFile}\n"
error = f"Error demultiplexing with Taggd: Output file not present {outputFile}"
logger.error(error)
raise RuntimeError(error)

# Write log file and collect stats
outputLog = f"{outputFilePrefix}_log.txt"
reads_after_demultiplexing = 0
for line in stdout.decode().splitlines():
tokens = [
"Total reads:",
"Perfect Matches:",
"Imperfect Matches",
"Ambiguous matches:",
"Non-unique ambiguous matches:",
"Unmatched:",
]
if any(x in line for x in tokens):
logger.info(line)
if "Total reads written:" in line:
logger.info(line)
reads_after_demultiplexing = int(line.split()[-1])
with open(outputLog, "w") as fwrite:
for line in stdout.decode().splitlines():
fwrite.write(line + "\n")
tokens = [
"Total reads:",
"Perfect Matches:",
"Imperfect Matches",
"Ambiguous matches:",
"Non-unique ambiguous matches:",
"Unmatched:",
]
if any(x in line for x in tokens):
logger.info(line.strip())
if "Total reads written:" in line:
logger.info(line.strip())
reads_after_demultiplexing = int(line.strip().split()[-1])

return reads_after_demultiplexing
2 changes: 1 addition & 1 deletion stpipeline/core/pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -364,7 +364,7 @@ def __call__(self, parser, namespace, values, option_string=None) -> None: # ty
)
parser.add_argument(
"--threads",
default=4,
default=8,
metavar="[INT]",
type=int,
choices=range(1, 81),
Expand Down
8 changes: 5 additions & 3 deletions tests/mapping_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -121,9 +121,10 @@ def test_alignReads():
assert total_reads == 95000


def test_barcodeDemultiplexing():
def test_barcodeDemultiplexing(tmpdir):
with (
patch("subprocess.Popen") as mock_popen,
patch("stpipeline.core.mapping.open", mock_open()) as mock_open_file,
patch("os.path.isfile", return_value=True),
patch("stpipeline.core.mapping.file_ok", return_value=True),
):
Expand All @@ -142,7 +143,7 @@ def test_barcodeDemultiplexing():
taggd_multiple_hits_keep_one=True,
taggd_trim_sequences=[1, 2, 3],
cores=4,
outputFilePrefix="output/test",
outputFilePrefix=str(tmpdir),
keep_discarded_files=False,
taggd_chunk_size=100,
)
Expand Down Expand Up @@ -175,10 +176,11 @@ def test_barcodeDemultiplexing():
"--no-results-output",
"barcodes.tsv",
"reads.bam",
"output/test",
str(tmpdir),
]

mock_popen.assert_called_once_with(
expected_args, stdout=subprocess.PIPE, stderr=subprocess.PIPE, close_fds=True, shell=False
)
assert str(tmpdir) + "_log.txt" in mock_open_file.call_args[0][0]
assert total_reads == 80

0 comments on commit fb5b14a

Please sign in to comment.