Skip to content

Commit

Permalink
Update
Browse files Browse the repository at this point in the history
  • Loading branch information
jfnavarro committed Jan 12, 2025
1 parent a3a7d92 commit 72aa065
Show file tree
Hide file tree
Showing 5 changed files with 32 additions and 52 deletions.
12 changes: 4 additions & 8 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -24,17 +24,12 @@ include = [
{ path = "README.md" },
{ path = "README_SHORT.md" },
{ path = "LICENSE" },
{ path = "doc/**" },
{ path = "scripts/*" },
{ path = "stpipeline/common/cdistance.pyx" },
{ path = "stpipeline/common/*.pxd" },
{ path = "stpipeline/common/*.so" },
{ path = "doc/**" }
]

[tool.poetry.dependencies]
python = ">=3.10,<=3.12"
argparse = "^1.4.0"
cython = "^3.0.11"
numpy = "^2.2.1"
pandas = "^2.2.3"
scipy = "^1.15.0"
Expand All @@ -48,6 +43,7 @@ seaborn = "^0.13.2"
types-regex = "^2024.11.6.20241221"
pandas-stubs = "^2.2.3.241126"
dnaio = "^1.2.3"
distance = "^0.1.3"

[tool.poetry.scripts]
st_qa = "scripts.st_qa:main"
Expand Down Expand Up @@ -85,8 +81,8 @@ doc = [
]

[build-system]
requires = ["poetry-core>=1.0.0", "setuptools", "cython", "numpy"]
build-backend = "setuptools.build_meta"
requires = ["poetry-core>=1.0.0"]
build-backend = "poetry.core.masonry.api"

[tool.ruff]
# Exclude a variety of commonly ignored directories.
Expand Down
20 changes: 0 additions & 20 deletions setup.py

This file was deleted.

12 changes: 0 additions & 12 deletions stpipeline/common/cdistance.pyx

This file was deleted.

17 changes: 5 additions & 12 deletions stpipeline/common/clustering.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
import numpy as np
from scipy.cluster.hierarchy import linkage, fcluster # type: ignore
from collections import defaultdict
from stpipeline.common.cdistance import hamming_distance # type: ignore
from stpipeline.common.distance import hamming_distance
import random
from collections import Counter
from typing import List, Dict, Set, Any
Expand Down Expand Up @@ -42,12 +42,7 @@ def _get_adj_list_adjacency(umis: List[str], allowed_mismatches: int) -> Dict[st
Constructs an adjacency list where each UMI points to all other UMIs within
the allowed mismatches.
"""
return {
umi: [
umi2 for umi2 in umis if hamming_distance(umi.encode("UTF-8"), umi2.encode("UTF-8")) <= allowed_mismatches
]
for umi in umis
}
return {umi: [umi2 for umi2 in umis if hamming_distance(umi, umi2) <= allowed_mismatches] for umi in umis}


def _get_connected_components_adjacency(adj_list: Dict[str, List[str]], counts: Counter[str]) -> List[List[str]]:
Expand Down Expand Up @@ -100,8 +95,7 @@ def _get_adj_list_directional_adjacency(
umi: [
umi2
for umi2 in umis
if hamming_distance(umi.encode("UTF-8"), umi2.encode("UTF-8")) <= allowed_mismatches
and counts[umi] >= (counts[umi2] * 2) - 1
if hamming_distance(umi, umi2) <= allowed_mismatches and counts[umi] >= (counts[umi2] * 2) - 1
]
for umi in umis
}
Expand Down Expand Up @@ -139,14 +133,13 @@ def dedup_hierarchical(molecular_barcodes: List[str], allowed_mismatches: int, m
if len(molecular_barcodes) == 2:
return (
molecular_barcodes
if hamming_distance(molecular_barcodes[0].encode("UTF-8"), molecular_barcodes[1].encode("UTF-8"))
<= allowed_mismatches
if hamming_distance(molecular_barcodes[0], molecular_barcodes[1]) <= allowed_mismatches
else [random.choice(molecular_barcodes)]
)

def d(coord: Any) -> int:
i, j = coord
return hamming_distance(molecular_barcodes[i].encode("UTF-8"), molecular_barcodes[j].encode("UTF-8")) # type: ignore
return hamming_distance(molecular_barcodes[i], molecular_barcodes[j])

indices = np.triu_indices(len(molecular_barcodes), 1)
distance_matrix = np.apply_along_axis(d, 0, indices)
Expand Down
23 changes: 23 additions & 0 deletions stpipeline/common/distance.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
"""
Module for distance metrics.
"""
import distance # type: ignore


def hamming_distance(a: str, b: str) -> int:
"""
Calculates the Hamming distance between two strings using the `distance` library.
Args:
a: First string.
b: Second string.
Returns:
The Hamming distance between the two strings.
Raises:
ValueError: If the strings are of unequal length.
"""
if len(a) != len(b):
raise ValueError("Strings must be of equal length to calculate Hamming distance")
return distance.hamming(a, b) # type: ignore

0 comments on commit 72aa065

Please sign in to comment.