Skip to content

Commit

Permalink
fix CI
Browse files Browse the repository at this point in the history
  • Loading branch information
Bribak committed Jan 31, 2025
1 parent f3cd8f0 commit f05b0d4
Show file tree
Hide file tree
Showing 4 changed files with 16 additions and 29 deletions.
4 changes: 2 additions & 2 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,7 @@
#### processing
##### Added ✨
- Added "antennary_Fuc" as another inferred feature to `infer_features_from_composition` (a64f694)
- Added "IdoA", "GalA", "Araf", "D-Fuc", "AllNAc", and "Par" to recognized WURCS2 tokens (52fc16e)
- Added "IdoA", "GalA", "Araf", "D-Fuc", "AllNAc", and "Par" to recognized WURCS2 tokens (52fc16e, f3cd8f0)
- Added the new "order_by" keyword argument to `choose_correct_isoform` to enforce strictly sorting branches by branch endings / linkages, if desired (918d18f)

##### Changed 🔄
Expand All @@ -107,7 +107,7 @@
- `get_class` will now return "lipid/free" if glycans of type Neu5Ac(a2-3)Gal(b1-4)Glc are supplied (i.e., lacking 1Cer and -ol but still lactose-core based) (b99699c)
- `expand_lib` now no longer modifies the input dictionary (65bd12c)
- `get_possible_linkages` now returns a set instead of a list (a98461f)
- `wurcs_to_iupac` now can also properly deal with ultra-narrow linkage wildcards (e.g., a2-3/6)
- `wurcs_to_iupac` now can also properly deal with ultra-narrow linkage wildcards (e.g., a2-3/6) (f3cd8f0)

##### Fixed 🐛
- Fixed component inference in `parse_glycoform` in case of unexpected composition formats (0c94995)
Expand Down
2 changes: 1 addition & 1 deletion glycowork/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
__version__ = "1.4.0"
__version__ = "1.5.0"
from .motif.draw import GlycoDraw
#from .glycowork import *

Expand Down
37 changes: 12 additions & 25 deletions glycowork/motif/processing.py
Original file line number Diff line number Diff line change
Expand Up @@ -464,8 +464,7 @@ def linearcode_to_iupac(linearcode: str # Glycan in LinearCode format
replace_dic = {'G': 'Glc', 'ME': 'me', 'M': 'Man', 'A': 'Gal', 'NN': 'Neu5Ac', 'GlcN': 'GlcNAc', 'GN': 'GlcNAc',
'GalN': 'GalNAc', 'AN': 'GalNAc', 'F': 'Fuc', 'K': 'Kdn', 'W': 'Kdo', 'L': 'GalA', 'I': 'IdoA', 'PYR': 'Pyr', 'R': 'Araf', 'H': 'Rha',
'X': 'Xyl', 'B': 'Rib', 'U': 'GlcA', 'O': 'All', 'E': 'Fruf', '[': '', ']': '', 'me': 'Me', 'PC': 'PCho', 'T': 'Ac'}
glycan = multireplace(linearcode.split(';')[0], replace_dic)
return glycan
return multireplace(linearcode.split(';')[0], replace_dic)


def iupac_extended_to_condensed(iupac_extended: str # Glycan in IUPAC-extended format
Expand Down Expand Up @@ -555,11 +554,9 @@ def glycoct_build_iupac(iupac_parts: Dict[int, List[Tuple[str, int]]], # IUPAC f
inverted_residue_dic = {}
inverted_residue_dic.setdefault(residue_dic[start], []).append(start)
for parent, children in iupac_parts.items():
child_strings = []
child_strings, i, last_child = [], 0, 0
children_degree = [degrees[c[1]] for c in children]
children = [x for _, x in sorted(zip(children_degree, children), reverse = True)]
i = 0
last_child = 0
for child in children:
prefix = '[' if degrees[child[1]] == 1 else ''
suffix = ']' if children.index(child) > 0 else ''
Expand All @@ -585,16 +582,15 @@ def glycoct_build_iupac(iupac_parts: Dict[int, List[Tuple[str, int]]], # IUPAC f
def glycoct_to_iupac(glycoct: str # Glycan in GlycoCT format
) -> str: # Basic IUPAC-condensed format
"Convert glycan from GlycoCT to barebones IUPAC-condensed format"
floating_bits = []
floating_part = ''
floating_part, floating_bits = '', []
mono_replace = {'dglc': 'Glc', 'dgal': 'Gal', 'dman': 'Man', 'lgal': 'Fuc', 'dgro': 'Neu',
'dxyl': 'Xyl', 'dara': 'D-Ara', 'lara': 'Ara', 'HEX': 'Hex', 'lman': 'Rha'}
sub_replace = {'n-acetyl': 'NAc', 'sulfate': 'OS', 'phosphate': 'OP', 'n-glycolyl': '5Gc',
'acetyl': 'OAc', 'methyl': 'OMe'}
if len(glycoct.split("UND")) > 1:
floating_bits = glycoct.split("UND")[2:]
floating_bits = ["RES" + f.split('RES')[1] for f in floating_bits]
glycoct = glycoct.split("UND")[0]
floating_bits = glycoct.split("UND")[2:]
floating_bits = ["RES" + f.split('RES')[1] for f in floating_bits]
glycoct = glycoct.split("UND")[0]
# Split the input by lines and iterate over them
residue_dic, iupac_parts, degrees = glycoct_to_iupac_int(glycoct, mono_replace, sub_replace)
if floating_bits:
Expand All @@ -615,8 +611,7 @@ def glycoct_to_iupac(glycoct: str # Glycan in GlycoCT format
iupac = re.sub(r'([1-9\?O](S|P|Ac|Me))NAc', r'NAc\1', iupac)
if ']' in iupac and iupac.index(']') < iupac.index('['):
iupac = iupac.replace(']', '', 1)
iupac = iupac.replace('[[', '[').replace(']]', ']').replace('Neu(', 'Kdn(')
return iupac
return iupac.replace('[[', '[').replace(']]', ']').replace('Neu(', 'Kdn(')


def get_mono(token: str # WURCS monosaccharide token
Expand Down Expand Up @@ -646,25 +641,21 @@ def wurcs_to_iupac(wurcs: str # Glycan in WURCS format
additional_pattern = r'\b([a-z])\?(?:\|\w\?)+\}?'
def replacement(match):
text = match.group(0)
if '?' in text:
return f'{match.group(1)}?' if match.group(1) else f'?{match.group(2)}'
if '|' in text and text[-1].isdigit(): # Case like r3|r6
letter = text[0]
nums = [c for c in text if c.isdigit()]
return f'{letter}{nums[0]}*{nums[1]}'
return f'{match.group(1)}?' if match.group(1) else f'?{match.group(2)}'
wurcs = re.sub(pattern, replacement, wurcs)
wurcs = re.sub(additional_pattern, '?', wurcs)
floating_part = ''
floating_parts = []
floating_part, floating_parts = '', []
parts = wurcs.split('/')
topology = parts[-1].split('_')
monosaccharides = '/'.join(parts[1:-2]).strip('[]').split('][')
connectivity = parts[-2].split('-')
connectivity = {chr(97 + i): int(num) for i, num in enumerate(connectivity)}
degrees = {c: ''.join(topology).count(c) for c in connectivity}
inverted_connectivity = {}
iupac_parts = []
inverted_connectivity, iupac_parts = {}, []
for link in topology:
if '-' not in link:
return get_mono(monosaccharides[0])
Expand Down Expand Up @@ -711,9 +702,7 @@ def replacement(match):
inverted_connectivity.setdefault(connectivity[tgt], []).insert(-insertion_idx, tgt)
else:
inverted_connectivity.setdefault(connectivity[tgt], []).append(tgt)
iupac = iupac[:-1]
iupac = iupac.strip('[]')
iupac = iupac.replace('}[', '}').replace('{[', '{')
iupac = iupac[:-1].strip('[]').replace('}[', '}').replace('{[', '{')
pattern = re.compile(r'([ab\?])\(')
iupac = pattern.sub(lambda match: f"({match.group(1)}", iupac)
# Define the pattern to find two ][ separated by a string with exactly one (
Expand All @@ -730,8 +719,7 @@ def remove_first_unmatched_opening_bracket(s):
if balance < 0:
return s[:i] + s[i + 1:]
return s
iupac = remove_first_unmatched_opening_bracket(iupac)
return iupac
return remove_first_unmatched_opening_bracket(iupac)


def oxford_to_iupac(oxford: str # Glycan in Oxford format
Expand Down Expand Up @@ -844,8 +832,7 @@ def oxford_to_iupac(oxford: str # Glycan in Oxford format
while sulf > 0:
iupac = iupac.replace("Gal(", "GalOS(", 1)
sulf -= 1
iupac = floaty + iupac.strip('[]')
return iupac
return floaty + iupac.strip('[]')


def check_nomenclature(glycan: str # Glycan string to check
Expand Down
2 changes: 1 addition & 1 deletion glycowork/network/biosynthesis.py
Original file line number Diff line number Diff line change
Expand Up @@ -308,7 +308,7 @@ def process_ptm(glycans: List[str], # List of glycans
edges = [find_ptm(k, glycans, graph_dic, stem_lib, allowed_ptms = allowed_ptms,
ggraphs = ggraphs, suffix = suffix) for k in ptm_glycans]
valid_edges = [k for k in edges if k != 0]
return zip(*valid_edges) if valid_edges else ([], [])
return list(zip(*valid_edges)) if valid_edges else ([], [])


def update_network(network_in: nx.Graph, # Input network
Expand Down

0 comments on commit f05b0d4

Please sign in to comment.