From 09be06b23dfeb123a978884c8117c160632788e7 Mon Sep 17 00:00:00 2001 From: xiangyan93 Date: Mon, 7 Oct 2024 14:05:39 -0400 Subject: [PATCH] Support atomInvariantsGenerator for Morgan Fingerprints. Bugfix for Morgan_count and rdkit_topol fingerprints. --- mgktools/features_mol/features_generators.py | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/mgktools/features_mol/features_generators.py b/mgktools/features_mol/features_generators.py index 7c26745..38fe066 100644 --- a/mgktools/features_mol/features_generators.py +++ b/mgktools/features_mol/features_generators.py @@ -12,12 +12,17 @@ class FeaturesGenerator: def __init__(self, features_generator_name: Union[str, Callable], radius: int = 2, - num_bits: int = 2048): + num_bits: int = 2048, + atomInvariantsGenerator: bool = False): self.features_generator_name = features_generator_name self.radius = radius self.num_bits = num_bits if features_generator_name in ['morgan', 'morgan_count']: - self.generator = AllChem.GetMorganGenerator(radius=radius, fpSize=num_bits) + if atomInvariantsGenerator: + invgen = AllChem.GetMorganFeatureAtomInvGen() + self.generator = AllChem.GetMorganGenerator(radius=radius, fpSize=num_bits, atomInvariantsGenerator=invgen) + else: + self.generator = AllChem.GetMorganGenerator(radius=radius, fpSize=num_bits) elif features_generator_name == 'circular': import deepchem self.generator = deepchem.feat.CircularFingerprint(radius=radius, size=num_bits, @@ -48,6 +53,8 @@ def __call__(self, mol: Union[str, Chem.Mol]) -> np.ndarray: return self.rdkit_2d_features_generator(mol) elif self.features_generator_name == 'rdkit_2d_normalized': return self.rdkit_2d_normalized_features_generator(mol) + elif self.features_generator_name == 'rdkit_topol': + return self.rdkit_topological_features_generator(mol) elif self.features_generator_name == 'layered': return self.layered_features_generator(mol) elif self.features_generator_name == 'torsion': @@ -87,7 +94,7 @@ def morgan_counts_features_generator(self, mol: Union[str, Chem.Mol]) -> np.ndar :return: A 1D numpy array containing the counts-based Morgan fingerprint. """ mol = Chem.MolFromSmiles(mol) if isinstance(mol, str) else mol - return np.array(self.generator.GetFingerprint(mol).ToList()) + return np.array(self.generator.GetCountFingerprint(mol).ToList()) def circular_features_generator(self, mol: Union[str, Chem.Mol]) -> np.ndarray: features = self.generator.featurize([mol]).ravel()