Skip to content

Commit

Permalink
Update to 0.2.0:
Browse files Browse the repository at this point in the history
add circular fingerprints.
  • Loading branch information
Xiangyan93 committed Dec 8, 2023
1 parent 227ea64 commit 3a23ad6
Show file tree
Hide file tree
Showing 5 changed files with 22 additions and 8 deletions.
2 changes: 1 addition & 1 deletion mgktools/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,4 +2,4 @@
# -*- coding: utf-8 -*-


__version__ = '0.1.1'
__version__ = '0.2.0'
12 changes: 11 additions & 1 deletion mgktools/features_mol/features_generators.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import numpy as np
from rdkit import Chem, DataStructs
from rdkit.Chem import AllChem, Descriptors
import deepchem
from descriptastorus.descriptors import rdDescriptors, rdNormalizedDescriptors


Expand All @@ -15,7 +16,7 @@ def __init__(self, features_generator_name: Union[str, Callable],
self.features_generator_name = features_generator_name
self.radius = radius
self.num_bits = num_bits
if features_generator_name in ['morgan', 'morgan_count']:
if features_generator_name in ['morgan', 'morgan_count', 'circular']:
assert self.radius is not None
assert self.num_bits is not None

Expand All @@ -26,6 +27,8 @@ def __call__(self, mol: Union[str, Chem.Mol]) -> np.ndarray:
return self.morgan_binary_features_generator(mol)
elif self.features_generator_name == 'morgan_count':
return self.morgan_counts_features_generator(mol)
elif self.features_generator_name == 'circular':
return self.circular_features_generator(mol)
elif self.features_generator_name == 'rdkit_208':
return self.rdkit_208_features_generator(mol)
elif self.features_generator_name == 'rdkit_2d':
Expand Down Expand Up @@ -67,6 +70,13 @@ def morgan_counts_features_generator(self, mol: Union[str, Chem.Mol]) -> np.ndar

return features

@staticmethod
def circular_features_generator(mol: Union[str, Chem.Mol]) -> np.ndarray:
circular_fp_featurizer = deepchem.feat.CircularFingerprint(size=2048, radius=8,
sparse=False, smiles=True)
features = circular_fp_featurizer.featurize([mol]).ravel()
return features

@staticmethod
def rdkit_2d_features_generator(mol: Union[str, Chem.Mol]) -> np.ndarray:
"""
Expand Down
1 change: 1 addition & 0 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ def read(*filenames, **kwargs):
'rxntools>=0.0.2',
'pycuda>=2022.1',
'rdkit>=2022.9.2',
'deepchem==2.7.2.dev20231207083329'
],
author='Yan Xiang',
author_email='1993.xiangyan@gmail.com',
Expand Down
13 changes: 7 additions & 6 deletions test/cross_validation/test_cv_pure.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,18 +11,19 @@
from mgktools.evaluators.cross_validation import Evaluator


pure = ['CCCC', 'CCCCCO', 'c1ccccc1', 'CCNCCO', 'CCCCN', 'NCCCCCO', 'c1ccccc1N', 'NCCNCCO']
targets_regression = [3.1, 14.5, 25.6, 56.7, 9.1, 17.5, 22.6, 36.7]
pure = ['CCCC', 'CCCCCO', 'c1ccccc1', 'CCNCCO', 'CCCCN', 'NCCCCCO', 'c1ccccc1N', 'NCCNCCO',
'CNC(CC)CC', 'c1ccccc1', 'c1ccccc1CCCCc1ccccc1', 'CC(=O)OCCO']
targets_regression = [3.1, 14.5, 25.6, 56.7, 9.1, 17.5, 22.6, 36.7, 23.1, 32.1, 1.4, 7.6]
df_regression = pd.DataFrame({'pure': pure, 'targets': targets_regression})
targets_classification = [1, 1, 0, 1, 1, 0, 0, 1]
targets_classification = [1, 1, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1]
df_classification = pd.DataFrame({'pure': pure, 'targets': targets_classification})


@pytest.mark.parametrize('mgk_file', [additive_norm, additive_pnorm, additive_msnorm,
product_norm, product_pnorm, product_msnorm])
@pytest.mark.parametrize('model', ['gpc', 'svc'])
@pytest.mark.parametrize('split_type', ['random', 'scaffold_order', 'scaffold_random'])
def test_only_graph(mgk_file, model, split_type):
def test_only_graph_classification(mgk_file, model, split_type):
dataset = Dataset.from_df(df=df_classification,
pure_columns=['pure'],
target_columns=['targets'])
Expand All @@ -49,12 +50,12 @@ def test_only_graph(mgk_file, model, split_type):
product_norm, product_pnorm, product_msnorm])
@pytest.mark.parametrize('modelsets', [('gpr', None, None, None),
('gpr-sod', 2, 3, 'smallest_uncertainty'),
('gpr-sod', 2, 3, 'weight_uncertainty'),
# ('gpr-sod', 2, 3, 'weight_uncertainty'),
('gpr-sod', 2, 3, 'mean'),
('gpr-nystrom', None, 3, None),
('gpr-nle', None, 3, None)])
@pytest.mark.parametrize('split_type', ['random', 'scaffold_order', 'scaffold_random'])
def test_only_graph(mgk_file, modelsets, split_type):
def test_only_graph_scalable_gps(mgk_file, modelsets, split_type):
model_type, n_estimators, n_samples, consensus_rule = modelsets
dataset = Dataset.from_df(df=df_regression,
pure_columns=['pure'],
Expand Down
2 changes: 2 additions & 0 deletions test/data/test_data_pure.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ def test_only_graph(testset):

@pytest.mark.parametrize('testset', [
('morgan', 2048),
('circular', 2048),
('rdkit_2d', 200),
('rdkit_2d_normalized', 200),
])
Expand All @@ -42,6 +43,7 @@ def test_only_fingerprints(testset):

@pytest.mark.parametrize('testset', [
('morgan', 2048),
('circular', 2048),
('rdkit_2d', 200),
('rdkit_2d_normalized', 200),
])
Expand Down

0 comments on commit 3a23ad6

Please sign in to comment.