Skip to content

Commit

Permalink
#1440 Add support for query features in MOL, SDF and RXN formats (Ma…
Browse files Browse the repository at this point in the history
…rvin extension) (#1464)
  • Loading branch information
AliaksandrDziarkach authored Jan 3, 2024
1 parent faa03d1 commit 9c181a5
Show file tree
Hide file tree
Showing 16 changed files with 279 additions and 82 deletions.
3 changes: 3 additions & 0 deletions api/c/indigo/src/indigo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -128,6 +128,7 @@ void Indigo::init()
json_saving_pretty = false;

molfile_saving_add_implicit_h = true;
molfile_saving_add_mrv_sma = true;

smiles_saving_write_name = false;
smiles_saving_smarts_mode = false;
Expand Down Expand Up @@ -189,6 +190,7 @@ void Indigo::initMolfileSaver(MolfileSaver& saver)
saver.no_chiral = molfile_saving_no_chiral;
saver.add_stereo_desc = molfile_saving_add_stereo_desc;
saver.add_implicit_h = molfile_saving_add_implicit_h;
saver.add_mrv_sma = molfile_saving_add_mrv_sma;
saver.chiral_flag = molfile_saving_chiral_flag;
}

Expand All @@ -210,6 +212,7 @@ void Indigo::initRxnfileSaver(RxnfileSaver& saver)
saver.skip_date = molfile_saving_skip_date;
saver.add_stereo_desc = molfile_saving_add_stereo_desc;
saver.add_implicit_h = molfile_saving_add_implicit_h;
saver.add_mrv_sma = molfile_saving_add_mrv_sma;
}

Indigo::~Indigo()
Expand Down
1 change: 1 addition & 0 deletions api/c/indigo/src/indigo_internal.h
Original file line number Diff line number Diff line change
Expand Up @@ -319,6 +319,7 @@ class DLLEXPORT Indigo
bool molfile_saving_skip_date;
bool molfile_saving_add_stereo_desc;
bool molfile_saving_add_implicit_h;
bool molfile_saving_add_mrv_sma;
bool json_saving_add_stereo_desc;
bool json_saving_pretty;
bool smiles_saving_write_name;
Expand Down
1 change: 1 addition & 0 deletions api/c/indigo/src/indigo_options.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -290,6 +290,7 @@ void IndigoOptionHandlerSetter::setBasicOptionHandlers(const qword id)
mgr->setOptionHandlerBool("json-saving-add-stereo-desc", SETTER_GETTER_BOOL_OPTION(indigo.json_saving_add_stereo_desc));
mgr->setOptionHandlerBool("json-saving-pretty", SETTER_GETTER_BOOL_OPTION(indigo.json_saving_pretty));
mgr->setOptionHandlerBool("molfile-saving-add-implicit-h", SETTER_GETTER_BOOL_OPTION(indigo.molfile_saving_add_implicit_h));
mgr->setOptionHandlerBool("molfile-saving-add-mrv-sma", SETTER_GETTER_BOOL_OPTION(indigo.molfile_saving_add_mrv_sma));
mgr->setOptionHandlerBool("smiles-saving-write-name", SETTER_GETTER_BOOL_OPTION(indigo.smiles_saving_write_name));
mgr->setOptionHandlerString("filename-encoding", indigoSetFilenameEncoding, indigoGetFilenameEncoding);
mgr->setOptionHandlerInt("fp-ord-qwords", SETTER_GETTER_INT_OPTION(indigo.fp_params.ord_qwords));
Expand Down
25 changes: 25 additions & 0 deletions api/tests/integration/ref/basic/check_query.py.out
Original file line number Diff line number Diff line change
Expand Up @@ -402,6 +402,7 @@ M UNS 1 4 1
M RBC 1 33 -2
A 59
CYC
M MRV SMA 16 [#6;H2]
M STY 2 1 DAT 2 DAT
M SPL 1 2 1
M SLB 2 1 1 2 2
Expand Down Expand Up @@ -636,3 +637,27 @@ Check query for bond 18 returns 0
Check query for bond 19 returns 0
Check query for bond 20 returns 0
Check query for bond 21 returns 0
[#6]1[#6][#6,#7,F;a;r6;R1;h1;H][!Cl;!Br;!I;a;R1;h1;H][#6][c;r6;R1;X3;v3;h1;H]1

-INDIGO-01000000002D

6 6 0 0 0 0 0 0 0 0999 V2000
0.0000 0.0000 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
0.0000 0.0000 0.0000 C 0 0 0 2 0 3 0 0 0 0 0 0
0.0000 0.0000 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
0.0000 0.0000 0.0000 L 0 0 0 2 0 0 0 0 0 0 0 0
0.0000 0.0000 0.0000 L 0 0 0 2 0 0 0 0 0 0 0 0
0.0000 0.0000 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
1 2 6 0 0 0 0
2 3 6 0 0 0 0
3 4 6 0 0 0 0
4 5 6 0 0 0 0
5 6 6 0 0 0 0
6 1 6 0 0 0 0
M ALS 4 3 T Cl Br I
M ALS 5 3 F C N F
M MRV SMA 2 [#6;Hh1X3R1r6a]
M MRV SMA 4 [!#17;!#35;!#53;Hh1R1a]
M MRV SMA 5 [#6,#7,#9;Hh1R1r6a]
M END

3 changes: 3 additions & 0 deletions api/tests/integration/ref/deco/deco_recursive_smarts.py.out
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,9 @@ full scaffold:
3 4 2 0 0 0 0
5 2 1 0 0 0 0
6 4 1 0 0 0 0
M MRV SMA 2 [#7;A]
M MRV SMA 3 [#6;A]
M MRV SMA 4 [#8;A]
M RGP 2 5 1 6 2
M END

Expand Down
12 changes: 9 additions & 3 deletions api/tests/integration/ref/formats/mol_features.py.out
Original file line number Diff line number Diff line change
Expand Up @@ -7928,7 +7928,7 @@ M V30 11 C 5.9062 -4.5267 0.0 0
M V30 12 C 6.6195 -4.1121 0.0 0
M V30 13 O 4.4773 -4.531 0.0 0 CHG=-1
M V30 14 Fe 5.1995 -9.0694 0.0 0 VAL=-1
M V30 15 A 3.7701 -8.2434 0.0 0
M V30 15 [Gln,Lys] 3.7701 -8.2434 0.0 0
M V30 16 C 3.7715 -6.5918 0.0 0
M V30 17 C 6.6313 -8.2424 0.0 0
M V30 END ATOM
Expand Down Expand Up @@ -7976,7 +7976,7 @@ M END
6.6195 -4.1121 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
4.4773 -4.5310 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0
5.1995 -9.0694 0.0000 Fe 0 0 0 0 0 15 0 0 0 0 0 0
3.7701 -8.2434 0.0000 A 0 0 0 0 0 0 0 0 0 0 0 0
3.7701 -8.2434 0.0000 L 0 0 0 0 0 0 0 0 0 0 0 0
3.7715 -6.5918 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
6.6313 -8.2424 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
7 8 1 0 0 0 0
Expand All @@ -7998,8 +7998,10 @@ M END
4 17 1 0 0 0 0
M CHG 1 13 -1
M ALS 10 2 F N P
M ALS 15 2 F Gln Lys
A 16
Fred
M MRV SMA 5 [#6;H0]
M END

molfile-saving-mode: 3000
Expand All @@ -8024,7 +8026,7 @@ M V30 11 C 5.9062 -4.5267 0.0 0
M V30 12 C 6.6195 -4.1121 0.0 0
M V30 13 O 4.4773 -4.531 0.0 0 CHG=-1
M V30 14 Fe 5.1995 -9.0694 0.0 0 VAL=-1
M V30 15 A 3.7701 -8.2434 0.0 0
M V30 15 [Gln,Lys] 3.7701 -8.2434 0.0 0
M V30 16 C 3.7715 -6.5918 0.0 0
M V30 17 C 6.6313 -8.2424 0.0 0
M V30 END ATOM
Expand Down Expand Up @@ -8845,6 +8847,8 @@ ind-928-asdrawn.mol:
M UNS 2 3 1 11 1
M SUB 2 4 -2 5 4
M RBC 2 4 3 7 -2
M MRV SMA 10 [#6;H2]
M MRV SMA 13 [#6;H]
M END

molfile-saving-mode: 2000
Expand Down Expand Up @@ -8884,6 +8888,8 @@ M END
M UNS 2 3 1 11 1
M SUB 2 4 -2 5 4
M RBC 2 4 3 7 -2
M MRV SMA 10 [#6;H2]
M MRV SMA 13 [#6;H]
M END

molfile-saving-mode: 3000
Expand Down
2 changes: 2 additions & 0 deletions api/tests/integration/ref/formats/mol_to_cml.py.out
Original file line number Diff line number Diff line change
Expand Up @@ -753,6 +753,7 @@ M SUB 1 9 -2
M RBC 1 5 -2
M ALS 14 3 F B Si As
M ALS 15 3 T N P As
M MRV SMA 7 [#6;H3]
M RGP 1 1 1
M LOG 1 1 0 0
M END
Expand Down Expand Up @@ -886,6 +887,7 @@ M SUB 1 9 -2
M RBC 1 5 -2
M ALS 14 3 F B Si As
M ALS 15 3 T N P As
M MRV SMA 7 [#6;H3]
M RGP 1 1 1
M LOG 1 1 0 0
M END
Expand Down
7 changes: 7 additions & 0 deletions api/tests/integration/tests/basic/check_query.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,3 +69,10 @@
"Check query for bond %d returns %d"
% (bond.index(), bond.checkQuery())
)

mol = indigo.loadSmarts(
"[#6]1[#6;a;H1h1v3X3R1r6][#6][!#17!#35!#53;a;H1h1R1][#6,#7,#9;a;H1h1R1r6][#6]1"
)

print(mol.smarts())
print(mol.molfile())
1 change: 1 addition & 0 deletions core/indigo-core/molecule/molfile_saver.h
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,7 @@ namespace indigo
bool add_implicit_h; // If true then MRV_IMPLICIT_H Data S-groups will be added for saving
// the number of implicit H for aromatic atoms
// (if it is required for correct de-aromatization) (default value is true)
bool add_mrv_sma; // If true then "MRV SMA" extension will be added for query molecules (default value is true)
static int parseFormatMode(const char* mode);
static void saveFormatMode(int mode, Array<char>& output);

Expand Down
11 changes: 7 additions & 4 deletions core/indigo-core/molecule/query_molecule.h
Original file line number Diff line number Diff line change
Expand Up @@ -348,6 +348,7 @@ namespace indigo
static std::string getSmartsBondStr(QueryMolecule::Bond* bond);
static void writeSmartsBond(Output& output, QueryMolecule::Bond* bond, bool has_or_parent);
static std::string getSmartsAtomStr(QueryMolecule::Atom* atom, int original_format);
static std::string getMolMrvSmaExtension(QueryMolecule& qm, int aid);
static void writeSmartsAtom(Output& output, Atom* atom, int aam, int chirality, int depth, bool has_or_parent, bool has_not_parent,
int original_format);

Expand Down Expand Up @@ -397,6 +398,8 @@ namespace indigo
Atom* releaseAtom(int idx);
void resetAtom(int idx, Atom* atom);

static bool isAtomProperty(OpType type);

Bond& getBond(int idx);
Bond* releaseBond(int idx);
void resetBond(int idx, Bond* bond);
Expand Down Expand Up @@ -428,7 +431,7 @@ namespace indigo

bool standardize(const StandardizeOptions& options);

static int parseQueryAtomSmarts(QueryMolecule& qm, int aid, std::vector<int>& list, std::map<int, std::unique_ptr<Atom>>& properties);
static int parseQueryAtomSmarts(QueryMolecule& qm, int aid, std::vector<std::unique_ptr<Atom>>& list, std::map<int, std::unique_ptr<Atom>>& properties);

protected:
void _getAtomDescription(Atom* atom, Output& out, int depth);
Expand All @@ -444,10 +447,10 @@ namespace indigo
void _removeBonds(const Array<int>& indices) override;

using AtomList = std::pair<bool, std::set<int>>;
static bool _isAtomListOr(Atom* pqa, std::vector<int>& list);
static bool _isAtomOrListAndProps(Atom* pqa, std::vector<int>& list, bool& neg, std::map<int, std::unique_ptr<Atom>>& properties);
static bool _isAtomListOr(Atom* pqa, std::vector<std::unique_ptr<Atom>>& list);
static bool _isAtomOrListAndProps(Atom* pqa, std::vector<std::unique_ptr<Atom>>& list, bool& neg, std::map<int, std::unique_ptr<Atom>>& properties);
static bool _isAtomList(Atom* qa, AtomList list);
static bool _tryToConvertToList(Atom* p_query_atom, std::vector<int>& atoms, std::map<int, std::unique_ptr<Atom>>& properties);
static bool _tryToConvertToList(Atom* p_query_atom, std::vector<std::unique_ptr<Atom>>& atoms, std::map<int, std::unique_ptr<Atom>>& properties);

Array<int> _min_h;

Expand Down
13 changes: 9 additions & 4 deletions core/indigo-core/molecule/src/molecule_json_saver.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -706,7 +706,7 @@ void MoleculeJsonSaver::saveAtoms(BaseMolecule& mol, JsonWriter& writer)
else
{
bool is_qatom_list = false;
std::vector<int> atoms;
std::vector<std::unique_ptr<QueryMolecule::Atom>> atoms;
if (_pqmol)
query_atom_type = QueryMolecule::parseQueryAtomSmarts(*_pqmol, i, atoms, query_atom_properties);
if (mol.isPseudoAtom(i))
Expand Down Expand Up @@ -751,13 +751,18 @@ void MoleculeJsonSaver::saveAtoms(BaseMolecule& mol, JsonWriter& writer)
}
writer.Key("elements");
writer.StartArray();
for (auto atom : atoms)
writer.String(Element::toString(atom));
for (auto& atom : atoms)
if (atom->type == QueryMolecule::ATOM_NUMBER)
writer.String(Element::toString(atom->value_max));
else if (atom->type == QueryMolecule::ATOM_PSEUDO)
writer.String(atom->alias.ptr());
else
throw Error("Wrong atom type %d", atom->type);
writer.EndArray();
}
else if (query_atom_type == QueryMolecule::QUERY_ATOM_SINGLE)
{
anum = *atoms.begin();
anum = (*atoms.begin()).get()->value_max;
buf.readString(Element::toString(anum), true);
if (anum == ELEM_H && query_atom_properties.count(QueryMolecule::ATOM_ISOTOPE) > 0)
{
Expand Down
Loading

0 comments on commit 9c181a5

Please sign in to comment.