diff --git a/api/c/indigo/src/indigo.cpp b/api/c/indigo/src/indigo.cpp index 66a0c08010..32d5fd1385 100644 --- a/api/c/indigo/src/indigo.cpp +++ b/api/c/indigo/src/indigo.cpp @@ -128,6 +128,7 @@ void Indigo::init() json_saving_pretty = false; molfile_saving_add_implicit_h = true; + molfile_saving_add_mrv_sma = true; smiles_saving_write_name = false; smiles_saving_smarts_mode = false; @@ -189,6 +190,7 @@ void Indigo::initMolfileSaver(MolfileSaver& saver) saver.no_chiral = molfile_saving_no_chiral; saver.add_stereo_desc = molfile_saving_add_stereo_desc; saver.add_implicit_h = molfile_saving_add_implicit_h; + saver.add_mrv_sma = molfile_saving_add_mrv_sma; saver.chiral_flag = molfile_saving_chiral_flag; } @@ -210,6 +212,7 @@ void Indigo::initRxnfileSaver(RxnfileSaver& saver) saver.skip_date = molfile_saving_skip_date; saver.add_stereo_desc = molfile_saving_add_stereo_desc; saver.add_implicit_h = molfile_saving_add_implicit_h; + saver.add_mrv_sma = molfile_saving_add_mrv_sma; } Indigo::~Indigo() diff --git a/api/c/indigo/src/indigo_internal.h b/api/c/indigo/src/indigo_internal.h index f3cfb66e86..59e4afb0e5 100644 --- a/api/c/indigo/src/indigo_internal.h +++ b/api/c/indigo/src/indigo_internal.h @@ -319,6 +319,7 @@ class DLLEXPORT Indigo bool molfile_saving_skip_date; bool molfile_saving_add_stereo_desc; bool molfile_saving_add_implicit_h; + bool molfile_saving_add_mrv_sma; bool json_saving_add_stereo_desc; bool json_saving_pretty; bool smiles_saving_write_name; diff --git a/api/c/indigo/src/indigo_options.cpp b/api/c/indigo/src/indigo_options.cpp index 3484b187e1..40c9b8af3b 100644 --- a/api/c/indigo/src/indigo_options.cpp +++ b/api/c/indigo/src/indigo_options.cpp @@ -290,6 +290,7 @@ void IndigoOptionHandlerSetter::setBasicOptionHandlers(const qword id) mgr->setOptionHandlerBool("json-saving-add-stereo-desc", SETTER_GETTER_BOOL_OPTION(indigo.json_saving_add_stereo_desc)); mgr->setOptionHandlerBool("json-saving-pretty", SETTER_GETTER_BOOL_OPTION(indigo.json_saving_pretty)); mgr->setOptionHandlerBool("molfile-saving-add-implicit-h", SETTER_GETTER_BOOL_OPTION(indigo.molfile_saving_add_implicit_h)); + mgr->setOptionHandlerBool("molfile-saving-add-mrv-sma", SETTER_GETTER_BOOL_OPTION(indigo.molfile_saving_add_mrv_sma)); mgr->setOptionHandlerBool("smiles-saving-write-name", SETTER_GETTER_BOOL_OPTION(indigo.smiles_saving_write_name)); mgr->setOptionHandlerString("filename-encoding", indigoSetFilenameEncoding, indigoGetFilenameEncoding); mgr->setOptionHandlerInt("fp-ord-qwords", SETTER_GETTER_INT_OPTION(indigo.fp_params.ord_qwords)); diff --git a/api/tests/integration/ref/basic/check_query.py.out b/api/tests/integration/ref/basic/check_query.py.out index 3b4473e7f3..d21cd55099 100644 --- a/api/tests/integration/ref/basic/check_query.py.out +++ b/api/tests/integration/ref/basic/check_query.py.out @@ -402,6 +402,7 @@ M UNS 1 4 1 M RBC 1 33 -2 A 59 CYC +M MRV SMA 16 [#6;H2] M STY 2 1 DAT 2 DAT M SPL 1 2 1 M SLB 2 1 1 2 2 @@ -636,3 +637,27 @@ Check query for bond 18 returns 0 Check query for bond 19 returns 0 Check query for bond 20 returns 0 Check query for bond 21 returns 0 +[#6]1[#6][#6,#7,F;a;r6;R1;h1;H][!Cl;!Br;!I;a;R1;h1;H][#6][c;r6;R1;X3;v3;h1;H]1 + + -INDIGO-01000000002D + + 6 6 0 0 0 0 0 0 0 0999 V2000 + 0.0000 0.0000 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 0.0000 0.0000 0.0000 C 0 0 0 2 0 3 0 0 0 0 0 0 + 0.0000 0.0000 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 0.0000 0.0000 0.0000 L 0 0 0 2 0 0 0 0 0 0 0 0 + 0.0000 0.0000 0.0000 L 0 0 0 2 0 0 0 0 0 0 0 0 + 0.0000 0.0000 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 1 2 6 0 0 0 0 + 2 3 6 0 0 0 0 + 3 4 6 0 0 0 0 + 4 5 6 0 0 0 0 + 5 6 6 0 0 0 0 + 6 1 6 0 0 0 0 +M ALS 4 3 T Cl Br I +M ALS 5 3 F C N F +M MRV SMA 2 [#6;Hh1X3R1r6a] +M MRV SMA 4 [!#17;!#35;!#53;Hh1R1a] +M MRV SMA 5 [#6,#7,#9;Hh1R1r6a] +M END + diff --git a/api/tests/integration/ref/deco/deco_recursive_smarts.py.out b/api/tests/integration/ref/deco/deco_recursive_smarts.py.out index 848b0dba2d..07aed2b43a 100644 --- a/api/tests/integration/ref/deco/deco_recursive_smarts.py.out +++ b/api/tests/integration/ref/deco/deco_recursive_smarts.py.out @@ -13,6 +13,9 @@ full scaffold: 3 4 2 0 0 0 0 5 2 1 0 0 0 0 6 4 1 0 0 0 0 +M MRV SMA 2 [#7;A] +M MRV SMA 3 [#6;A] +M MRV SMA 4 [#8;A] M RGP 2 5 1 6 2 M END diff --git a/api/tests/integration/ref/formats/mol_features.py.out b/api/tests/integration/ref/formats/mol_features.py.out index af0b7d5236..6537d37620 100644 --- a/api/tests/integration/ref/formats/mol_features.py.out +++ b/api/tests/integration/ref/formats/mol_features.py.out @@ -7928,7 +7928,7 @@ M V30 11 C 5.9062 -4.5267 0.0 0 M V30 12 C 6.6195 -4.1121 0.0 0 M V30 13 O 4.4773 -4.531 0.0 0 CHG=-1 M V30 14 Fe 5.1995 -9.0694 0.0 0 VAL=-1 -M V30 15 A 3.7701 -8.2434 0.0 0 +M V30 15 [Gln,Lys] 3.7701 -8.2434 0.0 0 M V30 16 C 3.7715 -6.5918 0.0 0 M V30 17 C 6.6313 -8.2424 0.0 0 M V30 END ATOM @@ -7976,7 +7976,7 @@ M END 6.6195 -4.1121 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 4.4773 -4.5310 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0 5.1995 -9.0694 0.0000 Fe 0 0 0 0 0 15 0 0 0 0 0 0 - 3.7701 -8.2434 0.0000 A 0 0 0 0 0 0 0 0 0 0 0 0 + 3.7701 -8.2434 0.0000 L 0 0 0 0 0 0 0 0 0 0 0 0 3.7715 -6.5918 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 6.6313 -8.2424 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 7 8 1 0 0 0 0 @@ -7998,8 +7998,10 @@ M END 4 17 1 0 0 0 0 M CHG 1 13 -1 M ALS 10 2 F N P +M ALS 15 2 F Gln Lys A 16 Fred +M MRV SMA 5 [#6;H0] M END molfile-saving-mode: 3000 @@ -8024,7 +8026,7 @@ M V30 11 C 5.9062 -4.5267 0.0 0 M V30 12 C 6.6195 -4.1121 0.0 0 M V30 13 O 4.4773 -4.531 0.0 0 CHG=-1 M V30 14 Fe 5.1995 -9.0694 0.0 0 VAL=-1 -M V30 15 A 3.7701 -8.2434 0.0 0 +M V30 15 [Gln,Lys] 3.7701 -8.2434 0.0 0 M V30 16 C 3.7715 -6.5918 0.0 0 M V30 17 C 6.6313 -8.2424 0.0 0 M V30 END ATOM @@ -8845,6 +8847,8 @@ ind-928-asdrawn.mol: M UNS 2 3 1 11 1 M SUB 2 4 -2 5 4 M RBC 2 4 3 7 -2 +M MRV SMA 10 [#6;H2] +M MRV SMA 13 [#6;H] M END molfile-saving-mode: 2000 @@ -8884,6 +8888,8 @@ M END M UNS 2 3 1 11 1 M SUB 2 4 -2 5 4 M RBC 2 4 3 7 -2 +M MRV SMA 10 [#6;H2] +M MRV SMA 13 [#6;H] M END molfile-saving-mode: 3000 diff --git a/api/tests/integration/ref/formats/mol_to_cml.py.out b/api/tests/integration/ref/formats/mol_to_cml.py.out index 6653bc4fec..cc2b9919db 100644 --- a/api/tests/integration/ref/formats/mol_to_cml.py.out +++ b/api/tests/integration/ref/formats/mol_to_cml.py.out @@ -753,6 +753,7 @@ M SUB 1 9 -2 M RBC 1 5 -2 M ALS 14 3 F B Si As M ALS 15 3 T N P As +M MRV SMA 7 [#6;H3] M RGP 1 1 1 M LOG 1 1 0 0 M END @@ -886,6 +887,7 @@ M SUB 1 9 -2 M RBC 1 5 -2 M ALS 14 3 F B Si As M ALS 15 3 T N P As +M MRV SMA 7 [#6;H3] M RGP 1 1 1 M LOG 1 1 0 0 M END diff --git a/api/tests/integration/tests/basic/check_query.py b/api/tests/integration/tests/basic/check_query.py index 71c00fe306..ff1fad755a 100644 --- a/api/tests/integration/tests/basic/check_query.py +++ b/api/tests/integration/tests/basic/check_query.py @@ -69,3 +69,10 @@ "Check query for bond %d returns %d" % (bond.index(), bond.checkQuery()) ) + +mol = indigo.loadSmarts( + "[#6]1[#6;a;H1h1v3X3R1r6][#6][!#17!#35!#53;a;H1h1R1][#6,#7,#9;a;H1h1R1r6][#6]1" +) + +print(mol.smarts()) +print(mol.molfile()) diff --git a/core/indigo-core/molecule/molfile_saver.h b/core/indigo-core/molecule/molfile_saver.h index 81e368e637..1c3ca982dc 100644 --- a/core/indigo-core/molecule/molfile_saver.h +++ b/core/indigo-core/molecule/molfile_saver.h @@ -59,6 +59,7 @@ namespace indigo bool add_implicit_h; // If true then MRV_IMPLICIT_H Data S-groups will be added for saving // the number of implicit H for aromatic atoms // (if it is required for correct de-aromatization) (default value is true) + bool add_mrv_sma; // If true then "MRV SMA" extension will be added for query molecules (default value is true) static int parseFormatMode(const char* mode); static void saveFormatMode(int mode, Array& output); diff --git a/core/indigo-core/molecule/query_molecule.h b/core/indigo-core/molecule/query_molecule.h index 2009e7b87e..1ccba64979 100644 --- a/core/indigo-core/molecule/query_molecule.h +++ b/core/indigo-core/molecule/query_molecule.h @@ -348,6 +348,7 @@ namespace indigo static std::string getSmartsBondStr(QueryMolecule::Bond* bond); static void writeSmartsBond(Output& output, QueryMolecule::Bond* bond, bool has_or_parent); static std::string getSmartsAtomStr(QueryMolecule::Atom* atom, int original_format); + static std::string getMolMrvSmaExtension(QueryMolecule& qm, int aid); static void writeSmartsAtom(Output& output, Atom* atom, int aam, int chirality, int depth, bool has_or_parent, bool has_not_parent, int original_format); @@ -397,6 +398,8 @@ namespace indigo Atom* releaseAtom(int idx); void resetAtom(int idx, Atom* atom); + static bool isAtomProperty(OpType type); + Bond& getBond(int idx); Bond* releaseBond(int idx); void resetBond(int idx, Bond* bond); @@ -428,7 +431,7 @@ namespace indigo bool standardize(const StandardizeOptions& options); - static int parseQueryAtomSmarts(QueryMolecule& qm, int aid, std::vector& list, std::map>& properties); + static int parseQueryAtomSmarts(QueryMolecule& qm, int aid, std::vector>& list, std::map>& properties); protected: void _getAtomDescription(Atom* atom, Output& out, int depth); @@ -444,10 +447,10 @@ namespace indigo void _removeBonds(const Array& indices) override; using AtomList = std::pair>; - static bool _isAtomListOr(Atom* pqa, std::vector& list); - static bool _isAtomOrListAndProps(Atom* pqa, std::vector& list, bool& neg, std::map>& properties); + static bool _isAtomListOr(Atom* pqa, std::vector>& list); + static bool _isAtomOrListAndProps(Atom* pqa, std::vector>& list, bool& neg, std::map>& properties); static bool _isAtomList(Atom* qa, AtomList list); - static bool _tryToConvertToList(Atom* p_query_atom, std::vector& atoms, std::map>& properties); + static bool _tryToConvertToList(Atom* p_query_atom, std::vector>& atoms, std::map>& properties); Array _min_h; diff --git a/core/indigo-core/molecule/src/molecule_json_saver.cpp b/core/indigo-core/molecule/src/molecule_json_saver.cpp index 1b7b39aa3d..e8fa3e50b1 100644 --- a/core/indigo-core/molecule/src/molecule_json_saver.cpp +++ b/core/indigo-core/molecule/src/molecule_json_saver.cpp @@ -706,7 +706,7 @@ void MoleculeJsonSaver::saveAtoms(BaseMolecule& mol, JsonWriter& writer) else { bool is_qatom_list = false; - std::vector atoms; + std::vector> atoms; if (_pqmol) query_atom_type = QueryMolecule::parseQueryAtomSmarts(*_pqmol, i, atoms, query_atom_properties); if (mol.isPseudoAtom(i)) @@ -751,13 +751,18 @@ void MoleculeJsonSaver::saveAtoms(BaseMolecule& mol, JsonWriter& writer) } writer.Key("elements"); writer.StartArray(); - for (auto atom : atoms) - writer.String(Element::toString(atom)); + for (auto& atom : atoms) + if (atom->type == QueryMolecule::ATOM_NUMBER) + writer.String(Element::toString(atom->value_max)); + else if (atom->type == QueryMolecule::ATOM_PSEUDO) + writer.String(atom->alias.ptr()); + else + throw Error("Wrong atom type %d", atom->type); writer.EndArray(); } else if (query_atom_type == QueryMolecule::QUERY_ATOM_SINGLE) { - anum = *atoms.begin(); + anum = (*atoms.begin()).get()->value_max; buf.readString(Element::toString(anum), true); if (anum == ELEM_H && query_atom_properties.count(QueryMolecule::ATOM_ISOTOPE) > 0) { diff --git a/core/indigo-core/molecule/src/molfile_saver.cpp b/core/indigo-core/molecule/src/molfile_saver.cpp index 750b741de6..b08f446d37 100644 --- a/core/indigo-core/molecule/src/molfile_saver.cpp +++ b/core/indigo-core/molecule/src/molfile_saver.cpp @@ -41,7 +41,7 @@ IMPL_ERROR(MolfileSaver, "molfile saver"); CP_DEF(MolfileSaver); -MolfileSaver::MolfileSaver(Output& output) : _output(output), CP_INIT, TL_CP_GET(_atom_mapping), TL_CP_GET(_bond_mapping) +MolfileSaver::MolfileSaver(Output& output) : _output(output), CP_INIT, TL_CP_GET(_atom_mapping), TL_CP_GET(_bond_mapping), add_mrv_sma(true) { mode = MODE_AUTO; no_chiral = false; @@ -412,7 +412,8 @@ void MolfileSaver::_writeCtab(Output& output, BaseMolecule& mol, bool query) ArrayOutput out(buf); out.printf("%d ", _atom_mapping[i]); - QS_DEF(Array, list); + std::vector> list; + std::map> properties; int query_atom_type; if (atom_number == ELEM_H && isotope == DEUTERIUM) @@ -435,7 +436,7 @@ void MolfileSaver::_writeCtab(Output& output, BaseMolecule& mol, bool query) { _writeAtomLabel(out, atom_number); } - else if (qmol != 0 && (query_atom_type = QueryMolecule::parseQueryAtom(*qmol, i, list)) != -1) + else if (qmol != 0 && (query_atom_type = QueryMolecule::parseQueryAtomSmarts(*qmol, i, list, properties)) != -1) { if (query_atom_type == QueryMolecule::QUERY_ATOM_A) out.writeChar('A'); @@ -455,17 +456,23 @@ void MolfileSaver::_writeCtab(Output& output, BaseMolecule& mol, bool query) out.writeString("MH"); else if (query_atom_type == QueryMolecule::QUERY_ATOM_LIST || query_atom_type == QueryMolecule::QUERY_ATOM_NOTLIST) { - int k; - if (query_atom_type == QueryMolecule::QUERY_ATOM_NOTLIST) out.writeString("NOT"); out.writeChar('['); - for (k = 0; k < list.size(); k++) + + bool not_first = false; + for (auto& qatom : list) { - if (k > 0) + if (not_first) out.writeChar(','); - _writeAtomLabel(out, list[k]); + else + not_first = true; + + if (qatom->type == QueryMolecule::ATOM_NUMBER) + _writeAtomLabel(out, qatom->value_max); + else if (qatom->type == QueryMolecule::ATOM_PSEUDO) + out.writeString(qatom->alias.ptr()); } out.writeChar(']'); } @@ -1097,7 +1104,7 @@ void MolfileSaver::_writeTGroup(Output& output, BaseMolecule& mol, int tg_idx) void MolfileSaver::_writeCtab2000(Output& output, BaseMolecule& mol, bool query) { _handleCIP(mol); - QueryMolecule* qmol = 0; + QueryMolecule* qmol = nullptr; if (query) qmol = (QueryMolecule*)(&mol); @@ -1175,9 +1182,9 @@ void MolfileSaver::_writeCtab2000(Output& output, BaseMolecule& mol, bool query) if (qmol == 0) throw Error("internal: atom number = -1, but qmol == 0"); - QS_DEF(Array, list); - - int query_atom_type = QueryMolecule::parseQueryAtom(*qmol, i, list); + std::vector> list; + std::map> properties; + int query_atom_type = QueryMolecule::parseQueryAtomSmarts(*qmol, i, list, properties); if (query_atom_type == QueryMolecule::QUERY_ATOM_A) label[0] = 'A'; @@ -1481,9 +1488,9 @@ void MolfileSaver::_writeCtab2000(Output& output, BaseMolecule& mol, bool query) for (i = 0; i < atom_lists.size(); i++) { int atom_idx = atom_lists[i]; - QS_DEF(Array, list); - - int query_atom_type = QueryMolecule::parseQueryAtom(*qmol, atom_idx, list); + std::vector> list; + std::map> properties; + int query_atom_type = QueryMolecule::parseQueryAtomSmarts(*qmol, atom_idx, list, properties); if (query_atom_type != QueryMolecule::QUERY_ATOM_LIST && query_atom_type != QueryMolecule::QUERY_ATOM_NOTLIST) throw Error("internal: atom list not recognized"); @@ -1495,16 +1502,35 @@ void MolfileSaver::_writeCtab2000(Output& output, BaseMolecule& mol, bool query) int j; - for (j = 0; j < list.size(); j++) + for (auto& qatom : list) { - char c1 = ' ', c2 = ' '; - const char* str = Element::toString(list[j]); + if (qatom->type == QueryMolecule::ATOM_NUMBER) + { + char c1 = ' ', c2 = ' '; + const char* str = Element::toString(qatom->value_max); - c1 = str[0]; - if (str[1] != 0) - c2 = str[1]; + c1 = str[0]; + if (str[1] != 0) + c2 = str[1]; - output.printf("%c%c ", c1, c2); + output.printf("%c%c ", c1, c2); + } + else if (qatom->type == QueryMolecule::ATOM_PSEUDO) + { + const char* str = qatom->alias.ptr(); + constexpr int SYMBOL_WIDTH = 4; + if (strlen(str) > 4) + { + for (int i = 0; i < SYMBOL_WIDTH; i++) + output.writeChar(str[i]); + } + else + { + output.writeString(str); + for (int i = strlen(str); i < SYMBOL_WIDTH; i++) + output.writeChar(' '); + } + } } output.writeCR(); } @@ -1523,6 +1549,18 @@ void MolfileSaver::_writeCtab2000(Output& output, BaseMolecule& mol, bool query) output.writeCR(); } + if (qmol && add_mrv_sma) + { + for (i = mol.vertexBegin(); i < mol.vertexEnd(); i = mol.vertexNext(i)) + { + std::string mrv_sma = QueryMolecule::getMolMrvSmaExtension(*qmol, i); + if (mrv_sma.length() > 0) + { + output.printfCR("M MRV SMA %3u [%s]", i + 1, mrv_sma.c_str()); + } + } + } + QS_DEF(Array, sgroup_ids); QS_DEF(Array, child_ids); QS_DEF(Array, parent_ids); diff --git a/core/indigo-core/molecule/src/query_molecule.cpp b/core/indigo-core/molecule/src/query_molecule.cpp index 8ff744873e..a0663f7814 100644 --- a/core/indigo-core/molecule/src/query_molecule.cpp +++ b/core/indigo-core/molecule/src/query_molecule.cpp @@ -27,6 +27,11 @@ using namespace indigo; +bool QueryMolecule::isAtomProperty(OpType type) +{ + return (type > ATOM_PSEUDO && type <= ATOM_CHIRALITY); +} + QueryMolecule::QueryMolecule() : spatial_constraints(*this) { } @@ -515,6 +520,64 @@ std::string QueryMolecule::getSmartsAtomStr(QueryMolecule::Atom* atom, int origi return result; } +std::string QueryMolecule::getMolMrvSmaExtension(QueryMolecule& qm, int aid) +{ + Array out; + ArrayOutput output(out); + std::vector> atom_list; + std::map> atom_props; + bool negative = false; + QueryMolecule::Atom& qa = qm.getAtom(aid); + if (_isAtomOrListAndProps(&qa, atom_list, negative, atom_props)) + { + // Just atom or list and list of properties. + bool atoms_writed = false; + bool not_first_property = false; + for (int property : {ATOM_TOTAL_H, ATOM_IMPLICIT_H, ATOM_CONNECTIVITY, ATOM_SSSR_RINGS, ATOM_SMALLEST_RING_SIZE, ATOM_AROMATICITY}) + { + if (atom_props.count(property) < 1) + continue; + if (!atoms_writed) + { + // negative list Will be !a1;!a2...;!a3;props + // positive list will be "a1,a2,..an;props" + bool not_first_atom = false; + for (auto& qatom : atom_list) + { + if (not_first_atom) + if (negative) + output.writeChar(';'); + else + output.writeChar(','); + else + not_first_atom = true; + if (negative) + output.writeChar('!'); + if (qatom->type == ATOM_NUMBER) + output.printf("#%d", qatom->value_max); + else if (qatom->type == ATOM_PSEUDO) + output.writeString(qatom->alias.ptr()); + } + output.writeChar(';'); + atoms_writed = true; + } + if (not_first_property) + output.writeChar('&'); + else + bool not_first_property = true; + writeSmartsAtom(output, atom_props[property].get(), -1, -1, 1, false, false, qm.original_format); + } + } + else + { + if (qa.type != OP_NONE) + // Complex tree - just write SMARTS + return getSmartsAtomStr(&qa, qm.original_format); + } + std::string result{out.ptr(), static_cast(out.size())}; + return result; +} + static void _write_num(indigo::Output& output, unsigned char ch, int num) { output.writeChar(ch); @@ -737,7 +800,8 @@ void QueryMolecule::writeSmartsAtom(Output& output, Atom* atom, int aam, int chi } case ATOM_PSEUDO: { - output.printf("*", atom->alias.ptr()); + // output.writeString(atom->alias.ptr()); + output.writeChar('*'); break; } @@ -2469,7 +2533,7 @@ QueryMolecule::Atom* QueryMolecule::stripKnownAttrs(QueryMolecule::Atom& qa) } // TODO: develop function to convert tree to CNF to simplify checks -bool QueryMolecule::_tryToConvertToList(Atom* p_query_atom, std::vector& atoms, std::map>& properties) +bool QueryMolecule::_tryToConvertToList(Atom* p_query_atom, std::vector>& atoms, std::map>& properties) { // Try to convert a1p1p2..pn, a2p1p2..pn, .. , akp1p2..pn to (a1, a2, .. an)p1p2..pn if (!p_query_atom) @@ -2480,7 +2544,7 @@ bool QueryMolecule::_tryToConvertToList(Atom* p_query_atom, std::vector& at if (size < 2) return false; std::vector> atoms_properties; - std::vector atoms_list; + std::vector> atoms_list; int list_element_child_count = -1; for (int i = 0; i < size; i++) { @@ -2512,9 +2576,10 @@ bool QueryMolecule::_tryToConvertToList(Atom* p_query_atom, std::vector& at return false; break; case ATOM_NUMBER: + case ATOM_PSEUDO: if (child_prop->value_min != child_prop->value_max) return false; - atoms_list.emplace_back(child_prop->value_min); + atoms_list.emplace_back(std::move(child_prop)); atom_not_found = false; break; default: @@ -2546,28 +2611,32 @@ bool QueryMolecule::_tryToConvertToList(Atom* p_query_atom, std::vector& at } } } - atoms = atoms_list; + + atoms.clear(); + for (auto& qa : atoms_list) + atoms.emplace_back(std::move(qa)); + for (auto& prop : atoms_properties) - { properties[prop->type] = std::move(prop); - } + return true; } -bool QueryMolecule::_isAtomListOr(Atom* p_query_atom, std::vector& list) +bool QueryMolecule::_isAtomListOr(Atom* p_query_atom, std::vector>& list) { // Check if p_query_atom atom list like or(a1,a2,a3, or(a4,a5,a6), a7) if (!p_query_atom) return false; if (p_query_atom->type != OP_OR) return false; - std::vector collected; + std::vector> collected; for (auto i = 0; i < p_query_atom->children.size(); i++) { Atom* p_query_atom_child = p_query_atom->child(i); - if (p_query_atom_child->type == ATOM_NUMBER && p_query_atom_child->value_min == p_query_atom_child->value_max) + if ((p_query_atom_child->type == ATOM_PSEUDO) || + (p_query_atom_child->type == ATOM_NUMBER && p_query_atom_child->value_min == p_query_atom_child->value_max)) { - collected.emplace_back(p_query_atom_child->value_min); + collected.emplace_back(p_query_atom_child->clone()); } else if (p_query_atom_child->type == OP_OR) { @@ -2579,11 +2648,13 @@ bool QueryMolecule::_isAtomListOr(Atom* p_query_atom, std::vector& list) } if (collected.size() < 1) return false; - list = collected; + for (auto& qa : collected) + list.emplace_back(std::move(qa)); return true; } -bool QueryMolecule::_isAtomOrListAndProps(Atom* p_query_atom, std::vector& list, bool& neg, std::map>& properties) +bool QueryMolecule::_isAtomOrListAndProps(Atom* p_query_atom, std::vector>& list, bool& neg, + std::map>& properties) { // Check if p_query_atom contains only atom or atom list and atom properties connected by "and" // atom list is positive i.e. or(a1,a2,a3,or(a4,a5),a6) or negative @@ -2599,13 +2670,14 @@ bool QueryMolecule::_isAtomOrListAndProps(Atom* p_query_atom, std::vector& p_query_atom_child = p_query_atom->child(0); is_neg = true; } - if (p_query_atom_child->type == ATOM_NUMBER && p_query_atom_child->value_min == p_query_atom_child->value_max) + if ((p_query_atom_child->type == ATOM_PSEUDO) || + (p_query_atom_child->type == ATOM_NUMBER && p_query_atom_child->value_min == p_query_atom_child->value_max)) { - list.emplace_back(p_query_atom_child->value_min); + list.emplace_back(p_query_atom_child->clone()); neg = is_neg; return true; } - else if (!is_neg && p_query_atom_child->type > ATOM_NUMBER && p_query_atom_child->type <= ATOM_CHIRALITY) // atom property, no negative props here + else if (!is_neg && isAtomProperty(p_query_atom_child->type)) // atom property, no negative props here { properties[p_query_atom_child->type] = std::unique_ptr(p_query_atom_child->clone()); return true; @@ -2616,12 +2688,12 @@ bool QueryMolecule::_isAtomOrListAndProps(Atom* p_query_atom, std::vector& { return true; } - std::vector collected; + std::vector> collected; if (_isAtomListOr(p_query_atom_child, collected)) { neg = is_neg; - for (auto item : collected) - list.emplace_back(item); + for (auto& item : collected) + list.emplace_back(std::move(item)); return true; } } @@ -2633,15 +2705,20 @@ bool QueryMolecule::_isAtomOrListAndProps(Atom* p_query_atom, std::vector& { Atom* p_query_atom_child = const_cast(p_query_atom)->child(i); bool is_neg = false; - std::vector collected; + std::vector> collected; std::map> collected_properties; if (_isAtomOrListAndProps(p_query_atom_child, collected, is_neg, collected_properties)) { + if (isAtomProperty(p_query_atom_child->type)) + { + properties[p_query_atom_child->type] = std::unique_ptr(p_query_atom_child->clone()); + continue; + } if (list.size() > 0 && is_neg != neg) // allowed only one list type in set - positive or negative return false; neg = is_neg; - for (auto item : collected) - list.emplace_back(item); + for (auto& item : collected) + list.emplace_back(std::move(item)); for (auto& prop : collected_properties) properties[prop.first] = std::move(prop.second); } @@ -2651,42 +2728,56 @@ bool QueryMolecule::_isAtomOrListAndProps(Atom* p_query_atom, std::vector& return true; } -int QueryMolecule::parseQueryAtomSmarts(QueryMolecule& qm, int aid, std::vector& list, std::map>& properties) +int QueryMolecule::parseQueryAtomSmarts(QueryMolecule& qm, int aid, std::vector>& list, std::map>& properties) { - std::vector atom_list; + std::vector> query_atom_list; std::map> atom_props; bool negative = false; QueryMolecule::Atom& qa = qm.getAtom(aid); if (qa.type == QueryMolecule::OP_NONE) return QUERY_ATOM_AH; - if (_isAtomOrListAndProps(&qa, atom_list, negative, atom_props)) + if (_isAtomOrListAndProps(&qa, query_atom_list, negative, atom_props)) { - list = atom_list; - std::sort(atom_list.begin(), atom_list.end()); + bool can_be_query_atom = true; + std::vector atom_list; for (auto& prop : atom_props) properties[prop.first] = std::move(prop.second); - - if (negative) + for (auto& qatom : query_atom_list) { - if (atom_list.size() == 1 && atom_list[0] == ELEM_H) - return QUERY_ATOM_A; // !H - else if (atom_list == std::vector{ELEM_H, ELEM_C}) - return QUERY_ATOM_Q; - else if (atom_list == std::vector{ELEM_C}) - return QUERY_ATOM_QH; - else if (atom_list == std::vector{ELEM_H, ELEM_He, ELEM_C, ELEM_N, ELEM_O, ELEM_F, ELEM_Ne, ELEM_P, ELEM_S, ELEM_Cl, ELEM_Ar, ELEM_Se, ELEM_Br, - ELEM_Kr, ELEM_I, ELEM_Xe, ELEM_At, ELEM_Rn}) - return QUERY_ATOM_M; - else if (atom_list == std::vector{ELEM_He, ELEM_C, ELEM_N, ELEM_O, ELEM_F, ELEM_Ne, ELEM_P, ELEM_S, ELEM_Cl, ELEM_Ar, ELEM_Se, ELEM_Br, - ELEM_Kr, ELEM_I, ELEM_Xe, ELEM_At, ELEM_Rn}) - return QUERY_ATOM_MH; + if (qatom->type == ATOM_PSEUDO) + can_be_query_atom = false; + else if (qatom->type == ATOM_NUMBER) + atom_list.emplace_back(qatom->value_max); + else + throw Error("Wrong atom type %d", qatom->type); + list.emplace_back(std::move(qatom)); } - else + + if (can_be_query_atom) { - if (atom_list == std::vector{ELEM_F, ELEM_Cl, ELEM_Br, ELEM_I, ELEM_At}) - return QUERY_ATOM_X; - else if (atom_list == std::vector{ELEM_H, ELEM_F, ELEM_Cl, ELEM_Br, ELEM_I, ELEM_At}) - return QUERY_ATOM_XH; + std::sort(atom_list.begin(), atom_list.end()); + if (negative) + { + if (atom_list.size() == 1 && atom_list[0] == ELEM_H) + return QUERY_ATOM_A; // !H + else if (atom_list == std::vector{ELEM_H, ELEM_C}) + return QUERY_ATOM_Q; + else if (atom_list == std::vector{ELEM_C}) + return QUERY_ATOM_QH; + else if (atom_list == std::vector{ELEM_H, ELEM_He, ELEM_C, ELEM_N, ELEM_O, ELEM_F, ELEM_Ne, ELEM_P, ELEM_S, ELEM_Cl, ELEM_Ar, ELEM_Se, + ELEM_Br, ELEM_Kr, ELEM_I, ELEM_Xe, ELEM_At, ELEM_Rn}) + return QUERY_ATOM_M; + else if (atom_list == std::vector{ELEM_He, ELEM_C, ELEM_N, ELEM_O, ELEM_F, ELEM_Ne, ELEM_P, ELEM_S, ELEM_Cl, ELEM_Ar, ELEM_Se, ELEM_Br, + ELEM_Kr, ELEM_I, ELEM_Xe, ELEM_At, ELEM_Rn}) + return QUERY_ATOM_MH; + } + else + { + if (atom_list == std::vector{ELEM_F, ELEM_Cl, ELEM_Br, ELEM_I, ELEM_At}) + return QUERY_ATOM_X; + else if (atom_list == std::vector{ELEM_H, ELEM_F, ELEM_Cl, ELEM_Br, ELEM_I, ELEM_At}) + return QUERY_ATOM_XH; + } } if (negative) { @@ -2694,9 +2785,9 @@ int QueryMolecule::parseQueryAtomSmarts(QueryMolecule& qm, int aid, std::vector< } else { - if (atom_list.size() == 0) + if (query_atom_list.size() == 0) return QUERY_ATOM_A; - else if (atom_list.size() == 1) + else if (query_atom_list.size() == 1) return QUERY_ATOM_SINGLE; else return QUERY_ATOM_LIST; diff --git a/core/indigo-core/reaction/rxnfile_saver.h b/core/indigo-core/reaction/rxnfile_saver.h index dd78487b94..ae128eb92d 100644 --- a/core/indigo-core/reaction/rxnfile_saver.h +++ b/core/indigo-core/reaction/rxnfile_saver.h @@ -44,6 +44,7 @@ namespace indigo bool skip_date; bool add_stereo_desc; bool add_implicit_h; + bool add_mrv_sma; DECL_ERROR; diff --git a/core/indigo-core/reaction/src/rxnfile_saver.cpp b/core/indigo-core/reaction/src/rxnfile_saver.cpp index 1786d4afe0..41c62773f9 100644 --- a/core/indigo-core/reaction/src/rxnfile_saver.cpp +++ b/core/indigo-core/reaction/src/rxnfile_saver.cpp @@ -28,7 +28,7 @@ using namespace indigo; IMPL_ERROR(RxnfileSaver, "Rxnfile saver"); -RxnfileSaver::RxnfileSaver(Output& output) : _output(output) +RxnfileSaver::RxnfileSaver(Output& output) : _output(output), add_mrv_sma(true) { molfile_saving_mode = MolfileSaver::MODE_AUTO; skip_date = false; @@ -95,6 +95,7 @@ void RxnfileSaver::_saveReaction() molfileSaver.mode = _v2000 ? MolfileSaver::MODE_2000 : MolfileSaver::MODE_3000; molfileSaver.add_stereo_desc = add_stereo_desc; molfileSaver.add_implicit_h = add_implicit_h; + molfileSaver.add_mrv_sma = add_mrv_sma; _writeRxnHeader(*_brxn); diff --git a/utils/indigo-service/backend/service/tests/api/indigo_test.py b/utils/indigo-service/backend/service/tests/api/indigo_test.py index f8504f7379..8ae972cacd 100644 --- a/utils/indigo-service/backend/service/tests/api/indigo_test.py +++ b/utils/indigo-service/backend/service/tests/api/indigo_test.py @@ -2438,6 +2438,9 @@ def test_calculate_query_mol_selected(self): "selected": [ 2, ], + "options": { + "molfile-saving-add-mrv-sma": False, + }, } ) result = requests.post( @@ -2602,6 +2605,9 @@ def test_calculate_query_rxn_selected(self): "selected": [ 2, ], + "options": { + "molfile-saving-add-mrv-sma": False, + }, } ) result = requests.post( @@ -2625,6 +2631,9 @@ def test_calculate_query_rxn_selected(self): "mass-composition", ], "selected": [2, 3, 4, 5], + "options": { + "molfile-saving-add-mrv-sma": False, + }, } ) result = requests.post(