diff --git a/api/tests/integration/ref/formats/ket_to_ket.py.out b/api/tests/integration/ref/formats/ket_to_ket.py.out index 0296b821cd..983a7f8b80 100644 --- a/api/tests/integration/ref/formats/ket_to_ket.py.out +++ b/api/tests/integration/ref/formats/ket_to_ket.py.out @@ -6,4 +6,6 @@ ambiguous_monomer.ket mol loadQueryMolecule: SUCCEED monomer_shape.ket doc loadKetDocument: SUCCEED monomer_shape.ket mol loadMolecule: SUCCEED monomer_shape.ket mol loadQueryMolecule: SUCCEED +2707_subst_count.ket ket: SUCCEED +2707_subst_count.ket mol: SUCCEED multi_merge4.ket:SUCCEED diff --git a/api/tests/integration/tests/formats/ket_to_ket.py b/api/tests/integration/tests/formats/ket_to_ket.py index b7307984c6..a0183d9b6b 100644 --- a/api/tests/integration/tests/formats/ket_to_ket.py +++ b/api/tests/integration/tests/formats/ket_to_ket.py @@ -17,6 +17,7 @@ def find_diff(a, b): indigo = Indigo() indigo.setOption("json-saving-pretty", True) indigo.setOption("ignore-stereochemistry-errors", True) +indigo.setOption("molfile-saving-skip-date", True) print("*** KET to KET ***") @@ -59,22 +60,34 @@ def check_res(filename, format, ket_ref, ket): "monomer_shape", "ambiguous_monomer", ] -formats = { +savers = { "doc": [indigo.loadKetDocument], "mol": [indigo.loadMolecule, indigo.loadQueryMolecule], } for filename in sorted(files): - for format in sorted(formats.keys()): + for format in sorted(savers.keys()): file_path = os.path.join(ref_path, filename) with open("{}_{}.ket".format(file_path, format), "r") as file: ket_ref = file.read() - for loader in formats[format]: + for loader in savers[format]: mol = loader(ket_ref) # with open("{}_{}.ket".format(file_path, format), "w") as file: # file.write(mol.json()) ket = mol.json() check_res(filename, format + " " + loader.__name__, ket_ref, ket) +filename = "2707_subst_count" +file_path = os.path.join(ref_path, filename) +mol = indigo.loadQueryMoleculeFromFile("{}.ket".format(file_path)) +savers = {"ket": mol.json, "mol": mol.molfile} +for format in sorted(savers.keys()): + data = savers[format]() + # with open("{}.{}".format(file_path, format), "w") as file: + # file.write(data) + with open("{}.{}".format(file_path, format), "r") as file: + data_ref = file.read() + check_res(filename, format, data_ref, data) + files = [ "multi_merge4", ] diff --git a/api/tests/integration/tests/formats/ref/2707_subst_count.ket b/api/tests/integration/tests/formats/ref/2707_subst_count.ket new file mode 100644 index 0000000000..edc2afa757 --- /dev/null +++ b/api/tests/integration/tests/formats/ref/2707_subst_count.ket @@ -0,0 +1,96 @@ +{ + "root": { + "nodes": [ + { + "$ref": "mol0" + } + ] + }, + "mol0": { + "type": "molecule", + "atoms": [ + { + "label": "C", + "location": [ + 7.300000, + -8.930000, + 0.000000 + ], + "substitutionCount": 5 + }, + { + "label": "C", + "location": [ + 7.800000, + -10.470000, + 0.000000 + ], + "substitutionCount": 7 + }, + { + "label": "C", + "location": [ + 8.110000, + -9.525000, + 0.000000 + ], + "substitutionCount": 6 + }, + { + "label": "C", + "location": [ + 6.800000, + -10.470000, + 0.000000 + ], + "substitutionCount": 8 + }, + { + "label": "C", + "location": [ + 6.490000, + -9.520000, + 0.000000 + ], + "substitutionCount": 9 + } + ], + "bonds": [ + { + "type": 1, + "atoms": [ + 0, + 4 + ] + }, + { + "type": 2, + "atoms": [ + 4, + 3 + ] + }, + { + "type": 1, + "atoms": [ + 3, + 1 + ] + }, + { + "type": 2, + "atoms": [ + 1, + 2 + ] + }, + { + "type": 1, + "atoms": [ + 2, + 0 + ] + } + ] + } +} \ No newline at end of file diff --git a/api/tests/integration/tests/formats/ref/2707_subst_count.mol b/api/tests/integration/tests/formats/ref/2707_subst_count.mol new file mode 100644 index 0000000000..472fe32466 --- /dev/null +++ b/api/tests/integration/tests/formats/ref/2707_subst_count.mol @@ -0,0 +1,16 @@ + + -INDIGO-01000000002D + + 5 5 0 0 0 0 0 0 0 0999 V2000 + 7.3000 -8.9300 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 7.8000 -10.4700 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 8.1100 -9.5250 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 6.8000 -10.4700 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 6.4900 -9.5200 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 1 5 1 0 0 0 0 + 5 4 2 0 0 0 0 + 4 2 1 0 0 0 0 + 2 3 2 0 0 0 0 + 3 1 1 0 0 0 0 +M SUB 5 1 5 2 6 3 6 4 6 5 6 +M END diff --git a/core/indigo-core/molecule/molfile_saver.h b/core/indigo-core/molecule/molfile_saver.h index 1947b00633..bc40b6b19b 100644 --- a/core/indigo-core/molecule/molfile_saver.h +++ b/core/indigo-core/molecule/molfile_saver.h @@ -54,6 +54,7 @@ namespace indigo MODE_3000 // force saving to v3000 format }; constexpr static int MAX_RING_BOND_COUNT = 4; + constexpr static int MAX_SUBSTITUTION_COUNT = 6; MolfileSaver(Output& output); diff --git a/core/indigo-core/molecule/query_molecule.h b/core/indigo-core/molecule/query_molecule.h index 2e57e35a3c..fb12efa6b9 100644 --- a/core/indigo-core/molecule/query_molecule.h +++ b/core/indigo-core/molecule/query_molecule.h @@ -189,6 +189,8 @@ namespace indigo virtual void _optimize(){}; }; + constexpr static int MAX_ATOM_VALUE = 100; + class DLLEXPORT Atom : public Node { public: diff --git a/core/indigo-core/molecule/src/cml_loader.cpp b/core/indigo-core/molecule/src/cml_loader.cpp index 378732f299..a1dad9e1fa 100644 --- a/core/indigo-core/molecule/src/cml_loader.cpp +++ b/core/indigo-core/molecule/src/cml_loader.cpp @@ -683,8 +683,7 @@ void CmlLoader::_loadMoleculeElement(XMLHandle& handle) } else if (subst > 0) { - atom.reset(QueryMolecule::Atom::und( - atom.release(), new QueryMolecule::Atom(QueryMolecule::ATOM_SUBSTITUENTS, subst, (subst < 6 ? subst : 100)))); + atom.reset(QueryMolecule::Atom::und(atom.release(), new QueryMolecule::Atom(QueryMolecule::ATOM_SUBSTITUENTS, subst))); } } else if (strncmp(qf.ptr(), "u", 1) == 0) diff --git a/core/indigo-core/molecule/src/molecule_json_loader.cpp b/core/indigo-core/molecule/src/molecule_json_loader.cpp index 2500fbdbc9..c0e9f855ca 100644 --- a/core/indigo-core/molecule/src/molecule_json_loader.cpp +++ b/core/indigo-core/molecule/src/molecule_json_loader.cpp @@ -477,9 +477,8 @@ void MoleculeJsonLoader::parseAtoms(const rapidjson::Value& atoms, BaseMolecule& _pqmol->getVertex(atom_idx).degree()))); } else if (sub_count > 0) - _pqmol->resetAtom(atom_idx, - QueryMolecule::Atom::und(_pqmol->releaseAtom(atom_idx), new QueryMolecule::Atom(QueryMolecule::ATOM_SUBSTITUENTS, sub_count, - (sub_count < 6 ? sub_count : 100)))); + _pqmol->resetAtom( + atom_idx, QueryMolecule::Atom::und(_pqmol->releaseAtom(atom_idx), new QueryMolecule::Atom(QueryMolecule::ATOM_SUBSTITUENTS, sub_count))); else throw Error("invalid SUB value: %d", sub_count); } diff --git a/core/indigo-core/molecule/src/molecule_savers.cpp b/core/indigo-core/molecule/src/molecule_savers.cpp index a7aff1a9b3..7f5e2cbcdb 100644 --- a/core/indigo-core/molecule/src/molecule_savers.cpp +++ b/core/indigo-core/molecule/src/molecule_savers.cpp @@ -93,12 +93,10 @@ bool MoleculeSavers::getSubstitutionCountFlagValue(QueryMolecule& qmol, int idx, value = -1; return true; } - int values[1] = {6}; - if (atom.sureValueBelongs(QueryMolecule::ATOM_SUBSTITUENTS, values, 1)) - { - value = 6; - return true; - } + // Some data stored as min=value, max=100(e.g. MOL format) + auto subst_node = atom.sureConstraint(QueryMolecule::ATOM_SUBSTITUENTS); + if (subst_node != nullptr) + return subst_node->value_min; } else if (atom.sureValue(QueryMolecule::ATOM_SUBSTITUENTS_AS_DRAWN, v)) { diff --git a/core/indigo-core/molecule/src/molfile_loader.cpp b/core/indigo-core/molecule/src/molfile_loader.cpp index 709b6983f8..6b417f7a0e 100644 --- a/core/indigo-core/molecule/src/molfile_loader.cpp +++ b/core/indigo-core/molecule/src/molfile_loader.cpp @@ -27,6 +27,7 @@ #include "molecule/molecule_3d_constraints.h" #include "molecule/molecule_stereocenters.h" #include "molecule/molfile_loader.h" +#include "molecule/molfile_saver.h" #include "molecule/monomer_commons.h" #include "molecule/parse_utils.h" #include "molecule/query_molecule.h" @@ -836,9 +837,11 @@ void MolfileLoader::_readCtab2000() _qmol->getVertex(atom_idx).degree()))); } else if (sub_count > 0) - _qmol->resetAtom(atom_idx, QueryMolecule::Atom::und(_qmol->releaseAtom(atom_idx), - new QueryMolecule::Atom(QueryMolecule::ATOM_SUBSTITUENTS, sub_count, - (sub_count < 6 ? sub_count : 100)))); + _qmol->resetAtom(atom_idx, QueryMolecule::Atom::und( + _qmol->releaseAtom(atom_idx), + new QueryMolecule::Atom( + QueryMolecule::ATOM_SUBSTITUENTS, sub_count, + (sub_count < MolfileSaver::MAX_SUBSTITUTION_COUNT ? sub_count : QueryMolecule::MAX_ATOM_VALUE)))); else throw Error("invalid SUB value: %d", sub_count); } @@ -2817,8 +2820,11 @@ void MolfileLoader::_readCtab3000() new QueryMolecule::Atom(QueryMolecule::ATOM_SUBSTITUENTS_AS_DRAWN, _qmol->getVertex(i).degree()))); } else if (subst > 0) - _qmol->resetAtom(i, QueryMolecule::Atom::und(_qmol->releaseAtom(i), new QueryMolecule::Atom(QueryMolecule::ATOM_SUBSTITUENTS, subst, - (subst < 6 ? subst : 100)))); + _qmol->resetAtom( + i, QueryMolecule::Atom::und( + _qmol->releaseAtom(i), + new QueryMolecule::Atom(QueryMolecule::ATOM_SUBSTITUENTS, subst, + (subst < MolfileSaver::MAX_SUBSTITUTION_COUNT ? subst : QueryMolecule::MAX_ATOM_VALUE)))); else throw Error("invalid SUBST value: %d", subst); } @@ -2865,8 +2871,9 @@ void MolfileLoader::_readCtab3000() new QueryMolecule::Atom(QueryMolecule::ATOM_RING_BONDS_AS_DRAWN, rbonds))); } else if (rb > 1) - _qmol->resetAtom(i, QueryMolecule::Atom::und(_qmol->releaseAtom(i), - new QueryMolecule::Atom(QueryMolecule::ATOM_RING_BONDS, rb, (rb < 4 ? rb : 100)))); + _qmol->resetAtom( + i, QueryMolecule::Atom::und(_qmol->releaseAtom(i), new QueryMolecule::Atom(QueryMolecule::ATOM_RING_BONDS, rb, + (rb < 4 ? rb : QueryMolecule::MAX_ATOM_VALUE)))); else throw Error("invalid RBCNT value: %d", rb); } diff --git a/core/indigo-core/molecule/src/molfile_saver.cpp b/core/indigo-core/molecule/src/molfile_saver.cpp index b67db2a14c..9fc1569e50 100644 --- a/core/indigo-core/molecule/src/molfile_saver.cpp +++ b/core/indigo-core/molecule/src/molfile_saver.cpp @@ -1388,7 +1388,7 @@ void MolfileSaver::_writeCtab2000(Output& output, BaseMolecule& mol, bool query) { int* s = substitution_count.push(); s[0] = i; - s[1] = subst; + s[1] = subst > MAX_SUBSTITUTION_COUNT ? MAX_SUBSTITUTION_COUNT : subst; } } diff --git a/core/indigo-core/molecule/src/query_molecule.cpp b/core/indigo-core/molecule/src/query_molecule.cpp index 0f95baaa65..220eadcdb9 100644 --- a/core/indigo-core/molecule/src/query_molecule.cpp +++ b/core/indigo-core/molecule/src/query_molecule.cpp @@ -157,6 +157,10 @@ int QueryMolecule::getAtomSubstCount(int idx) return res; if (_atoms[idx]->sureValue(ATOM_SUBSTITUENTS_AS_DRAWN, res)) return res; + // Some data stored as min=value, max=100(e.g. MOL format) + auto atom = _atoms[idx]->sureConstraint(ATOM_SUBSTITUENTS); + if (atom != nullptr) + return atom->value_min; return -1; } diff --git a/core/indigo-core/molecule/src/smiles_loader.cpp b/core/indigo-core/molecule/src/smiles_loader.cpp index d854987dd2..21baba28f5 100644 --- a/core/indigo-core/molecule/src/smiles_loader.cpp +++ b/core/indigo-core/molecule/src/smiles_loader.cpp @@ -1356,8 +1356,8 @@ void SmilesLoader::_readOtherStuff() _qmol->getVertex(atom_idx).degree()))); break; default: - _qmol->resetAtom(atom_idx, QueryMolecule::Atom::und(_qmol->releaseAtom(atom_idx), new QueryMolecule::Atom(QueryMolecule::ATOM_SUBSTITUENTS, - subs, (subs < 6 ? subs : 100)))); + _qmol->resetAtom(atom_idx, + QueryMolecule::Atom::und(_qmol->releaseAtom(atom_idx), new QueryMolecule::Atom(QueryMolecule::ATOM_SUBSTITUENTS, subs))); break; } if (_scanner.lookNext() == ',')