From ce4b00dbaf25857b0ffe1e74ef098691ed010842 Mon Sep 17 00:00:00 2001 From: Sean Colby Date: Mon, 22 Apr 2024 11:20:16 -0700 Subject: [PATCH 01/15] Separate wrapper from result during return --- isicle/md.py | 19 ++++++++----------- 1 file changed, 8 insertions(+), 11 deletions(-) diff --git a/isicle/md.py b/isicle/md.py index 9a9ee79..ce8fff6 100644 --- a/isicle/md.py +++ b/isicle/md.py @@ -381,20 +381,17 @@ def finish(self): result = parser.parse() - self.__dict__.update(result) - - for i in self.geom: - i.add___dict__({k: v for k, v in result.items() if k != "geom"}) - i.__dict__.update(basename=self.geom.basename) + for i in result['geometry']: + i.add___dict__({k: v for k, v in result.items() if k != "geometry"}) + # i.__dict__.update(basename=self.geom.basename) if self.task != "optimize": conformerID = 1 - for i in self.geom: + for i in result['geometry']: i.__dict__.update(conformerID=conformerID) conformerID += 1 - return self - else: - self.geom = self.geom[0] + + return result def run(self, geom, **kwargs): """ @@ -429,9 +426,9 @@ def run(self, geom, **kwargs): self.submit() # Finish/clean up - self.finish() + result = self.finish() - return self + return result def get_structures(self): """ From a0ea3f8ed69a9b25aadb7c2caed2588cd8666a77 Mon Sep 17 00:00:00 2001 From: Sean Colby Date: Mon, 22 Apr 2024 11:21:05 -0700 Subject: [PATCH 02/15] Remove try/except blocks for geometry parsing --- isicle/parse.py | 91 ++++++++++++++++++++++--------------------------- 1 file changed, 41 insertions(+), 50 deletions(-) diff --git a/isicle/parse.py b/isicle/parse.py index df767bd..3385f20 100644 --- a/isicle/parse.py +++ b/isicle/parse.py @@ -319,8 +319,8 @@ def parse(self): result = {k: v for k, v in result.items() if v is not None} # Add result info to geometry object - if 'geometry' in result: - result['geometry'].add___dict__( + if "geometry" in result: + result["geometry"].add___dict__( {k: v for k, v in result.items() if k != 'geometry'}) # Store attribute @@ -1107,12 +1107,9 @@ def _parse_xyz(self): # TODO: charge needs to propagate to here # Improve loading scheme as well - x = [isicle.io.load(i) for i in geom_list] + return isicle.conformers.ConformationalEnsemble([isicle.io.load(i) for i in geom_list]) - else: - x = [isicle.io.load(self.xyz_path)] - - return isicle.conformers.ConformationalEnsemble(x) + return isicle.io.load(self.xyz_path) def parse(self): """ @@ -1150,51 +1147,45 @@ def parse(self): pass # Parse geometry from assoc. XYZ file - try: - if self.path.endswith("xyz"): - try: - self.xyz_path = self.path - result["geom"] = self._parse_xyz() - - except: - pass - - if self.path.endswith("out") or self.path.endswith("log"): - # try geometry parsing - try: - XYZ = None - if result["protocol"].split()[0] == "xtb": - self.parse_opt = True - XYZ = "xtbopt.xyz" - if result["protocol"].split()[1] == "crest": - if "-deprotonate" in result["protocol"]: - self.parse_isomer = True - XYZ = "deprotonated.xyz" - elif "-protonate" in result["protocol"]: - self.parse_isomer = True - XYZ = "protonated.xyz" - elif "-tautomer" in result["protocol"]: - self.parse_isomer = True - XYZ = "tautomers.xyz" - else: - self.parse_crest = True - XYZ = "crest_conformers.xyz" - - if XYZ is None: - raise RuntimeError( - "XYZ file associated with XTB job not available,\ - please parse separately." - ) + if self.path.endswith("xyz"): + try: + result["geometry"] = self._parse_xyz() + + except: + pass + + if self.path.endswith("out") or self.path.endswith("log"): + # try geometry parsing + XYZ = None + if result["protocol"].split()[0] == "xtb": + self.parse_opt = True + XYZ = "xtbopt.xyz" + if result["protocol"].split()[1] == "crest": + if "-deprotonate" in result["protocol"]: + self.parse_isomer = True + XYZ = "deprotonated.xyz" + elif "-protonate" in result["protocol"]: + self.parse_isomer = True + XYZ = "protonated.xyz" + elif "-tautomer" in result["protocol"]: + self.parse_isomer = True + XYZ = "tautomers.xyz" + else: + self.parse_crest = True + XYZ = "crest_conformers.xyz" + + if XYZ is None: + raise RuntimeError( + "XYZ file associated with XTB job not available,\ + please parse separately." + ) - else: - temp_dir = os.path.dirname(self.path) - self.xyz_path = os.path.join(temp_dir, XYZ) + else: + temp_dir = os.path.dirname(self.path) + self.xyz_path = os.path.join(temp_dir, XYZ) + + result["geometry"] = self._parse_xyz() - result["geom"] = self._parse_xyz() - except: - pass - except: - pass return result def save(self, path): From 3d22c1eea45efba1df9d72229ff0c7ed3f9ae9cc Mon Sep 17 00:00:00 2001 From: Sean Colby Date: Mon, 22 Apr 2024 16:06:30 -0700 Subject: [PATCH 03/15] Remove attribute update from atom count method --- isicle/geometry.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/isicle/geometry.py b/isicle/geometry.py index 7e18ac2..833b094 100644 --- a/isicle/geometry.py +++ b/isicle/geometry.py @@ -395,9 +395,7 @@ def get_natoms(self): """ - natoms = Chem.Mol.GetNumAtoms(self.to_mol()) - self.__dict__["natoms"] = natoms - return self.__dict__["natoms"] + return Chem.Mol.GetNumAtoms(self.to_mol()) def get_atom_indices( self, atoms=["C", "H"], lookup={"C": 6, "H": 1, "N": 7, "O": 8, "F": 9, "P": 15} From 7613c5c009b2f3a5fff8010d72e1f38cdc493535 Mon Sep 17 00:00:00 2001 From: Sean Colby Date: Mon, 22 Apr 2024 16:06:52 -0700 Subject: [PATCH 04/15] Remove charge kwarg --- isicle/io.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/isicle/io.py b/isicle/io.py index 23d534b..2e9f094 100644 --- a/isicle/io.py +++ b/isicle/io.py @@ -61,7 +61,7 @@ def load_xyz(path, charge=None): basename = os.path.splitext(os.path.basename(path))[0] # Initialize Geometry instance - geom = isicle.geometry.Geometry(mol=mol, charge=charge, basename=basename) + geom = isicle.geometry.Geometry(mol=mol, basename=basename) return geom From 1f3e63b7045e5bfb4af49b424d64691b6ee32a1d Mon Sep 17 00:00:00 2001 From: Sean Colby Date: Mon, 22 Apr 2024 16:07:21 -0700 Subject: [PATCH 05/15] Improve `finish` method for XTB parser --- isicle/md.py | 253 +++++++++++++++++++++++---------------------------- 1 file changed, 113 insertions(+), 140 deletions(-) diff --git a/isicle/md.py b/isicle/md.py index ce8fff6..ae5d98a 100644 --- a/isicle/md.py +++ b/isicle/md.py @@ -1,15 +1,14 @@ +import glob import os import subprocess + from rdkit import Chem -from rdkit.Chem import AllChem -from rdkit.Chem import rdForceFieldHelpers -from rdkit.Chem import ChemicalForceFields -from rdkit.Chem import rdDistGeom +from rdkit.Chem import AllChem, rdDetermineBonds, rdDistGeom import isicle from isicle.geometry import Geometry from isicle.interfaces import WrapperInterface -from isicle.parse import XTBParser, TINKERParser +from isicle.parse import TINKERParser, XTBParser """ Files resulting from an xtb job always run in the same directory that the command is @@ -92,6 +91,7 @@ class XTBWrapper(WrapperInterface): _default_value = None def __init__(self, **kwargs): + self.temp_dir = isicle.utils.mkdtemp() self.__dict__.update(dict.fromkeys(self._defaults, self._default_value)) self.__dict__.update(**kwargs) @@ -123,8 +123,7 @@ def save_geometry(self, fmt="xyz"): ".pdb", ".pkl". """ - # Path operationspyth - self.temp_dir = isicle.utils.mkdtemp() + # Path operations self.fmt = fmt.lower() geomfile = os.path.join( self.temp_dir, "{}.{}".format(self.geom.basename, self.fmt.lower()) @@ -166,7 +165,7 @@ def _configure_xtb( s += "--" + forcefield + " " # Add charge - s += "--chrg " + self.geom.get_charge() + " " + s += "--chrg " + str(self.geom.get_charge()) + " " # Add optional implicit solvation if solvation is not None: @@ -313,11 +312,11 @@ def configure( """ - if type(task) == list: + if isinstance(task, list): raise TypeError("Initiate one xtb or crest job at a time.") - if type(forcefield) == list: + if isinstance(forcefield, list): raise TypeError("Initiate one forcefield at a time.") - if type(optlevel) == list: + if isinstance(optlevel, list): raise TypeError("Initiate one opt level at a time.") if task == "optimize": @@ -351,7 +350,7 @@ def configure( ignore_topology=ignore_topology, ) else: - raise Error( + raise ValueError( "Task not assigned properly, please choose optimize, conformer, protonate, deprotonate, or tautomerize" ) @@ -363,34 +362,114 @@ def submit(self): """ Run xtb or crest simulation according to configured inputs. """ - owd = os.getcwd() + cwd = os.getcwd() os.chdir(self.temp_dir) - job = self.config - subprocess.call(job, shell=True) - os.chdir(owd) + subprocess.call(self.config, shell=True) + os.chdir(cwd) def finish(self): """ Parse results, save xtb output files, and clean temporary directory """ - parser = XTBParser() + # Get list of outputs + outfiles = glob.glob(os.path.join(self.temp_dir, "*")) + + + # Result container + result = {} + + # Split out geometry files + geomfiles = sorted([x for x in outfiles if x.endswith(".xyz")]) + outfiles = sorted([x for x in outfiles if not x.endswith(".xyz")]) + + # Atom count + n_atoms = self.geom.get_natoms() + + # Enumerate geometry files + for geomfile in geomfiles: + # Extract unique basename + # Replace ancillary "crest_" + basename = os.path.splitext(os.path.basename(geomfile).replace("crest_", ""))[0] + + # Open file + with open(geomfile, "r") as f: + contents = f.readlines() + + # Single-structure outputs + if (basename == "struc") or (basename == "best") or (basename == "xtbopt"): + xyzblock = "".join(contents) + + raw_mol = Chem.MolFromXYZBlock(xyzblock) + mol = Chem.Mol(raw_mol) + rdDetermineBonds.DetermineBonds(mol, charge=self.geom.get_charge()) + + # Initialize Geometry instance + geom = isicle.geometry.Geometry(mol=mol, + basename=self.geom.basename) + + # Add to result container + result[basename] = geom + + # Multi-structure outputs + elif "crest" in geomfile: + contents = ["".join(contents[i:i + n_atoms + 2]) for i in range(0, len(contents), n_atoms + 2)] + + # Iterate XYZ content + geoms = [] + for xyzblock in contents: + # Construct mol from XYZ + raw_mol = Chem.MolFromXYZBlock(xyzblock) + mol = Chem.Mol(raw_mol) + rdDetermineBonds.DetermineBonds(mol, charge=self.geom.get_charge()) + + # Initialize Geometry instance + geom = isicle.geometry.Geometry(mol=mol, + basename=self.geom.basename) + + # Append to list of geometries + geoms.append(geom) + + # Add to result container + result[basename] = geoms + + # Enumerate output files + for outfile in outfiles: + # Split name and extension + if "." in outfile: + basename, ext = os.path.basename(outfile).rsplit(".", 1) + + # No extension + else: + basename = os.path.basename(outfile) + ext = None - parser.load(os.path.join(self.temp_dir, self.geom.basename + ".out")) - self.output = parser.load(os.path.join(self.temp_dir, self.geom.basename + ".out")) + # Key management + if (ext is None) or (ext == "tmp"): + key = basename + else: + key = ext + + # Read output content + with open(outfile, "rb") as f: + contents = f.read() - result = parser.parse() + # Attempt utf-8 decode + try: + result[key] = contents.decode("utf-8") + except UnicodeDecodeError: + result[key] = contents + + # Rename select keys + if "struc" in result: + result["input"] = result.pop("struc") + + if "xtbopt" in result: + result["final"] = result.pop("xtbopt") - for i in result['geometry']: - i.add___dict__({k: v for k, v in result.items() if k != "geometry"}) - # i.__dict__.update(basename=self.geom.basename) + if "original" in result: + result["coord_original"] = result.pop("original") - if self.task != "optimize": - conformerID = 1 - for i in result['geometry']: - i.__dict__.update(conformerID=conformerID) - conformerID += 1 - return result def run(self, geom, **kwargs): @@ -430,40 +509,6 @@ def run(self, geom, **kwargs): return result - def get_structures(self): - """ - Extract all structures from containing object as a conformational ensemble. - - Returns - ------- - :obj:`~isicle.conformers.ConformationalEnsemble` - Conformational ensemble. - - """ - if isinstance(self.geom, isicle.conformers.ConformationalEnsemble): - return self.geom - - raise TypeError( - "Object does not contain multiple structures. Use `get_structure` instead." - ) - - def get_structure(self): - """ - Extract structure from containing object. - - Returns - ------- - :obj:`~isicle.geometry.Geometry` - Structure instance. - - """ - if isinstance(self.geom, isicle.conformers.ConformationalEnsemble): - raise TypeError( - "Object contains multiple structures. Use `get_structures` instead." - ) - - return self.geom - class RDKitWrapper(Geometry, WrapperInterface): @@ -623,20 +668,16 @@ def finish(self): """ Parse RDKit conformers generated. """ - confCount = self.geom.mol.GetNumConformers() + conf_count = self.geom.mol.GetNumConformers() conformers = [ isicle.load(Chem.Mol(self.geom.mol, confId=i)) - for i in range(confCount) + for i in range(conf_count) ] - # Override conformer basename - for conformer in conformers: - conformer.basename = self.geom.basename + for conf, label in zip(conformers, range(conf_count)): + conf.__dict__.update(conformerID=label, basename=self.geom.basename) - # TODO: not an ideal overwrite of self.geom - self.geom = conformers - for conf, label in zip(self.geom, range(confCount)): - conf.__dict__.update(conformerID=label) + return isicle.conformers.ConformationalEnsemble(conformers) def run(self, geom, **kwargs): """ @@ -674,40 +715,6 @@ def run(self, geom, **kwargs): return self - def get_structures(self): - """ - Extract all structures from containing object as a conformational ensemble. - - Returns - ------- - :obj:`~isicle.conformers.ConformationalEnsemble` - Conformational ensemble. - - """ - if isinstance(self.geom, isicle.conformers.ConformationalEnsemble): - return self.geom - - raise TypeError( - "Object does not contain multiple structures. Use `get_structure` instead." - ) - - def get_structure(self): - """ - Extract structure from containing object. - - Returns - ------- - :obj:`~isicle.geometry.Geometry` - Structure instance. - - """ - if isinstance(self.geom, isicle.conformers.ConformationalEnsemble): - raise TypeError( - "Object contains multiple structures. Use `get_structures` instead." - ) - - return self.geom - class TINKERWrapper(Geometry, WrapperInterface): @@ -1118,37 +1125,3 @@ def run(self, geom, **kwargs): self.finish() return self - - def get_structures(self): - """ - Extract all structures from containing object as a conformational ensemble. - - Returns - ------- - :obj:`~isicle.conformers.ConformationalEnsemble` - Conformational ensemble. - - """ - if isinstance(self.geom, isicle.conformers.ConformationalEnsemble): - return self.geom - - raise TypeError( - "Object does not contain multiple structures. Use `get_structure` instead." - ) - - def get_structure(self): - """ - Extract structure from containing object. - - Returns - ------- - :obj:`~isicle.geometry.Geometry` - Structure instance. - - """ - if isinstance(self.geom, isicle.conformers.ConformationalEnsemble): - raise TypeError( - "Object contains multiple structures. Use `get_structures` instead." - ) - - return self.geom From 6dc1c3af4cda94c709118ddb0fa2f0edeefc9e40 Mon Sep 17 00:00:00 2001 From: Sean Colby Date: Tue, 23 Apr 2024 09:56:21 -0700 Subject: [PATCH 06/15] Remove extra blank line --- isicle/md.py | 1 - 1 file changed, 1 deletion(-) diff --git a/isicle/md.py b/isicle/md.py index ae5d98a..f9901c4 100644 --- a/isicle/md.py +++ b/isicle/md.py @@ -375,7 +375,6 @@ def finish(self): # Get list of outputs outfiles = glob.glob(os.path.join(self.temp_dir, "*")) - # Result container result = {} From 5f45cd2744c1a0d205cad8e171b0573131da0621 Mon Sep 17 00:00:00 2001 From: Sean Colby Date: Tue, 23 Apr 2024 09:56:37 -0700 Subject: [PATCH 07/15] Overhaul XTB parser --- isicle/parse.py | 221 ++++++++++++++++++------------------------------ 1 file changed, 80 insertions(+), 141 deletions(-) diff --git a/isicle/parse.py b/isicle/parse.py index 3385f20..fd3b9c8 100644 --- a/isicle/parse.py +++ b/isicle/parse.py @@ -321,7 +321,8 @@ def parse(self): # Add result info to geometry object if "geometry" in result: result["geometry"].add___dict__( - {k: v for k, v in result.items() if k != 'geometry'}) + {k: v for k, v in result.items() if k != "geometry"} + ) # Store attribute self.result = result @@ -348,7 +349,7 @@ def _parse_geometry(self): Add docstring """ - return self.data['xyz']['final'] + return self.data["xyz"]["final"] def _parse_energy(self): """ @@ -360,7 +361,7 @@ def _parse_energy(self): energy = None # Cycle through file - for line in self.data['out'].split("\n"): + for line in self.data["out"].split("\n"): if "Total DFT energy" in line: # Overwrite last saved energy energy = float(line.split()[-1]) @@ -377,7 +378,7 @@ def _parse_shielding(self): shield_atoms = [] shields = [] - for line in self.data['out'].split("\n"): + for line in self.data["out"].split("\n"): if " SHIELDING" in line: shield_idxs = [int(x) for x in line.split()[2:]] if len(shield_idxs) == 0: @@ -407,7 +408,7 @@ def _parse_spin(self): """ # No spin - if "SPINSPIN" not in self.data['nw']: + if "SPINSPIN" not in self.data["nw"]: return None # TO DO: Add g-factors @@ -419,7 +420,7 @@ def _parse_spin(self): g_factor = [] ready = False - for line in self.data['out'].split("\n"): + for line in self.data["out"].split("\n"): if "Atom " in line: line = line.split() idx1 = int((line[1].split(":"))[0]) @@ -468,7 +469,7 @@ def _parse_frequency(self): natoms = None has_frequency = None - lines = self.data['out'].split("\n") + lines = self.data["out"].split("\n") for i, line in enumerate(lines): if ("geometry" in line) and (natoms is None): atom_start = i + 7 @@ -535,7 +536,7 @@ def _parse_charge(self): charges = [] ready = False - for line in self.data['out'].split("\n"): + for line in self.data["out"].split("\n"): # Load charges from table if "Atom Charge Shell Charges" in line: # Table header found. Overwrite anything saved previously @@ -593,7 +594,7 @@ def _parse_timing(self): opt = False freq = False - for i, line in enumerate(self.data['out'].split("\n")): + for i, line in enumerate(self.data["out"].split("\n")): # ? if "No." in line and len(indices) == 0: indices.append(i + 2) # 0 @@ -638,8 +639,8 @@ def _parse_molden(self): """ Add docstring """ - if 'molden' in self.data: - return self['molden'] + if "molden" in self.data: + return self["molden"] return None @@ -653,9 +654,9 @@ def _parse_connectivity(self): """ Add docstring """ - + # Split lines - lines = self.data['out'].split("\n") + lines = self.data["out"].split("\n") # Extracting Atoms & Coordinates coor_substr = "internuclear distances" @@ -684,7 +685,7 @@ def _parse_connectivity(self): # Check for result if len(connectivity) > 0: return connectivity - + return None def parse(self): @@ -712,9 +713,10 @@ def parse(self): result = {k: v for k, v in result.items() if v is not None} # Add result info to geometry object - if 'geometry' in result: - result['geometry'].add___dict__( - {k: v for k, v in result.items() if k != 'geometry'}) + if "geometry" in result: + result["geometry"].add___dict__( + {k: v for k, v in result.items() if k != "geometry"} + ) # Store attribute self.result = result @@ -870,22 +872,15 @@ class XTBParser(FileParserInterface): Add docstring """ - def __init__(self): - """ - Add docstring - """ - self.contents = None - self.result = None - self.path = None + def __init__(self, data=None): + self.data = data - def load(self, path: str): - """ - Load in the data file - """ - with open(path, "r") as f: - self.contents = f.readlines() - self.path = path - # return self.contents + self.result = {} + + def load(self, path): + self.data = isicle.io.load_pickle(path) + + self.lines = self.data["out"].split("\n") def _crest_energy(self): """ @@ -896,11 +891,11 @@ def _crest_energy(self): population = [] ready = False - for h in range(len(self.contents), 0, -1): - if "Erel/kcal" in self.contents[h]: + for h in range(len(self.lines), 0, -1): + if "Erel/kcal" in self.lines[h]: g = h + 1 - for j in range(g, len(self.contents)): - line = self.contents[j].split() + for j in range(g, len(self.lines)): + line = self.lines[j].split() if len(line) == 8: relative_energy.append(float(line[1])) total_energy.append(float(line[2])) @@ -909,7 +904,7 @@ def _crest_energy(self): if "/K" in line[1]: break - if ready == True: + if ready is True: break return { @@ -930,7 +925,7 @@ def grab_time(line): return ":".join(line[1:]).strip("\n") ready = False - for line in self.contents: + for line in self.lines: if "test MD wall time" in line: test_MD = grab_time(line) ready = True @@ -941,7 +936,7 @@ def grab_time(line): if "multilevel OPT wall time" in line: multilevel_OPT = grab_time(line) - if "MD wall time" in line and ready == True: + if "MD wall time" in line and ready is True: MD = grab_time(line) ready = False @@ -967,19 +962,19 @@ def _isomer_energy(self): complete = False relative_energies = [] total_energies = [] - for i in range(len(self.contents), 0, -1): - if "structure ΔE(kcal/mol) Etot(Eh)" in self.contents[i]: + for i in range(len(self.lines), 0, -1): + if "structure ΔE(kcal/mol) Etot(Eh)" in self.lines[i]: h = i + 1 - for j in range(h, len(self.contents)): - if self.contents[j] != " \n": - line = self.contents[j].split() + for j in range(h, len(self.lines)): + if self.lines[j] != " \n": + line = self.lines[j].split() relative_energies.append(float(line[1])) total_energies.append(float(line[2])) else: complete = True break - if complete == True: + if complete is True: break return {"relative energy": relative_energies, "total energy": total_energies} @@ -995,7 +990,7 @@ def grab_time(line): return ":".join(line[1:]).strip("\n") - for line in self.contents: + for line in self.lines: if "LMO calc. wall time" in line: LMO_time = grab_time(line) @@ -1015,7 +1010,7 @@ def _opt_energy(self): """ Add docstring """ - for line in self.contents: + for line in self.lines: if "TOTAL ENERGY" in line: energy = line.split()[3] + " Hartrees" @@ -1036,7 +1031,7 @@ def grab_time(line): scf = False anc = False - for line in self.contents: + for line in self.lines: if "wall-time" in line and tot is False: total_time = grab_time(line) tot = True @@ -1055,28 +1050,6 @@ def grab_time(line): "ANC optimizer wall time": anc_time, } - def _parse_energy(self): - """ - Add docstring - """ - if self.parse_crest == True: - return self._crest_energy() - if self.parse_opt == True: - return self._opt_energy() - if self.parse_isomer == True: - return self._isomer_energy() - - def _parse_timing(self): - """ - Add docstring - """ - if self.parse_crest == True: - return self._crest_timing() - if self.parse_opt == True: - return self._opt_timing() - if self.parse_isomer == True: - return self._isomer_timing() - def _parse_protocol(self): """ Add docstring @@ -1090,26 +1063,25 @@ def _parse_protocol(self): protocol = (line.split(":")[1]).strip("\n") return protocol - def _parse_xyz(self): + def _parse_geometry(self): """ Split .xyz into separate XYZGeometry instances """ - - FILE = self.xyz_path - if len(list(pybel.readfile("xyz", FILE))) > 1: - geom_list = [] - count = 1 - XYZ = FILE.split(".")[0] - for geom in pybel.readfile("xyz", FILE): - geom.write("xyz", "%s_%d.xyz" % (XYZ, count)) - geom_list.append("%s_%d.xyz" % (XYZ, count)) - count += 1 - - # TODO: charge needs to propagate to here - # Improve loading scheme as well - return isicle.conformers.ConformationalEnsemble([isicle.io.load(i) for i in geom_list]) - - return isicle.io.load(self.xyz_path) + geometries = {} + # Add geometry info + for key in [ + "conformers", + "rotamers", + "final", + "best", + "protonated", + "deprotonated", + "tautomers", + ]: + if key in self.data: + geometries[key] = self.data[key] + + return geometries def parse(self): """ @@ -1128,63 +1100,30 @@ def parse(self): ): raise RuntimeError("XTB job failed: {}".format(self.path)) - self.parse_crest = False - self.parse_opt = False - self.parse_isomer = False - # Initialize result object to store info - result = {} - result["protocol"] = self._parse_protocol() - - try: - result["timing"] = self._parse_timing() - except: - pass - - try: - result["energy"] = self._parse_energy() - except: - pass - - # Parse geometry from assoc. XYZ file - if self.path.endswith("xyz"): - try: - result["geometry"] = self._parse_xyz() - - except: - pass - - if self.path.endswith("out") or self.path.endswith("log"): - # try geometry parsing - XYZ = None - if result["protocol"].split()[0] == "xtb": - self.parse_opt = True - XYZ = "xtbopt.xyz" - if result["protocol"].split()[1] == "crest": - if "-deprotonate" in result["protocol"]: - self.parse_isomer = True - XYZ = "deprotonated.xyz" - elif "-protonate" in result["protocol"]: - self.parse_isomer = True - XYZ = "protonated.xyz" - elif "-tautomer" in result["protocol"]: - self.parse_isomer = True - XYZ = "tautomers.xyz" - else: - self.parse_crest = True - XYZ = "crest_conformers.xyz" - - if XYZ is None: - raise RuntimeError( - "XYZ file associated with XTB job not available,\ - please parse separately." - ) + result = { + "protocol": self._parse_protocol(), + "timing": self._parse_timing(), + "energy": self._parse_energy(), + "geometry": self._parse_geometry() + } + if result["protocol"].split()[0] == "xtb": + result["timing"] = self._opt_timing() + result["energy"] = self._opt_energy() + + if result["protocol"].split()[1] == "crest": + if any( + [ + x in result["protocol"] + for x in ["-deprotonate", "-protonate", "-tautomer"] + ] + ): + result["timing"] = self._isomer_timing() + result["energy"] = self._isomer_energy() else: - temp_dir = os.path.dirname(self.path) - self.xyz_path = os.path.join(temp_dir, XYZ) - - result["geometry"] = self._parse_xyz() + result["timing"] = self._crest_timing() + result["energy"] = self._crest_energy() return result From e2a825555f50f63db4d8ab146e589e4fc480326e Mon Sep 17 00:00:00 2001 From: Sean Colby Date: Wed, 1 May 2024 14:35:21 -0700 Subject: [PATCH 08/15] Remove charge attribute in init --- isicle/geometry.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/isicle/geometry.py b/isicle/geometry.py index 833b094..8f1bd12 100644 --- a/isicle/geometry.py +++ b/isicle/geometry.py @@ -24,8 +24,8 @@ def __init__(self, **kwargs): self.__dict__.update(dict.fromkeys(self._defaults, self._default_value)) self.__dict__.update(kwargs) - if self.charge is None: - self.charge = self.get_charge() + # if self.charge is None: + # self.charge = self.get_charge() def get_basename(self): """ From 2b99d74c8786a7296813687bf95fae5dedebf0e7 Mon Sep 17 00:00:00 2001 From: Sean Colby Date: Wed, 1 May 2024 14:36:24 -0700 Subject: [PATCH 09/15] Decrease verbosity of if checks --- isicle/md.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/isicle/md.py b/isicle/md.py index f9901c4..dce194a 100644 --- a/isicle/md.py +++ b/isicle/md.py @@ -396,7 +396,7 @@ def finish(self): contents = f.readlines() # Single-structure outputs - if (basename == "struc") or (basename == "best") or (basename == "xtbopt"): + if basename in ["struc", "best", "xtbopt"]: xyzblock = "".join(contents) raw_mol = Chem.MolFromXYZBlock(xyzblock) @@ -444,7 +444,7 @@ def finish(self): ext = None # Key management - if (ext is None) or (ext == "tmp"): + if ext in [None, "tmp"]: key = basename else: key = ext From 44937f18d419feb7bcc9086390daca34e36a7841 Mon Sep 17 00:00:00 2001 From: Sean Colby Date: Wed, 1 May 2024 14:40:09 -0700 Subject: [PATCH 10/15] Simplify return for XTB when only single geometry type present --- isicle/parse.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/isicle/parse.py b/isicle/parse.py index fd3b9c8..f01a6f5 100644 --- a/isicle/parse.py +++ b/isicle/parse.py @@ -877,6 +877,9 @@ def __init__(self, data=None): self.result = {} + if data is not None: + self.lines = self.data["out"].split("\n") + def load(self, path): self.data = isicle.io.load_pickle(path) @@ -1081,7 +1084,10 @@ def _parse_geometry(self): if key in self.data: geometries[key] = self.data[key] - return geometries + if len(geometries) > 1: + return geometries + + return geometries[geometries.keys()[0]] def parse(self): """ @@ -1103,7 +1109,6 @@ def parse(self): # Initialize result object to store info result = { "protocol": self._parse_protocol(), - "timing": self._parse_timing(), "energy": self._parse_energy(), "geometry": self._parse_geometry() } From f6d0c38c043ed86cfac3f97bb3e00029199c3fda Mon Sep 17 00:00:00 2001 From: Sean Colby Date: Wed, 1 May 2024 15:23:43 -0700 Subject: [PATCH 11/15] Improve edge case handling when finishing a run --- isicle/md.py | 112 ++++++++++++++++++++++++++++----------------------- 1 file changed, 62 insertions(+), 50 deletions(-) diff --git a/isicle/md.py b/isicle/md.py index dce194a..60a53ef 100644 --- a/isicle/md.py +++ b/isicle/md.py @@ -133,9 +133,7 @@ def save_geometry(self, fmt="xyz"): isicle.io.save(geomfile, self.geom) self.geom.path = geomfile - def _configure_xtb( - self, forcefield="gfn2", optlevel="normal", solvation=None - ): + def _configure_xtb(self, forcefield="gfn2", optlevel="normal", solvation=None): """ Set command line for xtb simulations. @@ -227,7 +225,9 @@ def _configure_crest( # Add geometry s += str( - os.path.join(self.temp_dir, "{}.{}".format(self.geom.basename, self.fmt.lower())) + os.path.join( + self.temp_dir, "{}.{}".format(self.geom.basename, self.fmt.lower()) + ) ) s += " " @@ -382,37 +382,41 @@ def finish(self): geomfiles = sorted([x for x in outfiles if x.endswith(".xyz")]) outfiles = sorted([x for x in outfiles if not x.endswith(".xyz")]) - # Atom count - n_atoms = self.geom.get_natoms() + # # Atom count + # n_atoms = xtbw.geom.get_natoms() + + # Charge lookup + charge_lookup = defaultdict(lambda: 0, {"protonated": 1, "deprotonated": -1}) # Enumerate geometry files for geomfile in geomfiles: # Extract unique basename # Replace ancillary "crest_" - basename = os.path.splitext(os.path.basename(geomfile).replace("crest_", ""))[0] - - # Open file - with open(geomfile, "r") as f: - contents = f.readlines() - - # Single-structure outputs - if basename in ["struc", "best", "xtbopt"]: - xyzblock = "".join(contents) - - raw_mol = Chem.MolFromXYZBlock(xyzblock) - mol = Chem.Mol(raw_mol) - rdDetermineBonds.DetermineBonds(mol, charge=self.geom.get_charge()) - - # Initialize Geometry instance - geom = isicle.geometry.Geometry(mol=mol, - basename=self.geom.basename) - - # Add to result container - result[basename] = geom - - # Multi-structure outputs - elif "crest" in geomfile: - contents = ["".join(contents[i:i + n_atoms + 2]) for i in range(0, len(contents), n_atoms + 2)] + basename = os.path.splitext( + os.path.basename(geomfile).replace("crest_", "") + )[0] + + # Only process of-interest structures + if basename in [ + "struc", + "best", + "xtbopt", + "protonated", + "deprotonated", + "tautomers", + ]: + # Open file + with open(geomfile, "r") as f: + contents = f.readlines() + + # Get file-specific atom count + n_atoms = int(contents[0].strip()) + + # Split into individual xyz + contents = [ + "".join(contents[i : i + n_atoms + 2]) + for i in range(0, len(contents), n_atoms + 2) + ] # Iterate XYZ content geoms = [] @@ -420,35 +424,41 @@ def finish(self): # Construct mol from XYZ raw_mol = Chem.MolFromXYZBlock(xyzblock) mol = Chem.Mol(raw_mol) - rdDetermineBonds.DetermineBonds(mol, charge=self.geom.get_charge()) + rdDetermineBonds.DetermineBonds( + mol, charge=self.geom.get_charge() + charge_lookup[basename] + ) # Initialize Geometry instance - geom = isicle.geometry.Geometry(mol=mol, - basename=self.geom.basename) + geom = isicle.geometry.Geometry( + mol=mol, basename=self.geom.basename + ) # Append to list of geometries geoms.append(geom) # Add to result container - result[basename] = geoms + if len(geoms) > 1: + result[basename] = geoms + else: + result[basename] = geoms[0] # Enumerate output files for outfile in outfiles: # Split name and extension if "." in outfile: basename, ext = os.path.basename(outfile).rsplit(".", 1) - + # No extension else: basename = os.path.basename(outfile) ext = None # Key management - if ext in [None, "tmp"]: + if ext in [None, "tmp", "0"]: key = basename else: key = ext - + # Read output content with open(outfile, "rb") as f: contents = f.read() @@ -459,15 +469,18 @@ def finish(self): except UnicodeDecodeError: result[key] = contents - # Rename select keys - if "struc" in result: - result["input"] = result.pop("struc") - - if "xtbopt" in result: - result["final"] = result.pop("xtbopt") + # Renaming key + rename = { + "struc": "input", + "xtbopt": "final", + "original": "coord_original", + "protonated": "cations", + "deprotonated": "anions", + } - if "original" in result: - result["coord_original"] = result.pop("original") + # Rename matching keys + for key in result.keys() & rename.keys(): + result[rename[key]] = result.pop(key) return result @@ -510,7 +523,6 @@ def run(self, geom, **kwargs): class RDKitWrapper(Geometry, WrapperInterface): - """ Wrapper for RDKit functionality. @@ -669,8 +681,7 @@ def finish(self): """ conf_count = self.geom.mol.GetNumConformers() conformers = [ - isicle.load(Chem.Mol(self.geom.mol, confId=i)) - for i in range(conf_count) + isicle.load(Chem.Mol(self.geom.mol, confId=i)) for i in range(conf_count) ] for conf, label in zip(conformers, range(conf_count)): @@ -716,7 +727,6 @@ def run(self, geom, **kwargs): class TINKERWrapper(Geometry, WrapperInterface): - """ Wrapper for TINKER functionality. @@ -1073,7 +1083,9 @@ def finish(self): parser = TINKERParser() parser.load(os.path.join(self.temp_dir, self.geom.basename + ".tout")) - self.output = parser.load(os.path.join(self.temp_dir, self.geom.basename + ".tout")) + self.output = parser.load( + os.path.join(self.temp_dir, self.geom.basename + ".tout") + ) result = parser.parse() From ad395b1c53f7dd1d7d96872a643106b11b495c69 Mon Sep 17 00:00:00 2001 From: Sean Colby Date: Wed, 1 May 2024 15:24:49 -0700 Subject: [PATCH 12/15] Fix bad attribute reference, return logic for XTB parser --- isicle/parse.py | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/isicle/parse.py b/isicle/parse.py index f01a6f5..d388ca6 100644 --- a/isicle/parse.py +++ b/isicle/parse.py @@ -1059,7 +1059,7 @@ def _parse_protocol(self): """ protocol = None - for line in self.contents: + for line in self.lines: if " > " in line: protocol = line.strip("\n") if "program call" in line: @@ -1087,7 +1087,7 @@ def _parse_geometry(self): if len(geometries) > 1: return geometries - return geometries[geometries.keys()[0]] + return geometries.popitem()[1] def parse(self): """ @@ -1095,10 +1095,10 @@ def parse(self): """ # Check that the file is valid first - if len(self.contents) == 0: + if len(self.lines) == 0: raise RuntimeError("No contents to parse: {}".format(self.path)) - last_lines = "".join(self.contents[-10:]) + last_lines = "".join(self.lines[-10:]) if ( ("terminat" not in last_lines) & ("normal" not in last_lines) @@ -1109,7 +1109,6 @@ def parse(self): # Initialize result object to store info result = { "protocol": self._parse_protocol(), - "energy": self._parse_energy(), "geometry": self._parse_geometry() } @@ -1117,7 +1116,7 @@ def parse(self): result["timing"] = self._opt_timing() result["energy"] = self._opt_energy() - if result["protocol"].split()[1] == "crest": + elif result["protocol"].split()[1] == "crest": if any( [ x in result["protocol"] From c39a5b59324196939e72beeb1538159f7f8b20f9 Mon Sep 17 00:00:00 2001 From: Sean Colby Date: Wed, 1 May 2024 16:22:55 -0700 Subject: [PATCH 13/15] Add missing import --- isicle/md.py | 1 + 1 file changed, 1 insertion(+) diff --git a/isicle/md.py b/isicle/md.py index 60a53ef..e25d829 100644 --- a/isicle/md.py +++ b/isicle/md.py @@ -1,3 +1,4 @@ +from collections import defaultdict import glob import os import subprocess From e4a6bd2d4f6b598c41b816cc532bc3b4bcc4a152 Mon Sep 17 00:00:00 2001 From: Sean Colby Date: Wed, 1 May 2024 16:23:32 -0700 Subject: [PATCH 14/15] Fix parsing bugs for XTB --- isicle/parse.py | 61 +++++++++++++++++++++++++------------------------ 1 file changed, 31 insertions(+), 30 deletions(-) diff --git a/isicle/parse.py b/isicle/parse.py index d388ca6..b00d3b1 100644 --- a/isicle/parse.py +++ b/isicle/parse.py @@ -1,8 +1,5 @@ -import glob -import os import pickle import re -from os.path import splitext import numpy as np import pandas as pd @@ -41,7 +38,7 @@ def _parse_protocol(self): return self.data["inp"] def _parse_geometry(self): - return self.data["xyz"]["final"] + return self.data["xyz"] def _parse_energy(self): # Split text @@ -61,37 +58,41 @@ def _parse_energy(self): return evals[-1] def _parse_frequency(self): - # Define columns - columns = ["wavenumber", "eps", "intensity", "TX", "TY", "TZ"] + if "hess" in self.data: + # Define columns + columns = ["wavenumber", "eps", "intensity", "TX", "TY", "TZ"] - # Split sections by delimiter - blocks = self.data["hess"].split("$") + # Split sections by delimiter + blocks = self.data["hess"].split("$") - # Search for frequency values - freq_block = [x for x in blocks if x.startswith("ir_spectrum")] + # Search for frequency values + freq_block = [x for x in blocks if x.startswith("ir_spectrum")] - # Frequency values not found - if len(freq_block) == 0: - return None + # Frequency values not found + if len(freq_block) == 0: + return None - # Grab last frequency block - # Doubtful if more than one, but previous results in list - freq_block = freq_block[-1] + # Grab last frequency block + # Doubtful if more than one, but previous results in list + freq_block = freq_block[-1] - # Split block into lines - lines = freq_block.split("\n") + # Split block into lines + lines = freq_block.split("\n") - # Map float over values - vals = np.array( - [ - list(map(float, x.split())) - for x in lines - if len(x.split()) == len(columns) - ] - ) + # Map float over values + vals = np.array( + [ + list(map(float, x.split())) + for x in lines + if len(x.split()) == len(columns) + ] + ) - # Zip columns and values - return dict(zip(columns, vals.T)) + # Zip columns and values + return dict(zip(columns, vals.T)) + + # No frequency info + return None def _parse_timing(self): # Grab only last few lines @@ -894,7 +895,7 @@ def _crest_energy(self): population = [] ready = False - for h in range(len(self.lines), 0, -1): + for h in range(len(self.lines) - 1, -1, -1): if "Erel/kcal" in self.lines[h]: g = h + 1 for j in range(g, len(self.lines)): @@ -965,7 +966,7 @@ def _isomer_energy(self): complete = False relative_energies = [] total_energies = [] - for i in range(len(self.lines), 0, -1): + for i in range(len(self.lines) - 1, -1, -1): if "structure ΔE(kcal/mol) Etot(Eh)" in self.lines[i]: h = i + 1 for j in range(h, len(self.lines)): From 2a539e8e18afdc28fdd902adee3b27bf6566ec22 Mon Sep 17 00:00:00 2001 From: Sean Colby Date: Wed, 1 May 2024 16:23:50 -0700 Subject: [PATCH 15/15] Remove charge as kwarg --- isicle/qm.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/isicle/qm.py b/isicle/qm.py index 59dba56..b03f2fd 100644 --- a/isicle/qm.py +++ b/isicle/qm.py @@ -713,7 +713,7 @@ def configure(self, tasks='energy', functional='b3lyp', mem_stack=mem_stack) # Load geometry - config += self._configure_load(charge=self.geom.get_charge()) + config += self._configure_load() # Configure tasks for task, f, b, a, c, so in zip(tasks, cycle(functional), cycle(basis_set),