Skip to content

Commit

Permalink
Merge pull request #199 from hjnpark/master
Browse files Browse the repository at this point in the history
BigChem implementation
  • Loading branch information
leeping authored Oct 11, 2024
2 parents 00e7722 + b4edfb2 commit feab507
Show file tree
Hide file tree
Showing 16 changed files with 402 additions and 82 deletions.
6 changes: 6 additions & 0 deletions geometric/data/hcn_minimized.tcin
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
run gradient
coordinates hcn_minimized.xyz
method rhf
basis 3-21g
charge 0
spinmult 1
5 changes: 5 additions & 0 deletions geometric/data/hcn_minimized.xyz
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
3
Iteration 0 Energy -92.35408417
C 0.0000000000 0.0000000000 0.0000000000
N 0.0000000000 0.0000000000 1.1371177000
H 0.0000000000 0.0000000000 -1.0502281000
12 changes: 12 additions & 0 deletions geometric/data/hcn_minimized_ts.qcin
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
$molecule
0 1
C -0.1088783634 -0.6365101639 0.0043221742
N -0.6393457902 0.4205365638 0.0052498438
H 0.7532976101 0.2173493463 -0.0090384631
$end

$rem
jobtype force
exchange hf
basis 3-21g
$end
13 changes: 13 additions & 0 deletions geometric/data/hcn_neb_input.qcin
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
$molecule
0 1
C -0.1088783634 -0.6365101639 0.0043221742
N -0.6393457902 0.4205365638 0.0052498438
H 0.7532976101 0.2173493463 -0.0090384631
$end

$rem
jobtype force
exchange hf
basis 3-21g
$end

6 changes: 6 additions & 0 deletions geometric/data/hcn_neb_input.tcin
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
run gradient
coordinates hcn_neb_input.xyz
method rhf
basis 3-21g
charge 0
spinmult 1
43 changes: 35 additions & 8 deletions geometric/engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -383,6 +383,11 @@ def __init__(self, molecule, tcin, dirname=None, pdb=None):
self.scr = self.tcin['scrdir']
else:
self.scr = 'scr'

# Getting method and basis
self.method = self.tcin['method']
self.basis = self.tcin['basis']

# Always specify the TeraChem scratch folder name
self.tcin['scrdir'] = self.scr
# A few notes about the electronic structure method
Expand Down Expand Up @@ -1069,7 +1074,8 @@ def load_gaussian_input(self, gaussian_input):

elif line.startswith('#'):
self.route_line = line

method_basis = line.split(' ')[1]
self.method, self.basis = method_basis.split('/')
found_force_nostep = "force=nostep" in line.lower()
found_force = "force=" in line.lower()
found_symmetry_none = "symmetry=none" in line.lower()
Expand Down Expand Up @@ -1388,7 +1394,10 @@ def load_psi4_input(self, psi4in):
else:
psi4_temp.append(line)
if "gradient(" in line:
self.method = re.findall(r'\((.*?)\)', line)[0].replace("'","").replace('"','')
found_gradient = True
if "set basis" in line:
self.basis = line.split(" ")[-1].replace('\n', '')
if found_gradient == False:
raise RuntimeError("Psi4 inputfile %s should have gradient() command." % psi4in)
self.M = Molecule()
Expand All @@ -1413,7 +1422,7 @@ def calc_new(self, coords, dirname):
outfile.write(line)
try:
# Run Psi4
subprocess.check_call('psi4%s input.dat' % self.nt(), cwd=dirname, shell=True)
subprocess.check_call('psi4%s input.dat run.out' % self.nt(), cwd=dirname, shell=True)
# Read energy and gradients from Psi4 output
result = self.read_result(dirname)
except (OSError, IOError, RuntimeError, subprocess.CalledProcessError):
Expand All @@ -1439,17 +1448,22 @@ def calc_wq_new(self, coords, dirname):
# self.M.edit_qcrems({'jobtype':'force'})
# self.M[0].write(os.path.join(dirname, 'run.in'))
in_files = [('%s/input.dat' % dirname, 'input.dat')]
out_files = [('%s/output.dat' % dirname, 'output.dat'), ('%s/run.log' % dirname, 'run.log')]
out_files = [('%s/run.out' % dirname, 'run.out'), ('%s/run.log' % dirname, 'run.log')]
# We will assume that the number of threads on the worker is 1, as this maximizes efficiency
# in the limit of large numbers of jobs, although it may be controlled via environment variables.
queue_up_src_dest(wq, 'psi4 input.dat > run.log 2>&1', in_files, out_files, verbose=False)

queue_up_src_dest(wq, 'psi4 input.dat run.out 2>&1', in_files, out_files, verbose=False)

def number_output(self, dirname, calcNum):
if not os.path.exists(os.path.join(dirname, 'run.out')):
raise RuntimeError('run.out does not exist')
shutil.copy2(os.path.join(dirname,'run.out'), os.path.join(dirname,'run_%03i.out' % calcNum))

def read_result(self, dirname, check_coord=None):
""" Read Psi4 calculation output. """
if check_coord is not None:
raise CheckCoordError("Coordinate checking not implemented")
energy, gradient = None, None
psi4out = os.path.join(dirname, 'output.dat')
psi4out = os.path.join(dirname, 'run.out')
with open(psi4out) as outfile:
found_grad = False
found_num_grad = False
Expand Down Expand Up @@ -1518,6 +1532,11 @@ def __init__(self, molecule, dirname=None, qcdir=None, threads=None):
self.threads = threads
self.prep_temp_folder(dirname, qcdir)

self.method = self.M.qcrems[0].get('method')
if self.method is None:
self.method = self.M.qcrems[0].get('exchange')
self.basis = self.M.qcrems[0].get('basis')

def prep_temp_folder(self, dirname, qcdir):
# Provide an initial qchem scratch folder (e.g. supplied initial guess)
if not qcdir:
Expand Down Expand Up @@ -1855,13 +1874,21 @@ def calc_new(self, coords, dirname):
new_schema["molecule"]["geometry"] = coords.tolist()
new_schema.pop("program", None)
ret = qcengine.compute(new_schema, self.program, return_dict=True)
# store the schema_traj for run_json to pick up
self.schema_traj.append(ret)

if ret["success"] is False:
raise QCEngineAPIEngineError("QCEngineAPI computation did not execute correctly. Message: " + ret["error"]["error_message"])
# Unpack the energy and gradient
if "return_energy" not in ret["properties"]:
# SinglepointRecord dictionary from terachem.py in QCEngine won't have 'return_energy' key.
# Setting 'scf_total_energy' as 'return_energy' here, otherwise energies in OptimizationResult will be None.
ret["properties"]["return_energy"] = ret["properties"]["scf_total_energy"]

energy = ret["properties"]["return_energy"]
gradient = np.array(ret["return_result"])

# store the schema_traj for run_json to pick up
self.schema_traj.append(ret)

return {'energy':energy, 'gradient':gradient}

def calc(self, coords, dirname, **kwargs):
Expand Down
54 changes: 44 additions & 10 deletions geometric/neb.py
Original file line number Diff line number Diff line change
Expand Up @@ -262,6 +262,7 @@ def __init__(self, molecule, engine, tmpdir, params, coords=None):
self.tmpdir = tmpdir
# HP 5/2/2023: coordtype is set to 'cart' here.
self.coordtype='cart'

# coords is a 2D array with dimensions (N_image, N_atomx3) in atomic units
if coords is not None:
# Reshape the array if we passed in a flat array
Expand Down Expand Up @@ -292,19 +293,49 @@ def ComputeEnergyGradient(self, cyc=None, result=None):
"""Compute energies and gradients for each structure."""
# This is the parallel point.
wq = getWorkQueue()
if wq is None:
for i in range(len(self)):
if result:
self.Structures[i].ComputeEnergyGradient(result=result[i])
else:
self.Structures[i].ComputeEnergyGradient()
else: # If work queue is available, handle jobs with the work queue.
if wq: # If work queue is available, handle jobs with the work queue.
for i in range(len(self)):
self.Structures[i].QueueEnergyGradient()
wq_wait(wq, print_time=600)
for i in range(len(self)):
self.Structures[i].GetEnergyGradient()
if cyc is not None:
elif self.params.bigchem:
# If BigChem is available, it will be used to carry the single-point calculations.
from qcio import Molecule as qcio_Molecule, ProgramInput
from bigchem import compute, group
elems = self.Structures[0].M.elem
molecules = []

# Creating a list with qcio Molecule objects and submitting calculations.
for Structure in self.Structures:
molecules.append(qcio_Molecule(symbols=elems, geometry=Structure.cartesians.reshape(-1,3)))

outputs = group(compute.s(self.engine.__class__.__name__.lower(),
ProgramInput(molecule=qcio_M, calctype="gradient",
model={"method":self.engine.method, "basis": self.engine.basis},
extras={"order":i}),
) for i, qcio_M in enumerate(molecules)).apply_async()

# Getting the records
records = outputs.get()

# Passing the results to chain.ComputeEnergyGradient()
for i in range(len(self)):
assert records[i].input_data.extras["order"] == i
result = {"energy": records[i].results.energy, "gradient": np.array(records[i].results.gradient).ravel()}
self.Structures[i].ComputeEnergyGradient(result=result)

# Deleting the records
outputs.forget()

else:
for i in range(len(self)):
if result:
self.Structures[i].ComputeEnergyGradient(result=result[i])
else:
self.Structures[i].ComputeEnergyGradient()
# When BigChem is used, it does not write output files to disk.
if cyc is not None and not self.params.bigchem:
for i in range(len(self)):
self.Structures[i].engine.number_output(self.Structures[i].tmpdir, cyc)
self.haveCalcs = True
Expand Down Expand Up @@ -1682,8 +1713,11 @@ def main():

M, engine = get_molecule_engine(**args)

if params.port != 0:
createWorkQueue(params.port, debug=params.verbose)
if params.bigchem:
logger.info("BigChem will be used to carry singlepoint calculations. \n")

if args.get('wqport', 0):
createWorkQueue(args.get('wqport'), debug=params.verbose)

if params.prefix is None:
tmpdir = os.path.splitext(args["input"])[0] + ".tmp"
Expand Down
56 changes: 54 additions & 2 deletions geometric/normal_modes.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@
from .molecule import Molecule, PeriodicTable
from .nifty import logger, kb, kb_si, hbar, au2kj, au2kcal, ang2bohr, bohr2ang, c_lightspeed, avogadro, cm2au, amu2au, ambervel2au, wq_wait, getWorkQueue, commadash, bak

def calc_cartesian_hessian(coords, molecule, engine, dirname, read_data=True, verbose=0):
def calc_cartesian_hessian(coords, molecule, engine, dirname, read_data=True, bigchem=False, verbose=0):
"""
Calculate the Cartesian Hessian using finite difference, and/or read data from disk.
Data is stored in a folder <prefix>.tmp/hessian, with gradient calculations found in
Expand All @@ -51,7 +51,7 @@ def calc_cartesian_hessian(coords, molecule, engine, dirname, read_data=True, ve
Parameters
----------
coords : np.ndarray
Nx3 array of Cartesian coordinates in atomic units
1-dimensional array of shape (3*N_atoms) containing atomic coordinates in Bohr
molecule : Molecule
Molecule object
engine : Engine
Expand Down Expand Up @@ -134,6 +134,58 @@ def calc_cartesian_hessian(coords, molecule, engine, dirname, read_data=True, ve
gbak = engine.read_wq(coords, dirname_d)['gradient']
coords[i] += h
Hx[i] = (gfwd-gbak)/(2*h)

elif bigchem:
# If BigChem is ready, it will be used to parallelize the Hessian calculation.
logger.info("BigChem will be used to calculate the Hessian. \n")
from qcio import Molecule as qcio_Molecule, ProgramInput
from bigchem import compute, group

elems = molecule[0].elem

# Uncommenting the following 6 lines and commenting out the rest of the lines will use BigChem's parallel_hessian function.
#from bigchem.algos import parallel_hessian
#qcio_M = qcio_Molecule(symbols=elems, geometry=coords.reshape(-1,3))
#input = ProgramInput(molecule=qcio_M, model={"method":engine.method, "basis": engine.basis}, calctype='hessian')
#output = parallel_hessian("psi4", input).delay()
#rec = output.get()
#Hx = rec.results.hessian

# Creating a list containing qcio Molecule obejcts with different geometries.
molecules = []
for i in range(nc):
coords[i] += h
molecules.append(qcio_Molecule(symbols=elems, geometry=coords.reshape(-1,3)))
coords[i] -= 2*h
molecules.append(qcio_Molecule(symbols=elems, geometry=coords.reshape(-1,3)))
coords[i] += h

# Submitting calculations
outputs = group(compute.s(engine.__class__.__name__.lower(),
ProgramInput(molecule=qcio_M, calctype='gradient',
model={"method":engine.method, "basis": engine.basis},
extras={"order":i}),
) for i, qcio_M in enumerate(molecules)).apply_async()

# Getting the records
records = outputs.get(disable_sync_subtasks=False)
assert len(records) == nc*2

# Grouping the recrods
grouped_records = list(zip(records[::2], records[1::2]))

# Iterating through the grouped records to calculate the Hessian
for i, (fwd_rec, bak_rec) in enumerate(grouped_records):
# Double checking the order
assert fwd_rec.input_data.extras["order"] == i*2
assert bak_rec.input_data.extras["order"] == fwd_rec.input_data.extras["order"] + 1
gfwd = fwd_rec.results.gradient.ravel()
gbak = bak_rec.results.gradient.ravel()
Hx[i] = (gfwd-gbak)/(2*h)

# Deleting the records in the backend
outputs.forget()

else:
# First calculate a gradient at the central point, for linking scratch files.
engine.calc(coords, dirname, read_data=read_data)
Expand Down
20 changes: 10 additions & 10 deletions geometric/optimize.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
Authors: Lee-Ping Wang, Chenchen Song
Contributors: Yudong Qiu, Daniel G. A. Smith, Alberto Gobbi, Josh Horton
Contributors: Heejune Park, Yudong Qiu, Daniel G. A. Smith, Alberto Gobbi, Josh Horton
Redistribution and use in source and binary forms, with or without modification,
are permitted provided that the following conditions are met:
Expand Down Expand Up @@ -67,7 +67,7 @@ def __init__(self, coords, molecule, IC, engine, dirname, params, print_info=Tru
Parameters
----------
coords : np.ndarray
Nx3 array of Cartesian coordinates in atomic units
1-dimensional array of shape (3*N_atoms) containing atomic coordinates in Bohr
molecule : Molecule
Molecule object (Units Angstrom)
IC : InternalCoordinates
Expand Down Expand Up @@ -333,7 +333,7 @@ def calcEnergyForce(self):
if self.params.hessian == 'each' or self.recalcHess:
# Hx is assumed to be the Cartesian Hessian at the current step.
# Otherwise we use the variable name Hx0 to avoid almost certain confusion.
self.Hx = calc_cartesian_hessian(self.X, self.molecule, self.engine, self.dirname, read_data=True, verbose=self.params.verbose)
self.Hx = calc_cartesian_hessian(self.X, self.molecule, self.engine, self.dirname, read_data=True, bigchem=self.params.bigchem, verbose=self.params.verbose)
if self.params.frequency:
self.frequency_analysis(self.Hx, 'iter%03i' % self.Iteration, False)
if self.recalcHess:
Expand All @@ -345,7 +345,7 @@ def calcEnergyForce(self):
self.recalcHess = False
elif self.Iteration == 0:
if self.params.hessian in ['first', 'stop', 'first+last'] and not hasattr(self.params, 'hess_data'):
self.Hx0 = calc_cartesian_hessian(self.X, self.molecule, self.engine, self.dirname, read_data=True, verbose=self.params.verbose)
self.Hx0 = calc_cartesian_hessian(self.X, self.molecule, self.engine, self.dirname, read_data=True, bigchem=self.params.bigchem, verbose=self.params.verbose)
logger.info(">> Initial Cartesian Hessian Eigenvalues\n")
self.SortedEigenvalues(self.Hx0)
if self.params.frequency:
Expand Down Expand Up @@ -467,7 +467,7 @@ def IRC_step(self):
logger.info('First, following the imaginary mode vector\n')
if self.TSWavenum[1] < 0:
raise IRCError("There are more than one imaginary vibrational mode. Please optimize the structure and try again.\n")
elif self.TSWavenum[0] > 0:
elif self.TSWavenum[0] >= 0:
raise IRCError("No imaginary mode detected. Please optimize the structure and try again.\n")

self.IRC_adjfactor = np.linalg.norm(self.TSNormal_modes_x[0] * np.sqrt(self.IC.mass))
Expand Down Expand Up @@ -1050,7 +1050,7 @@ def optimizeGeometry(self):
Hx = self.IC.calcHessCart(self.X, self.G, self.H)
np.savetxt(self.params.write_cart_hess, Hx, fmt='% 14.10f')
if self.params.hessian in ['last', 'first+last', 'each']:
Hx = calc_cartesian_hessian(self.X, self.molecule, self.engine, self.dirname, read_data=False, verbose=self.params.verbose)
Hx = calc_cartesian_hessian(self.X, self.molecule, self.engine, self.dirname, read_data=False, bigchem=self.params.bigchem, verbose=self.params.verbose)
if self.params.frequency:
self.frequency_analysis(Hx, 'last', True)
return self.progress
Expand Down Expand Up @@ -1095,7 +1095,7 @@ def Optimize(coords, molecule, IC, engine, dirname, params, print_info=True):
Parameters
----------
coords : np.ndarray
Nx3 array of Cartesian coordinates in atomic units
1-dimensional array of shape (3*N_atoms) containing atomic coordinates in Bohr
molecule : Molecule
Molecule object
IC : InternalCoordinates
Expand Down Expand Up @@ -1187,9 +1187,9 @@ def run_optimizer(**kwargs):
M, engine = get_molecule_engine(**kwargs)

# Create Work Queue object
if kwargs.get('port', 0):
if kwargs.get('wqport', 0):
logger.info("Creating Work Queue object for distributed Hessian calculation\n")
createWorkQueue(kwargs['port'], debug=verbose>1)
createWorkQueue(kwargs['wqport'], debug=verbose>1)

# Get initial coordinates in bohr
coords = M.xyzs[0].flatten() * ang2bohr
Expand Down Expand Up @@ -1337,7 +1337,7 @@ def run_optimizer(**kwargs):
if params.qdata is not None: Mfinal.write('qdata-final.txt')
print_citation(logger)
logger.info("Time elapsed since start of run_optimizer: %.3f seconds\n" % (time.time()-t0))
if kwargs.get('port', 0):
if kwargs.get('wqport', 0):
destroyWorkQueue()
return progress

Expand Down
Loading

0 comments on commit feab507

Please sign in to comment.