-
Notifications
You must be signed in to change notification settings - Fork 15
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
3 changed files
with
347 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,225 @@ | ||
# (C) Copyright IBM 2024. | ||
# | ||
# This code is licensed under the Apache License, Version 2.0. You may | ||
# obtain a copy of this license in the LICENSE.txt file in the root directory | ||
# of this source tree or at http://www.apache.org/licenses/LICENSE-2.0. | ||
# | ||
# Any modifications or derivative works of this code must retain this | ||
# copyright notice, and modified files need to carry a notice indicating | ||
# that they have been altered from the originals. | ||
|
||
"""Stochastic reconfiguration for optimization of a variational ansatz.""" | ||
|
||
from __future__ import annotations | ||
|
||
import math | ||
from typing import Any, Callable | ||
|
||
import numpy as np | ||
import scipy.linalg | ||
from scipy.optimize import OptimizeResult, minimize | ||
from scipy.sparse.linalg import LinearOperator | ||
|
||
from ffsim.optimize._util import ( | ||
WrappedCallable, | ||
WrappedLinearOperator, | ||
jacobian_finite_diff, | ||
orthogonalize_columns, | ||
) | ||
|
||
|
||
def minimize_stochastic_reconfiguration( | ||
params_to_vec: Callable[[np.ndarray], np.ndarray], | ||
hamiltonian: LinearOperator, | ||
x0: np.ndarray, | ||
*, | ||
maxiter: int = 1000, | ||
variation: float = 1.0, | ||
cond: float = 1e-8, | ||
epsilon: float = 1e-8, | ||
gtol: float = 1e-5, | ||
optimize_hyperparameters: bool = True, | ||
optimize_hyperparameters_args: dict | None = None, | ||
callback: Callable[[OptimizeResult], Any] | None = None, | ||
) -> OptimizeResult: | ||
"""Minimize the energy of a variational ansatz using stochastic reconfiguration. | ||
References: | ||
- `Generalized Lanczos algorithm for variational quantum Monte Carlo`_ | ||
Args: | ||
params_to_vec: Function representing the wavefunction ansatz. It takes as input | ||
a vector of real-valued parameters and outputs the state vector represented | ||
by those parameters. | ||
hamiltonian: The Hamiltonian representing the energy to be minimized. | ||
x0: Initial guess for the parameters. | ||
maxiter: Maximum number of optimization iterations to perform. | ||
variation: Hyperparameter controlling the size of parameter variations | ||
used in the linear expansion of the wavefunction. Its value must be | ||
strictly between 0 and 1. A larger value results in larger variations. | ||
cond: `cond` argument to pass to `scipy.linalg.lstsq`_, which is called to | ||
solve for the parameter update. | ||
epsilon: Increment to use for approximating the gradient using | ||
finite difference. | ||
gtol: Convergence threshold for the norm of the projected gradient. | ||
optimize_hyperparameters: Whether to optimize the `variation` hyperparameter in | ||
each iteration. Optimizing the hyperparameter incurs more function and | ||
energy evaluations in each iteration, but may speed up convergence, leading | ||
to fewer iterations overall. The optimization is performed using | ||
`scipy.optimize.minimize`_. | ||
optimize_hyperparameters_args: Arguments to use when calling | ||
`scipy.optimize.minimize`_ to optimize the hyperparameters. The call is | ||
constructed as | ||
.. code:: | ||
scipy.optimize.minimize(f, x0, **scipy_optimize_minimize_args) | ||
If not specified, the default value of `dict(method="L-BFGS-B")` will be | ||
used. | ||
callback: A callable called after each iteration. It is called with the | ||
signature | ||
.. code:: | ||
callback(intermediate_result: OptimizeResult) | ||
where ``intermediate_result`` is a `scipy.optimize.OptimizeResult`_ | ||
with attributes ``x`` and ``fun``, the present values of the parameter | ||
vector and objective function. For all iterations except for the last, | ||
it also contains the ``jac`` attribute holding the present value of the | ||
gradient, as well as ``regularization`` and ``variation`` attributes | ||
holding the present values of the `regularization` and `variation` | ||
hyperparameters. | ||
Returns: | ||
The optimization result represented as a `scipy.optimize.OptimizeResult`_ | ||
object. Note the following definitions of selected attributes: | ||
- ``x``: The final parameters of the optimization. | ||
- ``fun``: The energy associated with the final parameters. That is, the | ||
expectation value of the Hamiltonian with respect to the state vector | ||
corresponding to the parameters. | ||
- ``nfev``: The number of times the ``params_to_vec`` function was called. | ||
- ``nlinop``: The number of times the ``hamiltonian`` linear operator was | ||
applied to a vector. | ||
.. _Generalized Lanczos algorithm for variational quantum Monte Carlo: https://doi.org/10.1103/PhysRevB.64.024512 | ||
.. _scipy.optimize.OptimizeResult: https://docs.scipy.org/doc/scipy/reference/generated/scipy.optimize.OptimizeResult.html#scipy.optimize.OptimizeResult | ||
.. _scipy.linalg.lstsq: https://docs.scipy.org/doc/scipy/reference/generated/scipy.linalg.lstsq.html | ||
.. _scipy.optimize.minimize: https://docs.scipy.org/doc/scipy/reference/generated/scipy.optimize.minimize.html | ||
""" # noqa: E501 | ||
if variation <= 0: | ||
raise ValueError(f"variation must be positive. Got {variation}.") | ||
if maxiter < 1: | ||
raise ValueError(f"maxiter must be at least 1. Got {maxiter}.") | ||
|
||
if optimize_hyperparameters_args is None: | ||
optimize_hyperparameters_args = dict(method="L-BFGS-B") | ||
|
||
variation_param = math.sqrt(variation) | ||
params = x0.copy() | ||
converged = False | ||
intermediate_result = OptimizeResult( | ||
x=None, fun=None, jac=None, nfev=0, njev=0, nit=0, nlinop=0 | ||
) | ||
|
||
params_to_vec = WrappedCallable(params_to_vec, intermediate_result) | ||
hamiltonian = WrappedLinearOperator(hamiltonian, intermediate_result) | ||
|
||
for i in range(maxiter): | ||
vec = params_to_vec(params) | ||
jac = jacobian_finite_diff(params_to_vec, params, len(vec), epsilon=epsilon) | ||
jac = orthogonalize_columns(jac, vec) | ||
|
||
energy, grad, overlap_mat = _sr_matrices(vec, jac, hamiltonian) | ||
intermediate_result.njev += 1 | ||
|
||
if i > 0 and callback is not None: | ||
intermediate_result.x = params | ||
intermediate_result.fun = energy | ||
intermediate_result.jac = grad | ||
intermediate_result.overlap_mat = overlap_mat | ||
intermediate_result.variation = variation | ||
callback(intermediate_result) | ||
|
||
if np.linalg.norm(grad) < gtol: | ||
converged = True | ||
break | ||
|
||
if optimize_hyperparameters: | ||
|
||
def f(x: np.ndarray) -> float: | ||
(variation_param,) = x | ||
variation = variation_param**2 | ||
param_update = _get_param_update( | ||
grad, | ||
overlap_mat, | ||
variation, | ||
cond=cond, | ||
) | ||
vec = params_to_vec(params + param_update) | ||
return np.vdot(vec, hamiltonian @ vec).real | ||
|
||
result = minimize( | ||
f, | ||
x0=[variation_param], | ||
**optimize_hyperparameters_args, | ||
) | ||
(variation_param,) = result.x | ||
variation = variation_param**2 | ||
|
||
param_update = _get_param_update( | ||
grad, | ||
overlap_mat, | ||
variation, | ||
cond=cond, | ||
) | ||
params = params + param_update | ||
intermediate_result.nit += 1 | ||
|
||
vec = params_to_vec(params) | ||
energy = np.vdot(vec, hamiltonian @ vec).real | ||
|
||
intermediate_result.x = params | ||
intermediate_result.fun = energy | ||
del intermediate_result.jac | ||
if callback is not None: | ||
callback(intermediate_result) | ||
|
||
if converged: | ||
success = True | ||
message = "Convergence: Norm of projected gradient <= gtol." | ||
else: | ||
success = False | ||
message = "Stop: Total number of iterations reached limit." | ||
|
||
return OptimizeResult( | ||
x=params, | ||
success=success, | ||
message=message, | ||
fun=energy, | ||
jac=grad, | ||
nfev=intermediate_result.nfev, | ||
njev=intermediate_result.njev, | ||
nlinop=intermediate_result.nlinop, | ||
nit=intermediate_result.nit, | ||
) | ||
|
||
|
||
def _sr_matrices( | ||
vec: np.ndarray, jac: np.ndarray, hamiltonian: LinearOperator | ||
) -> tuple[np.ndarray, np.ndarray, np.ndarray]: | ||
energy = np.vdot(vec, hamiltonian @ vec) | ||
grad = vec.conj() @ (hamiltonian @ jac) | ||
overlap_mat = jac.T.conj() @ jac | ||
return energy.real, 2 * grad.real, overlap_mat.real | ||
|
||
|
||
def _get_param_update( | ||
grad: np.ndarray, overlap_mat: np.ndarray, variation: float, cond: float | ||
) -> np.ndarray: | ||
x, _, _, _ = scipy.linalg.lstsq(overlap_mat, -0.5 * variation * grad, cond=cond) | ||
return x |
118 changes: 118 additions & 0 deletions
118
tests/python/optimize/stochastic_reconfiguration_test.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,118 @@ | ||
# (C) Copyright IBM 2024. | ||
# | ||
# This code is licensed under the Apache License, Version 2.0. You may | ||
# obtain a copy of this license in the LICENSE.txt file in the root directory | ||
# of this source tree or at http://www.apache.org/licenses/LICENSE-2.0. | ||
# | ||
# Any modifications or derivative works of this code must retain this | ||
# copyright notice, and modified files need to carry a notice indicating | ||
# that they have been altered from the originals. | ||
|
||
"""Tests for stochastic reconfiguration optimization algorithm.""" | ||
|
||
from __future__ import annotations | ||
|
||
from collections import defaultdict | ||
|
||
import numpy as np | ||
import pyscf | ||
import pytest | ||
|
||
import ffsim | ||
|
||
|
||
def test_minimize_stochastic_reconfiguration(): | ||
# Build an H2 molecule | ||
mol = pyscf.gto.Mole() | ||
mol.build( | ||
atom=[["H", (0, 0, 0)], ["H", (0, 0, 1.8)]], | ||
basis="sto-6g", | ||
) | ||
hartree_fock = pyscf.scf.RHF(mol) | ||
hartree_fock.kernel() | ||
|
||
# Initialize parameters | ||
n_reps = 2 | ||
n_params = ffsim.UCJOperator.n_params(hartree_fock.mol.nao_nr(), n_reps) | ||
rng = np.random.default_rng(1804) | ||
x0 = rng.uniform(-10, 10, size=n_params) | ||
|
||
# Get molecular data and molecular Hamiltonian (one- and two-body tensors) | ||
mol_data = ffsim.MolecularData.from_scf(hartree_fock) | ||
norb = mol_data.norb | ||
nelec = mol_data.nelec | ||
mol_hamiltonian = mol_data.hamiltonian | ||
hamiltonian = ffsim.linear_operator(mol_hamiltonian, norb=norb, nelec=nelec) | ||
reference_state = ffsim.hartree_fock_state(norb, nelec) | ||
|
||
def params_to_vec(x: np.ndarray): | ||
operator = ffsim.UCJOperator.from_parameters(x, norb=norb, n_reps=n_reps) | ||
return ffsim.apply_unitary(reference_state, operator, norb=norb, nelec=nelec) | ||
|
||
def energy(x: np.ndarray): | ||
vec = params_to_vec(x) | ||
return np.real(np.vdot(vec, hamiltonian @ vec)) | ||
|
||
info = defaultdict(list) | ||
|
||
def callback(intermediate_result): | ||
info["x"].append(intermediate_result.x) | ||
info["fun"].append(intermediate_result.fun) | ||
np.testing.assert_allclose( | ||
energy(intermediate_result.x), intermediate_result.fun | ||
) | ||
if hasattr(intermediate_result, "jac"): | ||
info["jac"].append(intermediate_result.jac) | ||
if hasattr(intermediate_result, "variation"): | ||
info["variation"].append(intermediate_result.variation) | ||
|
||
# default optimization | ||
result = ffsim.optimize.minimize_stochastic_reconfiguration( | ||
params_to_vec, x0=x0, hamiltonian=hamiltonian, callback=callback | ||
) | ||
np.testing.assert_allclose(energy(result.x), result.fun) | ||
np.testing.assert_allclose(result.fun, -0.970773) | ||
np.testing.assert_allclose(info["fun"][0], -0.853501, atol=1e-5) | ||
np.testing.assert_allclose(info["fun"][-1], -0.970773, atol=1e-5) | ||
for params, fun in zip(info["x"], info["fun"]): | ||
np.testing.assert_allclose(energy(params), fun) | ||
assert result.nit <= 20 | ||
assert result.nit < result.nlinop < result.nfev | ||
|
||
# optimization without optimizing hyperparameters | ||
info = defaultdict(list) | ||
result = ffsim.optimize.minimize_stochastic_reconfiguration( | ||
params_to_vec, | ||
x0=x0, | ||
hamiltonian=hamiltonian, | ||
optimize_hyperparameters=False, | ||
callback=callback, | ||
) | ||
np.testing.assert_allclose(energy(result.x), result.fun) | ||
np.testing.assert_allclose(result.fun, -0.970773) | ||
for params, fun in zip(info["x"], info["fun"]): | ||
np.testing.assert_allclose(energy(params), fun) | ||
assert result.nit <= 30 | ||
assert result.nit < result.nlinop < result.nfev | ||
assert set(info["variation"]) == {1.0} | ||
|
||
# optimization with maxiter | ||
info = defaultdict(list) | ||
result = ffsim.optimize.minimize_stochastic_reconfiguration( | ||
params_to_vec, hamiltonian=hamiltonian, x0=x0, maxiter=3, callback=callback | ||
) | ||
assert result.nit == 3 | ||
assert len(info["x"]) == 3 | ||
assert len(info["fun"]) == 3 | ||
assert len(info["jac"]) == 2 | ||
np.testing.assert_allclose(energy(result.x), result.fun) | ||
|
||
# test raising errors | ||
with pytest.raises(ValueError, match="variation"): | ||
result = ffsim.optimize.minimize_stochastic_reconfiguration( | ||
params_to_vec, x0=x0, hamiltonian=hamiltonian, variation=-0.1 | ||
) | ||
with pytest.raises(ValueError, match="maxiter"): | ||
result = ffsim.optimize.minimize_stochastic_reconfiguration( | ||
params_to_vec, x0=x0, hamiltonian=hamiltonian, maxiter=0 | ||
) |