diff --git a/python/ffsim/optimize/__init__.py b/python/ffsim/optimize/__init__.py index 594e4191a..8cb5b45c5 100644 --- a/python/ffsim/optimize/__init__.py +++ b/python/ffsim/optimize/__init__.py @@ -11,7 +11,11 @@ """Optimization algorithms.""" from ffsim.optimize.linear_method import minimize_linear_method +from ffsim.optimize.stochastic_reconfiguration import ( + minimize_stochastic_reconfiguration, +) __all__ = [ "minimize_linear_method", + "minimize_stochastic_reconfiguration", ] diff --git a/python/ffsim/optimize/stochastic_reconfiguration.py b/python/ffsim/optimize/stochastic_reconfiguration.py new file mode 100644 index 000000000..4253e19f0 --- /dev/null +++ b/python/ffsim/optimize/stochastic_reconfiguration.py @@ -0,0 +1,225 @@ +# (C) Copyright IBM 2024. +# +# This code is licensed under the Apache License, Version 2.0. You may +# obtain a copy of this license in the LICENSE.txt file in the root directory +# of this source tree or at http://www.apache.org/licenses/LICENSE-2.0. +# +# Any modifications or derivative works of this code must retain this +# copyright notice, and modified files need to carry a notice indicating +# that they have been altered from the originals. + +"""Stochastic reconfiguration for optimization of a variational ansatz.""" + +from __future__ import annotations + +import math +from typing import Any, Callable + +import numpy as np +import scipy.linalg +from scipy.optimize import OptimizeResult, minimize +from scipy.sparse.linalg import LinearOperator + +from ffsim.optimize._util import ( + WrappedCallable, + WrappedLinearOperator, + jacobian_finite_diff, + orthogonalize_columns, +) + + +def minimize_stochastic_reconfiguration( + params_to_vec: Callable[[np.ndarray], np.ndarray], + hamiltonian: LinearOperator, + x0: np.ndarray, + *, + maxiter: int = 1000, + variation: float = 1.0, + cond: float = 1e-8, + epsilon: float = 1e-8, + gtol: float = 1e-5, + optimize_hyperparameters: bool = True, + optimize_hyperparameters_args: dict | None = None, + callback: Callable[[OptimizeResult], Any] | None = None, +) -> OptimizeResult: + """Minimize the energy of a variational ansatz using stochastic reconfiguration. + + References: + + - `Generalized Lanczos algorithm for variational quantum Monte Carlo`_ + + Args: + params_to_vec: Function representing the wavefunction ansatz. It takes as input + a vector of real-valued parameters and outputs the state vector represented + by those parameters. + hamiltonian: The Hamiltonian representing the energy to be minimized. + x0: Initial guess for the parameters. + maxiter: Maximum number of optimization iterations to perform. + variation: Hyperparameter controlling the size of parameter variations + used in the linear expansion of the wavefunction. Its value must be + strictly between 0 and 1. A larger value results in larger variations. + cond: `cond` argument to pass to `scipy.linalg.lstsq`_, which is called to + solve for the parameter update. + epsilon: Increment to use for approximating the gradient using + finite difference. + gtol: Convergence threshold for the norm of the projected gradient. + optimize_hyperparameters: Whether to optimize the `variation` hyperparameter in + each iteration. Optimizing the hyperparameter incurs more function and + energy evaluations in each iteration, but may speed up convergence, leading + to fewer iterations overall. The optimization is performed using + `scipy.optimize.minimize`_. + optimize_hyperparameters_args: Arguments to use when calling + `scipy.optimize.minimize`_ to optimize the hyperparameters. The call is + constructed as + + .. code:: + + scipy.optimize.minimize(f, x0, **scipy_optimize_minimize_args) + + If not specified, the default value of `dict(method="L-BFGS-B")` will be + used. + + callback: A callable called after each iteration. It is called with the + signature + + .. code:: + + callback(intermediate_result: OptimizeResult) + + where ``intermediate_result`` is a `scipy.optimize.OptimizeResult`_ + with attributes ``x`` and ``fun``, the present values of the parameter + vector and objective function. For all iterations except for the last, + it also contains the ``jac`` attribute holding the present value of the + gradient, as well as ``regularization`` and ``variation`` attributes + holding the present values of the `regularization` and `variation` + hyperparameters. + + Returns: + The optimization result represented as a `scipy.optimize.OptimizeResult`_ + object. Note the following definitions of selected attributes: + + - ``x``: The final parameters of the optimization. + - ``fun``: The energy associated with the final parameters. That is, the + expectation value of the Hamiltonian with respect to the state vector + corresponding to the parameters. + - ``nfev``: The number of times the ``params_to_vec`` function was called. + - ``nlinop``: The number of times the ``hamiltonian`` linear operator was + applied to a vector. + + .. _Generalized Lanczos algorithm for variational quantum Monte Carlo: https://doi.org/10.1103/PhysRevB.64.024512 + .. _scipy.optimize.OptimizeResult: https://docs.scipy.org/doc/scipy/reference/generated/scipy.optimize.OptimizeResult.html#scipy.optimize.OptimizeResult + .. _scipy.linalg.lstsq: https://docs.scipy.org/doc/scipy/reference/generated/scipy.linalg.lstsq.html + .. _scipy.optimize.minimize: https://docs.scipy.org/doc/scipy/reference/generated/scipy.optimize.minimize.html + """ # noqa: E501 + if variation <= 0: + raise ValueError(f"variation must be positive. Got {variation}.") + if maxiter < 1: + raise ValueError(f"maxiter must be at least 1. Got {maxiter}.") + + if optimize_hyperparameters_args is None: + optimize_hyperparameters_args = dict(method="L-BFGS-B") + + variation_param = math.sqrt(variation) + params = x0.copy() + converged = False + intermediate_result = OptimizeResult( + x=None, fun=None, jac=None, nfev=0, njev=0, nit=0, nlinop=0 + ) + + params_to_vec = WrappedCallable(params_to_vec, intermediate_result) + hamiltonian = WrappedLinearOperator(hamiltonian, intermediate_result) + + for i in range(maxiter): + vec = params_to_vec(params) + jac = jacobian_finite_diff(params_to_vec, params, len(vec), epsilon=epsilon) + jac = orthogonalize_columns(jac, vec) + + energy, grad, overlap_mat = _sr_matrices(vec, jac, hamiltonian) + intermediate_result.njev += 1 + + if i > 0 and callback is not None: + intermediate_result.x = params + intermediate_result.fun = energy + intermediate_result.jac = grad + intermediate_result.overlap_mat = overlap_mat + intermediate_result.variation = variation + callback(intermediate_result) + + if np.linalg.norm(grad) < gtol: + converged = True + break + + if optimize_hyperparameters: + + def f(x: np.ndarray) -> float: + (variation_param,) = x + variation = variation_param**2 + param_update = _get_param_update( + grad, + overlap_mat, + variation, + cond=cond, + ) + vec = params_to_vec(params + param_update) + return np.vdot(vec, hamiltonian @ vec).real + + result = minimize( + f, + x0=[variation_param], + **optimize_hyperparameters_args, + ) + (variation_param,) = result.x + variation = variation_param**2 + + param_update = _get_param_update( + grad, + overlap_mat, + variation, + cond=cond, + ) + params = params + param_update + intermediate_result.nit += 1 + + vec = params_to_vec(params) + energy = np.vdot(vec, hamiltonian @ vec).real + + intermediate_result.x = params + intermediate_result.fun = energy + del intermediate_result.jac + if callback is not None: + callback(intermediate_result) + + if converged: + success = True + message = "Convergence: Norm of projected gradient <= gtol." + else: + success = False + message = "Stop: Total number of iterations reached limit." + + return OptimizeResult( + x=params, + success=success, + message=message, + fun=energy, + jac=grad, + nfev=intermediate_result.nfev, + njev=intermediate_result.njev, + nlinop=intermediate_result.nlinop, + nit=intermediate_result.nit, + ) + + +def _sr_matrices( + vec: np.ndarray, jac: np.ndarray, hamiltonian: LinearOperator +) -> tuple[np.ndarray, np.ndarray, np.ndarray]: + energy = np.vdot(vec, hamiltonian @ vec) + grad = vec.conj() @ (hamiltonian @ jac) + overlap_mat = jac.T.conj() @ jac + return energy.real, 2 * grad.real, overlap_mat.real + + +def _get_param_update( + grad: np.ndarray, overlap_mat: np.ndarray, variation: float, cond: float +) -> np.ndarray: + x, _, _, _ = scipy.linalg.lstsq(overlap_mat, -0.5 * variation * grad, cond=cond) + return x diff --git a/tests/python/optimize/stochastic_reconfiguration_test.py b/tests/python/optimize/stochastic_reconfiguration_test.py new file mode 100644 index 000000000..f5b57c88f --- /dev/null +++ b/tests/python/optimize/stochastic_reconfiguration_test.py @@ -0,0 +1,118 @@ +# (C) Copyright IBM 2024. +# +# This code is licensed under the Apache License, Version 2.0. You may +# obtain a copy of this license in the LICENSE.txt file in the root directory +# of this source tree or at http://www.apache.org/licenses/LICENSE-2.0. +# +# Any modifications or derivative works of this code must retain this +# copyright notice, and modified files need to carry a notice indicating +# that they have been altered from the originals. + +"""Tests for stochastic reconfiguration optimization algorithm.""" + +from __future__ import annotations + +from collections import defaultdict + +import numpy as np +import pyscf +import pytest + +import ffsim + + +def test_minimize_stochastic_reconfiguration(): + # Build an H2 molecule + mol = pyscf.gto.Mole() + mol.build( + atom=[["H", (0, 0, 0)], ["H", (0, 0, 1.8)]], + basis="sto-6g", + ) + hartree_fock = pyscf.scf.RHF(mol) + hartree_fock.kernel() + + # Initialize parameters + n_reps = 2 + n_params = ffsim.UCJOperator.n_params(hartree_fock.mol.nao_nr(), n_reps) + rng = np.random.default_rng(1804) + x0 = rng.uniform(-10, 10, size=n_params) + + # Get molecular data and molecular Hamiltonian (one- and two-body tensors) + mol_data = ffsim.MolecularData.from_scf(hartree_fock) + norb = mol_data.norb + nelec = mol_data.nelec + mol_hamiltonian = mol_data.hamiltonian + hamiltonian = ffsim.linear_operator(mol_hamiltonian, norb=norb, nelec=nelec) + reference_state = ffsim.hartree_fock_state(norb, nelec) + + def params_to_vec(x: np.ndarray): + operator = ffsim.UCJOperator.from_parameters(x, norb=norb, n_reps=n_reps) + return ffsim.apply_unitary(reference_state, operator, norb=norb, nelec=nelec) + + def energy(x: np.ndarray): + vec = params_to_vec(x) + return np.real(np.vdot(vec, hamiltonian @ vec)) + + info = defaultdict(list) + + def callback(intermediate_result): + info["x"].append(intermediate_result.x) + info["fun"].append(intermediate_result.fun) + np.testing.assert_allclose( + energy(intermediate_result.x), intermediate_result.fun + ) + if hasattr(intermediate_result, "jac"): + info["jac"].append(intermediate_result.jac) + if hasattr(intermediate_result, "variation"): + info["variation"].append(intermediate_result.variation) + + # default optimization + result = ffsim.optimize.minimize_stochastic_reconfiguration( + params_to_vec, x0=x0, hamiltonian=hamiltonian, callback=callback + ) + np.testing.assert_allclose(energy(result.x), result.fun) + np.testing.assert_allclose(result.fun, -0.970773) + np.testing.assert_allclose(info["fun"][0], -0.853501, atol=1e-5) + np.testing.assert_allclose(info["fun"][-1], -0.970773, atol=1e-5) + for params, fun in zip(info["x"], info["fun"]): + np.testing.assert_allclose(energy(params), fun) + assert result.nit <= 20 + assert result.nit < result.nlinop < result.nfev + + # optimization without optimizing hyperparameters + info = defaultdict(list) + result = ffsim.optimize.minimize_stochastic_reconfiguration( + params_to_vec, + x0=x0, + hamiltonian=hamiltonian, + optimize_hyperparameters=False, + callback=callback, + ) + np.testing.assert_allclose(energy(result.x), result.fun) + np.testing.assert_allclose(result.fun, -0.970773) + for params, fun in zip(info["x"], info["fun"]): + np.testing.assert_allclose(energy(params), fun) + assert result.nit <= 30 + assert result.nit < result.nlinop < result.nfev + assert set(info["variation"]) == {1.0} + + # optimization with maxiter + info = defaultdict(list) + result = ffsim.optimize.minimize_stochastic_reconfiguration( + params_to_vec, hamiltonian=hamiltonian, x0=x0, maxiter=3, callback=callback + ) + assert result.nit == 3 + assert len(info["x"]) == 3 + assert len(info["fun"]) == 3 + assert len(info["jac"]) == 2 + np.testing.assert_allclose(energy(result.x), result.fun) + + # test raising errors + with pytest.raises(ValueError, match="variation"): + result = ffsim.optimize.minimize_stochastic_reconfiguration( + params_to_vec, x0=x0, hamiltonian=hamiltonian, variation=-0.1 + ) + with pytest.raises(ValueError, match="maxiter"): + result = ffsim.optimize.minimize_stochastic_reconfiguration( + params_to_vec, x0=x0, hamiltonian=hamiltonian, maxiter=0 + )