From f9acb0fed76cb6458e0082e009d710aca686471c Mon Sep 17 00:00:00 2001 From: Thomas Morris Date: Wed, 10 Apr 2024 19:43:53 -0400 Subject: [PATCH 1/5] add pareto functionality --- src/blop/agent.py | 118 +++++++++---- src/blop/bayesian/acquisition/__init__.py | 43 ++--- src/blop/bayesian/acquisition/config.yml | 8 + src/blop/bayesian/models.py | 21 ++- src/blop/bayesian/transforms.py | 2 + src/blop/dofs.py | 8 +- src/blop/objectives.py | 60 ++++++- src/blop/{bayesian => }/plotting.py | 198 +++++++++++++++------- src/blop/tests/conftest.py | 2 +- src/blop/tests/test_acq_funcs.py | 12 +- src/blop/tests/test_pareto.py | 14 ++ src/blop/tests/test_plots.py | 12 +- src/blop/utils/functions.py | 7 + 13 files changed, 368 insertions(+), 137 deletions(-) rename src/blop/{bayesian => }/plotting.py (69%) create mode 100644 src/blop/tests/test_pareto.py diff --git a/src/blop/agent.py b/src/blop/agent.py index 3b9d4c8..f559614 100644 --- a/src/blop/agent.py +++ b/src/blop/agent.py @@ -17,15 +17,19 @@ import pandas as pd import scipy as sp import torch + +# from botorch.utils.transforms import normalize +from botorch.acquisition.objective import ScalarizedPosteriorTransform from botorch.models.deterministic import GenericDeterministicModel from botorch.models.model_list_gp_regression import ModelListGP from botorch.models.transforms.input import AffineInputTransform, ChainedInputTransform, Log10, Normalize from databroker import Broker from ophyd import Signal -from . import utils -from .bayesian import acquisition, models, plotting -from .bayesian.transforms import TargetingPosteriorTransform +from . import plotting, utils +from .bayesian import acquisition, models + +# from .bayesian.transforms import TargetingPosteriorTransform from .digestion import default_digestion_function from .dofs import DOF, DOFList from .objectives import Objective, ObjectiveList @@ -532,7 +536,7 @@ def benchmark( @property def model(self): - """A model encompassing all the objectives. A single GP in the single-objective case, or a model list.""" + """A model encompassing all the fitnesses and constraints.""" return ( ModelListGP(*[obj.model for obj in self.active_objs]) if len(self.active_objs) > 1 else self.active_objs[0].model ) @@ -542,32 +546,86 @@ def posterior(self, x): return self.model.posterior(torch.tensor(x)) @property - def targeting_transform(self): - return TargetingPosteriorTransform( - weights=torch.tensor(self.active_objs.weights, dtype=torch.double), targets=self.active_objs.targets + def fitness_model(self): + return ( + ModelListGP(*[obj.model for obj in self.active_objs if obj.type == "fitness"]) + if len(self.active_objs) > 1 + else self.active_objs[0].model ) @property - def scalarized_objectives(self): - """Returns a (n_obs,) array of scalarized objectives""" - return self.targeting_transform.evaluate(self.train_targets(active=True)).sum(axis=-1) + def fitness_weights(self): + return torch.tensor([obj.weight for obj in self.objectives if obj.type == "fitness"], dtype=torch.double) @property - def max_scalarized_objective(self): - """Returns the value of the best scalarized objective seen so far.""" - f = self.scalarized_objectives - return np.max(np.where(np.isnan(f), -np.inf, f)) + def fitness_scalarization(self): + return ScalarizedPosteriorTransform(weights=self.fitness_weights) @property - def argmax_scalarized_objective(self): - """Returns the index of the best scalarized objective seen so far.""" - f = self.scalarized_objectives - return np.argmax(np.where(np.isnan(f), -np.inf, f)) + def evaluated_fitnesses(self): + return self.train_targets()[:, self.objectives.type == "fitness"] + + @property + def scalarized_fitnesses(self): + return self.fitness_scalarization.evaluate(self.evaluated_fitnesses) + + @property + def evaluated_constraints(self): + if sum(self.objectives.type == "constraint"): + y = self.train_targets() + return torch.cat( + [ + ((y[:, i] >= obj.target[0]) & (y[:, i] <= obj.target[1])).unsqueeze(0) + for i, obj in enumerate(self.objectives) + if obj.type == "constraint" + ], + dim=0, + ).T + else: + return torch.ones(size=(len(self.table), 0), dtype=torch.bool) + + @property + def argmax_best_f(self): + f = self.scalarized_fitnesses + c = self.evaluated_constraints.all(axis=-1) + mask = (~f.isnan()) & c + + if not mask.sum(): + raise ValueError("There are no valid points that satisfy the constraints!") + + return torch.where(mask)[0][f[mask].argmax()] + + @property + def best_f(self): + return self.scalarized_fitnesses[self.argmax_best_f] + + @property + def pareto_front_mask(self): + # a point is on the Pareto front if it is Pareto dominant + # a point is Pareto dominant if it is there is no other point that is better at every objective + y = self.train_targets()[:, self.objectives.type == "fitness"] + in_pareto_front = ~(y.unsqueeze(1) > y.unsqueeze(0)).all(axis=-1).any(axis=0) + all_constraints_satisfied = self.evaluated_constraints.all(axis=-1) + return in_pareto_front & all_constraints_satisfied + + @property + def pareto_front(self): + return self.table.loc[self.pareto_front_mask.numpy()] + + @property + def min_ref_point(self): + y = self.train_targets()[:, self.objectives.type == "fitness"] + return y[y.argmax(axis=0)].min(axis=0).values + + @property + def random_ref_point(self): + pareto_front_index = torch.where(self.pareto_front_mask)[0] + return self.evaluated_fitnesses[np.random.choice(pareto_front_index)] @property def all_objectives_valid(self): """A mask of whether all objectives are valid for each data point.""" - return ~torch.isnan(self.scalarized_objectives) + return ~torch.isnan(self.scalarized_fitnesses) def _train_model(self, model, hypers=None, **kwargs): """Fit all of the agent's models. All kwargs are passed to `botorch.fit.fit_gpytorch_mll`.""" @@ -621,7 +679,7 @@ def _construct_model(self, obj, skew_dims=None): trusted.long(), learn_additional_noise=True ) - obj.validity_conjugate_model = models.LatentDirichletModel( + obj.validity_conjugate_model = models.LatentDirichletClassifier( train_inputs=train_inputs[inputs_are_trusted], train_targets=dirichlet_likelihood.transformed_targets.transpose(-1, -2)[inputs_are_trusted].double(), skew_dims=skew_dims, @@ -760,12 +818,12 @@ def constraint(self, x): p = torch.ones(x.shape[:-1]) for obj in self.active_objs: # if the targeting constraint is non-trivial - if obj.use_as_constraint: + if obj.type == "constraint": p *= obj.targeting_constraint(x) # if the validity constaint is non-trivial if obj.validity_conjugate_model is not None: p *= obj.validity_constraint(x) - return p + return p # + 1e-6 * normalize(x, self._sample_domain).square().sum(axis=-1) @property def hypers(self) -> dict: @@ -848,22 +906,20 @@ def train_targets(self, index=None, **subset_kwargs): return torch.cat([self.train_targets(obj.name) for obj in self.objectives], dim=-1) obj = self.objectives[index] - targets = self.table.loc[:, obj.name].values.copy() + values = self.table.loc[:, obj.name].values.copy() # check that targets values are inside acceptable values - valid = (targets >= obj._trust_domain[0]) & (targets <= obj._trust_domain[1]) - targets = np.where(valid, targets, np.nan) + valid = (values >= obj._trust_domain[0]) & (values <= obj._trust_domain[1]) + values = np.where(valid, values, np.nan) - # transform if needed - if obj.log: - targets = np.where(targets > 0, np.log(targets), np.nan) + targets = obj.fitness_forward(values) return torch.tensor(targets, dtype=torch.double).unsqueeze(-1) @property def best(self): """Returns all data for the best point.""" - return self.table.loc[self.argmax_scalarized_objective] + return self.table.loc[self.argmax_best_f] @property def best_inputs(self): @@ -941,3 +997,7 @@ def plot_history(self, **kwargs): @property def latent_transforms(self): return {obj.name: obj.model.covar_module.latent_transform for obj in self.active_objs} + + def plot_pareto_front(self, **kwargs): + """Plot the improvement of the agent over time.""" + plotting._plot_pareto_front(self, **kwargs) diff --git a/src/blop/bayesian/acquisition/__init__.py b/src/blop/bayesian/acquisition/__init__.py index 24bdacf..9d807d7 100644 --- a/src/blop/bayesian/acquisition/__init__.py +++ b/src/blop/bayesian/acquisition/__init__.py @@ -1,6 +1,7 @@ import os import yaml +from botorch.utils.transforms import normalize from . import analytic, monte_carlo from .analytic import * # noqa F401 @@ -42,53 +43,53 @@ def get_acquisition_function(agent, identifier="qei", return_metadata=True, verb if acq_func_name == "expected_improvement": acq_func = analytic.ConstrainedLogExpectedImprovement( constraint=agent.constraint, - model=agent.model, - best_f=agent.max_scalarized_objective, - posterior_transform=agent.targeting_transform, + model=agent.fitness_model, + best_f=agent.best_f, + posterior_transform=agent.fitness_scalarization, ) acq_func_meta = {"name": acq_func_name, "args": {}} elif acq_func_name == "monte_carlo_expected_improvement": acq_func = monte_carlo.qConstrainedExpectedImprovement( constraint=agent.constraint, - model=agent.model, - best_f=agent.max_scalarized_objective, - posterior_transform=agent.targeting_transform, + model=agent.fitness_model, + best_f=agent.best_f, + posterior_transform=agent.fitness_scalarization, ) acq_func_meta = {"name": acq_func_name, "args": {}} elif acq_func_name == "probability_of_improvement": acq_func = analytic.ConstrainedLogProbabilityOfImprovement( constraint=agent.constraint, - model=agent.model, - best_f=agent.max_scalarized_objective, - posterior_transform=agent.targeting_transform, + model=agent.fitness_model, + best_f=agent.best_f, + posterior_transform=agent.fitness_scalarization, ) acq_func_meta = {"name": acq_func_name, "args": {}} elif acq_func_name == "monte_carlo_probability_of_improvement": acq_func = monte_carlo.qConstrainedProbabilityOfImprovement( constraint=agent.constraint, - model=agent.model, - best_f=agent.max_scalarized_objective, - posterior_transform=agent.targeting_transform, + model=agent.fitness_model, + best_f=agent.best_f, + posterior_transform=agent.fitness_scalarization, ) acq_func_meta = {"name": acq_func_name, "args": {}} elif acq_func_name == "lower_bound_max_value_entropy": acq_func = monte_carlo.qConstrainedLowerBoundMaxValueEntropy( constraint=agent.constraint, - model=agent.model, + model=agent.fitness_model, candidate_set=agent.test_inputs(n=1024).squeeze(1), ) acq_func_meta = {"name": acq_func_name, "args": {}} - elif acq_func_name == "noisy_expected_hypervolume_improvement": + elif acq_func_name == "monte_carlo_noisy_expected_hypervolume_improvement": acq_func = monte_carlo.qConstrainedNoisyExpectedHypervolumeImprovement( constraint=agent.constraint, - model=agent.model, - ref_point=agent.train_targets.min(dim=0).values, - X_baseline=agent.train_inputs, + model=agent.fitness_model, + ref_point=agent.random_ref_point, + X_baseline=normalize(agent.train_inputs(), agent._sample_domain), prune_baseline=True, ) acq_func_meta = {"name": acq_func_name, "args": {}} @@ -98,9 +99,9 @@ def get_acquisition_function(agent, identifier="qei", return_metadata=True, verb acq_func = analytic.ConstrainedUpperConfidenceBound( constraint=agent.constraint, - model=agent.model, + model=agent.fitness_model, beta=beta, - posterior_transform=agent.targeting_transform, + posterior_transform=agent.fitness_scalarization, ) acq_func_meta = {"name": acq_func_name, "args": {"beta": beta}} @@ -109,9 +110,9 @@ def get_acquisition_function(agent, identifier="qei", return_metadata=True, verb acq_func = monte_carlo.qConstrainedUpperConfidenceBound( constraint=agent.constraint, - model=agent.model, + model=agent.fitness_model, beta=beta, - posterior_transform=agent.targeting_transform, + posterior_transform=agent.fitness_scalarization, ) acq_func_meta = {"name": acq_func_name, "args": {"beta": beta}} diff --git a/src/blop/bayesian/acquisition/config.yml b/src/blop/bayesian/acquisition/config.yml index 2fac116..512470a 100644 --- a/src/blop/bayesian/acquisition/config.yml +++ b/src/blop/bayesian/acquisition/config.yml @@ -48,6 +48,14 @@ noisy_expected_hypervolume_improvement: pretty_name: Noisy expected hypervolume improvement type: analytic +monte_carlo_noisy_expected_hypervolume_improvement: + description: It's like a big box. How big is the box? + identifiers: + - qnehvi + multitask_only: true + pretty_name: Noisy expected hypervolume improvement + type: monte_carlo + probability_of_improvement: description: The probability that this input improves on the current maximum. identifiers: diff --git a/src/blop/bayesian/models.py b/src/blop/bayesian/models.py index 05c7268..59ca6e9 100644 --- a/src/blop/bayesian/models.py +++ b/src/blop/bayesian/models.py @@ -1,11 +1,11 @@ -import botorch import gpytorch import torch +from botorch.models.gp_regression import SingleTaskGP from . import kernels -class LatentGP(botorch.models.gp_regression.SingleTaskGP): +class LatentGP(SingleTaskGP): def __init__(self, train_inputs, train_targets, skew_dims=True, *args, **kwargs): super().__init__(train_inputs, train_targets, *args, **kwargs) @@ -23,7 +23,22 @@ def __init__(self, train_inputs, train_targets, skew_dims=True, *args, **kwargs) self.trained = False -class LatentDirichletModel(LatentGP): +class LatentConstraintModel(LatentGP): + def __init__(self, train_inputs, train_targets, skew_dims=True, *args, **kwargs): + super().__init__(train_inputs, train_targets, skew_dims, *args, **kwargs) + + self.trained = False + + def fitness(self, x, n_samples=1024): + """ + Takes in a (..., m) dimension tensor and returns a (..., n_classes) tensor + """ + *input_shape, n_dim = x.shape + samples = self.posterior(x.reshape(-1, n_dim)).sample(torch.Size((n_samples,))).exp() + return (samples / samples.sum(-1, keepdim=True)).mean(0).reshape(*input_shape, -1) + + +class LatentDirichletClassifier(LatentGP): def __init__(self, train_inputs, train_targets, skew_dims=True, *args, **kwargs): super().__init__(train_inputs, train_targets, skew_dims, *args, **kwargs) diff --git a/src/blop/bayesian/transforms.py b/src/blop/bayesian/transforms.py index 82bdb9d..0999784 100644 --- a/src/blop/bayesian/transforms.py +++ b/src/blop/bayesian/transforms.py @@ -6,6 +6,8 @@ from botorch.posteriors.posterior_list import PosteriorList from torch import Tensor +# This file is deprecated for now, but we may need it later + def targeting_transform(y, target): if target == "max": diff --git a/src/blop/dofs.py b/src/blop/dofs.py index a78a8f0..af23fcc 100644 --- a/src/blop/dofs.py +++ b/src/blop/dofs.py @@ -105,14 +105,16 @@ def __post_init__(self): if not self.read_only: raise ValueError("You must specify search_domain if the device is not read-only.") else: - self.search_domain = tuple(self.search_domain) if len(self.search_domain) != 2: - raise ValueError("'search_domain' must be a 2-tuple of floats.") + raise ValueError("'search_domain' must have length 2.") + self.search_domain = tuple([float(self.search_domain[0]), float(self.search_domain[1])]) if self.search_domain[0] > self.search_domain[1]: raise ValueError("The lower search bound must be less than the upper search bound.") if self.trust_domain is not None: - self.trust_domain = tuple(self.trust_domain) + if len(self.trust_domain) != 2: + raise ValueError("'trust_domain' must have length 2.") + self.trust_domain = tuple([float(self.trust_domain[0]), float(self.trust_domain[1])]) if not self.read_only: if (self.search_domain[0] < self.trust_domain[0]) or (self.search_domain[1] > self.trust_domain[1]): raise ValueError("Trust domain must be larger than search domain.") diff --git a/src/blop/objectives.py b/src/blop/objectives.py index 97aa1a4..71b7f38 100644 --- a/src/blop/objectives.py +++ b/src/blop/objectives.py @@ -5,7 +5,8 @@ import numpy as np import pandas as pd import torch -from torch.special import erf + +from .utils.functions import approximate_erf DEFAULT_MIN_NOISE_LEVEL = 1e-6 DEFAULT_MAX_NOISE_LEVEL = 1e0 @@ -76,7 +77,7 @@ class Objective: name: str description: str = "" - type: str = "continuous" + type: str = None target: Union[Tuple[float, float], float, str] = "max" log: bool = False weight: float = 1.0 @@ -96,7 +97,7 @@ def __post_init__(self): if self.log and not self.target > 0: return ValueError("'target' must strictly positive if log=True.") - self.use_as_constraint = True if isinstance(self.target, tuple) else False + self.type = "fitness" if self.target in ["min", "max"] else "constraint" @property def _trust_domain(self): @@ -152,7 +153,45 @@ def targeting_constraint(self, x: torch.Tensor) -> torch.Tensor: m = p.mean s = p.variance.sqrt() - return 0.5 * (erf((b - m) / (np.sqrt(2) * s)) - erf((a - m) / (np.sqrt(2) * s)))[..., -1] + return 0.5 * (approximate_erf((b - m) / (np.sqrt(2) * s)) - approximate_erf((a - m) / (np.sqrt(2) * s)))[..., -1] + + def fitness_forward(self, y): + f = y + if self.log: + f = np.log(f) + if self.target == "min": + f = -f + return f + + def fitness_inverse(self, f): + y = f + if self.target == "min": + y = -y + if self.log: + y = np.exp(y) + return y + + @property + def is_fitness(self): + return self.target in ["min", "max"] + + def value_prediction(self, X): + p = self.model.posterior(X) + + if self.is_fitness: + return self.fitness_inverse(p.mean) + + if isinstance(self.target, tuple): + return p.mean + + def fitness_prediction(self, X): + p = self.model.posterior(X) + + if self.is_fitness: + return self.fitness_inverse(p.mean) + + if isinstance(self.target, tuple): + return self.targeting_constraint(X).log().clamp(min=-16) class ObjectiveList(Sequence): @@ -197,13 +236,13 @@ def summary(self) -> pd.DataFrame: for attr, dtype in OBJ_FIELD_TYPES.items(): table[attr] = table[attr].astype(dtype) - return table + return table.T def __repr__(self): return self.summary.__repr__() - def __repr_html__(self): - return self.summary.__repr_html__() + def _repr_html_(self): + return self.summary._repr_html_() @property def descriptions(self) -> list: @@ -233,6 +272,13 @@ def weights(self) -> np.array: """ return np.array([obj.weight for obj in self.objectives]) + @property + def is_fitness(self) -> np.array: + """ + Returns an array of the objective weights. + """ + return np.array([obj.target in ["min", "max"] for obj in self.objectives]) + @property def signed_weights(self) -> np.array: """ diff --git a/src/blop/bayesian/plotting.py b/src/blop/plotting.py similarity index 69% rename from src/blop/bayesian/plotting.py rename to src/blop/plotting.py index 568ad6e..80659aa 100644 --- a/src/blop/bayesian/plotting.py +++ b/src/blop/plotting.py @@ -3,10 +3,10 @@ from matplotlib import pyplot as plt from matplotlib.patches import Patch -from . import acquisition +from .bayesian import acquisition DEFAULT_COLOR_LIST = ["dodgerblue", "tomato", "mediumseagreen", "goldenrod"] -DEFAULT_COLORMAP = "magma" +DEFAULT_COLORMAP = "turbo" DEFAULT_SCATTER_SIZE = 16 MAX_TEST_INPUTS = 2**11 @@ -89,77 +89,131 @@ def _plot_objs_many_dofs(agent, axes=(0, 1), shading="nearest", cmap=DEFAULT_COL for obj_index, obj in enumerate(agent.objectives): targets = agent.train_targets(obj.name).squeeze(-1).numpy() - obj_vmin, obj_vmax = np.nanpercentile(targets, q=[1, 99]) + values = obj.fitness_inverse(targets) + + val_vmin, val_vmax = np.quantile(values, q=[0.01, 0.99]) + val_norm = mpl.colors.LogNorm(val_vmin, val_vmax) if obj.log else mpl.colors.Normalize(val_vmin, val_vmax) + + obj_vmin, obj_vmax = np.quantile(targets, q=[0.01, 0.99]) obj_norm = mpl.colors.Normalize(obj_vmin, obj_vmax) - data_ax = agent.obj_axes[obj_index, 0].scatter(x_values, y_values, c=targets, s=size, norm=obj_norm, cmap=cmap) + val_ax = agent.obj_axes[obj_index, 0].scatter(x_values, y_values, c=values, s=size, norm=val_norm, cmap=cmap) # mean and sigma will have shape (*input_shape,) test_posterior = obj.model.posterior(test_inputs) test_mean = test_posterior.mean[..., 0, 0].detach().squeeze().numpy() test_sigma = test_posterior.variance.sqrt()[..., 0, 0].detach().squeeze().numpy() + # test_values = obj.fitness_inverse(test_mean) if obj.is_fitness else test_mean + + test_constraint = None + if not obj.is_fitness: + test_constraint = obj.targeting_constraint(test_inputs).detach().squeeze().numpy() + if gridded: - _ = agent.obj_axes[obj_index, 1].pcolormesh( - test_x, - test_y, - test_mean, - shading=shading, - cmap=cmap, - norm=obj_norm, - ) - sigma_ax = agent.obj_axes[obj_index, 2].pcolormesh( - test_x, - test_y, - test_sigma, - shading=shading, - cmap=cmap, - norm=mpl.colors.LogNorm(), - ) + # _ = agent.obj_axes[obj_index, 1].pcolormesh( + # test_x, + # test_y, + # test_values, + # shading=shading, + # cmap=cmap, + # norm=val_norm, + # ) + if obj.is_fitness: + fitness_ax = agent.obj_axes[obj_index, 1].pcolormesh( + test_x, + test_y, + test_mean, + shading=shading, + cmap=cmap, + norm=obj_norm, + ) + fit_err_ax = agent.obj_axes[obj_index, 2].pcolormesh( + test_x, + test_y, + test_sigma, + shading=shading, + cmap=cmap, + norm=mpl.colors.LogNorm(), + ) + + if test_constraint is not None: + constraint_ax = agent.obj_axes[obj_index, 3].pcolormesh( + test_x, + test_y, + test_constraint, + shading=shading, + cmap=cmap, + # norm=mpl.colors.LogNorm(), + ) else: - _ = agent.obj_axes[obj_index, 1].scatter( - test_x, - test_y, - c=test_mean, - s=size, - norm=obj_norm, - cmap=cmap, - ) - sigma_ax = agent.obj_axes[obj_index, 2].scatter( - test_x, - test_y, - c=test_sigma, - s=size, - cmap=cmap, - norm=mpl.colors.LogNorm(), + # _ = agent.obj_axes[obj_index, 1].scatter( + # test_x, + # test_y, + # c=test_values, + # s=size, + # norm=val_norm, + # cmap=cmap, + # ) + if obj.is_fitness: + fitness_ax = agent.obj_axes[obj_index, 1].scatter( + test_x, + test_y, + c=test_mean, + s=size, + norm=obj_norm, + cmap=cmap, + ) + fit_err_ax = agent.obj_axes[obj_index, 2].scatter( + test_x, + test_y, + c=test_sigma, + s=size, + cmap=cmap, + norm=mpl.colors.LogNorm(), + ) + + if test_constraint is not None: + constraint_ax = agent.obj_axes[obj_index, 3].scatter( + test_x, + test_y, + c=test_constraint, + s=size, + cmap=cmap, + norm=mpl.colors.LogNorm(), + ) + + val_cbar = agent.obj_fig.colorbar(val_ax, ax=agent.obj_axes[obj_index, 0], location="bottom", aspect=32, shrink=0.8) + val_cbar.set_label(f"{obj.units if obj.units else ''}") + + if obj.is_fitness: + _ = agent.obj_fig.colorbar(fitness_ax, ax=agent.obj_axes[obj_index, 1], location="bottom", aspect=32, shrink=0.8) + _ = agent.obj_fig.colorbar(fit_err_ax, ax=agent.obj_axes[obj_index, 2], location="bottom", aspect=32, shrink=0.8) + + # obj_cbar.set_label(f"{obj.label}") + # err_cbar.set_label(f"{obj.label}") + + if test_constraint is not None: + constraint_cbar = agent.obj_fig.colorbar( + constraint_ax, ax=agent.obj_axes[obj_index, 3], location="bottom", aspect=32, shrink=0.8 ) - obj_cbar = agent.obj_fig.colorbar( - data_ax, ax=agent.obj_axes[obj_index, :2], location="bottom", aspect=32, shrink=0.8 - ) - err_cbar = agent.obj_fig.colorbar( - sigma_ax, ax=agent.obj_axes[obj_index, 2], location="bottom", aspect=32, shrink=0.8 - ) - obj_cbar.set_label(obj.label) - err_cbar.set_label(f"{obj.label} error") - - col_names = ["samples", "posterior mean", "posterior std. dev.", "fitness"] - - pad = 5 - - for column_index, ax in enumerate(agent.obj_axes[0]): - ax.annotate( - col_names[column_index], - xy=(0.5, 1), - xytext=(0, pad), - color="k", - xycoords="axes fraction", - textcoords="offset points", - size="large", - ha="center", - va="baseline", - ) + constraint_cbar.set_label(f"{obj.label} constraint") + + col_names = [ + f"{obj.description} samples", + f"pred. {obj.description} fitness", + f"pred. {obj.description} fitness error", + f"{obj.description} constraint", + ] + + pad = 5 + + for column_index, ax in enumerate(agent.obj_axes[obj_index]): + ax.set_title( + col_names[column_index], + ) if len(agent.objectives) > 1: for row_index, ax in enumerate(agent.obj_axes[:, 0]): @@ -370,7 +424,7 @@ def _plot_history(agent, x_key="index", show_all_objs=False): hist_axes[obj_index].plot(x, y, lw=5e-1, c="k") hist_axes[obj_index].set_ylabel(obj.key) - y = agent.scalarized_objectives + y = agent.scalarized_fitnesses cummax_y = np.array([np.nanmax(y[: i + 1]) for i in range(len(y))]) @@ -406,3 +460,25 @@ def inspect_beam(agent, index, border=None): if border is not None: plt.xlim(x_min - border * width_x, x_min + border * width_x) plt.ylim(y_min - border * width_y, y_min + border * width_y) + + +def _plot_pareto_front(agent, obj_indices=(0, 1)): + f_objs = agent.objectives[agent.objectives.type == "fitness"] + (i, j) = obj_indices + + if len(f_objs) < 2: + raise ValueError("Cannot plot Pareto front for agents with fewer than two fitness objectives") + + fig, ax = plt.subplots(1, 1, figsize=(6, 6)) + + y = agent.train_targets()[:, agent.objectives.type == "fitness"] + + front_mask = agent.pareto_front_mask + + ax.scatter(y[front_mask, i], y[front_mask, j], c="b", label="Pareto front") + ax.scatter(y[~front_mask, i], y[~front_mask, j], c="r") + + ax.set_xlabel(f"{f_objs[i].name} fitness") + ax.set_ylabel(f"{f_objs[j].name} fitness") + + ax.legend() diff --git a/src/blop/tests/conftest.py b/src/blop/tests/conftest.py index 99558ea..43196a9 100644 --- a/src/blop/tests/conftest.py +++ b/src/blop/tests/conftest.py @@ -70,7 +70,7 @@ def agent(db): @pytest.fixture(scope="function") -def agent_with_multiple_objs(db): +def mo_agent(db): """ A simple agent minimizing two Himmelblau's functions """ diff --git a/src/blop/tests/test_acq_funcs.py b/src/blop/tests/test_acq_funcs.py index be376b7..2a9d89d 100644 --- a/src/blop/tests/test_acq_funcs.py +++ b/src/blop/tests/test_acq_funcs.py @@ -14,12 +14,12 @@ def test_monte_carlo_acq_funcs_single_objective(agent, RE, acq_func): @pytest.mark.parametrize("acq_func", ["ei", "pi", "em", "ucb"]) -def test_analytic_acq_funcs_multi_objective(agent_with_multiple_objs, RE, acq_func): - RE(agent_with_multiple_objs.learn("qr", n=16)) - RE(agent_with_multiple_objs.learn(acq_func, n=1)) +def test_analytic_acq_funcs_multi_objective(mo_agent, RE, acq_func): + RE(mo_agent.learn("qr", n=16)) + RE(mo_agent.learn(acq_func, n=1)) @pytest.mark.parametrize("acq_func", ["qei", "qpi", "qem", "qucb"]) -def test_monte_carlo_acq_funcs_multi_objective(agent_with_multiple_objs, RE, acq_func): - RE(agent_with_multiple_objs.learn("qr", n=16)) - RE(agent_with_multiple_objs.learn(acq_func, n=4)) +def test_monte_carlo_acq_funcs_multi_objective(mo_agent, RE, acq_func): + RE(mo_agent.learn("qr", n=16)) + RE(mo_agent.learn(acq_func, n=4)) diff --git a/src/blop/tests/test_pareto.py b/src/blop/tests/test_pareto.py new file mode 100644 index 0000000..ddcc482 --- /dev/null +++ b/src/blop/tests/test_pareto.py @@ -0,0 +1,14 @@ +import pytest + + +@pytest.mark.test_func +def test_pareto(mo_agent, RE): + RE(mo_agent.learn("qr", n=16)) + + mo_agent.plot_pareto_front() + + +@pytest.mark.parametrize("acq_func", ["qnehvi"]) +def test_analytic_pareto_acq_funcs(mo_agent, RE, acq_func): + RE(mo_agent.learn("qr", n=4)) + RE(mo_agent.learn(acq_func, n=2)) diff --git a/src/blop/tests/test_plots.py b/src/blop/tests/test_plots.py index f28758a..e67836c 100644 --- a/src/blop/tests/test_plots.py +++ b/src/blop/tests/test_plots.py @@ -10,13 +10,13 @@ def test_plots(RE, agent): agent.plot_history() -def test_plots_multiple_objs(RE, agent_with_multiple_objs): - RE(agent_with_multiple_objs.learn("qr", n=16)) +def test_plots_multiple_objs(RE, mo_agent): + RE(mo_agent.learn("qr", n=16)) - agent_with_multiple_objs.plot_objectives() - agent_with_multiple_objs.plot_acquisition() - agent_with_multiple_objs.plot_validity() - agent_with_multiple_objs.plot_history() + mo_agent.plot_objectives() + mo_agent.plot_acquisition() + mo_agent.plot_validity() + mo_agent.plot_history() def test_plots_passive_dofs(RE, agent_with_passive_dofs): diff --git a/src/blop/utils/functions.py b/src/blop/utils/functions.py index 0a2e949..745e316 100644 --- a/src/blop/utils/functions.py +++ b/src/blop/utils/functions.py @@ -1,4 +1,11 @@ import numpy as np +import torch + + +def approximate_erf(x): + # we want to compute erf(x) to compute the definite integral of the Gaussian PDF + # we instead use an approximation with tanh(x) which is faster and better-conditioned near +/- infinity + return torch.tanh(1.20278247 * x) def sigmoid(x): From 14f46d7a0e24ecf11b128fe0006b358ddcbce8c8 Mon Sep 17 00:00:00 2001 From: Thomas Morris Date: Wed, 10 Apr 2024 19:58:10 -0400 Subject: [PATCH 2/5] fixed a broken method --- docs/source/tutorials/himmelblau.ipynb | 6 +++--- docs/source/tutorials/passive-dofs.ipynb | 2 +- src/blop/agent.py | 4 ++-- src/blop/tests/test_agent.py | 2 ++ 4 files changed, 8 insertions(+), 6 deletions(-) diff --git a/docs/source/tutorials/himmelblau.ipynb b/docs/source/tutorials/himmelblau.ipynb index 7925a58..143fb6a 100644 --- a/docs/source/tutorials/himmelblau.ipynb +++ b/docs/source/tutorials/himmelblau.ipynb @@ -165,7 +165,7 @@ "metadata": {}, "outputs": [], "source": [ - "RE(agent.learn(\"quasi-random\", n=32))\n", + "RE(agent.learn(\"quasi-random\", n=36))\n", "agent.plot_objectives()" ] }, @@ -294,7 +294,7 @@ ], "metadata": { "kernelspec": { - "display_name": "Python 3.11.5 64-bit", + "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, @@ -308,7 +308,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.11.5" + "version": "3.10.0" }, "vscode": { "interpreter": { diff --git a/docs/source/tutorials/passive-dofs.ipynb b/docs/source/tutorials/passive-dofs.ipynb index 8e53358..a79ae5b 100644 --- a/docs/source/tutorials/passive-dofs.ipynb +++ b/docs/source/tutorials/passive-dofs.ipynb @@ -94,7 +94,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.11.5" + "version": "3.10.0" }, "vscode": { "interpreter": { diff --git a/src/blop/agent.py b/src/blop/agent.py index f559614..d53590d 100644 --- a/src/blop/agent.py +++ b/src/blop/agent.py @@ -39,7 +39,7 @@ mpl.rc("image", cmap="coolwarm") -DEFAULT_MAX_SAMPLES = 2**11 +DEFAULT_MAX_SAMPLES = 2**12 def _validate_dofs_and_objs(dofs: DOFList, objs: ObjectiveList): @@ -919,7 +919,7 @@ def train_targets(self, index=None, **subset_kwargs): @property def best(self): """Returns all data for the best point.""" - return self.table.loc[self.argmax_best_f] + return self.table.loc[self.argmax_best_f.item()] @property def best_inputs(self): diff --git a/src/blop/tests/test_agent.py b/src/blop/tests/test_agent.py index 9789635..7b6f117 100644 --- a/src/blop/tests/test_agent.py +++ b/src/blop/tests/test_agent.py @@ -4,6 +4,8 @@ def test_agent(agent, RE): RE(agent.learn("qr", n=4)) + agent.best + def test_forget(agent, RE): RE(agent.learn("qr", n=4)) From a7578cb6f7995c6df5263870d5bb639a46d748a0 Mon Sep 17 00:00:00 2001 From: Thomas Morris Date: Wed, 10 Apr 2024 20:54:36 -0400 Subject: [PATCH 3/5] tests pass now --- src/blop/agent.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/blop/agent.py b/src/blop/agent.py index d53590d..a8dd2d9 100644 --- a/src/blop/agent.py +++ b/src/blop/agent.py @@ -39,7 +39,7 @@ mpl.rc("image", cmap="coolwarm") -DEFAULT_MAX_SAMPLES = 2**12 +DEFAULT_MAX_SAMPLES = 3200 def _validate_dofs_and_objs(dofs: DOFList, objs: ObjectiveList): From b7b1df03ff3d4b2ca1ccb6f160dbcf4f781f4d55 Mon Sep 17 00:00:00 2001 From: Thomas Morris Date: Thu, 18 Apr 2024 12:43:41 -0700 Subject: [PATCH 4/5] added pareto example to the docs --- docs/source/tutorials.rst | 3 +- .../{himmelblau.ipynb => introduction.ipynb} | 0 docs/source/tutorials/pareto-fronts.ipynb | 159 ++++++++++++++++++ src/blop/agent.py | 11 +- src/blop/bayesian/transforms.py | 79 --------- src/blop/plotting.py | 2 +- src/blop/tests/test_agent.py | 3 +- src/blop/utils/functions.py | 6 +- 8 files changed, 177 insertions(+), 86 deletions(-) rename docs/source/tutorials/{himmelblau.ipynb => introduction.ipynb} (100%) create mode 100644 docs/source/tutorials/pareto-fronts.ipynb delete mode 100644 src/blop/bayesian/transforms.py diff --git a/docs/source/tutorials.rst b/docs/source/tutorials.rst index 3506da7..d146376 100644 --- a/docs/source/tutorials.rst +++ b/docs/source/tutorials.rst @@ -4,6 +4,7 @@ Tutorials .. toctree:: :maxdepth: 2 - tutorials/himmelblau.ipynb + tutorials/introduction.ipynb tutorials/hyperparameters.ipynb + tutorials/pareto-fronts.ipynb tutorials/passive-dofs.ipynb diff --git a/docs/source/tutorials/himmelblau.ipynb b/docs/source/tutorials/introduction.ipynb similarity index 100% rename from docs/source/tutorials/himmelblau.ipynb rename to docs/source/tutorials/introduction.ipynb diff --git a/docs/source/tutorials/pareto-fronts.ipynb b/docs/source/tutorials/pareto-fronts.ipynb new file mode 100644 index 0000000..4b8a13c --- /dev/null +++ b/docs/source/tutorials/pareto-fronts.ipynb @@ -0,0 +1,159 @@ +{ + "cells": [ + { + "attachments": {}, + "cell_type": "markdown", + "id": "e7b5e13a-c059-441d-8d4f-fff080d52054", + "metadata": {}, + "source": [ + "# Multiobjective optimization with Pareto front mapping\n", + "\n", + "One way to do multiobjective optimization is with Pareto optimization, which explores the set of Pareto-efficient points. A point is Pareto-efficient if there are no other valid points that are better at every objective: it shows the \"trade-off\" between several objectives. " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "cac0177b-576c-4f01-b306-2e9e8544a05c", + "metadata": {}, + "outputs": [], + "source": [ + "from blop.utils import prepare_re_env\n", + "\n", + "%run -i $prepare_re_env.__file__ --db-type=temp" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "120812d8-5de2-4efa-8fc6-2d8e4cd0693e", + "metadata": {}, + "outputs": [], + "source": [ + "from blop import DOF, Objective, Agent\n", + "from blop.utils import functions\n", + "from blop.dofs import BrownianMotion\n", + "\n", + "import numpy as np\n", + "\n", + "\n", + "def digestion(db, uid):\n", + " products = db[uid].table()\n", + "\n", + " for index, entry in products.iterrows():\n", + " x1, x2 = entry.x1, entry.x2\n", + "\n", + " products.loc[index, \"f1\"] = (x1 - 2) ** 2 + (x2 - 1) + 2\n", + " products.loc[index, \"f2\"] = 9 * x1 - (x2 - 1) + 2\n", + " products.loc[index, \"c1\"] = x1**2 + x2**2\n", + " products.loc[index, \"c2\"] = x1 - 3 * x2 + 10\n", + "\n", + " return products\n", + "\n", + "\n", + "dofs = [\n", + " DOF(name=\"x1\", search_domain=(-20, 20)),\n", + " DOF(name=\"x2\", search_domain=(-20, 20)),\n", + "]\n", + "\n", + "\n", + "objectives = [\n", + " Objective(name=\"f1\", target=\"min\"),\n", + " Objective(name=\"f2\", target=\"min\"),\n", + " Objective(name=\"c1\", target=(-np.inf, 225)),\n", + " Objective(name=\"c2\", target=(-np.inf, 0)),\n", + "]\n", + "\n", + "agent = Agent(\n", + " dofs=dofs,\n", + " objectives=objectives,\n", + " digestion=digestion,\n", + " db=db,\n", + ")\n", + "\n", + "(uid,) = RE(agent.learn(\"qr\", n=64))" + ] + }, + { + "cell_type": "markdown", + "id": "d81c6af2-0a4c-4d31-9b7c-cc3065550b98", + "metadata": {}, + "source": [ + "We can plot our fitness and constraint objectives to see their models:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c6c08f53-e96b-4987-ba3d-a93c84468b0b", + "metadata": {}, + "outputs": [], + "source": [ + "agent.plot_objectives()" + ] + }, + { + "cell_type": "markdown", + "id": "48b976e6-048a-4e16-9a1c-500203a2e195", + "metadata": {}, + "source": [ + "We can plot the Pareto front (the set of all Pareto-efficient points), which shows the trade-off between the two fitnesses. The points in blue comprise the Pareto front, while the points in red are either not Pareto efficient or are invalidated by one of the constraints." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "990a877e-f533-419c-bf5d-569ad7e72c6b", + "metadata": {}, + "outputs": [], + "source": [ + "agent.plot_pareto_front()" + ] + }, + { + "cell_type": "markdown", + "id": "29d7f4fb-8f25-4b57-982f-28737fad2a7c", + "metadata": {}, + "source": [ + "We can explore the Pareto front by choosing a random point on the Pareto front and computing the expected improvement in the hypervolume of all fitness objectives with respect to that point (called the \"reference point\"). All this is done automatically with the `qnehvi` acquisition function:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b49e6233-b228-43a3-9d8a-722a82e93443", + "metadata": {}, + "outputs": [], + "source": [ + "# this is broken now but is fixed in the next PR\n", + "# RE(agent.learn(\"qnehvi\", n=4))" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.0" + }, + "vscode": { + "interpreter": { + "hash": "b0fa6594d8f4cbf19f97940f81e996739fb7646882a419484c72d19e05852a7e" + } + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/src/blop/agent.py b/src/blop/agent.py index a8dd2d9..7d4fe37 100644 --- a/src/blop/agent.py +++ b/src/blop/agent.py @@ -601,8 +601,12 @@ def best_f(self): @property def pareto_front_mask(self): - # a point is on the Pareto front if it is Pareto dominant - # a point is Pareto dominant if it is there is no other point that is better at every objective + """ + A mask for all data points that is true when the point is on the Pareto front. + A point is on the Pareto front if it is Pareto dominant + A point is Pareto dominant if it is there is no other point that is better at every objective + Points that violate any constraint are excluded. + """ y = self.train_targets()[:, self.objectives.type == "fitness"] in_pareto_front = ~(y.unsqueeze(1) > y.unsqueeze(0)).all(axis=-1).any(axis=0) all_constraints_satisfied = self.evaluated_constraints.all(axis=-1) @@ -610,6 +614,9 @@ def pareto_front_mask(self): @property def pareto_front(self): + """ + A subset of the data table containing only points on the Pareto front. + """ return self.table.loc[self.pareto_front_mask.numpy()] @property diff --git a/src/blop/bayesian/transforms.py b/src/blop/bayesian/transforms.py deleted file mode 100644 index 0999784..0000000 --- a/src/blop/bayesian/transforms.py +++ /dev/null @@ -1,79 +0,0 @@ -from typing import Union - -import botorch -from botorch.acquisition.objective import PosteriorTransform -from botorch.posteriors.gpytorch import GPyTorchPosterior -from botorch.posteriors.posterior_list import PosteriorList -from torch import Tensor - -# This file is deprecated for now, but we may need it later - - -def targeting_transform(y, target): - if target == "max": - return y - if target == "min": - return -y - elif not isinstance(target, tuple): - return -(y - target).abs() - else: - return -((y - 0.5 * (target[1] + target[0])).abs() - 0.5 * (target[1] - target[0])).clamp(min=0) - - -class TargetingPosteriorTransform(PosteriorTransform): - r"""An affine posterior transform for scalarizing multi-output posteriors.""" - - scalarize: bool = True - - def __init__(self, weights: Tensor, targets: Tensor) -> None: - r""" - Args: - weights: A one-dimensional tensor with `m` elements representing the - linear weights on the outputs. - offset: An offset to be added to posterior mean. - """ - super().__init__() - self.targets = targets - self.register_buffer("weights", weights) - - def sample_transform(self, y): - for i, target in enumerate(self.targets): - y[..., i] = targeting_transform(y[..., i], target) - return y @ self.weights.unsqueeze(-1) - - def mean_transform(self, mean, var): - for i, target in enumerate(self.targets): - mean[..., i] = targeting_transform(mean[..., i], target) - return mean @ self.weights.unsqueeze(-1) - - def variance_transform(self, mean, var): - return var @ self.weights.unsqueeze(-1) - - def evaluate(self, Y: Tensor) -> Tensor: - r"""Evaluate the transform on a set of outcomes. - - Args: - Y: A `batch_shape x q x m`-dim tensor of outcomes. - - Returns: - A `batch_shape x q`-dim tensor of transformed outcomes. - """ - return self.sample_transform(Y) - - def forward(self, posterior: Union[GPyTorchPosterior, PosteriorList]) -> GPyTorchPosterior: - r"""Compute the posterior of the affine transformation. - - Args: - posterior: A posterior with the same number of outputs as the - elements in `self.weights`. - - Returns: - A single-output posterior. - """ - - return botorch.posteriors.transformed.TransformedPosterior( - posterior, - sample_transform=self.sample_transform, - mean_transform=self.mean_transform, - variance_transform=self.variance_transform, - ) diff --git a/src/blop/plotting.py b/src/blop/plotting.py index 80659aa..2c9e66b 100644 --- a/src/blop/plotting.py +++ b/src/blop/plotting.py @@ -185,7 +185,7 @@ def _plot_objs_many_dofs(agent, axes=(0, 1), shading="nearest", cmap=DEFAULT_COL ) val_cbar = agent.obj_fig.colorbar(val_ax, ax=agent.obj_axes[obj_index, 0], location="bottom", aspect=32, shrink=0.8) - val_cbar.set_label(f"{obj.units if obj.units else ''}") + val_cbar.set_label(f"{obj.units or ''}") if obj.is_fitness: _ = agent.obj_fig.colorbar(fitness_ax, ax=agent.obj_axes[obj_index, 1], location="bottom", aspect=32, shrink=0.8) diff --git a/src/blop/tests/test_agent.py b/src/blop/tests/test_agent.py index 7b6f117..a04b432 100644 --- a/src/blop/tests/test_agent.py +++ b/src/blop/tests/test_agent.py @@ -4,7 +4,8 @@ def test_agent(agent, RE): RE(agent.learn("qr", n=4)) - agent.best + assert [dof.name in agent.best for dof in agent.dofs] + assert [obj.name in agent.best for obj in agent.objectives] def test_forget(agent, RE): diff --git a/src/blop/utils/functions.py b/src/blop/utils/functions.py index 745e316..74f6f42 100644 --- a/src/blop/utils/functions.py +++ b/src/blop/utils/functions.py @@ -3,8 +3,10 @@ def approximate_erf(x): - # we want to compute erf(x) to compute the definite integral of the Gaussian PDF - # we instead use an approximation with tanh(x) which is faster and better-conditioned near +/- infinity + """ + An approximation of erf(x), to compute the definite integral of the Gaussian PDF + This is faster and better-conditioned near +/- infinity + """ return torch.tanh(1.20278247 * x) From dfa413fc48f8bdf3a9391c4cd530cf2c609f9e55 Mon Sep 17 00:00:00 2001 From: Thomas Morris <41275226+thomaswmorris@users.noreply.github.com> Date: Thu, 18 Apr 2024 13:30:34 -0700 Subject: [PATCH 5/5] Apply suggestions from code review Co-authored-by: Max Rakitin --- src/blop/agent.py | 2 +- src/blop/plotting.py | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/src/blop/agent.py b/src/blop/agent.py index 7d4fe37..cf57290 100644 --- a/src/blop/agent.py +++ b/src/blop/agent.py @@ -604,7 +604,7 @@ def pareto_front_mask(self): """ A mask for all data points that is true when the point is on the Pareto front. A point is on the Pareto front if it is Pareto dominant - A point is Pareto dominant if it is there is no other point that is better at every objective + A point is Pareto dominant if there is no other point that is better at every objective Points that violate any constraint are excluded. """ y = self.train_targets()[:, self.objectives.type == "fitness"] diff --git a/src/blop/plotting.py b/src/blop/plotting.py index 2c9e66b..7535aa7 100644 --- a/src/blop/plotting.py +++ b/src/blop/plotting.py @@ -6,6 +6,7 @@ from .bayesian import acquisition DEFAULT_COLOR_LIST = ["dodgerblue", "tomato", "mediumseagreen", "goldenrod"] +# Note: the values near 1 are hard to see on a white background. Turbo goes from red to blue and isn't white in the middle. DEFAULT_COLORMAP = "turbo" DEFAULT_SCATTER_SIZE = 16