diff --git a/docs/source/dofs.rst b/docs/source/dofs.rst index a77981c..7506a72 100644 --- a/docs/source/dofs.rst +++ b/docs/source/dofs.rst @@ -1,6 +1,9 @@ Degrees of freedom (DOFs) +++++++++++++++++++++++++ +Continuous degrees of freedom +----------------------------- + A degree of freedom is a variable that affects our optimization objective. We can define a simple DOF as .. code-block:: python diff --git a/docs/source/objectives.rst b/docs/source/objectives.rst index 69e489e..58e89f9 100644 --- a/docs/source/objectives.rst +++ b/docs/source/objectives.rst @@ -1,25 +1,40 @@ Objectives ++++++++++ -We can describe an optimization problem as a list of objectives to. A simple objective is +Objectives are what control how optimal the output of our experiment is, and are defined by ``Objective`` objects. + +``blop`` combines one or many ``Objective`` objects into an ``ObjectiveList``, which encapsulates how we model and optimize our outputs. + +Fitness +------- + +A fitness objective is an ``Objective`` that minimizes or maximizes a given value. + +* Maximize the flux of a beam of light. +* Minimize the size of a beam. + +We can construct an objective to maximize some output with .. code-block:: python from blop import Objective - objective = Objective(name="y1", target="max") + objective = Objective(name="some_output", target="max") # or "min" -Given some data, the ``Objective`` object will try to model the quantity "y1" and find the corresponding inputs that maximize it. -The objective will expect that this quantity will be spit out by the experimentation loop, so we will check later that it is set up correctly. -There are many ways to specify an objective's behavior, which is done by changing the objective's target: +Given some data, the ``Objective`` object will try to model the quantity ``some_output`` and find the corresponding inputs that maximize it. +We can also apply a transform to the value to make it more Gaussian when we fit to it. +This is especially useful when the quantity tends to be non-Gaussian, like with a beam flux. .. code-block:: python from blop import Objective - objective = Objective(name="y1", target="min") # minimize the quantity "y1" + objective_with_log_transform = Objective(name="some_output", target="max", transform="log") + + objective_with_arctanh_transform = Objective(name="some_output", target="max", transform="arctanh") + - objective = Objective(name="y1", target=2) # get the quantity "y1" as close to 2 as possible +.. code-block:: python objective = Objective(name="y1", target=(1, 3)) # find any input that puts "y1" between 1 and 3. @@ -31,4 +46,53 @@ In this case, a smart thing to do is to set ``log=True``, which will model and s from blop import Objective - objective = Objective(name="some_strictly_positive_quantity", target="max", log=True) + objective = Objective(name="some_strictly_positive_output", target="max", log=True) + + +Constraints +----------- + +A constraint objective doesn't try to minimize or maximize a value, and instead just tries to maximize the chance that the objective is within some acceptable range. +This is useful in optimization problems like + +* Require a beam to be within some box. +* Require the wavelength of light to be a certain color. +* We want a beam to be focused enough to perform some experiment, but not necessarily optimal. + +.. code-block:: python + + # ensure the color is approximately green + color_bjective = Objective(name="peak_wavelength", target=(520, 530), units="nm") + + # ensure the beam is smaller than 10 microns + width_objective = Objective(name="beam_width", target=(-np.inf, 10), units="um", transform="log") + + # ensure our flux is at least some value + flux_objective = Objective(name="beam_flux", target=(1.0, np.inf), transform="log") + + + +Validity +-------- + +A common problem in beamline optimization is in the random or systematically invalid measurement of objectives. This arises in different ways, like when + +* The beam misses the detector, leading our beam parser to return some absurdly small or large value. +* Some part of the experiment glitches, leading to an uncharacteristic data point. +* Some part of the data postprocessing pipeline fails, giving no value for the output. + +We obviously want to exclude these points from our model fitting, but if we stopped there, inputs that always lead to invalid outputs will lead to an infinite loop of trying to sample an interesting but invalid points (as the points get immediately removed every time). +The set of points that border valid and invalid data points are often highly nonlinear and unknown *a priori*. +We solve this by implementing a validity model for each ``Objective``, which constructs and fits a probabilistic model for validity for all inputs. +Using this model, we constrain acquisition functions to take into account the possibility that the output value is invalid, meaning it will eventually learn to ignore infeasible points. + +We can control the exclusion of data points in two ways. The first is to specify a ``trust_domain`` for the objective, so that the model only "trusts" points in that domain. For example: + +.. code-block:: python + + # any beam smaller than two pixels shouldn't be trusted. + # any beam larger than 100 pixels will mess up our model and aren't interesting anyway + objective = Objective(name="beam_size", trust_domain=(2, 100), units="pixels") + +This will set any value outside of the ``trust_domain`` to ``NaN``, which the model will learn to avoid. +The second way is to ensure that any invalid values are converted to ``NaN`` in the diagnostics, before the agent ever sees them. diff --git a/src/blop/agent.py b/src/blop/agent.py index cf57290..0a170cd 100644 --- a/src/blop/agent.py +++ b/src/blop/agent.py @@ -22,7 +22,7 @@ from botorch.acquisition.objective import ScalarizedPosteriorTransform from botorch.models.deterministic import GenericDeterministicModel from botorch.models.model_list_gp_regression import ModelListGP -from botorch.models.transforms.input import AffineInputTransform, ChainedInputTransform, Log10, Normalize +from botorch.models.transforms.input import Normalize from databroker import Broker from ophyd import Signal @@ -198,7 +198,7 @@ def sample(self, n: int = DEFAULT_MAX_SAMPLES, method: str = "quasi-random") -> else: raise ValueError("'method' argument must be one of ['quasi-random', 'random', 'grid'].") - return self._sample_input_transform.untransform(X) + return self.dofs.subset(active=True).untransform(X) def ask(self, acq_func_identifier="qei", n=1, route=True, sequential=True, upsample=1, **acq_func_kwargs): """Ask the agent for the best point to sample, given an acquisition function. @@ -246,8 +246,8 @@ def ask(self, acq_func_identifier="qei", n=1, route=True, sequential=True, upsam identifier=acq_func_identifier, return_metadata=True, **acq_func_kwargs ) - NUM_RESTARTS = 8 - RAW_SAMPLES = 256 + NUM_RESTARTS = 16 + RAW_SAMPLES = 1024 candidates, acqf_obj = botorch.optim.optimize_acqf( acq_function=acq_func, @@ -258,8 +258,11 @@ def ask(self, acq_func_identifier="qei", n=1, route=True, sequential=True, upsam raw_samples=RAW_SAMPLES, # used for intialization heuristic ) - # this includes both RO and non-RO DOFs - candidates = candidates.numpy() + # this includes both RO and non-RO DOFs. + # and is in the transformed model space + candidates = self.dofs.subset(active=True).untransform(candidates).numpy() + + p = self.posterior(candidates) if hasattr(self, "model") else None acq_points = candidates[..., ~self.active_dofs.read_only] read_only_values = candidates[..., self.active_dofs.read_only] @@ -275,8 +278,6 @@ def ask(self, acq_func_identifier="qei", n=1, route=True, sequential=True, upsam upsampled_idx = np.linspace(0, len(idx) - 1, upsample * len(idx) - 1) acq_points = sp.interpolate.interp1d(idx, acq_points, axis=0)(upsampled_idx) - p = self.posterior(candidates) if hasattr(self, "model") else None - res = { "points": acq_points, "acq_func": acq_func_name, @@ -367,6 +368,7 @@ def learn( append: bool = True, hypers: str = None, route: bool = True, + **acq_func_kwargs, ): """This returns a Bluesky plan which iterates the learning algorithm, looping over ask -> acquire -> tell. @@ -400,9 +402,10 @@ def learn( new_table.loc[:, "acq_func"] = "sample_center_on_init" for i in range(iterations): - print(f"running iteration {i + 1} / {iterations}") + if self.verbose: + print(f"running iteration {i + 1} / {iterations}") for single_acq_func in np.atleast_1d(acq_func): - res = self.ask(n=n, acq_func_identifier=single_acq_func, upsample=upsample, route=route) + res = self.ask(n=n, acq_func_identifier=single_acq_func, upsample=upsample, route=route, **acq_func_kwargs) new_table = yield from self.acquire(res["points"]) new_table.loc[:, "acq_func"] = res["acq_func"] @@ -548,56 +551,45 @@ def posterior(self, x): @property def fitness_model(self): return ( - ModelListGP(*[obj.model for obj in self.active_objs if obj.type == "fitness"]) + ModelListGP(*[obj.model for obj in self.objectives.subset(active=True, kind="fitness")]) if len(self.active_objs) > 1 else self.active_objs[0].model ) - @property - def fitness_weights(self): - return torch.tensor([obj.weight for obj in self.objectives if obj.type == "fitness"], dtype=torch.double) - - @property - def fitness_scalarization(self): - return ScalarizedPosteriorTransform(weights=self.fitness_weights) - - @property - def evaluated_fitnesses(self): - return self.train_targets()[:, self.objectives.type == "fitness"] - - @property - def scalarized_fitnesses(self): - return self.fitness_scalarization.evaluate(self.evaluated_fitnesses) - @property def evaluated_constraints(self): - if sum(self.objectives.type == "constraint"): - y = self.train_targets() - return torch.cat( - [ - ((y[:, i] >= obj.target[0]) & (y[:, i] <= obj.target[1])).unsqueeze(0) - for i, obj in enumerate(self.objectives) - if obj.type == "constraint" - ], - dim=0, - ).T + constraint_objectives = self.objectives.subset(kind="constraint") + if len(constraint_objectives): + return torch.cat([obj.constrain(self.raw_targets(obj.name)) for obj in constraint_objectives], dim=-1) else: return torch.ones(size=(len(self.table), 0), dtype=torch.bool) - @property - def argmax_best_f(self): - f = self.scalarized_fitnesses - c = self.evaluated_constraints.all(axis=-1) - mask = (~f.isnan()) & c - - if not mask.sum(): - raise ValueError("There are no valid points that satisfy the constraints!") - - return torch.where(mask)[0][f[mask].argmax()] - - @property - def best_f(self): - return self.scalarized_fitnesses[self.argmax_best_f] + def fitness_scalarization(self, weights="default"): + fitness_objectives = self.objectives.subset(active=True, kind="fitness") + if weights == "default": + weights = torch.tensor([obj.weight for obj in fitness_objectives], dtype=torch.double) + elif weights == "equal": + weights = torch.ones(len(fitness_objectives), dtype=torch.double) + elif weights == "random": + weights = torch.rand(len(fitness_objectives), dtype=torch.double) + weights *= len(fitness_objectives) / weights.sum() + elif not isinstance(weights, torch.Tensor): + raise ValueError(f"'weights' must be a Tensor or one of ['default', 'equal', 'random'], and not {weights}.") + return ScalarizedPosteriorTransform(weights=weights) + + def scalarized_fitnesses(self, weights="default", constrained=True): + f = self.fitness_scalarization(weights=weights).evaluate(self.train_targets(active=True, kind="fitness")) + if constrained: + c = self.evaluated_constraints.all(axis=-1) + if not c.sum(): + raise ValueError("There are no valid points that satisfy the constraints!") + return torch.where(c, f, -np.inf) + + def argmax_best_f(self, weights="default"): + return int(self.scalarized_fitnesses(weights=weights, constrained=True).argmax()) + + def best_f(self, weights="default"): + return float(self.scalarized_fitnesses(weights=weights, constrained=True).max()) @property def pareto_front_mask(self): @@ -626,8 +618,7 @@ def min_ref_point(self): @property def random_ref_point(self): - pareto_front_index = torch.where(self.pareto_front_mask)[0] - return self.evaluated_fitnesses[np.random.choice(pareto_front_index)] + return self.train_targets(active=True, kind="fitness")[self.argmax_best_f(weights="random")] @property def all_objectives_valid(self): @@ -745,20 +736,12 @@ def _latent_dim_tuples(self, obj_index=None): @property def _sample_domain(self): - return torch.tensor(self.active_dofs.search_domain, dtype=torch.double).T - - @property - def _sample_input_transform(self): - tf1 = Log10(indices=list(np.where(self.active_dofs.log)[0])) - - transformed_sample_domain = tf1.transform(self._sample_domain) - - offset = transformed_sample_domain.min(dim=0).values - coefficient = (transformed_sample_domain.max(dim=0).values - offset).clamp(min=1e-16) - - tf2 = AffineInputTransform(d=len(offset), coefficient=coefficient, offset=offset) - - return ChainedInputTransform(tf1=tf1, tf2=tf2) + """ + Returns a (2, n_active_dof) array of lower and upper bounds for dofs. + Read-only DOFs are set to exactly their last known value. + Discrete DOFs are relaxed to some continuous domain. + """ + return self.dofs.subset(active=True).transform(self.dofs.subset(active=True).search_domain.T) @property def _model_input_transform(self): @@ -775,10 +758,7 @@ def _model_input_transform(self): Read-only: constrain to the readback value """ - tf1 = Log10(indices=list(np.where(self.active_dofs.log)[0])) - tf2 = Normalize(d=len(self.active_dofs)) - - return ChainedInputTransform(tf1=tf1, tf2=tf2) + return Normalize(d=len(self.active_dofs)) def save_data(self, path="./data.h5"): """ @@ -822,11 +802,13 @@ def _set_hypers(self, hypers): self.validity_constraint.load_state_dict(hypers["validity_constraint"]) def constraint(self, x): + x = self.dofs.subset(active=True).transform(x) + p = torch.ones(x.shape[:-1]) for obj in self.active_objs: # if the targeting constraint is non-trivial - if obj.type == "constraint": - p *= obj.targeting_constraint(x) + # if obj.kind == "constraint": + # p *= obj.targeting_constraint(x) # if the validity constaint is non-trivial if obj.validity_conjugate_model is not None: p *= obj.validity_constraint(x) @@ -891,47 +873,61 @@ def all_acq_funcs(self): print("\n\n".join(entries)) + def raw_inputs(self, index=None, **subset_kwargs): + """ + Get the raw, untransformed inputs for a DOF (or for a subset). + """ + if index is None: + return torch.cat([self.raw_inputs(dof.name) for dof in self.dofs.subset(**subset_kwargs)], dim=-1) + return torch.tensor(self.table.loc[:, self.dofs[index].name].values, dtype=torch.double).unsqueeze(-1) + def train_inputs(self, index=None, **subset_kwargs): """A two-dimensional tensor of all DOF values.""" if index is None: - return torch.cat([self.train_inputs(dof.name) for dof in self.dofs.subset(**subset_kwargs)], dim=-1) + return torch.cat([self.train_inputs(index=dof.name) for dof in self.dofs.subset(**subset_kwargs)], dim=-1) dof = self.dofs[index] - inputs = self.table.loc[:, dof.name].values.copy() + raw_inputs = self.raw_inputs(index=index, **subset_kwargs) # check that inputs values are inside acceptable values - valid = (inputs >= dof._trust_domain[0]) & (inputs <= dof._trust_domain[1]) - inputs = np.where(valid, inputs, np.nan) + valid = (raw_inputs >= dof._trust_domain[0]) & (raw_inputs <= dof._trust_domain[1]) + raw_inputs = torch.where(valid, raw_inputs, np.nan) - return torch.tensor(inputs, dtype=torch.double).unsqueeze(-1) + return dof._transform(raw_inputs) + + def raw_targets(self, index=None, **subset_kwargs): + """ + Get the raw, untransformed inputs for an objective (or for a subset). + """ + if index is None: + return torch.cat([self.raw_targets(index=obj.name) for obj in self.objectives.subset(**subset_kwargs)], dim=-1) + return torch.tensor(self.table.loc[:, self.objectives[index].name].values, dtype=torch.double).unsqueeze(-1) def train_targets(self, index=None, **subset_kwargs): """Returns the values associated with an objective name.""" if index is None: - return torch.cat([self.train_targets(obj.name) for obj in self.objectives], dim=-1) + return torch.cat([self.train_targets(obj.name) for obj in self.objectives.subset(**subset_kwargs)], dim=-1) obj = self.objectives[index] - values = self.table.loc[:, obj.name].values.copy() + raw_targets = self.raw_targets(index=index, **subset_kwargs) # check that targets values are inside acceptable values - valid = (values >= obj._trust_domain[0]) & (values <= obj._trust_domain[1]) - values = np.where(valid, values, np.nan) - - targets = obj.fitness_forward(values) + valid = (raw_targets >= obj._trust_domain[0]) & (raw_targets <= obj._trust_domain[1]) + raw_targets = torch.where(valid, raw_targets, np.nan) - return torch.tensor(targets, dtype=torch.double).unsqueeze(-1) + return obj._transform(raw_targets) @property def best(self): """Returns all data for the best point.""" - return self.table.loc[self.argmax_best_f.item()] + return self.table.loc[self.argmax_best_f()] @property def best_inputs(self): """Returns the value of each DOF at the best point.""" - return self.table.loc[self.argmax_scalarized_objective, self.dofs.names].to_dict() + return self.table.loc[self.argmax_best_f(), self.dofs.names].to_dict() def go_to(self, **positions): """Set all settable DOFs to a given position. DOF/value pairs should be supplied as kwargs, e.g. as diff --git a/src/blop/bayesian/acquisition/__init__.py b/src/blop/bayesian/acquisition/__init__.py index 9d807d7..329c933 100644 --- a/src/blop/bayesian/acquisition/__init__.py +++ b/src/blop/bayesian/acquisition/__init__.py @@ -44,8 +44,8 @@ def get_acquisition_function(agent, identifier="qei", return_metadata=True, verb acq_func = analytic.ConstrainedLogExpectedImprovement( constraint=agent.constraint, model=agent.fitness_model, - best_f=agent.best_f, - posterior_transform=agent.fitness_scalarization, + best_f=agent.best_f(), + posterior_transform=agent.fitness_scalarization(), ) acq_func_meta = {"name": acq_func_name, "args": {}} @@ -53,8 +53,8 @@ def get_acquisition_function(agent, identifier="qei", return_metadata=True, verb acq_func = monte_carlo.qConstrainedExpectedImprovement( constraint=agent.constraint, model=agent.fitness_model, - best_f=agent.best_f, - posterior_transform=agent.fitness_scalarization, + best_f=agent.best_f(), + posterior_transform=agent.fitness_scalarization(), ) acq_func_meta = {"name": acq_func_name, "args": {}} @@ -62,8 +62,8 @@ def get_acquisition_function(agent, identifier="qei", return_metadata=True, verb acq_func = analytic.ConstrainedLogProbabilityOfImprovement( constraint=agent.constraint, model=agent.fitness_model, - best_f=agent.best_f, - posterior_transform=agent.fitness_scalarization, + best_f=agent.best_f(), + posterior_transform=agent.fitness_scalarization(), ) acq_func_meta = {"name": acq_func_name, "args": {}} @@ -71,8 +71,8 @@ def get_acquisition_function(agent, identifier="qei", return_metadata=True, verb acq_func = monte_carlo.qConstrainedProbabilityOfImprovement( constraint=agent.constraint, model=agent.fitness_model, - best_f=agent.best_f, - posterior_transform=agent.fitness_scalarization, + best_f=agent.best_f(), + posterior_transform=agent.fitness_scalarization(), ) acq_func_meta = {"name": acq_func_name, "args": {}} @@ -101,7 +101,7 @@ def get_acquisition_function(agent, identifier="qei", return_metadata=True, verb constraint=agent.constraint, model=agent.fitness_model, beta=beta, - posterior_transform=agent.fitness_scalarization, + posterior_transform=agent.fitness_scalarization(), ) acq_func_meta = {"name": acq_func_name, "args": {"beta": beta}} @@ -112,7 +112,7 @@ def get_acquisition_function(agent, identifier="qei", return_metadata=True, verb constraint=agent.constraint, model=agent.fitness_model, beta=beta, - posterior_transform=agent.fitness_scalarization, + posterior_transform=agent.fitness_scalarization(), ) acq_func_meta = {"name": acq_func_name, "args": {"beta": beta}} diff --git a/src/blop/dofs.py b/src/blop/dofs.py index af23fcc..5d3b90c 100644 --- a/src/blop/dofs.py +++ b/src/blop/dofs.py @@ -1,27 +1,32 @@ import time as ttime import uuid +import warnings from collections.abc import Iterable, Sequence -from dataclasses import dataclass, field -from typing import Tuple +from dataclasses import dataclass, field, fields +from operator import attrgetter +from typing import Tuple, Union import numpy as np import pandas as pd +import torch from ophyd import Signal, SignalRO DOF_FIELD_TYPES = { "description": "str", "readback": "object", "type": "str", + "transform": "str", "search_domain": "object", "trust_domain": "object", - "units": "str", + "domain": "object", "active": "bool", "read_only": "bool", - "log": "bool", + "units": "str", "tags": "object", } -SUPPORTED_DOF_TYPES = ["continuous", "binary", "ordinal", "categorical"] +DOF_TYPES = ["continuous", "binary", "ordinal", "categorical"] +TRANSFORM_DOMAINS = {"log": (0.0, np.inf), "logit": (0.0, 1.0), "arctanh": (-1.0, 1.0)} class ReadOnlyError(Exception): @@ -40,6 +45,48 @@ def _validate_dofs(dofs): return list(dofs) +def _validate_continuous_dof_domains(search_domain, trust_domain, domain, read_only): + """ + A DOF MUST have a search domain, and it MIGHT have a trust domain or a transform domain. + + Check that all the domains are kosher by enforcing that: + search_domain \\subseteq trust_domain \\subseteq domain + """ + if not read_only: + if len(search_domain) != 2: + raise ValueError(f"Bad search domain {search_domain}. The search domain must have length 2.") + try: + search_domain = tuple((float(search_domain[0]), float(search_domain[1]))) + except TypeError: + raise ValueError("If type='continuous', then 'search_domain' must be a tuple of two numbers.") + + if search_domain[0] >= search_domain[1]: + raise ValueError("The lower search bound must be strictly less than the upper search bound.") + + if domain is not None: + if (search_domain[0] <= domain[0]) or (search_domain[1] >= domain[1]): + raise ValueError(f"The search domain {search_domain} must be a strict subset of the domain {domain}.") + + if trust_domain is not None: + if (search_domain[0] < trust_domain[0]) or (search_domain[1] > trust_domain[1]): + raise ValueError(f"The search domain {search_domain} must be a subset of the trust domain {trust_domain}.") + + if (trust_domain is not None) and (domain is not None): + if (trust_domain[0] < domain[0]) or (trust_domain[1] > domain[1]): + raise ValueError(f"The trust domain {trust_domain} must be a subset of the domain {domain}.") + + +def _validate_discrete_dof_domains(search_domain, trust_domain): + """ + A DOF MUST have a search domain, and it MIGHT have a trust domain or a transform domain + + Check that all the domains are kosher by enforcing that: + search_domain \\subseteq trust_domain \\subseteq domain + """ + if not trust_domain.issuperset(search_domain): + raise ValueError(f"The trust domain {trust_domain} not a superset of the search domain {search_domain}.") + + @dataclass class DOF: """A degree of freedom (DOF), to be used by an agent. @@ -68,8 +115,8 @@ class DOF: active: bool If True, the agent will try to use the DOF in its optimization. If False, the agent will still read the DOF but not include it any model or acquisition function. - log: bool - Whether to apply a log to the objective, i.e. to make the process outputs more Gaussian. + transform: Callable + A transform to apply to the objective, to make the process outputs more Gaussian. tags: list A list of tags. These make it easier to subset large groups of dofs. device: Signal, optional @@ -78,57 +125,81 @@ class DOF: name: str = None description: str = "" - type: str = "continuous" - search_domain: Tuple[float, float] = None - trust_domain: Tuple[float, float] = None - units: str = "" + type: str = None + search_domain: Union[Tuple[float, float], Sequence] = None + trust_domain: Union[Tuple[float, float], Sequence] = None + units: str = None read_only: bool = False active: bool = True - log: bool = False + transform: str = None tags: list = field(default_factory=list) device: Signal = None + def __repr__(self): + nodef_f_vals = ((f.name, attrgetter(f.name)(self)) for f in fields(self)) + + nodef_f_repr = [] + for name, value in nodef_f_vals: + if (name == "search_domain") and (self.type == "continuous"): + search_min, search_max = self.search_domain + nodef_f_repr.append(f"search_domain=({search_min:.03e}, {search_max:.03e})") + else: + nodef_f_repr.append(f"{name}={value}") + + return f"{self.__class__.__name__}({', '.join(nodef_f_repr)})" + # Some post-processing. This is specific to dataclasses def __post_init__(self): - if self.type not in SUPPORTED_DOF_TYPES: - raise ValueError(f"'type' must be one of {SUPPORTED_DOF_TYPES}") - if (self.name is None) ^ (self.device is None): if self.name is None: self.name = self.device.name else: - raise ValueError("DOF() accepts exactly one of either a name or an ophyd device.") - - # if our input is continuous - if self.type == "continuous": + raise ValueError("You must specify exactly one of 'name' or 'device'.") + if self.read_only: + if self.type is None: + if isinstance(self.readback, float): + self.type = "continuous" + else: + self.type = "categorical" + warnings.warn(f"No type was specified for DOF {self.name}. Assuming type={self.type}.") + else: if self.search_domain is None: - if not self.read_only: - raise ValueError("You must specify search_domain if the device is not read-only.") - else: - if len(self.search_domain) != 2: - raise ValueError("'search_domain' must have length 2.") - self.search_domain = tuple([float(self.search_domain[0]), float(self.search_domain[1])]) - if self.search_domain[0] > self.search_domain[1]: - raise ValueError("The lower search bound must be less than the upper search bound.") - - if self.trust_domain is not None: - if len(self.trust_domain) != 2: - raise ValueError("'trust_domain' must have length 2.") - self.trust_domain = tuple([float(self.trust_domain[0]), float(self.trust_domain[1])]) - if not self.read_only: - if (self.search_domain[0] < self.trust_domain[0]) or (self.search_domain[1] > self.trust_domain[1]): - raise ValueError("Trust domain must be larger than search domain.") - - if self.log: - if not self.search_domain[0] > 0: - raise ValueError("Search domain must be strictly positive if log=True.") - - if self.device is None: - center_value = np.mean(np.log(self.search_domain)) if self.log else np.mean(self.search_domain) - self.device = Signal(name=self.name, value=center_value) + raise ValueError("You must specify the search domain if read_only=False.") + # if there is no type, infer it from the search_domain + if self.type is None: + if isinstance(self.search_domain, tuple): + self.type = "continuous" + elif isinstance(self.search_domain, set): + if len(self.search_domain) == 2: + self.type = "binary" + else: + self.type = "categorical" + else: + raise TypeError("'search_domain' must be either a 2-tuple of numbers or a set.") + + if self.type not in DOF_TYPES: + raise ValueError(f"Invalid DOF type '{self.type}'. 'type' must be one of {DOF_TYPES}.") + + # our input is usually continuous + if self.type == "continuous": + if not self.read_only: + _validate_continuous_dof_domains( + search_domain=self._search_domain, + trust_domain=self._trust_domain, + domain=self.domain, + read_only=self.read_only, + ) + + self.search_domain = tuple((float(self.search_domain[0]), float(self.search_domain[1]))) + + if self.device is None: + center = float(self._untransform(np.mean([self._transform(np.array(self.search_domain))]))) + self.device = Signal(name=self.name, value=center) # otherwise it must be discrete else: + _validate_discrete_dof_domains(search_domain=self._search_domain, trust_domain=self._trust_domain) + if self.type == "binary": if self.search_domain is None: self.search_domain = [False, True] @@ -152,16 +223,82 @@ def __post_init__(self): @property def _search_domain(self): + """ + Compute the search domain of the DOF. + """ if self.read_only: - _readback = self.readback - return (_readback, _readback) - return self.search_domain + value = self.readback + if self.type == "continuous": + return tuple((value, value)) + else: + return {value} + else: + return self.search_domain @property def _trust_domain(self): - if self.trust_domain is None: - return (0, np.inf) if self.log else (-np.inf, np.inf) - return self.trust_domain + """ + If trust_domain is None, then we return the total domain. + """ + return self.trust_domain or self.domain + + @property + def domain(self): + """ + The total domain; the user can't control this. This is what we fall back on as the trust_domain if none is supplied. + If the DOF is continuous: + If there is a transform, return the domain of the transform + Else, return (-inf, inf) + If the DOF is discrete: + If there is a trust domain, return the trust domain + Else, return the search domain + """ + if self.type == "continuous": + if self.transform is None: + return (-np.inf, np.inf) + else: + return TRANSFORM_DOMAINS[self.transform] + else: + return self.trust_domain or self.search_domain + + def _trust(self, x): + return (self.trust_domain[0] <= x) & (x <= self.trust_domain[1]) + + def _transform(self, x, normalize=True): + if not isinstance(x, torch.Tensor): + x = torch.tensor(x, dtype=torch.double) + + x = torch.where((x > self.domain[0]) & (x < self.domain[1]), x, torch.nan) + + if self.transform == "log": + x = torch.log(x) + if self.transform == "logit": + x = (x / (1 - x)).log() + if self.transform == "arctanh": + x = torch.arctanh(x) + + if normalize and not self.read_only: + min, max = self._transform(self._search_domain, normalize=False) + x = (x - min) / (max - min) + + return x + + def _untransform(self, x): + if not isinstance(x, torch.Tensor): + x = torch.tensor(x, dtype=torch.double) + + if not self.read_only: + min, max = self._transform(self._search_domain, normalize=False) + x = x * (max - min) + min + + if self.transform is None: + return x + if self.transform == "log": + return torch.exp(x) + if self.transform == "logit": + return 1 / (1 + torch.exp(-x)) + if self.transform == "arctanh": + return torch.tanh(x) @property def readback(self): @@ -172,17 +309,30 @@ def readback(self): def summary(self) -> pd.Series: series = pd.Series(index=list(DOF_FIELD_TYPES.keys()), dtype="object") for attr in series.index: - series[attr] = getattr(self, attr) + value = getattr(self, attr) + if attr in ["search_domain", "trust_domain", "domain"]: + if (self.type == "continuous") and not self.read_only and value is not None: + if attr in ["search_domain", "trust_domain"]: + value = f"[{value[0]:.02e}, {value[1]:.02e}]" + else: + value = f"({value[0]:.02e}, {value[1]:.02e})" + series[attr] = value if value else "" return series @property - def label(self) -> str: - return f"{self.description}{f' [{self.units}]' if len(self.units) > 0 else ''}" + def label_with_units(self) -> str: + return f"{self.description}{f' [{self.units}]' if self.units else ''}" @property def has_model(self): return hasattr(self, "model") + def activate(self): + self.active = True + + def deactivate(self): + self.active = False + class DOFList(Sequence): def __init__(self, dofs: list = []): @@ -221,6 +371,32 @@ def __repr__(self): def _repr_html_(self): return self.summary.T._repr_html_() + def transform(self, X): + """ + Transform X to the transformed unit hypercube. + """ + if X.shape[-1] != len(self): + raise ValueError(f"Cannot transform points with shape {X.shape} using DOFs with dimension {len(self)}.") + + if not isinstance(X, torch.Tensor): + X = torch.tensor(X, dtype=torch.double) + + return torch.cat([dof._transform(X[..., i]).unsqueeze(-1) for i, dof in enumerate(self.dofs)], dim=-1) + + def untransform(self, X): + """ + Transform the transformed unit hypercube to the search domain. + """ + if X.shape[-1] != len(self): + raise ValueError(f"Cannot untransform points with shape {X.shape} using DOFs with dimension {len(self)}.") + + if not isinstance(X, torch.Tensor): + X = torch.tensor(X, dtype=torch.double) + + return torch.cat( + [dof._untransform(X[..., i]).unsqueeze(-1) for i, dof in enumerate(self.subset(active=True))], dim=-1 + ) + @property def readback(self): """ @@ -268,7 +444,10 @@ def add(self, dof): self.dofs.append(dof) @staticmethod - def _test_dof(dof, active=None, read_only=None, tag=None): + def _test_dof(dof, type=None, active=None, read_only=None, tag=None): + if type is not None: + if dof.type != type: + return False if active is not None: if dof.active != active: return False @@ -280,19 +459,35 @@ def _test_dof(dof, active=None, read_only=None, tag=None): return False return True - def subset(self, active=None, read_only=None, tag=None): - return DOFList([dof for dof in self.dofs if self._test_dof(dof, active=active, read_only=read_only, tag=tag)]) + def subset(self, type=None, active=None, read_only=None, tag=None): + return DOFList( + [dof for dof in self.dofs if self._test_dof(dof, type=type, active=active, read_only=read_only, tag=tag)] + ) - def activate(self, active=None, read_only=None, tag=None): + def activate(self, **subset_kwargs): for dof in self.dofs: - if self._test_dof(dof, active=active, read_only=read_only, tag=tag): + if self._test_dof(dof, **subset_kwargs): dof.active = True - def deactivate(self, active=None, read_only=None, tag=None): + def deactivate(self, **subset_kwargs): for dof in self.dofs: - if self._test_dof(dof, active=active, read_only=read_only, tag=tag): + if self._test_dof(dof, **subset_kwargs): dof.active = False + def activate_only(self, **subset_kwargs): + for dof in self.dofs: + if self._test_dof(dof, **subset_kwargs): + dof.active = True + else: + dof.active = False + + def deactivate_only(self, **subset_kwargs): + for dof in self.dofs: + if self._test_dof(dof, **subset_kwargs): + dof.active = False + else: + dof.active = True + class BrownianMotion(SignalRO): """ diff --git a/src/blop/objectives.py b/src/blop/objectives.py index 71b7f38..f99e22b 100644 --- a/src/blop/objectives.py +++ b/src/blop/objectives.py @@ -13,36 +13,61 @@ OBJ_FIELD_TYPES = { "description": "object", + # "kind": "str", "type": "str", "target": "object", - "active": "bool", + "transform": "str", + "domain": "str", "trust_domain": "object", - "active": "bool", - "weight": "bool", + "weight": "float", "units": "object", - "log": "bool", - "min_noise": "float", - "max_noise": "float", + "noise_bounds": "object", "noise": "float", - "n": "int", + "n_valid": "int", "latent_groups": "object", + "active": "bool", } -ALLOWED_OBJ_TYPES = ["continuous", "binary", "ordinal", "categorical"] +SUPPORTED_OBJ_TYPES = ["continuous", "binary", "ordinal", "categorical"] +TRANSFORM_DOMAINS = {"log": (0.0, np.inf), "logit": (0.0, 1.0), "arctanh": (-1.0, 1.0)} class DuplicateNameError(ValueError): ... -def _validate_objectives(objectives): - names = [obj.name for obj in objectives] +def _validate_objs(objs): + names = [obj.name for obj in objs] unique_names, counts = np.unique(names, return_counts=True) duplicate_names = unique_names[counts > 1] if len(duplicate_names) > 0: raise DuplicateNameError(f"Duplicate name(s) in supplied objectives: {duplicate_names}") +domains = {"log"} + + +def _validate_obj_transform(transform): + if transform is None: + return (-np.inf, np.inf) + + if transform not in TRANSFORM_DOMAINS: + raise ValueError(f"'transform' must be a callable with one argument, or one of {TRANSFORM_DOMAINS}") + + +def _validate_continuous_domains(trust_domain, domain): + """ + A DOF MUST have a search domain, and it MIGHT have a trust domain or a transform domain + + Check that all the domains are kosher by enforcing that: + search_domain \\subseteq trust_domain \\subseteq domain + """ + + if (trust_domain is not None) and (domain is not None): + if (trust_domain[0] < domain[0]) or (trust_domain[1] > domain[1]): + raise ValueError(f"The trust domain {trust_domain} is outside the transform domain {domain}.") + + @dataclass class Objective: """An objective to be used by an agent. @@ -77,9 +102,9 @@ class Objective: name: str description: str = "" - type: str = None + type: str = "continuous" target: Union[Tuple[float, float], float, str] = "max" - log: bool = False + transform: str = None weight: float = 1.0 active: bool = True trust_domain: Tuple[float, float] or None = None @@ -89,52 +114,107 @@ class Objective: latent_groups: List[Tuple[str, ...]] = field(default_factory=list) def __post_init__(self): + if self.transform is not None: + _validate_obj_transform(self.transform) + if isinstance(self.target, str): + # eventually we will be able to target other strings, as outputs of a discrete objective if self.target not in ["min", "max"]: raise ValueError("'target' must be either 'min', 'max', a number, or a tuple of numbers.") - if isinstance(self.target, float): - if self.log and not self.target > 0: - return ValueError("'target' must strictly positive if log=True.") + self.use_as_constraint = True if isinstance(self.target, tuple) else False + + @property + def kind(self): + return "fitness" if self.target in ["min", "max"] else "constraint" - self.type = "fitness" if self.target in ["min", "max"] else "constraint" + @property + def domain(self): + """ + The total domain of the objective. + """ + if self.transform is None: + if self.type == "continuous": + return (-np.inf, np.inf) + return TRANSFORM_DOMAINS[self.transform] + + def constrain(self, y): + """ + The total domain of the objective. + """ + if self.kind != "constraint": + raise RuntimeError("Cannot call 'constrain' with a non-constraint objective.") + return (y > self.target[0]) & (y < self.target[1]) @property def _trust_domain(self): if self.trust_domain is None: - return (0, np.inf) if self.log else (-np.inf, np.inf) + return self.domain return self.trust_domain + def _transform(self, y): + if not isinstance(y, torch.Tensor): + y = torch.tensor(y, dtype=torch.double) + + y = torch.where((y > self.domain[0]) & (y < self.domain[1]), y, np.nan) + + if self.transform == "log": + y = y.log() + if self.transform == "logit": + y = (y / (1 - y)).log() + if self.transform == "arctanh": + y = torch.arctanh(y) + + if self.target == "min": + y = -y + + return y + + def _untransform(self, y): + if not isinstance(y, torch.Tensor): + y = torch.tensor(y, dtype=torch.double) + + if self.target == "min": + y = -y + + if self.transform == "log": + y = y.exp() + if self.transform == "logit": + y = 1 / (1 + torch.exp(-y)) + if self.transform == "arctanh": + y = torch.tanh(y) + + return y + @property - def label(self) -> str: - return f"{'log ' if self.log else ''}{self.description}" + def label_with_units(self) -> str: + return f"{self.description}{f' [{self.units}]' if self.units else ''}" + + @property + def noise_bounds(self) -> tuple: + return (self.min_noise, self.max_noise) @property def summary(self) -> pd.Series: series = pd.Series(index=list(OBJ_FIELD_TYPES.keys()), dtype="object") for attr in series.index: value = getattr(self, attr) - if attr == "trust_domain": - if value is None: - value = (0, np.inf) if self.log else (-np.inf, np.inf) - series[attr] = value - return series - @property - def trust_lower_bound(self): - if self.trust_domain is None: - return 0 if self.log else -np.inf - return float(self.trust_domain[0]) + if attr in ["search_domain", "trust_domain"]: + if self.type == "continuous": + if value is not None: + value = f"({value[0]:.02e}, {value[1]:.02e})" - @property - def trust_upper_bound(self): - if self.trust_domain is None: - return np.inf - return float(self.trust_domain[1]) + if attr in ["noise_bounds"]: + if value is not None: + value = f"({value[0]:.01e}, {value[1]:.01e})" + + series[attr] = value if value is not None else "" + return series @property def noise(self) -> float: - return self.model.likelihood.noise.item() if hasattr(self, "model") else None + return self.model.likelihood.noise.item() if hasattr(self, "model") else np.nan @property def snr(self) -> float: @@ -142,7 +222,7 @@ def snr(self) -> float: @property def n(self) -> int: - return self.model.train_targets.shape[0] if hasattr(self, "model") else 0 + return int((~self.model.train_targets.isnan()).sum()) if hasattr(self, "model") else 0 def targeting_constraint(self, x: torch.Tensor) -> torch.Tensor: if not isinstance(self.target, tuple): @@ -153,55 +233,63 @@ def targeting_constraint(self, x: torch.Tensor) -> torch.Tensor: m = p.mean s = p.variance.sqrt() - return 0.5 * (approximate_erf((b - m) / (np.sqrt(2) * s)) - approximate_erf((a - m) / (np.sqrt(2) * s)))[..., -1] + sish = s + 0.1 * m.std() # for numerical stability - def fitness_forward(self, y): - f = y - if self.log: - f = np.log(f) - if self.target == "min": - f = -f - return f + return ( + 0.5 * (approximate_erf((b - m) / (np.sqrt(2) * sish)) - approximate_erf((a - m) / (np.sqrt(2) * sish)))[..., -1] + ) - def fitness_inverse(self, f): - y = f - if self.target == "min": - y = -y - if self.log: - y = np.exp(y) - return y + # def fitness_forward(self, y): + # f = y + # if self.log: + # f = np.log(f) + # if self.target == "min": + # f = -f + # return f - @property - def is_fitness(self): - return self.target in ["min", "max"] + # def fitness_inverse(self, f): + # y = f + # if self.target == "min": + # y = -y + # if self.log: + # y = np.exp(y) + # return y + + # @property + # def is_fitness(self): + # return self.target in ["min", "max"] - def value_prediction(self, X): - p = self.model.posterior(X) + # def value_prediction(self, X): + # p = self.model.posterior(X) - if self.is_fitness: - return self.fitness_inverse(p.mean) + # if self.is_fitness: + # return self.fitness_inverse(p.mean) - if isinstance(self.target, tuple): - return p.mean + # if isinstance(self.target, tuple): + # return p.mean - def fitness_prediction(self, X): - p = self.model.posterior(X) + # def fitness_prediction(self, X): + # p = self.model.posterior(X) - if self.is_fitness: - return self.fitness_inverse(p.mean) + # if self.is_fitness: + # return self.fitness_inverse(p.mean) - if isinstance(self.target, tuple): - return self.targeting_constraint(X).log().clamp(min=-16) + # if isinstance(self.target, tuple): + # return self.targeting_constraint(X).log().clamp(min=-16) class ObjectiveList(Sequence): def __init__(self, objectives: list = []): - _validate_objectives(objectives) + _validate_objs(objectives) self.objectives = objectives + @property + def names(self): + return [obj.name for obj in self.objectives] + def __getattr__(self, attr): # This is called if we can't find the attribute in the normal way. - if attr in OBJ_FIELD_TYPES.keys(): + if attr in [*OBJ_FIELD_TYPES.keys(), "kind"]: return np.array([getattr(obj, attr) for obj in self.objectives]) if attr in self.names: return self.__getitem__(attr) @@ -236,66 +324,86 @@ def summary(self) -> pd.DataFrame: for attr, dtype in OBJ_FIELD_TYPES.items(): table[attr] = table[attr].astype(dtype) - return table.T + return table def __repr__(self): return self.summary.__repr__() def _repr_html_(self): - return self.summary._repr_html_() + return self.summary.T._repr_html_() + + # @property + # def descriptions(self) -> list: + # """ + # Returns an array of the objective names. + # """ + # return [obj.description for obj in self.objectives] + + # @property + # def names(self) -> list: + # """ + # Returns an array of the objective names. + # """ + # return [obj.name for obj in self.objectives] + + # @property + # def targets(self) -> list: + # """ + # Returns an array of the objective targets. + # """ + # return [obj.target for obj in self.objectives] + + # @property + # def weights(self) -> np.array: + # """ + # Returns an array of the objective weights. + # """ + # return np.array([obj.weight for obj in self.objectives]) + + # @property + # def signed_weights(self) -> np.array: + # """ + # Returns a signed array of the objective weights. + # """ + # return np.array([(1 if obj.target == "max" else -1) * obj.weight for obj in self.objectives]) - @property - def descriptions(self) -> list: - """ - Returns an array of the objective names. - """ - return [obj.description for obj in self.objectives] + def add(self, objective): + _validate_objs([*self.objectives, objective]) + self.objectives.append(objective) - @property - def names(self) -> list: - """ - Returns an array of the objective names. - """ - return [obj.name for obj in self.objectives] + @staticmethod + def _test_obj(obj, active=None, kind=None): + if active is not None: + if obj.active != active: + return False + if kind is not None: + if obj.kind != kind: + return False + return True - @property - def targets(self) -> list: - """ - Returns an array of the objective targets. - """ - return [obj.target for obj in self.objectives] + def subset(self, active=None, kind=None): + return ObjectiveList([obj for obj in self.objectives if self._test_obj(obj, active=active, kind=kind)]) - @property - def weights(self) -> np.array: + def transform(self, Y): """ - Returns an array of the objective weights. + Transform the experiment space to the model space. """ - return np.array([obj.weight for obj in self.objectives]) + if Y.shape[-1] != len(self): + raise ValueError(f"Cannot transform points with shape {Y.shape} using DOFs with dimension {len(self)}.") - @property - def is_fitness(self) -> np.array: - """ - Returns an array of the objective weights. - """ - return np.array([obj.target in ["min", "max"] for obj in self.objectives]) + if not isinstance(Y, torch.Tensor): + Y = torch.tensor(Y, dtype=torch.double) - @property - def signed_weights(self) -> np.array: + return torch.cat([obj._transform(Y[..., i]).unsqueeze(-1) for i, obj in enumerate(self.objectives)], dim=-1) + + def untransform(self, Y): """ - Returns a signed array of the objective weights. + Transform the model space to the experiment space. """ - return np.array([(1 if obj.target == "max" else -1) * obj.weight for obj in self.objectives]) - - def add(self, objective): - _validate_objectives([*self.objectives, objective]) - self.objectives.append(objective) + if Y.shape[-1] != len(self): + raise ValueError(f"Cannot untransform points with shape {Y.shape} using DOFs with dimension {len(self)}.") - @staticmethod - def _test_obj(obj, active=None): - if active is not None: - if obj.active != active: - return False - return True + if not isinstance(Y, torch.Tensor): + Y = torch.tensor(Y, dtype=torch.double) - def subset(self, active=None): - return ObjectiveList([obj for obj in self.objectives if self._test_obj(obj, active=active)]) + return torch.cat([obj._untransform(Y[..., i]).unsqueeze(-1) for i, obj in enumerate(self.objectives)], dim=-1) diff --git a/src/blop/plotting.py b/src/blop/plotting.py index 7535aa7..2f69d9d 100644 --- a/src/blop/plotting.py +++ b/src/blop/plotting.py @@ -13,11 +13,13 @@ MAX_TEST_INPUTS = 2**11 -def _plot_objs_one_dof(agent, size=16, lw=1e0): +def _plot_fitness_objs_one_dof(agent, size=16, lw=1e0): + fitness_objs = agent.objectives.subset(kind="fitness") + agent.obj_fig, agent.obj_axes = plt.subplots( - len(agent.objectives), + len(fitness_objs), 1, - figsize=(6, 4 * len(agent.objectives)), + figsize=(6, 4 * len(fitness_objs)), sharex=True, constrained_layout=True, ) @@ -27,7 +29,10 @@ def _plot_objs_one_dof(agent, size=16, lw=1e0): x_dof = agent.dofs.subset(active=True)[0] x_values = agent.table.loc[:, x_dof.device.name].values - for obj_index, obj in enumerate(agent.objectives): + test_inputs = agent.sample(method="grid") + test_model_inputs = agent.dofs.transform(test_inputs) + + for obj_index, obj in enumerate(fitness_objs): obj_values = agent.train_targets(obj.name).squeeze(-1).numpy() color = DEFAULT_COLOR_LIST[obj_index] @@ -35,7 +40,7 @@ def _plot_objs_one_dof(agent, size=16, lw=1e0): test_inputs = agent.sample(method="grid") test_x = test_inputs[..., 0].detach().numpy() - test_posterior = obj.model.posterior(test_inputs) + test_posterior = obj.model.posterior(test_model_inputs) test_mean = test_posterior.mean[..., 0].detach().numpy() test_sigma = test_posterior.variance.sqrt()[..., 0].detach().numpy() @@ -52,8 +57,71 @@ def _plot_objs_one_dof(agent, size=16, lw=1e0): ) agent.obj_axes[obj_index].set_xlim(*x_dof.search_domain) - agent.obj_axes[obj_index].set_xlabel(x_dof.label) - agent.obj_axes[obj_index].set_ylabel(obj.label) + agent.obj_axes[obj_index].set_xlabel(x_dof.label_with_units) + agent.obj_axes[obj_index].set_ylabel(obj.label_with_units) + + +def _plot_constraint_objs_one_dof(agent, size=16, lw=1e0): + constraint_objs = agent.objectives.subset(kind="constraint") + + agent.obj_fig, agent.obj_axes = plt.subplots( + len(constraint_objs), + 2, + figsize=(8, 4 * len(constraint_objs)), + sharex=True, + constrained_layout=True, + ) + + agent.obj_axes = np.atleast_2d(agent.obj_axes) + + x_dof = agent.dofs.subset(active=True)[0] + x_values = agent.table.loc[:, x_dof.device.name].values + + test_inputs = agent.sample(method="grid") + test_model_inputs = agent.dofs.transform(test_inputs) + + for obj_index, obj in enumerate(constraint_objs): + val_ax = agent.obj_axes[obj_index, 0] + con_ax = agent.obj_axes[obj_index, 1] + + obj_values = agent.train_targets(obj.name).squeeze(-1).numpy() + + color = DEFAULT_COLOR_LIST[obj_index] + + test_inputs = agent.sample(method="grid") + test_x = test_inputs[..., 0].detach().numpy() + + test_posterior = obj.model.posterior(test_model_inputs) + test_mean = test_posterior.mean[..., 0].detach().numpy() + test_sigma = test_posterior.variance.sqrt()[..., 0].detach().numpy() + + val_ax.scatter(x_values, obj_values, s=size, color=color) + + con_ax.plot(test_x, obj.targeting_constraint(test_model_inputs).detach()) + + for z in [0, 1, 2]: + val_ax.fill_between( + test_x.ravel(), + (test_mean - z * test_sigma).ravel(), + (test_mean + z * test_sigma).ravel(), + lw=lw, + color=color, + alpha=0.5**z, + ) + + ymin, ymax = val_ax.get_ylim() + + val_ax.fill_between( + test_x.ravel(), y1=np.maximum(obj.target[0], ymin), y2=np.minimum(obj.target[1], ymax), alpha=0.2 + ) + val_ax.set_ylim(ymin, ymax) + + con_ax.set_ylabel(r"P(constraint)") + val_ax.set_ylabel(obj.label_with_units) + + for ax in [val_ax, con_ax]: + ax.set_xlim(*x_dof.search_domain) + ax.set_xlabel(x_dof.label_with_units) def _plot_objs_many_dofs(agent, axes=(0, 1), shading="nearest", cmap=DEFAULT_COLORMAP, gridded=None, size=32, grid_zoom=1): @@ -87,29 +155,33 @@ def _plot_objs_many_dofs(agent, axes=(0, 1), shading="nearest", cmap=DEFAULT_COL test_x = test_inputs[..., 0, axes[0]].detach().squeeze().numpy() test_y = test_inputs[..., 0, axes[1]].detach().squeeze().numpy() + model_inputs = agent.dofs.subset(active=True).transform(test_inputs) + for obj_index, obj in enumerate(agent.objectives): targets = agent.train_targets(obj.name).squeeze(-1).numpy() - values = obj.fitness_inverse(targets) + values = obj._untransform(targets) - val_vmin, val_vmax = np.quantile(values, q=[0.01, 0.99]) - val_norm = mpl.colors.LogNorm(val_vmin, val_vmax) if obj.log else mpl.colors.Normalize(val_vmin, val_vmax) + val_vmin, val_vmax = np.nanquantile(values, q=[0.01, 0.99]) + val_norm = ( + mpl.colors.LogNorm(val_vmin, val_vmax) if obj.transform == "log" else mpl.colors.Normalize(val_vmin, val_vmax) + ) - obj_vmin, obj_vmax = np.quantile(targets, q=[0.01, 0.99]) + obj_vmin, obj_vmax = np.nanquantile(targets, q=[0.01, 0.99]) obj_norm = mpl.colors.Normalize(obj_vmin, obj_vmax) val_ax = agent.obj_axes[obj_index, 0].scatter(x_values, y_values, c=values, s=size, norm=val_norm, cmap=cmap) # mean and sigma will have shape (*input_shape,) - test_posterior = obj.model.posterior(test_inputs) + test_posterior = obj.model.posterior(model_inputs) test_mean = test_posterior.mean[..., 0, 0].detach().squeeze().numpy() test_sigma = test_posterior.variance.sqrt()[..., 0, 0].detach().squeeze().numpy() - # test_values = obj.fitness_inverse(test_mean) if obj.is_fitness else test_mean + # test_values = obj.fitness_inverse(test_mean) if obj.kind == "fitness" else test_mean test_constraint = None - if not obj.is_fitness: - test_constraint = obj.targeting_constraint(test_inputs).detach().squeeze().numpy() + if not obj.kind == "fitness": + test_constraint = obj.targeting_constraint(model_inputs).detach().squeeze().numpy() if gridded: # _ = agent.obj_axes[obj_index, 1].pcolormesh( @@ -120,7 +192,7 @@ def _plot_objs_many_dofs(agent, axes=(0, 1), shading="nearest", cmap=DEFAULT_COL # cmap=cmap, # norm=val_norm, # ) - if obj.is_fitness: + if obj.kind == "fitness": fitness_ax = agent.obj_axes[obj_index, 1].pcolormesh( test_x, test_y, @@ -157,7 +229,7 @@ def _plot_objs_many_dofs(agent, axes=(0, 1), shading="nearest", cmap=DEFAULT_COL # norm=val_norm, # cmap=cmap, # ) - if obj.is_fitness: + if obj.kind == "fitness": fitness_ax = agent.obj_axes[obj_index, 1].scatter( test_x, test_y, @@ -188,7 +260,7 @@ def _plot_objs_many_dofs(agent, axes=(0, 1), shading="nearest", cmap=DEFAULT_COL val_cbar = agent.obj_fig.colorbar(val_ax, ax=agent.obj_axes[obj_index, 0], location="bottom", aspect=32, shrink=0.8) val_cbar.set_label(f"{obj.units or ''}") - if obj.is_fitness: + if obj.kind == "fitness": _ = agent.obj_fig.colorbar(fitness_ax, ax=agent.obj_axes[obj_index, 1], location="bottom", aspect=32, shrink=0.8) _ = agent.obj_fig.colorbar(fit_err_ax, ax=agent.obj_axes[obj_index, 2], location="bottom", aspect=32, shrink=0.8) @@ -200,7 +272,7 @@ def _plot_objs_many_dofs(agent, axes=(0, 1), shading="nearest", cmap=DEFAULT_COL constraint_ax, ax=agent.obj_axes[obj_index, 3], location="bottom", aspect=32, shrink=0.8 ) - constraint_cbar.set_label(f"{obj.label} constraint") + constraint_cbar.set_label(f"{obj.label_with_units} constraint") col_names = [ f"{obj.description} samples", @@ -232,13 +304,13 @@ def _plot_objs_many_dofs(agent, axes=(0, 1), shading="nearest", cmap=DEFAULT_COL ) for ax in agent.obj_axes.ravel(): - ax.set_xlabel(x_dof.label) - ax.set_ylabel(y_dof.label) + ax.set_xlabel(x_dof.label_with_units) + ax.set_ylabel(y_dof.label_with_units) ax.set_xlim(*x_dof.search_domain) ax.set_ylim(*y_dof.search_domain) - if x_dof.log: + if x_dof.transform == "log": ax.set_xscale("log") - if y_dof.log: + if y_dof.transform == "log": ax.set_yscale("log") @@ -265,7 +337,7 @@ def _plot_acqf_one_dof(agent, acq_funcs, lw=1e0, **kwargs): agent.acq_axes[iacq_func].plot(test_inputs.squeeze(-2), test_acqf, lw=lw, color=color) agent.acq_axes[iacq_func].set_xlim(*x_dof.search_domain) - agent.acq_axes[iacq_func].set_xlabel(x_dof.label) + agent.acq_axes[iacq_func].set_xlabel(x_dof.label_with_units) agent.acq_axes[iacq_func].set_ylabel(acq_func_meta["name"]) @@ -324,13 +396,13 @@ def _plot_acqf_many_dofs( agent.acq_fig.colorbar(obj_ax, ax=agent.acq_axes[iacq_func], location="bottom", aspect=32, shrink=0.8) for ax in agent.acq_axes.ravel(): - ax.set_xlabel(x_dof.label) - ax.set_ylabel(y_dof.label) + ax.set_xlabel(x_dof.label_with_units) + ax.set_ylabel(y_dof.label_with_units) ax.set_xlim(*x_dof.search_domain) ax.set_ylim(*y_dof.search_domain) - if x_dof.log: + if x_dof.transform == "log": ax.set_xscale("log") - if y_dof.log: + if y_dof.transform == "log": ax.set_yscale("log") @@ -388,13 +460,13 @@ def _plot_valid_many_dofs(agent, axes=[0, 1], shading="nearest", cmap=DEFAULT_CO ) for ax in agent.valid_axes.ravel(): - ax.set_xlabel(x_dof.label) - ax.set_ylabel(y_dof.label) + ax.set_xlabel(x_dof.label_with_units) + ax.set_ylabel(y_dof.label_with_units) ax.set_xlim(*x_dof.search_domain) ax.set_ylim(*y_dof.search_domain) - if x_dof.log: + if x_dof.transform == "log": ax.set_xscale("log") - if y_dof.log: + if y_dof.transform == "log": ax.set_yscale("log") @@ -425,7 +497,7 @@ def _plot_history(agent, x_key="index", show_all_objs=False): hist_axes[obj_index].plot(x, y, lw=5e-1, c="k") hist_axes[obj_index].set_ylabel(obj.key) - y = agent.scalarized_fitnesses + y = agent.scalarized_fitnesses() cummax_y = np.array([np.nanmax(y[: i + 1]) for i in range(len(y))]) @@ -464,7 +536,7 @@ def inspect_beam(agent, index, border=None): def _plot_pareto_front(agent, obj_indices=(0, 1)): - f_objs = agent.objectives[agent.objectives.type == "fitness"] + f_objs = agent.objectives[agent.objectives.kind == "fitness"] (i, j) = obj_indices if len(f_objs) < 2: @@ -472,12 +544,14 @@ def _plot_pareto_front(agent, obj_indices=(0, 1)): fig, ax = plt.subplots(1, 1, figsize=(6, 6)) - y = agent.train_targets()[:, agent.objectives.type == "fitness"] + y = agent.train_targets()[:, agent.objectives.kind == "fitness"] - front_mask = agent.pareto_front_mask + pareto_front_mask = agent.pareto_front_mask + constraint = agent.evaluated_constraints.all(axis=-1) - ax.scatter(y[front_mask, i], y[front_mask, j], c="b", label="Pareto front") - ax.scatter(y[~front_mask, i], y[~front_mask, j], c="r") + ax.scatter(*y[(~pareto_front_mask) & constraint].T[[i, j]], c="k") + ax.scatter(*y[~constraint].T[[i, j]], c="r", marker="x", label="invalid") + ax.scatter(*y[pareto_front_mask].T[[i, j]], c="b", label="Pareto front") ax.set_xlabel(f"{f_objs[i].name} fitness") ax.set_ylabel(f"{f_objs[j].name} fitness") diff --git a/src/blop/tests/conftest.py b/src/blop/tests/conftest.py index 43196a9..9c81da8 100644 --- a/src/blop/tests/conftest.py +++ b/src/blop/tests/conftest.py @@ -2,6 +2,7 @@ import datetime import databroker +import numpy as np import pytest from bluesky.callbacks import best_effort from bluesky.run_engine import RunEngine @@ -51,8 +52,8 @@ def agent(db): """ dofs = [ - DOF(name="x1", search_domain=(-8.0, 8.0)), - DOF(name="x2", search_domain=(-8.0, 8.0)), + DOF(name="x1", search_domain=(-5.0, 5.0)), + DOF(name="x2", search_domain=(-5.0, 5.0)), ] objectives = [Objective(name="himmelblau", target="min")] @@ -60,7 +61,7 @@ def agent(db): agent = Agent( dofs=dofs, objectives=objectives, - digestion=functions.constrained_himmelblau_digestion, + digestion=functions.himmelblau_digestion, db=db, verbose=True, tolerate_acquisition_errors=False, @@ -72,15 +73,15 @@ def agent(db): @pytest.fixture(scope="function") def mo_agent(db): """ - A simple agent minimizing two Himmelblau's functions + An agent minimizing two Himmelblau's functions """ def digestion(db, uid): products = db[uid].table() for index, entry in products.iterrows(): - products.loc[index, "obj1"] = functions.himmelblau(entry.x1, entry.x2) - products.loc[index, "obj2"] = functions.himmelblau(entry.x2, entry.x1) + products.loc[index, "f1"] = functions.himmelblau(entry.x1, entry.x2) + products.loc[index, "f2"] = functions.himmelblau(entry.x2, entry.x1) return products @@ -89,7 +90,48 @@ def digestion(db, uid): DOF(name="x2", search_domain=(-5.0, 5.0)), ] - objectives = [Objective(name="obj1", target="min"), Objective(name="obj2", target="min")] + objectives = [Objective(name="f1", target="min"), Objective(name="f2", target="min")] + + agent = Agent( + dofs=dofs, + objectives=objectives, + digestion=digestion, + db=db, + verbose=True, + tolerate_acquisition_errors=False, + ) + + return agent + + +@pytest.fixture(scope="function") +def constrained_agent(db): + """ + Chankong and Haimes function from https://en.wikipedia.org/wiki/Test_functions_for_optimization + """ + + def digestion(db, uid): + products = db[uid].table() + + for index, entry in products.iterrows(): + products.loc[index, "f1"] = (entry.x1 - 2) ** 2 + (entry.x2 - 1) + 2 + products.loc[index, "f2"] = 9 * entry.x1 - (entry.x2 - 1) + 2 + products.loc[index, "c1"] = entry.x1**2 + entry.x2**2 + products.loc[index, "c2"] = entry.x1 - 3 * entry.x2 + 10 + + return products + + dofs = [ + DOF(description="The first DOF", name="x1", search_domain=(-20, 20)), + DOF(description="The second DOF", name="x2", search_domain=(-20, 20)), + ] + + objectives = [ + Objective(description="f1", name="f1", target="min"), + Objective(description="f2", name="f2", target="min"), + Objective(description="c1", name="c1", target=(-np.inf, 225)), + Objective(description="c2", name="c2", target=(-np.inf, 0)), + ] agent = Agent( dofs=dofs, @@ -104,7 +146,7 @@ def digestion(db, uid): @pytest.fixture(scope="function") -def agent_with_passive_dofs(db): +def agent_with_read_only_dofs(db): """ A simple agent minimizing two Himmelblau's functions """ @@ -124,7 +166,7 @@ def agent_with_passive_dofs(db): agent = Agent( dofs=dofs, objectives=objectives, - digestion=functions.constrained_himmelblau_digestion, + digestion=functions.himmelblau_digestion, db=db, verbose=True, tolerate_acquisition_errors=False, diff --git a/src/blop/tests/test_dofs.py b/src/blop/tests/test_dofs.py index 5ff3664..4c83613 100644 --- a/src/blop/tests/test_dofs.py +++ b/src/blop/tests/test_dofs.py @@ -9,23 +9,21 @@ def test_dof_types(): description="A binary DOF", type="binary", name="x2", - search_domain=["in", "out"], - trust_domain=["in"], + search_domain={"in", "out"}, units="is it in or out?", ) dof3 = DOF( description="An ordinal DOF", type="ordinal", name="x3", - search_domain=["low", "medium", "high"], - trust_domain=["low", "medium"], + search_domain={"low", "medium", "high"}, units="noise level", ) dof4 = DOF( description="A categorical DOF", type="categorical", name="x4", - search_domain=["mango", "orange", "banana", "papaya"], + search_domain={"mango", "orange", "banana", "papaya"}, units="fruit", ) diff --git a/src/blop/tests/test_pareto.py b/src/blop/tests/test_pareto.py index ddcc482..75f1fb6 100644 --- a/src/blop/tests/test_pareto.py +++ b/src/blop/tests/test_pareto.py @@ -4,11 +4,22 @@ @pytest.mark.test_func def test_pareto(mo_agent, RE): RE(mo_agent.learn("qr", n=16)) - mo_agent.plot_pareto_front() @pytest.mark.parametrize("acq_func", ["qnehvi"]) -def test_analytic_pareto_acq_funcs(mo_agent, RE, acq_func): - RE(mo_agent.learn("qr", n=4)) +def test_monte_carlo_pareto_acq_funcs(mo_agent, RE, acq_func): + RE(mo_agent.learn("qr", n=16)) RE(mo_agent.learn(acq_func, n=2)) + + +@pytest.mark.test_func +def test_constrained_pareto(constrained_agent, RE): + RE(constrained_agent.learn("qr", n=16)) + constrained_agent.plot_pareto_front() + + +@pytest.mark.parametrize("acq_func", ["qnehvi"]) +def test_constrained_monte_carlo_pareto_acq_funcs(constrained_agent, RE, acq_func): + RE(constrained_agent.learn("qr", n=16)) + RE(constrained_agent.learn(acq_func, n=2)) diff --git a/src/blop/tests/test_passive_dofs.py b/src/blop/tests/test_passive_dofs.py index b2034a9..fb8624a 100644 --- a/src/blop/tests/test_passive_dofs.py +++ b/src/blop/tests/test_passive_dofs.py @@ -2,5 +2,6 @@ @pytest.mark.test_func -def test_passive_dofs(agent_with_passive_dofs, RE): - RE(agent_with_passive_dofs.learn("qr", n=32)) +def test_read_only_dofs(agent_with_read_only_dofs, RE): + RE(agent_with_read_only_dofs.learn("qr", n=32)) + RE(agent_with_read_only_dofs.learn("qei", n=2)) diff --git a/src/blop/tests/test_plots.py b/src/blop/tests/test_plots.py index e67836c..4f2c535 100644 --- a/src/blop/tests/test_plots.py +++ b/src/blop/tests/test_plots.py @@ -19,10 +19,10 @@ def test_plots_multiple_objs(RE, mo_agent): mo_agent.plot_history() -def test_plots_passive_dofs(RE, agent_with_passive_dofs): - RE(agent_with_passive_dofs.learn("qr", n=16)) +def test_plots_read_only_dofs(RE, agent_with_read_only_dofs): + RE(agent_with_read_only_dofs.learn("qr", n=16)) - agent_with_passive_dofs.plot_objectives() - agent_with_passive_dofs.plot_acquisition() - agent_with_passive_dofs.plot_validity() - agent_with_passive_dofs.plot_history() + agent_with_read_only_dofs.plot_objectives() + agent_with_read_only_dofs.plot_acquisition() + agent_with_read_only_dofs.plot_validity() + agent_with_read_only_dofs.plot_history() diff --git a/src/blop/utils/functions.py b/src/blop/utils/functions.py index 74f6f42..553d79d 100644 --- a/src/blop/utils/functions.py +++ b/src/blop/utils/functions.py @@ -14,6 +14,10 @@ def sigmoid(x): return 1 / (1 + np.exp(-x)) +def inverse_sigmoid(x): + return np.log(x / (1 - x)) + + def booth(x1, x2): """ The Booth function (https://en.wikipedia.org/wiki/Test_functions_for_optimization)