-
Notifications
You must be signed in to change notification settings - Fork 4
/
Copy pathlearn_gp_hyperparameters.m
219 lines (194 loc) · 7.53 KB
/
learn_gp_hyperparameters.m
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
% LEARN_GP_HYPERPARAMETERS actively learn GP hyperparameters.
%
% This is an implementation of the method for actively learning GP
% hyperparameters described in
%
% Garnett, R., Osborne, M., and Hennig, P. Active Learning of Linear
% Embeddings for Gaussian Processes. (2014). 30th Conference on
% Uncertainty in Artificial Intelligence (UAI 2014).
%
% Given a GP model on a function f:
%
% p(f | \theta) = GP(f; mu(x; \theta), K(x, x'; \theta)),
%
% this routine sequentially chooses a sequence of locations X = {x_i}
% to make observations with the goal of learning the GP
% hyperparameters \theta as quickly as possible. This is done by
% maintaining a probabilistic belief p(\theta | D) and selecting
% each observation location by maximizing the Bayesian active
% learning by disagreement (BALD) criterion described in
%
% N. Houlsby, F. Huszar, Z. Ghahramani, and M. Lengyel. Bayesian
% Active Learning for Classification and Preference
% Learning. (2011). arXiv preprint arXiv:1112.5745 [stat.ML].
%
% This implementation uses the approximation to BALD described in the
% Garnett, et al. paper above, which relies on the "marginal GP" (MGP)
% method for approximate GP hyperparameter marginalization.
%
% See demo/demo.m for a simple example usage.
%
% Dependencies
% ------------
%
% This code is written to be interoperable with the GPML MATLAB
% toolbox, available here:
%
% http://www.gaussianprocess.org/gpml/code/matlab/doc/
%
% The GPML toolbox must be in your MATLAB path for this function to
% work. This function also depends on the gpml_extensions repository,
% available here:
%
% https://github.com/rmgarnett/gpml_extensions/
%
% as well as the marginal GP (MGP) implementation available here:
%
% https://github.com/rmgarnett/mgp/
%
% Both must be in your MATLAB path. Finally, the optimization of
% the GP log posterior requires Mark Schmidt's minFunc function:
%
% http://www.di.ens.fr/~mschmidt/Software/minFunc.html
%
% Usage
% -----
%
% results = learn_gp_hyperparameters(problem, model, varargin)
%
% Required inputs:
%
% problem: a struct describing the active learning problem,
% containing fields:
%
% num_evaluations: the number of observations to select
% candidate_x_star: the set of observation locations available
% for selection
% f: a function handle that returns the function
% observation at a given point, which will be
% called as
%
% y_star = problem.f(x_star)
%
% model: a struct describing the GP model, containing fields:
%
% inference_method: a GPML inference method
% (optional, default: @exact_inference)
% mean_function: a GPML mean function
% (optional, default: @zero_mean)
% covariance_function: a GPML covariance function
% likelihood: a GPML likelihood
% (optional, default: @likGauss)
% prior: a function handle to a hyperparameter
% prior p(\theta) (see priors.m in
% gpml_extensions)
%
% Optional inputs (specified as name/value pairs):
%
% 'minFunc_options': a struct containing options to pass to
% minFunc when optimizing the log posterior,
% default:
%
% .Display = 'off'
% .MaxFunEvals = 300
%
% 'num_restarts': the number of random restarts to use when
% optimizing the log posterior, default: 1
%
% Outputs:
%
% results: a struct describing the active learning process,
% containing fields:
%
% .chosen_x: the chosen observation locations
% (problem.num_evalutations x D)
% .chosen_y: the associated observation values
% (problem.num_evalutations x 1)
% .map_hyperparameters: an array of GPML hyperparameter structs
% containing the MAP hyperparameters
% learned after every evaluation
% .map_posteriors: an array of GPML posterior structs
% corresponding to
%
% (D_i, \hat{\theta}_i),
%
% where D_i is the first i observations,
%
% D_i = {(x_k, y_k)} (k = 1..i),
%
% and \hat{\theta}_i is the MAP
% hyperparameters learned from D_i
% (contained in results.map_hyperparameters)
%
% See also GP, MGP, MINFUNC.
% Copyright (c) 2014 Roman Garnett.
function results = learn_gp_hyperparameters(problem, model, varargin)
% parse optional inputs
parser = inputParser;
addParamValue(parser, 'num_restarts', 1);
addParamValue(parser, 'minFunc_options', ...
struct('Display', 'off', ...
'MaxFunEvals', 300));
parse(parser, varargin{:});
options = parser.Results;
% check model specification
if (~isfield(model, 'inference_method') || isempty(model.inference_method))
model.inference_method = @exact_inference;
end
if (~isfield(model, 'mean_function') || isempty(model.mean_function))
model.mean_function = {@zero_mean};
end
if (~isfield(model, 'likelihood') || isempty(model.likelihood))
model.likelihood = @likGauss;
end
% add prior to inference method if not already incorporated
if (~isempty(strfind(func2str(model.inference_method), 'inference_with_prior')))
model.inference_method = ...
add_prior_to_inference_method(model.inference_method, model.prior);
end
% choose first point closest to mean of available points
residuals = bsxfun(@minus, problem.candidate_x_star, ...
mean(problem.candidate_x_star));
[~, ind] = min(sum(residuals.^2, 2));
x = problem.candidate_x_star(ind, :);
y = problem.f(x);
problem.candidate_x_star = ...
problem.candidate_x_star([1:(ind - 1), (ind + 1):end], :);
% allocate output
results.map_embeddings = repmat(model.prior(), [problem.num_evaluations, 1]);
for i = 1:problem.num_evaluations
% always start optimization at previous MAP hyperparameters
if (i > 1)
initial_hyperparameters = results.map_hyperparameters(i - 1);
else
initial_hyperparameters = [];
end
% find MAP hyperparameters
results.map_hyperparameters(i) = ...
minimize_minFunc(model, x, y, ...
'initial_hyperparameters', initial_hyperparameters, ...
'num_restarts', options.num_restarts, ...
'minFunc_options', options.minFunc_options);
[~, ~, results.map_posteriors(i)] = gp(results.map_hyperparameters(i), ...
model.inference_method, model.mean_function, ...
model.covariance_function, model.likelihood, x, y);
% allocate output
if (i == 1)
results.map_posteriors = ...
repmat(results.map_posteriors(1), [problem.num_evaluations, 1]);
end
if (i == problem.num_evaluations)
break;
end
ind = select_next_point(results.map_hyperparameters(i), ...
model, x, results.map_posteriors(i), ...
problem.candidate_x_star);
x_star = problem.candidate_x_star(ind, :);
x = [x; x_star];
y = [y; problem.f(x_star)];
problem.candidate_x_star = ...
problem.candidate_x_star([1:(ind - 1), (ind + 1):end], :);
end
results.chosen_x = x;
results.chosen_y = y;
end