Kstars

gaussian_process.cpp
Go to the documentation of this file.
1/*
2 SPDX-FileCopyrightText: 2014-2017 Max Planck Society.
3 All rights reserved.
4
5 SPDX-License-Identifier: BSD-3-Clause
6*/
7
8/**
9 * @file
10 * @date 2014-2017
11 * @copyright Max Planck Society
12 *
13 * @author Edgar D. Klenske <edgar.klenske@tuebingen.mpg.de>
14 * @author Stephan Wenninger <stephan.wenninger@tuebingen.mpg.de>
15 * @author Raffi Enficiaud <raffi.enficiaud@tuebingen.mpg.de>
16 *
17 * @brief The GP class implements the Gaussian Process functionality.
18 */
19
20#include <cstdint>
21
22#include "gaussian_process.h"
23#include "math_tools.h"
25
26// A functor for special orderings
27struct covariance_ordering
28{
29 covariance_ordering(Eigen::VectorXd const &cov) : covariance_(cov) {}
30 bool operator()(int a, int b) const
31 {
32 return (covariance_[a] > covariance_[b]);
33 }
34 Eigen::VectorXd const &covariance_;
35};
36
37GP::GP() : covFunc_(nullptr), // initialize pointer to null
38 covFuncProj_(nullptr), // initialize pointer to null
39 data_loc_(Eigen::VectorXd()),
40 data_out_(Eigen::VectorXd()),
41 data_var_(Eigen::VectorXd()),
42 gram_matrix_(Eigen::MatrixXd()),
43 alpha_(Eigen::VectorXd()),
44 chol_gram_matrix_(Eigen::LDLT<Eigen::MatrixXd>()),
45 log_noise_sd_(-1E20),
46 use_explicit_trend_(false),
47 feature_vectors_(Eigen::MatrixXd()),
48 feature_matrix_(Eigen::MatrixXd()),
49 chol_feature_matrix_(Eigen::LDLT<Eigen::MatrixXd>()),
50 beta_(Eigen::VectorXd())
51{ }
52
53GP::GP(const covariance_functions::CovFunc &covFunc) :
54 covFunc_(covFunc.clone()),
55 covFuncProj_(nullptr),
56 data_loc_(Eigen::VectorXd()),
57 data_out_(Eigen::VectorXd()),
58 data_var_(Eigen::VectorXd()),
59 gram_matrix_(Eigen::MatrixXd()),
60 alpha_(Eigen::VectorXd()),
61 chol_gram_matrix_(Eigen::LDLT<Eigen::MatrixXd>()),
62 log_noise_sd_(-1E20),
63 use_explicit_trend_(false),
64 feature_vectors_(Eigen::MatrixXd()),
65 feature_matrix_(Eigen::MatrixXd()),
66 chol_feature_matrix_(Eigen::LDLT<Eigen::MatrixXd>()),
67 beta_(Eigen::VectorXd())
68{ }
69
70GP::GP(const double noise_variance,
71 const covariance_functions::CovFunc &covFunc) :
72 covFunc_(covFunc.clone()),
73 covFuncProj_(nullptr),
74 data_loc_(Eigen::VectorXd()),
75 data_out_(Eigen::VectorXd()),
76 data_var_(Eigen::VectorXd()),
77 gram_matrix_(Eigen::MatrixXd()),
78 alpha_(Eigen::VectorXd()),
79 chol_gram_matrix_(Eigen::LDLT<Eigen::MatrixXd>()),
80 log_noise_sd_(std::log(noise_variance)),
81 use_explicit_trend_(false),
82 feature_vectors_(Eigen::MatrixXd()),
83 feature_matrix_(Eigen::MatrixXd()),
84 chol_feature_matrix_(Eigen::LDLT<Eigen::MatrixXd>()),
85 beta_(Eigen::VectorXd())
86{ }
87
88GP::~GP()
89{
90 delete this->covFunc_; // tidy up since we are responsible for the covFunc.
91 delete this->covFuncProj_; // tidy up since we are responsible for the covFuncProj.
92}
93
94GP::GP(const GP &that) :
95 covFunc_(nullptr), // initialize to nullptr, clone later
96 covFuncProj_(nullptr), // initialize to nullptr, clone later
97 data_loc_(that.data_loc_),
98 data_out_(that.data_out_),
99 data_var_(that.data_var_),
100 gram_matrix_(that.gram_matrix_),
101 alpha_(that.alpha_),
102 chol_gram_matrix_(that.chol_gram_matrix_),
103 log_noise_sd_(that.log_noise_sd_),
104 use_explicit_trend_(that.use_explicit_trend_),
105 feature_vectors_(that.feature_vectors_),
106 feature_matrix_(that.feature_matrix_),
107 chol_feature_matrix_(that.chol_feature_matrix_),
108 beta_(that.beta_)
109{
110 covFunc_ = that.covFunc_->clone();
111 covFuncProj_ = that.covFuncProj_->clone();
112}
113
114bool GP::setCovarianceFunction(const covariance_functions::CovFunc &covFunc)
115{
116 // can only set the covariance function if training dataset is empty
117 if (data_loc_.size() != 0 || data_out_.size() != 0)
118 return false;
119 delete covFunc_; // initialized to zero, so delete is safe
120 covFunc_ = covFunc.clone();
121
122 return true;
123}
124
125void GP::enableOutputProjection(const covariance_functions::CovFunc &covFuncProj)
126{
127 delete covFuncProj_;// initialized to zero, so delete is safe
128 covFuncProj_ = covFuncProj.clone();
129}
130
131void GP::disableOutputProjection()
132{
133 delete covFuncProj_; // initialized to zero, so delete is safe
134 covFuncProj_ = nullptr;
135}
136
137GP &GP::operator=(const GP &that)
138{
139 if (this != &that)
140 {
141 covariance_functions::CovFunc* temp = covFunc_; // store old pointer...
142 covFunc_ = that.covFunc_->clone(); // ... first clone ...
143 delete temp; // ... and then delete.
144
145 // copy the rest
146 data_loc_ = that.data_loc_;
147 data_out_ = that.data_out_;
148 data_var_ = that.data_var_;
149 gram_matrix_ = that.gram_matrix_;
150 alpha_ = that.alpha_;
151 chol_gram_matrix_ = that.chol_gram_matrix_;
152 log_noise_sd_ = that.log_noise_sd_;
153 }
154 return *this;
155}
156
157Eigen::VectorXd GP::drawSample(const Eigen::VectorXd &locations) const
158{
159 return drawSample(locations,
160 math_tools::generate_normal_random_matrix(locations.rows(), locations.cols()));
161}
162
163Eigen::VectorXd GP::drawSample(const Eigen::VectorXd &locations,
164 const Eigen::VectorXd &random_vector) const
165{
166 Eigen::MatrixXd prior_covariance;
167 Eigen::MatrixXd kernel_matrix;
168 Eigen::LLT<Eigen::MatrixXd> chol_kernel_matrix;
169 Eigen::MatrixXd samples;
170
171 // we need the prior covariance for both, prior and posterior samples.
172 prior_covariance = covFunc_->evaluate(locations, locations);
173 kernel_matrix = prior_covariance;
174
175 if (gram_matrix_.cols() == 0) // no data, i.e. only a prior
176 {
177 kernel_matrix = prior_covariance + JITTER * Eigen::MatrixXd::Identity(
178 prior_covariance.rows(), prior_covariance.cols());
179 }
180 else // we have some data
181 {
182 Eigen::MatrixXd mixed_covariance;
183 mixed_covariance = covFunc_->evaluate(locations, data_loc_);
184 Eigen::MatrixXd posterior_covariance;
185 posterior_covariance = prior_covariance - mixed_covariance *
186 (chol_gram_matrix_.solve(mixed_covariance.transpose()));
187 kernel_matrix = posterior_covariance + JITTER * Eigen::MatrixXd::Identity(
188 posterior_covariance.rows(), posterior_covariance.cols());
189 }
190 chol_kernel_matrix = kernel_matrix.llt();
191
192 // Draw sample: s = chol(K)*x, where x is a random vector
193 samples = chol_kernel_matrix.matrixL() * random_vector;
194
195 // add the measurement noise on return
196 return samples + std::exp(log_noise_sd_) *
197 math_tools::generate_normal_random_matrix(samples.rows(), samples.cols());
198}
199
200void GP::infer()
201{
202 assert(data_loc_.rows() > 0 && "Error: the GP is not yet initialized!");
203
204 // The data covariance matrix
205 Eigen::MatrixXd data_cov = covFunc_->evaluate(data_loc_, data_loc_);
206
207 // swapping in the Gram matrix is faster than directly assigning it
208 gram_matrix_.swap(data_cov); // store the new data_cov as gram matrix
209 if (data_var_.rows() == 0) // homoscedastic
210 {
211 gram_matrix_ += (std::exp(2 * log_noise_sd_) + JITTER) *
212 Eigen::MatrixXd::Identity(gram_matrix_.rows(), gram_matrix_.cols());
213 }
214 else // heteroscedastic
215 {
216 gram_matrix_ += data_var_.asDiagonal();
217 }
218
219 // compute the Cholesky decomposition of the Gram matrix
220 chol_gram_matrix_ = gram_matrix_.ldlt();
221
222 // pre-compute the alpha, which is the solution of the chol to the data
223 alpha_ = chol_gram_matrix_.solve(data_out_);
224
225 if (use_explicit_trend_)
226 {
227 feature_vectors_ = Eigen::MatrixXd(2, data_loc_.rows());
228 // precompute necessary matrices for the explicit trend function
229 feature_vectors_.row(0) = Eigen::MatrixXd::Ones(1, data_loc_.rows()); // instead of pow(0)
230 feature_vectors_.row(1) = data_loc_.array(); // instead of pow(1)
231
232 feature_matrix_ = feature_vectors_ * chol_gram_matrix_.solve(feature_vectors_.transpose());
233 chol_feature_matrix_ = feature_matrix_.ldlt();
234
235 beta_ = chol_feature_matrix_.solve(feature_vectors_) * alpha_;
236 }
237}
238
239void GP::infer(const Eigen::VectorXd &data_loc,
240 const Eigen::VectorXd &data_out,
241 const Eigen::VectorXd &data_var /* = EigenVectorXd() */)
242{
243 data_loc_ = data_loc;
244 data_out_ = data_out;
245 if (data_var.rows() > 0)
246 {
247 data_var_ = data_var;
248 }
249 infer(); // updates the Gram matrix and its Cholesky decomposition
250}
251
252void GP::inferSD(const Eigen::VectorXd &data_loc,
253 const Eigen::VectorXd &data_out,
254 const int n, const Eigen::VectorXd &data_var /* = EigenVectorXd() */,
255 const double prediction_point /*= std::numeric_limits<double>::quiet_NaN()*/)
256{
257 Eigen::VectorXd covariance;
258
259 Eigen::VectorXd prediction_loc(1);
260 if ( math_tools::isNaN(prediction_point) )
261 {
262 // if none given, use the last datapoint as prediction reference
263 prediction_loc = data_loc.tail(1);
264 }
265 else
266 {
267 prediction_loc << prediction_point;
268 }
269
270 // calculate covariance between data and prediction point for point selection
271 covariance = covFunc_->evaluate(data_loc, prediction_loc);
272
273 // generate index vector
274 std::vector<int> index(covariance.size(), 0);
275 for (size_t i = 0 ; i != index.size() ; i++)
276 {
277 index[i] = i;
278 }
279
280 // sort indices with respect to covariance value
281 std::sort(index.begin(), index.end(),
282 covariance_ordering(covariance)
283 );
284
285 bool use_var = data_var.rows() > 0; // true means heteroscedastic noise
286
287 if (n < data_loc.rows())
288 {
289 std::vector<double> loc_arr(n);
290 std::vector<double> out_arr(n);
291 std::vector<double> var_arr(n);
292
293 for (int i = 0; i < n; ++i)
294 {
295 loc_arr[i] = data_loc[index[i]];
296 out_arr[i] = data_out[index[i]];
297 if (use_var)
298 {
299 var_arr[i] = data_var[index[i]];
300 }
301 }
302
303 data_loc_ = Eigen::Map<Eigen::VectorXd>(loc_arr.data(), n, 1);
304 data_out_ = Eigen::Map<Eigen::VectorXd>(out_arr.data(), n, 1);
305 if (use_var)
306 {
307 data_var_ = Eigen::Map<Eigen::VectorXd>(var_arr.data(), n, 1);
308 }
309 }
310 else // we can use all points and don't neet to select
311 {
312 data_loc_ = data_loc;
313 data_out_ = data_out;
314 if (use_var)
315 {
316 data_var_ = data_var;
317 }
318 }
319 infer();
320}
321
322void GP::clearData()
323{
324 gram_matrix_ = Eigen::MatrixXd();
325 chol_gram_matrix_ = Eigen::LDLT<Eigen::MatrixXd>();
326 data_loc_ = Eigen::VectorXd();
327 data_out_ = Eigen::VectorXd();
328}
329
330Eigen::VectorXd GP::predict(const Eigen::VectorXd &locations, Eigen::VectorXd* variances /*=nullptr*/) const
331{
332 // The prior covariance matrix (evaluated on test points)
333 Eigen::MatrixXd prior_cov = covFunc_->evaluate(locations, locations);
334
335 if (data_loc_.rows() == 0) // check if the data is empty
336 {
337 if (variances != nullptr)
338 {
339 (*variances) = prior_cov.diagonal();
340 }
341 return Eigen::VectorXd::Zero(locations.size());
342 }
343 else
344 {
345 // Calculate mixed covariance matrix (test and data points)
346 Eigen::MatrixXd mixed_cov = covFunc_->evaluate(locations, data_loc_);
347
348 // Calculate feature matrix for linear feature
349 Eigen::MatrixXd phi(2, locations.rows());
350 if (use_explicit_trend_)
351 {
352 phi.row(0) = Eigen::MatrixXd::Ones(1, locations.rows()); // instead of pow(0)
353 phi.row(1) = locations.array(); // instead of pow(1)
354
355 return predict(prior_cov, mixed_cov, phi, variances);
356 }
357 return predict(prior_cov, mixed_cov, Eigen::MatrixXd(), variances);
358 }
359}
360
361Eigen::VectorXd GP::predictProjected(const Eigen::VectorXd &locations, Eigen::VectorXd* variances /*=nullptr*/) const
362{
363 // use the suitable covariance function, depending on whether an
364 // output projection is used or not.
365 covariance_functions::CovFunc* covFunc = nullptr;
366 if (covFuncProj_ == nullptr)
367 {
368 covFunc = covFunc_;
369 }
370 else
371 {
372 covFunc = covFuncProj_;
373 }
374
375 assert(covFunc != nullptr);
376
377 // The prior covariance matrix (evaluated on test points)
378 Eigen::MatrixXd prior_cov = covFunc->evaluate(locations, locations);
379
380 if (data_loc_.rows() == 0) // check if the data is empty
381 {
382 if (variances != nullptr)
383 {
384 (*variances) = prior_cov.diagonal();
385 }
386 return Eigen::VectorXd::Zero(locations.size());
387 }
388 else
389 {
390 // The mixed covariance matrix (test and data points)
391 Eigen::MatrixXd mixed_cov = covFunc->evaluate(locations, data_loc_);
392
393 Eigen::MatrixXd phi(2, locations.rows());
394 if (use_explicit_trend_)
395 {
396 // calculate the feature vectors for linear regression
397 phi.row(0) = Eigen::MatrixXd::Ones(1, locations.rows()); // instead of pow(0)
398 phi.row(1) = locations.array(); // instead of pow(1)
399
400 return predict(prior_cov, mixed_cov, phi, variances);
401 }
402 return predict(prior_cov, mixed_cov, Eigen::MatrixXd(), variances);
403 }
404}
405
406Eigen::VectorXd GP::predict(const Eigen::MatrixXd &prior_cov, const Eigen::MatrixXd &mixed_cov,
407 const Eigen::MatrixXd &phi /*=Eigen::MatrixXd()*/, Eigen::VectorXd* variances /*=nullptr*/)
408const
409{
410
411 // calculate GP mean from precomputed alpha vector
412 Eigen::VectorXd m = mixed_cov * alpha_;
413
414 // precompute K^{-1} * mixed_cov
415 Eigen::MatrixXd gamma = chol_gram_matrix_.solve(mixed_cov.transpose());
416
417 Eigen::MatrixXd R;
418
419 // include fixed-features in the calculations
420 if (use_explicit_trend_)
421 {
422 R = phi - feature_vectors_ * gamma;
423
424 m += R.transpose() * beta_;
425 }
426
427 if (variances != nullptr)
428 {
429 // calculate GP variance
430 Eigen::MatrixXd v = prior_cov - mixed_cov * gamma;
431
432 // include fixed-features in the calculations
433 if (use_explicit_trend_)
434 {
435 assert(R.size() > 0);
436 v += R.transpose() * chol_feature_matrix_.solve(R);
437 }
438
439 (*variances) = v.diagonal();
440 }
441 return m;
442}
443
444void GP::setHyperParameters(const Eigen::VectorXd &hyperParameters)
445{
446 assert(hyperParameters.rows() == covFunc_->getParameterCount() + covFunc_->getExtraParameterCount() + 1 &&
447 "Wrong number of hyperparameters supplied to setHyperParameters()!");
448 log_noise_sd_ = hyperParameters[0];
449 covFunc_->setParameters(hyperParameters.segment(1, covFunc_->getParameterCount()));
450 covFunc_->setExtraParameters(hyperParameters.tail(covFunc_->getExtraParameterCount()));
451 if (data_loc_.rows() > 0)
452 {
453 infer();
454 }
455
456 // if the projection kernel is set, set the parameters there as well.
457 if (covFuncProj_ != nullptr)
458 {
459 covFuncProj_->setParameters(hyperParameters.segment(1, covFunc_->getParameterCount()));
460 covFuncProj_->setExtraParameters(hyperParameters.tail(covFunc_->getExtraParameterCount()));
461 }
462}
463
464Eigen::VectorXd GP::getHyperParameters() const
465{
466 Eigen::VectorXd hyperParameters(covFunc_->getParameterCount() + covFunc_->getExtraParameterCount() + 1);
467 hyperParameters << log_noise_sd_, covFunc_->getParameters(), covFunc_->getExtraParameters();
468 return hyperParameters;
469}
470
471void GP::enableExplicitTrend()
472{
473 use_explicit_trend_ = true;
474}
475
476void GP::disableExplicitTrend()
477{
478 use_explicit_trend_ = false;
479}
Base class definition for covariance functions.
virtual Eigen::MatrixXd evaluate(const Eigen::VectorXd &x1, const Eigen::VectorXd &x2)=0
virtual void setParameters(const Eigen::VectorXd &params)=0
Method to set the hyper-parameters.
virtual int getParameterCount() const =0
Returns the number of hyper-parameters.
virtual const Eigen::VectorXd & getParameters() const =0
Returns the hyper-parameters.
virtual CovFunc * clone() const =0
Produces a clone to be able to copy the object.
The file holds the covariance functions that can be used with the GP class.
The GP class implements the Gaussian Process functionality.
Eigen::MatrixXd generate_normal_random_matrix(const size_t n, const size_t m)
Provides mathematical tools needed for the Gaussian process toolbox.
This file is part of the KDE documentation.
Documentation copyright © 1996-2025 The KDE developers.
Generated on Fri Jan 3 2025 11:47:15 by doxygen 1.12.0 written by Dimitri van Heesch, © 1997-2006

KDE's Doxygen guidelines are available online.