Kstars

gaussian_process.cpp
Go to the documentation of this file.
1 /*
2  SPDX-FileCopyrightText: 2014-2017 Max Planck Society.
3  All rights reserved.
4 
5  SPDX-License-Identifier: BSD-3-Clause
6 */
7 
8 /**
9  * @file
10  * @date 2014-2017
11  * @copyright Max Planck Society
12  *
13  * @author Edgar D. Klenske <[email protected]>
14  * @author Stephan Wenninger <[email protected]>
15  * @author Raffi Enficiaud <[email protected]>
16  *
17  * @brief The GP class implements the Gaussian Process functionality.
18  */
19 
20 #include <cstdint>
21 
22 #include "gaussian_process.h"
23 #include "math_tools.h"
24 #include "covariance_functions.h"
25 
26 // A functor for special orderings
27 struct covariance_ordering
28 {
29  covariance_ordering(Eigen::VectorXd const& cov) : covariance_(cov){}
30  bool operator()(int a, int b) const
31  {
32  return (covariance_[a] > covariance_[b]);
33  }
34  Eigen::VectorXd const& covariance_;
35 };
36 
37 GP::GP() : covFunc_(nullptr), // initialize pointer to null
38  covFuncProj_(nullptr), // initialize pointer to null
39  data_loc_(Eigen::VectorXd()),
40  data_out_(Eigen::VectorXd()),
41  data_var_(Eigen::VectorXd()),
42  gram_matrix_(Eigen::MatrixXd()),
43  alpha_(Eigen::VectorXd()),
44  chol_gram_matrix_(Eigen::LDLT<Eigen::MatrixXd>()),
45  log_noise_sd_(-1E20),
46  use_explicit_trend_(false),
47  feature_vectors_(Eigen::MatrixXd()),
48  feature_matrix_(Eigen::MatrixXd()),
49  chol_feature_matrix_(Eigen::LDLT<Eigen::MatrixXd>()),
50  beta_(Eigen::VectorXd())
51 { }
52 
53 GP::GP(const covariance_functions::CovFunc& covFunc) :
54  covFunc_(covFunc.clone()),
55  covFuncProj_(nullptr),
56  data_loc_(Eigen::VectorXd()),
57  data_out_(Eigen::VectorXd()),
58  data_var_(Eigen::VectorXd()),
59  gram_matrix_(Eigen::MatrixXd()),
60  alpha_(Eigen::VectorXd()),
61  chol_gram_matrix_(Eigen::LDLT<Eigen::MatrixXd>()),
62  log_noise_sd_(-1E20),
63  use_explicit_trend_(false),
64  feature_vectors_(Eigen::MatrixXd()),
65  feature_matrix_(Eigen::MatrixXd()),
66  chol_feature_matrix_(Eigen::LDLT<Eigen::MatrixXd>()),
67  beta_(Eigen::VectorXd())
68 { }
69 
70 GP::GP(const double noise_variance,
71  const covariance_functions::CovFunc& covFunc) :
72  covFunc_(covFunc.clone()),
73  covFuncProj_(nullptr),
74  data_loc_(Eigen::VectorXd()),
75  data_out_(Eigen::VectorXd()),
76  data_var_(Eigen::VectorXd()),
77  gram_matrix_(Eigen::MatrixXd()),
78  alpha_(Eigen::VectorXd()),
79  chol_gram_matrix_(Eigen::LDLT<Eigen::MatrixXd>()),
80  log_noise_sd_(std::log(noise_variance)),
81  use_explicit_trend_(false),
82  feature_vectors_(Eigen::MatrixXd()),
83  feature_matrix_(Eigen::MatrixXd()),
84  chol_feature_matrix_(Eigen::LDLT<Eigen::MatrixXd>()),
85  beta_(Eigen::VectorXd())
86 { }
87 
88 GP::~GP()
89 {
90  delete this->covFunc_; // tidy up since we are responsible for the covFunc.
91  delete this->covFuncProj_; // tidy up since we are responsible for the covFuncProj.
92 }
93 
94 GP::GP(const GP& that) :
95  covFunc_(nullptr), // initialize to nullptr, clone later
96  covFuncProj_(nullptr), // initialize to nullptr, clone later
97  data_loc_(that.data_loc_),
98  data_out_(that.data_out_),
99  data_var_(that.data_var_),
100  gram_matrix_(that.gram_matrix_),
101  alpha_(that.alpha_),
102  chol_gram_matrix_(that.chol_gram_matrix_),
103  log_noise_sd_(that.log_noise_sd_),
104  use_explicit_trend_(that.use_explicit_trend_),
105  feature_vectors_(that.feature_vectors_),
106  feature_matrix_(that.feature_matrix_),
107  chol_feature_matrix_(that.chol_feature_matrix_),
108  beta_(that.beta_)
109 {
110  covFunc_ = that.covFunc_->clone();
111  covFuncProj_ = that.covFuncProj_->clone();
112 }
113 
114 bool GP::setCovarianceFunction(const covariance_functions::CovFunc& covFunc)
115 {
116  // can only set the covariance function if training dataset is empty
117  if (data_loc_.size() != 0 || data_out_.size() != 0)
118  return false;
119  delete covFunc_; // initialized to zero, so delete is safe
120  covFunc_ = covFunc.clone();
121 
122  return true;
123 }
124 
125 void GP::enableOutputProjection(const covariance_functions::CovFunc& covFuncProj)
126 {
127  delete covFuncProj_;// initialized to zero, so delete is safe
128  covFuncProj_ = covFuncProj.clone();
129 }
130 
131 void GP::disableOutputProjection()
132 {
133  delete covFuncProj_; // initialized to zero, so delete is safe
134  covFuncProj_ = nullptr;
135 }
136 
137 GP& GP::operator=(const GP& that)
138 {
139  if (this != &that)
140  {
141  covariance_functions::CovFunc* temp = covFunc_; // store old pointer...
142  covFunc_ = that.covFunc_->clone(); // ... first clone ...
143  delete temp; // ... and then delete.
144 
145  // copy the rest
146  data_loc_ = that.data_loc_;
147  data_out_ = that.data_out_;
148  data_var_ = that.data_var_;
149  gram_matrix_ = that.gram_matrix_;
150  alpha_ = that.alpha_;
151  chol_gram_matrix_ = that.chol_gram_matrix_;
152  log_noise_sd_ = that.log_noise_sd_;
153  }
154  return *this;
155 }
156 
157 Eigen::VectorXd GP::drawSample(const Eigen::VectorXd& locations) const
158 {
159  return drawSample(locations,
160  math_tools::generate_normal_random_matrix(locations.rows(), locations.cols()));
161 }
162 
163 Eigen::VectorXd GP::drawSample(const Eigen::VectorXd& locations,
164  const Eigen::VectorXd& random_vector) const
165 {
166  Eigen::MatrixXd prior_covariance;
167  Eigen::MatrixXd kernel_matrix;
168  Eigen::LLT<Eigen::MatrixXd> chol_kernel_matrix;
169  Eigen::MatrixXd samples;
170 
171  // we need the prior covariance for both, prior and posterior samples.
172  prior_covariance = covFunc_->evaluate(locations, locations);
173  kernel_matrix = prior_covariance;
174 
175  if (gram_matrix_.cols() == 0) // no data, i.e. only a prior
176  {
177  kernel_matrix = prior_covariance + JITTER * Eigen::MatrixXd::Identity(
178  prior_covariance.rows(), prior_covariance.cols());
179  }
180  else // we have some data
181  {
182  Eigen::MatrixXd mixed_covariance;
183  mixed_covariance = covFunc_->evaluate(locations, data_loc_);
184  Eigen::MatrixXd posterior_covariance;
185  posterior_covariance = prior_covariance - mixed_covariance *
186  (chol_gram_matrix_.solve(mixed_covariance.transpose()));
187  kernel_matrix = posterior_covariance + JITTER * Eigen::MatrixXd::Identity(
188  posterior_covariance.rows(), posterior_covariance.cols());
189  }
190  chol_kernel_matrix = kernel_matrix.llt();
191 
192  // Draw sample: s = chol(K)*x, where x is a random vector
193  samples = chol_kernel_matrix.matrixL() * random_vector;
194 
195  // add the measurement noise on return
196  return samples + std::exp(log_noise_sd_) *
197  math_tools::generate_normal_random_matrix(samples.rows(), samples.cols());
198 }
199 
200 void GP::infer()
201 {
202  assert(data_loc_.rows() > 0 && "Error: the GP is not yet initialized!");
203 
204  // The data covariance matrix
205  Eigen::MatrixXd data_cov = covFunc_->evaluate(data_loc_, data_loc_);
206 
207  // swapping in the Gram matrix is faster than directly assigning it
208  gram_matrix_.swap(data_cov); // store the new data_cov as gram matrix
209  if (data_var_.rows() == 0) // homoscedastic
210  {
211  gram_matrix_ += (std::exp(2 * log_noise_sd_) + JITTER) *
212  Eigen::MatrixXd::Identity(gram_matrix_.rows(), gram_matrix_.cols());
213  }
214  else // heteroscedastic
215  {
216  gram_matrix_ += data_var_.asDiagonal();
217  }
218 
219  // compute the Cholesky decomposition of the Gram matrix
220  chol_gram_matrix_ = gram_matrix_.ldlt();
221 
222  // pre-compute the alpha, which is the solution of the chol to the data
223  alpha_ = chol_gram_matrix_.solve(data_out_);
224 
225  if (use_explicit_trend_)
226  {
227  feature_vectors_ = Eigen::MatrixXd(2, data_loc_.rows());
228  // precompute necessary matrices for the explicit trend function
229  feature_vectors_.row(0) = Eigen::MatrixXd::Ones(1,data_loc_.rows()); // instead of pow(0)
230  feature_vectors_.row(1) = data_loc_.array(); // instead of pow(1)
231 
232  feature_matrix_ = feature_vectors_ * chol_gram_matrix_.solve(feature_vectors_.transpose());
233  chol_feature_matrix_ = feature_matrix_.ldlt();
234 
235  beta_ = chol_feature_matrix_.solve(feature_vectors_) * alpha_;
236  }
237 }
238 
239 void GP::infer(const Eigen::VectorXd& data_loc,
240  const Eigen::VectorXd& data_out,
241  const Eigen::VectorXd& data_var /* = EigenVectorXd() */)
242 {
243  data_loc_ = data_loc;
244  data_out_ = data_out;
245  if (data_var.rows() > 0)
246  {
247  data_var_ = data_var;
248  }
249  infer(); // updates the Gram matrix and its Cholesky decomposition
250 }
251 
252 void GP::inferSD(const Eigen::VectorXd& data_loc,
253  const Eigen::VectorXd& data_out,
254  const int n, const Eigen::VectorXd& data_var /* = EigenVectorXd() */,
255  const double prediction_point /*= std::numeric_limits<double>::quiet_NaN()*/)
256 {
257  Eigen::VectorXd covariance;
258 
259  Eigen::VectorXd prediction_loc(1);
260  if ( math_tools::isNaN(prediction_point) )
261  {
262  // if none given, use the last datapoint as prediction reference
263  prediction_loc = data_loc.tail(1);
264  }
265  else
266  {
267  prediction_loc << prediction_point;
268  }
269 
270  // calculate covariance between data and prediction point for point selection
271  covariance = covFunc_->evaluate(data_loc, prediction_loc);
272 
273  // generate index vector
274  std::vector<int> index(covariance.size(), 0);
275  for (size_t i = 0 ; i != index.size() ; i++) {
276  index[i] = i;
277  }
278 
279  // sort indices with respect to covariance value
280  std::sort(index.begin(), index.end(),
281  covariance_ordering(covariance)
282  );
283 
284  bool use_var = data_var.rows() > 0; // true means heteroscedastic noise
285 
286  if (n < data_loc.rows()) {
287  std::vector<double> loc_arr(n);
288  std::vector<double> out_arr(n);
289  std::vector<double> var_arr(n);
290 
291  for (int i = 0; i < n; ++i)
292  {
293  loc_arr[i] = data_loc[index[i]];
294  out_arr[i] = data_out[index[i]];
295  if (use_var)
296  {
297  var_arr[i] = data_var[index[i]];
298  }
299  }
300 
301  data_loc_ = Eigen::Map<Eigen::VectorXd>(loc_arr.data(),n,1);
302  data_out_ = Eigen::Map<Eigen::VectorXd>(out_arr.data(),n,1);
303  if (use_var)
304  {
305  data_var_ = Eigen::Map<Eigen::VectorXd>(var_arr.data(),n,1);
306  }
307  }
308  else // we can use all points and don't neet to select
309  {
310  data_loc_ = data_loc;
311  data_out_ = data_out;
312  if (use_var)
313  {
314  data_var_ = data_var;
315  }
316  }
317  infer();
318 }
319 
320 void GP::clearData()
321 {
322  gram_matrix_ = Eigen::MatrixXd();
323  chol_gram_matrix_ = Eigen::LDLT<Eigen::MatrixXd>();
324  data_loc_ = Eigen::VectorXd();
325  data_out_ = Eigen::VectorXd();
326 }
327 
328 Eigen::VectorXd GP::predict(const Eigen::VectorXd& locations, Eigen::VectorXd* variances /*=nullptr*/) const
329 {
330  // The prior covariance matrix (evaluated on test points)
331  Eigen::MatrixXd prior_cov = covFunc_->evaluate(locations, locations);
332 
333  if (data_loc_.rows() == 0) // check if the data is empty
334  {
335  if (variances != nullptr)
336  {
337  (*variances) = prior_cov.diagonal();
338  }
339  return Eigen::VectorXd::Zero(locations.size());
340  }
341  else
342  {
343  // Calculate mixed covariance matrix (test and data points)
344  Eigen::MatrixXd mixed_cov = covFunc_->evaluate(locations, data_loc_);
345 
346  // Calculate feature matrix for linear feature
347  Eigen::MatrixXd phi(2, locations.rows());
348  if (use_explicit_trend_)
349  {
350  phi.row(0) = Eigen::MatrixXd::Ones(1,locations.rows()); // instead of pow(0)
351  phi.row(1) = locations.array(); // instead of pow(1)
352 
353  return predict(prior_cov, mixed_cov, phi, variances);
354  }
355  return predict(prior_cov, mixed_cov, Eigen::MatrixXd(), variances);
356  }
357 }
358 
359 Eigen::VectorXd GP::predictProjected(const Eigen::VectorXd& locations, Eigen::VectorXd* variances /*=nullptr*/) const
360 {
361  // use the suitable covariance function, depending on whether an
362  // output projection is used or not.
363  covariance_functions::CovFunc* covFunc = nullptr;
364  if (covFuncProj_ == nullptr)
365  {
366  covFunc = covFunc_;
367  }
368  else
369  {
370  covFunc = covFuncProj_;
371  }
372 
373  assert(covFunc != nullptr);
374 
375  // The prior covariance matrix (evaluated on test points)
376  Eigen::MatrixXd prior_cov = covFunc->evaluate(locations, locations);
377 
378  if (data_loc_.rows() == 0) // check if the data is empty
379  {
380  if (variances != nullptr)
381  {
382  (*variances) = prior_cov.diagonal();
383  }
384  return Eigen::VectorXd::Zero(locations.size());
385  }
386  else
387  {
388  // The mixed covariance matrix (test and data points)
389  Eigen::MatrixXd mixed_cov = covFunc->evaluate(locations, data_loc_);
390 
391  Eigen::MatrixXd phi(2, locations.rows());
392  if (use_explicit_trend_)
393  {
394  // calculate the feature vectors for linear regression
395  phi.row(0) = Eigen::MatrixXd::Ones(1,locations.rows()); // instead of pow(0)
396  phi.row(1) = locations.array(); // instead of pow(1)
397 
398  return predict(prior_cov, mixed_cov, phi, variances);
399  }
400  return predict(prior_cov, mixed_cov, Eigen::MatrixXd(), variances);
401  }
402 }
403 
404 Eigen::VectorXd GP::predict(const Eigen::MatrixXd& prior_cov, const Eigen::MatrixXd& mixed_cov,
405  const Eigen::MatrixXd& phi /*=Eigen::MatrixXd()*/, Eigen::VectorXd* variances /*=nullptr*/)
406  const
407 {
408 
409  // calculate GP mean from precomputed alpha vector
410  Eigen::VectorXd m = mixed_cov * alpha_;
411 
412  // precompute K^{-1} * mixed_cov
413  Eigen::MatrixXd gamma = chol_gram_matrix_.solve(mixed_cov.transpose());
414 
415  Eigen::MatrixXd R;
416 
417  // include fixed-features in the calculations
418  if (use_explicit_trend_)
419  {
420  R = phi - feature_vectors_ * gamma;
421 
422  m += R.transpose() * beta_;
423  }
424 
425  if (variances != nullptr)
426  {
427  // calculate GP variance
428  Eigen::MatrixXd v = prior_cov - mixed_cov * gamma;
429 
430  // include fixed-features in the calculations
431  if (use_explicit_trend_)
432  {
433  assert(R.size() > 0);
434  v += R.transpose() * chol_feature_matrix_.solve(R);
435  }
436 
437  (*variances) = v.diagonal();
438  }
439  return m;
440 }
441 
442 void GP::setHyperParameters(const Eigen::VectorXd& hyperParameters)
443 {
444  assert(hyperParameters.rows() == covFunc_->getParameterCount() + covFunc_->getExtraParameterCount() + 1 &&
445  "Wrong number of hyperparameters supplied to setHyperParameters()!");
446  log_noise_sd_ = hyperParameters[0];
447  covFunc_->setParameters(hyperParameters.segment(1, covFunc_->getParameterCount()));
448  covFunc_->setExtraParameters(hyperParameters.tail(covFunc_->getExtraParameterCount()));
449  if (data_loc_.rows() > 0)
450  {
451  infer();
452  }
453 
454  // if the projection kernel is set, set the parameters there as well.
455  if (covFuncProj_ != nullptr)
456  {
457  covFuncProj_->setParameters(hyperParameters.segment(1, covFunc_->getParameterCount()));
458  covFuncProj_->setExtraParameters(hyperParameters.tail(covFunc_->getExtraParameterCount()));
459  }
460 }
461 
462 Eigen::VectorXd GP::getHyperParameters() const
463 {
464  Eigen::VectorXd hyperParameters(covFunc_->getParameterCount() + covFunc_->getExtraParameterCount() + 1);
465  hyperParameters << log_noise_sd_, covFunc_->getParameters(), covFunc_->getExtraParameters();
466  return hyperParameters;
467 }
468 
469 void GP::enableExplicitTrend()
470 {
471  use_explicit_trend_ = true;
472 }
473 
474 void GP::disableExplicitTrend()
475 {
476  use_explicit_trend_ = false;
477 }
The GP class implements the Gaussian Process functionality.
The file holds the covariance functions that can be used with the GP class.
virtual Eigen::MatrixXd evaluate(const Eigen::VectorXd &x1, const Eigen::VectorXd &x2)=0
Base class definition for covariance functions.
Eigen::MatrixXd generate_normal_random_matrix(const size_t n, const size_t m)
virtual CovFunc * clone() const =0
Produces a clone to be able to copy the object.
Provides mathematical tools needed for the Gaussian process toolbox.
This file is part of the KDE documentation.
Documentation copyright © 1996-2022 The KDE developers.
Generated on Tue Aug 9 2022 04:06:03 by doxygen 1.8.17 written by Dimitri van Heesch, © 1997-2006

KDE's Doxygen guidelines are available online.