mlpack
gmm_impl.hpp
Go to the documentation of this file.
1 
14 #ifndef MLPACK_METHODS_GMM_GMM_IMPL_HPP
15 #define MLPACK_METHODS_GMM_GMM_IMPL_HPP
16 
17 // In case it hasn't already been included.
18 #include "gmm.hpp"
19 
20 namespace mlpack {
21 namespace gmm {
22 
26 template<typename FittingType>
27 double GMM::Train(const arma::mat& observations,
28  const size_t trials,
29  const bool useExistingModel,
30  FittingType fitter)
31 {
32  double bestLikelihood; // This will be reported later.
33 
34  // We don't need to store temporary models if we are only doing one trial.
35  if (trials == 1)
36  {
37  // Train the model. The user will have been warned earlier if the GMM was
38  // initialized with no parameters (0 gaussians, dimensionality of 0).
39  fitter.Estimate(observations, dists, weights, useExistingModel);
40  bestLikelihood = LogLikelihood(observations, dists, weights);
41  }
42  else
43  {
44  if (trials == 0)
45  return -DBL_MAX; // It's what they asked for...
46 
47  // If each trial must start from the same initial location, we must save it.
48  std::vector<distribution::GaussianDistribution> distsOrig;
49  arma::vec weightsOrig;
50  if (useExistingModel)
51  {
52  distsOrig = dists;
53  weightsOrig = weights;
54  }
55 
56  // We need to keep temporary copies. We'll do the first training into the
57  // actual model position, so that if it's the best we don't need to copy it.
58  fitter.Estimate(observations, dists, weights, useExistingModel);
59 
60  bestLikelihood = LogLikelihood(observations, dists, weights);
61 
62  Log::Info << "GMM::Train(): Log-likelihood of trial 0 is "
63  << bestLikelihood << "." << std::endl;
64 
65  // Now the temporary model.
66  std::vector<distribution::GaussianDistribution> distsTrial(gaussians,
67  distribution::GaussianDistribution(dimensionality));
68  arma::vec weightsTrial(gaussians);
69 
70  for (size_t trial = 1; trial < trials; ++trial)
71  {
72  if (useExistingModel)
73  {
74  distsTrial = distsOrig;
75  weightsTrial = weightsOrig;
76  }
77 
78  fitter.Estimate(observations, distsTrial, weightsTrial, useExistingModel);
79 
80  // Check to see if the log-likelihood of this one is better.
81  double newLikelihood = LogLikelihood(observations, distsTrial,
82  weightsTrial);
83 
84  Log::Info << "GMM::Train(): Log-likelihood of trial " << trial << " is "
85  << newLikelihood << "." << std::endl;
86 
87  if (newLikelihood > bestLikelihood)
88  {
89  // Save new likelihood and copy new model.
90  bestLikelihood = newLikelihood;
91 
92  dists = distsTrial;
93  weights = weightsTrial;
94  }
95  }
96  }
97 
98  // Report final log-likelihood and return it.
99  Log::Info << "GMM::Train(): log-likelihood of trained GMM is "
100  << bestLikelihood << "." << std::endl;
101  return bestLikelihood;
102 }
103 
108 template<typename FittingType>
109 double GMM::Train(const arma::mat& observations,
110  const arma::vec& probabilities,
111  const size_t trials,
112  const bool useExistingModel,
113  FittingType fitter)
114 {
115  double bestLikelihood; // This will be reported later.
116 
117  // We don't need to store temporary models if we are only doing one trial.
118  if (trials == 1)
119  {
120  // Train the model. The user will have been warned earlier if the GMM was
121  // initialized with no parameters (0 gaussians, dimensionality of 0).
122  fitter.Estimate(observations, probabilities, dists, weights,
123  useExistingModel);
124  bestLikelihood = LogLikelihood(observations, dists, weights);
125  }
126  else
127  {
128  if (trials == 0)
129  return -DBL_MAX; // It's what they asked for...
130 
131  // If each trial must start from the same initial location, we must save it.
132  std::vector<distribution::GaussianDistribution> distsOrig;
133  arma::vec weightsOrig;
134  if (useExistingModel)
135  {
136  distsOrig = dists;
137  weightsOrig = weights;
138  }
139 
140  // We need to keep temporary copies. We'll do the first training into the
141  // actual model position, so that if it's the best we don't need to copy it.
142  fitter.Estimate(observations, probabilities, dists, weights,
143  useExistingModel);
144 
145  bestLikelihood = LogLikelihood(observations, dists, weights);
146 
147  Log::Debug << "GMM::Train(): Log-likelihood of trial 0 is "
148  << bestLikelihood << "." << std::endl;
149 
150  // Now the temporary model.
151  std::vector<distribution::GaussianDistribution> distsTrial(gaussians,
152  distribution::GaussianDistribution(dimensionality));
153  arma::vec weightsTrial(gaussians);
154 
155  for (size_t trial = 1; trial < trials; ++trial)
156  {
157  if (useExistingModel)
158  {
159  distsTrial = distsOrig;
160  weightsTrial = weightsOrig;
161  }
162 
163  fitter.Estimate(observations, probabilities, distsTrial, weightsTrial,
164  useExistingModel);
165 
166  // Check to see if the log-likelihood of this one is better.
167  double newLikelihood = LogLikelihood(observations, distsTrial,
168  weightsTrial);
169 
170  Log::Debug << "GMM::Train(): Log-likelihood of trial " << trial << " is "
171  << newLikelihood << "." << std::endl;
172 
173  if (newLikelihood > bestLikelihood)
174  {
175  // Save new likelihood and copy new model.
176  bestLikelihood = newLikelihood;
177 
178  dists = distsTrial;
179  weights = weightsTrial;
180  }
181  }
182  }
183 
184  // Report final log-likelihood and return it.
185  Log::Info << "GMM::Train(): log-likelihood of trained GMM is "
186  << bestLikelihood << "." << std::endl;
187  return bestLikelihood;
188 }
189 
193 template<typename Archive>
194 void GMM::serialize(Archive& ar, const uint32_t /* version */)
195 {
196  ar(CEREAL_NVP(gaussians));
197  ar(CEREAL_NVP(dimensionality));
198 
199  // Load (or save) the gaussians. Not going to use the default std::vector
200  // serialize here because it won't call out correctly to serialize() for each
201  // Gaussian distribution.
202  if (cereal::is_loading<Archive>())
203  dists.resize(gaussians);
204 
205  ar(CEREAL_NVP(dists));
206 
207  ar(CEREAL_NVP(weights));
208 }
209 
210 } // namespace gmm
211 } // namespace mlpack
212 
213 #endif
214 
A single multivariate Gaussian distribution.
Definition: gaussian_distribution.hpp:24
static MLPACK_EXPORT util::NullOutStream Debug
MLPACK_EXPORT is required for global variables, so that they are properly exported by the Windows com...
Definition: log.hpp:79
Linear algebra utility functions, generally performed on matrices or vectors.
Definition: cv.hpp:1
double Train(const arma::mat &observations, const size_t trials=1, const bool useExistingModel=false, FittingType fitter=FittingType())
Estimate the probability distribution directly from the given observations, using the given algorithm...
Definition: gmm_impl.hpp:27
static MLPACK_EXPORT util::PrefixedOutStream Info
Prints informational messages if –verbose is specified, prefixed with [INFO ].
Definition: log.hpp:84
void serialize(Archive &ar, const uint32_t)
Serialize the GMM.
Definition: gmm_impl.hpp:194