Fleet  0.0.9
Inference in the LOT
ThunkGrammarHypothesis.h
Go to the documentation of this file.
1 #pragma once
2 
4 
5 
15 template<typename this_t,
16  typename _HYP,
17  typename datum_t=HumanDatum<_HYP>,
18  typename data_t=std::vector<datum_t>,
20 class ThunkGrammarHypothesis : public BaseGrammarHypothesis<this_t, _HYP, datum_t, data_t, _Predict_t> {
21 public:
22  using HYP = _HYP;
24  using Super::Super;
25  using LL_t = Super::LL_t;
27 
28  // NOTE: We need to define this constructor or else we call the wrong recompute_LL in the constructor
29  // because C++ is a little bit insane.
30  ThunkGrammarHypothesis(std::vector<HYP>& hypotheses, const data_t* human_data) {
31  this->set_hypotheses_and_data(hypotheses, *human_data);
32  }
33 
34  // We are going to override this function because it normally would call the likelihood
35  // on EVERY data point, which would be a catastrophe, because compute_likelihood will re-run
36  // each program. We want to run each program once, since programs here are stochastic
37  // and thunks
38  virtual void recompute_LL(std::vector<HYP>& hypotheses, const data_t& human_data) override {
39  assert(this->which_data == std::addressof(human_data));
40 
41  // For each HumanDatum::data, figure out the max amount of data it contains
42  std::unordered_map<typename datum_t::data_t*, size_t> max_sizes;
43  for(auto& d : human_data) {
44  if( (not max_sizes.contains(d.data)) or max_sizes[d.data] < d.ndata) {
45  max_sizes[d.data] = d.ndata;
46  }
47  }
48 
49  this->LL.reset(new LL_t());
50  this->LL->reserve(max_sizes.size()); // reserve for the same number of elements
51 
52  // now go through and compute the likelihood of each hypothesis on each data set
53  for(const auto& [dptr, sz] : max_sizes) {
54  if(CTRL_C) break;
55 
56  this->LL->emplace(dptr, this->nhypotheses()); // in this place, make something of size nhypotheses
57 
58  #pragma omp parallel for
59  for(size_t h=0;h<this->nhypotheses();h++) {
60 
61  // set up all the likelihoods here
62  Vector data_lls = Vector::Zero(sz);
63 
64  // We can use this because it was stored in P
65  const auto& M = this->P->at(h,0);
66 
67  // read the max size from above and compute all the likelihoods
68  for(size_t i=0;i<max_sizes[dptr];i++) {
69  typename HYP::data_t d;
70  d.push_back(dptr->at(i));
71 
72  data_lls(i) = MyHypothesis::string_likelihood(M, d);
73 
74  assert(not std::isnan(data_lls(i))); // NaNs will really mess everything up
75  }
76 
77  #pragma omp critical
78  this->LL->at(dptr)[h] = std::move(data_lls);
79  }
80  }
81 
82  }
83 
84 
91  virtual void recompute_P(std::vector<HYP>& hypotheses, const data_t& human_data) override {
92  assert(this->which_data == std::addressof(human_data));
93 
94  this->P.reset(new Predict_t(hypotheses.size(), 1));
95 
96  #pragma omp parallel for
97  for(size_t h=0;h<hypotheses.size();h++) {
98 
99  // call this with no arguments
100  auto ret = hypotheses[h].call();
101 
102  #pragma omp critical
103  this->P->at(h,0) = std::move(ret);
104  }
105  }
106 
112  virtual std::map<typename HYP::output_t, double> compute_model_predictions(const data_t& human_data, const size_t i, const Matrix& hposterior) const override {
113 
114  std::map<typename HYP::output_t, double> model_predictions;
115 
116  for(int h=0;h<hposterior.rows();h++) {
117  if(hposterior(h,i) < 1e-6) continue; // skip very low probability for speed
118 
119  for(const auto& [outcome,outcomelp] : this->P->at(h,0)) {
120  model_predictions[outcome] += hposterior(h,i) * exp(outcomelp);
121  }
122  }
123 
124  return model_predictions;
125  }
126 
127 
128 };
129 
130 
131 
std::shared_ptr< LL_t > LL
Definition: BaseGrammarHypothesis.h:90
virtual std::map< typename HYP::output_t, double > compute_model_predictions(const data_t &human_data, const size_t i, const Matrix &hposterior) const override
In this variant, we need to always use P->at(h,0) since we only have one prediction stored for thunks...
Definition: ThunkGrammarHypothesis.h:112
virtual void set_hypotheses_and_data(std::vector< HYP > &hypotheses, const data_t &human_data)
This is the primary function for setting hypothese and data on construction.
Definition: BaseGrammarHypothesis.h:172
_Predict_t Predict_t
Definition: BaseGrammarHypothesis.h:58
virtual void recompute_LL(std::vector< HYP > &hypotheses, const data_t &human_data) override
Recompute LL[h,di] a hypothesis from each hypothesis and data point to a vector of prior responses...
Definition: ThunkGrammarHypothesis.h:38
Eigen::MatrixXf Matrix
Definition: EigenLib.h:18
Definition: BaseGrammarHypothesis.h:48
virtual size_t nhypotheses() const
A convenient function that uses C to say how many hypotheses.
Definition: BaseGrammarHypothesis.h:228
we don&#39;t need inputs/outputs for out MyHypothesis
Definition: MyHypothesis.h:6
volatile sig_atomic_t CTRL_C
ThunkGrammarHypothesis(std::vector< HYP > &hypotheses, const data_t *human_data)
Definition: ThunkGrammarHypothesis.h:30
Definition: HumanDatum.h:19
Definition: ThunkGrammarHypothesis.h:20
Super::LL_t LL_t
Definition: ThunkGrammarHypothesis.h:25
std::shared_ptr< Predict_t > P
Definition: BaseGrammarHypothesis.h:92
Just a little wrapper to allow vectors to be handled as 2D arrays, which simplifie some stuff in Gram...
Definition: Vector2D.h:14
const data_t * which_data
Definition: BaseGrammarHypothesis.h:99
std::vector< Args... > data_t
Definition: Bayesable.h:39
std::unordered_map< typename MyHumanDatum ::std::vector< MyHumanDatum > *, std::vector< Vector > > LL_t
Definition: BaseGrammarHypothesis.h:61
virtual void recompute_P(std::vector< HYP > &hypotheses, const data_t &human_data) override
For a thunk, the predictions don&#39;t depend on the data.
Definition: ThunkGrammarHypothesis.h:91
This class does grammar inference with some collection of HumanData and fixed set of hypotheses...
Super::Predict_t Predict_t
Definition: ThunkGrammarHypothesis.h:26
Eigen::VectorXf Vector
Definition: EigenLib.h:17