Fleet  0.0.9
Inference in the LOT
MyGrammarHypothesis.h
Go to the documentation of this file.
1 #pragma once
2 
4 
5 class MyGrammarHypothesis final : public ThunkGrammarHypothesis<MyGrammarHypothesis,
6  MyHypothesis,
7  MyHumanDatum,
8  std::vector<MyHumanDatum>,
9  Vector2D<DiscreteDistribution<S>>> {
10 public:
12  using Super::Super;
14 
15 
16  // remove any strings in human_data[0...n] from M, and then renormalize
17  static void remove_strings_and_renormalize(DiscreteDistribution<S>& M, datum_t::data_t* dptr, const size_t n) {
18  // now we need to renormalize for the fact that we can't produce strings in the
19  // observed set of data (NOTE This assumes that the data can't be noisy versions of
20  // strings that were in the set too).
21  for(size_t i=0;i<n;i++) {
22  const auto& s = dptr->at(i).output;
23  if(M.contains(s)) {
24  M.erase(s);
25  }
26  }
27 
28  // and renormalize M with the strings removed
29  double Z = M.Z();
30  for(auto& [s,lp] : M){
31  M.m[s] = lp-Z;
32  }
33 
34  }
35 
36  virtual double human_chance_lp(const typename datum_t::output_t& r, const datum_t& hd) const override {
37  // here we are going to make chance be exponential in the length of the response
38  return -(double)(r.length()+1)*log(alphabet.size()+1); // NOTE: Without the double case, we negate r.length() first and it's awful
39  }
40 
41  virtual void recompute_LL(std::vector<HYP>& hypotheses, const data_t& human_data) override {
42  assert(this->which_data == std::addressof(human_data));
43 
44  // define a version of LL where we DO NOT include the observed strings in the model-based
45  // likelihood, since people aren't allowed to respond with them.
46 
47  // For each HumanDatum::data, figure out the max amount of data it contains
48  std::unordered_map<typename datum_t::data_t*, size_t> max_sizes;
49  for(auto& d : human_data) {
50  if( (not max_sizes.contains(d.data)) or max_sizes[d.data] < d.ndata) {
51  max_sizes[d.data] = d.ndata;
52  }
53  }
54 
55  this->LL.reset(new LL_t());
56  this->LL->reserve(max_sizes.size()); // reserve for the same number of elements
57 
58  // now go through and compute the likelihood of each hypothesis on each data set
59  for(const auto& [dptr, sz] : max_sizes) {
60  if(CTRL_C) break;
61 
62  this->LL->emplace(dptr, this->nhypotheses()); // in this place, make something of size nhypotheses
63 
64  #pragma omp parallel for
65  for(size_t h=0;h<this->nhypotheses();h++) {
66 
67  // set up all the likelihoods here
68  Vector data_lls = Vector::Zero(sz);
69 
70  // call this just onece and then use it for all the string likelihoods
71  auto M = P->at(h,0); // can just copy
73  remove_strings_and_renormalize(M, dptr, max_sizes[dptr]);
75 
76  // read the max size from above and compute all the likelihoods
77  for(size_t i=0;i<max_sizes[dptr];i++) {
78  typename HYP::data_t d;
79  d.push_back(dptr->at(i));
80 
81  data_lls(i) = MyHypothesis::string_likelihood(M, d);
82 
83  assert(not std::isnan(data_lls(i))); // NaNs will really mess everything up
84  }
85 
86  #pragma omp critical
87  this->LL->at(dptr)[h] = std::move(data_lls);
88  }
89  }
90 
91  }
92 
93 
94  virtual std::map<typename HYP::output_t, double> compute_model_predictions(const data_t& human_data, const size_t i, const Matrix& hposterior) const override {
95 
96  // NOTE: we must also define a version of this which renormalizes by the data
97  // we could do this in P except then P would have to be huge for thunks since P
98  // would depend on the data. So we will do it here.
99 
100  std::map<typename HYP::output_t, double> model_predictions;
101 
102  for(int h=0;h<hposterior.rows();h++) {
103  if(hposterior(h,i) < 1e-6) continue; // skip very low probability for speed
104 
105  auto M = P->at(h,0);
106 
108  remove_strings_and_renormalize(M, human_data[i].data, human_data[i].ndata);
110 
111  for(const auto& [outcome,outlp] : M) {
112  model_predictions[outcome] += hposterior(h,i) * exp(outlp);
113  }
114 
115  }
116 
117  return model_predictions;
118  }
119 
120 
121 
122 };
virtual double human_chance_lp(const typename datum_t::output_t &r, const datum_t &hd) const override
Definition: MyGrammarHypothesis.h:36
virtual std::map< typename HYP::output_t, double > compute_model_predictions(const data_t &human_data, const size_t i, const Matrix &hposterior) const override
Definition: MyGrammarHypothesis.h:94
Super::LL_t LL_t
Definition: DeterministicGrammarHypothesis.h:24
Eigen::MatrixXf Matrix
Definition: EigenLib.h:18
Definition: DiscreteDistribution.h:25
virtual size_t nhypotheses() const
A convenient function that uses C to say how many hypotheses.
Definition: BaseGrammarHypothesis.h:228
volatile sig_atomic_t CTRL_C
bool contains(const T &x) const
Definition: DiscreteDistribution.h:121
A version of a BaseGrammarHypothesis where the hypotheses are thunks (functions of no arguments)...
Definition: ThunkGrammarHypothesis.h:20
Args... datum_t
Definition: Bayesable.h:38
Just a little wrapper to allow vectors to be handled as 2D arrays, which simplifie some stuff in Gram...
Definition: Vector2D.h:14
virtual void recompute_LL(std::vector< HYP > &hypotheses, const data_t &human_data) override
Definition: MyGrammarHypothesis.h:41
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ...
Definition: MyGrammarHypothesis.h:5
BaseGrammarHypothesis< this_t, _HYP, datum_t, data_t, _Predict_t > Super
Definition: ThunkGrammarHypothesis.h:23
Super::data_t data_t
Definition: MyGrammarHypothesis.h:13
std::vector< Args... > data_t
Definition: Bayesable.h:39
static void remove_strings_and_renormalize(DiscreteDistribution< S > &M, datum_t::data_t *dptr, const size_t n)
Definition: MyGrammarHypothesis.h:17
S alphabet
Definition: Main.cpp:19
void erase(const T &k)
Definition: DiscreteDistribution.h:55
Eigen::VectorXf Vector
Definition: EigenLib.h:17
double Z() const
Definition: DiscreteDistribution.h:143