piantado/Fleet/_thunk_grammar_hypothesis_8h_source.html

 #pragma once

 #include "BaseGrammarHypothesis.h"


 template<typename this_t,
          typename _HYP,
          typename datum_t=HumanDatum<_HYP>,
          typename data_t=std::vector<datum_t>,
          typename _Predict_t=Vector2D<DiscreteDistribution<typename _HYP::output_t>>>
 class ThunkGrammarHypothesis : public BaseGrammarHypothesis<this_t, _HYP, datum_t, data_t, _Predict_t> {
 public:
     using HYP = _HYP;
     using Super = BaseGrammarHypothesis<this_t, _HYP, datum_t, data_t, _Predict_t>;
     using Super::Super;
     using LL_t = Super::LL_t;
     using Predict_t = Super::Predict_t;

     // NOTE: We need to define this constructor or else we call the wrong recompute_LL in the constructor
     // because C++ is a little bit insane.
     ThunkGrammarHypothesis(std::vector<HYP>& hypotheses, const data_t* human_data) {
         this->set_hypotheses_and_data(hypotheses, *human_data);
     }

     // We are going to override this function because it normally would call the likelihood
     // on EVERY data point, which would be a catastrophe, because compute_likelihood will re-run
     // each program. We want to run each program once, since programs here are stochastic
     // and thunks
     virtual void recompute_LL(std::vector<HYP>& hypotheses, const data_t& human_data) override {
         assert(this->which_data == std::addressof(human_data));

         // For each HumanDatum::data, figure out the max amount of data it contains
         std::unordered_map<typename datum_t::data_t*, size_t> max_sizes;
         for(auto& d : human_data) {
             if( (not max_sizes.contains(d.data)) or max_sizes[d.data] < d.ndata) {
                 max_sizes[d.data] = d.ndata;
             }
         }

         this->LL.reset(new LL_t());
         this->LL->reserve(max_sizes.size()); // reserve for the same number of elements

         // now go through and compute the likelihood of each hypothesis on each data set
         for(const auto& [dptr, sz] : max_sizes) {
             if(CTRL_C) break;

             this->LL->emplace(dptr, this->nhypotheses()); // in this place, make something of size nhypotheses

             #pragma omp parallel for
             for(size_t h=0;h<this->nhypotheses();h++) {

                 // set up all the likelihoods here
                 Vector data_lls  = Vector::Zero(sz);

                 //  We can use this because it was stored in P
                 const auto& M = this->P->at(h,0);

                 // read the max size from above and compute all the likelihoods
                 for(size_t i=0;i<max_sizes[dptr];i++) {
                     typename HYP::data_t d;
                     d.push_back(dptr->at(i));

                     data_lls(i) = MyHypothesis::string_likelihood(M, d);

                     assert(not std::isnan(data_lls(i))); // NaNs will really mess everything up
                 }

                 #pragma omp critical
                 this->LL->at(dptr)[h] = std::move(data_lls);
             }
         }

     }


     virtual void recompute_P(std::vector<HYP>& hypotheses, const data_t& human_data) override {
         assert(this->which_data == std::addressof(human_data));

         this->P.reset(new Predict_t(hypotheses.size(), 1));

         #pragma omp parallel for
         for(size_t h=0;h<hypotheses.size();h++) {

             // call this with no arguments
             auto ret = hypotheses[h].call();

             #pragma omp critical
             this->P->at(h,0) = std::move(ret);
         }
     }

     virtual std::map<typename HYP::output_t, double> compute_model_predictions(const data_t& human_data, const size_t i, const Matrix& hposterior) const override {

         std::map<typename HYP::output_t, double> model_predictions;

         for(int h=0;h<hposterior.rows();h++) {
             if(hposterior(h,i) < 1e-6) continue;  // skip very low probability for speed

             for(const auto& [outcome,outcomelp] : this->P->at(h,0)) {
                 model_predictions[outcome] += hposterior(h,i) * exp(outcomelp);
             }
         }

         return model_predictions;
     }


 };


BaseGrammarHypothesis::LL
std::shared_ptr< LL_t > LL
Definition: BaseGrammarHypothesis.h:90

ThunkGrammarHypothesis::compute_model_predictions
virtual std::map< typename HYP::output_t, double > compute_model_predictions(const data_t &human_data, const size_t i, const Matrix &hposterior) const override
In this variant, we need to always use P->at(h,0) since we only have one prediction stored for thunks...
Definition: ThunkGrammarHypothesis.h:112

BaseGrammarHypothesis::set_hypotheses_and_data
virtual void set_hypotheses_and_data(std::vector< HYP > &hypotheses, const data_t &human_data)
This is the primary function for setting hypothese and data on construction.
Definition: BaseGrammarHypothesis.h:172

BaseGrammarHypothesis::Predict_t
_Predict_t Predict_t
Definition: BaseGrammarHypothesis.h:58

ThunkGrammarHypothesis::recompute_LL
virtual void recompute_LL(std::vector< HYP > &hypotheses, const data_t &human_data) override
Recompute LL[h,di] a hypothesis from each hypothesis and data point to a vector of prior responses...
Definition: ThunkGrammarHypothesis.h:38

Matrix
Eigen::MatrixXf Matrix
Definition: EigenLib.h:18

BaseGrammarHypothesis
Definition: BaseGrammarHypothesis.h:48

BaseGrammarHypothesis::nhypotheses
virtual size_t nhypotheses() const
A convenient function that uses C to say how many hypotheses.
Definition: BaseGrammarHypothesis.h:228

MyHypothesis
we don&#39;t need inputs/outputs for out MyHypothesis
Definition: MyHypothesis.h:6

CTRL_C
volatile sig_atomic_t CTRL_C

ThunkGrammarHypothesis::ThunkGrammarHypothesis
ThunkGrammarHypothesis(std::vector< HYP > &hypotheses, const data_t *human_data)
Definition: ThunkGrammarHypothesis.h:30

HumanDatum
Definition: HumanDatum.h:19

ThunkGrammarHypothesis
Definition: ThunkGrammarHypothesis.h:20

ThunkGrammarHypothesis::LL_t
Super::LL_t LL_t
Definition: ThunkGrammarHypothesis.h:25

BaseGrammarHypothesis::P
std::shared_ptr< Predict_t > P
Definition: BaseGrammarHypothesis.h:92

Vector2D
Just a little wrapper to allow vectors to be handled as 2D arrays, which simplifie some stuff in Gram...
Definition: Vector2D.h:14

BaseGrammarHypothesis::which_data
const data_t * which_data
Definition: BaseGrammarHypothesis.h:99

Bayesable< Args... >::data_t
std::vector< Args... > data_t
Definition: Bayesable.h:39

BaseGrammarHypothesis< MyGrammarHypothesis, MyHypothesis, MyHumanDatum, std::vector< MyHumanDatum >, Vector2D< DiscreteDistribution< S > > >::LL_t
std::unordered_map< typename MyHumanDatum ::std::vector< MyHumanDatum > *, std::vector< Vector > > LL_t
Definition: BaseGrammarHypothesis.h:61

ThunkGrammarHypothesis::recompute_P
virtual void recompute_P(std::vector< HYP > &hypotheses, const data_t &human_data) override
For a thunk, the predictions don&#39;t depend on the data.
Definition: ThunkGrammarHypothesis.h:91

BaseGrammarHypothesis.h
This class does grammar inference with some collection of HumanData and fixed set of hypotheses...

ThunkGrammarHypothesis::Predict_t
Super::Predict_t Predict_t
Definition: ThunkGrammarHypothesis.h:26

Vector
Eigen::VectorXf Vector
Definition: EigenLib.h:17