Fleet  0.0.9
Inference in the LOT
MyHypothesis.h
Go to the documentation of this file.
1 #pragma once
2 
6 
8 
9 class InnerHypothesis : public StochasticLOTHypothesis<InnerHypothesis,S,S,MyGrammar,&grammar> {
10 public:
12  using Super::Super;
13 
14  static constexpr double regenerate_p = 0.7;
15 
16  [[nodiscard]] virtual std::optional<std::pair<InnerHypothesis,double>> propose() const override {
17 
18  std::optional<std::pair<Node,double>> x;
19  if(flip(regenerate_p)) x = Proposals::regenerate(&grammar, value);
21  else if(flip(0.1)) x = Proposals::swap_args(&grammar, value);
22  else if(flip()) x = Proposals::insert_tree(&grammar, value);
24 
25  if(not x) { return {}; }
26 
27  return std::make_pair(InnerHypothesis(std::move(x.value().first)), x.value().second);
28  }
29 
30 };
31 
32 #include "Lexicon.h"
33 
34 class MyHypothesis final : public Lexicon<MyHypothesis, int,InnerHypothesis, S, S> {
35 public:
36 
37  unsigned long PRINT_STRINGS = 128;
38 
40  using Super::Super;
41 
42  virtual DiscreteDistribution<S> call(const S x=EMPTY_STRING, const S& err=S{}) {
43  // this calls by calling only the last factor, which, according to our prior,
44 
45  // make myself the loader for all factors
46  for(auto& [k,f] : factors) {
47  f.program.loader = this;
48  f.was_called = false; // zero this please
49  }
50 
51  return factors[factors.size()-1].call(x, err); // we call the factor but with this as the loader.
52  }
53 
54  // We assume input,output with reliability as the number of counts that input was seen going to that output
55  virtual double compute_single_likelihood(const datum_t& datum) override { assert(0); }
56 
57  // a helpful little function so other functions (like grammar inference) can get
58  // likelihoods
59  static double string_likelihood(const DiscreteDistribution<S>& M, const data_t& data, const double breakout=-infinity) {
60 
61  const float log_A = log(alphabet.size());
62 
63  double ll = 0.0;
64  for(const auto& a : data) {
65  double alp = -infinity; // the model's probability of this
66  for(const auto& m : M.values()) {
67 
68  // we can always take away all character and generate a anew
69  alp = logplusexp(alp, m.second + p_delete_append<alpha,alpha>(m.first, a.output, log_A));
70  }
71  ll += alp * a.count;
72 
73  if(ll == -infinity or ll < breakout) {
74  return -infinity;
75  }
76  }
77  return ll;
78  }
79 
80 
81  double compute_likelihood(const data_t& data, const double breakout=-infinity) override {
82  // this version goes through and computes the predictive probability of each prefix
83 
84  const auto& M = call(EMPTY_STRING, errorstring);
85 
86  // calling "call" first made was_called false on everything, and
87  // this will only be set to true if it was called (via push_program)
88  // so here we check and make sure everything was used and if not
89  // we give it a -inf likelihood
90  // NOTE: This is a slow way to do this because it requires running the hypothesis
91  // but that's hard to get around with how factors work.
92  // NOTE that this checks factors over ALL prob. outcomes
93  for(auto& [k,f] : factors) {
94  if(not f.was_called) {
95  return likelihood=-infinity;
96  }
97  }
98 
99  // otherwise let's compute the likelihood
100  return likelihood = string_likelihood(M, data, breakout);
101 
102  }
103 
104  void show(std::string prefix="") override {
105  std::lock_guard guard(output_lock); // better not call Super wtih this here
107  extern std::string current_data;
108  extern std::pair<double,double> mem_pr;
109  auto o = this->call(EMPTY_STRING, errorstring);
110  auto [prec, rec] = get_precision_and_recall(o, prdata, PREC_REC_N);
111  COUT "#\n";
112  COUT "# "; o.show(PRINT_STRINGS); COUT "\n";
113  COUT prefix << current_data TAB current_ntokens TAB mem_pr.first TAB mem_pr.second TAB
114  this->born TAB this->posterior TAB this->prior TAB this->likelihood TAB QQ(this->serialize())
115  TAB prec TAB rec TAB QQ(this->string()) ENDL
116  }
117 
118 
119 };
A lexicon stores an association of numbers (in a vector) to some other kind of hypotheses (typically ...
MyGrammar grammar
std::optional< std::pair< Node, double > > sample_function_leaving_args(GrammarType *grammar, const Node &from)
This samples functions f(a,b) -> g(a,b) (e.g. without destroying what&#39;s below). This uses a little tr...
Definition: Proposers.h:331
std::string QQ(const std::string &x)
Definition: Strings.h:190
double likelihood
Definition: Bayesable.h:43
const std::string errorstring
Definition: Main.cpp:36
OrderedLock output_lock
Definition: IO.h:31
LOTHypothesis< InnerHypothesis, BindingTree *, bool, MyGrammar, grammar, defaultdatum_t< BindingTree *, bool >, std::vector< defaultdatum_t< BindingTree *, bool > >, typename MyGrammar ::VirtualMachineState_t > Super
Definition: DeterministicLOTHypothesis.h:17
size_t PREC_REC_N
Definition: Main.cpp:26
std::string S
Definition: Main.cpp:28
#define TAB
Definition: IO.h:19
virtual std::optional< std::pair< InnerHypothesis, double > > propose() const override
Default proposal is rational-rules style regeneration.
Definition: InnerHypothesis.h:60
DeterministicLOTHypothesis< InnerHypothesis, BindingTree *, bool, MyGrammar,&grammar > Super
Definition: InnerHypothesis.h:14
Definition: InnerHypothesis.h:9
double prior
Definition: Bayesable.h:42
Definition: DiscreteDistribution.h:25
virtual output_t call(const input_t x, const output_t &err=output_t{})
A variant of call that assumes no stochasticity and therefore outputs only a single value...
Definition: DeterministicLOTHypothesis.h:32
bool flip(float p=0.5)
Definition: Random.h:25
double posterior
Definition: Bayesable.h:44
uintmax_t born
Definition: Bayesable.h:47
we don&#39;t need inputs/outputs for out MyHypothesis
Definition: MyHypothesis.h:6
std::optional< std::pair< Node, double > > delete_tree(GrammarType *grammar, const Node &from)
Definition: Proposers.h:275
constexpr double infinity
Definition: Numerics.h:20
T logplusexp(const T a, const T b)
Definition: Numerics.h:131
virtual void show(std::string prefix="")
Definition: Bayesable.h:197
Bayesable< defaultdatum_t< BindingTree *, bool >, std::vector< defaultdatum_t< BindingTree *, bool > > >::datum_t datum_t
Definition: LOTHypothesis.h:47
MyHypothesis::data_t prdata
Definition: Main.cpp:42
std::optional< std::pair< Node, double > > regenerate(GrammarType *grammar, const Node &from)
A little helper function that resamples everything below when we can. If we can&#39;t, then we&#39;ll recurse.
Definition: Proposers.h:107
std::pair< double, double > mem_pr
Definition: Main.cpp:45
Definition: Lexicon.h:27
InnerHypothesis(const InnerHypothesis &c)
Definition: InnerHypothesis.h:25
#define ENDL
Definition: IO.h:21
const std::string EMPTY_STRING
Definition: Strings.h:17
std::optional< std::pair< Node, double > > insert_tree(GrammarType *grammar, const Node &from)
Definition: Proposers.h:192
std::vector< Args... > data_t
Definition: Bayesable.h:39
Definition: StochasticLOTHypothesis.h:14
const std::map< T, double > & values() const
Definition: DiscreteDistribution.h:128
std::optional< std::pair< Node, double > > swap_args(GrammarType *grammar, const Node &from)
This propose swaps around arguments of the same type.
Definition: Proposers.h:389
virtual double compute_likelihood(const data_t &data, const double breakout=-infinity)
Compute the likelihood of a collection of data, by calling compute_single_likelihood on each...
Definition: Bayesable.h:83
#define COUT
Definition: IO.h:24
size_t current_ntokens
Definition: Main.cpp:34
Super::data_t data_t
Definition: InnerHypothesis.h:23
unsigned long PRINT_STRINGS
Definition: Main.cpp:29
S current_data
Definition: Main.cpp:43
S alphabet
Definition: Main.cpp:19
std::pair< double, double > get_precision_and_recall(DiscreteDistribution< std::string > &model, std::vector< TDATA > &data, unsigned long N)
Definition: Data.h:48