piantado/Fleet/_grammar_8h_source.html

 #pragma once

 #include <tuple>
 #include <array>
 #include <exception>

 #include "IO.h"
 #include "Errors.h"
 #include "Node.h"
 #include "Random.h"
 #include "Nonterminal.h"
 #include "VirtualMachineState.h"
 #include "VirtualMachinePool.h"
 #include "Builtins.h"
 #include "Functional.h"

 // an exception for recursing too deep
 struct DepthException : public std::exception {
     DepthException() {
         ++FleetStatistics::depth_exceptions;
     }
 };

 template<typename _input_t, typename _output_t, typename... GRAMMAR_TYPES>
 class Grammar {
 public:

     using input_t = _input_t;
     using output_t = _output_t;
     using this_t = Grammar<input_t, output_t, GRAMMAR_TYPES...>;

     // Keep track of what types we are using here as our types -- thesee types are
     // stored in this tuple so they can be extracted
     using TypeTuple = std::tuple<GRAMMAR_TYPES...>;

     // how many nonterminal types do we have?
     static constexpr size_t N_NTs = std::tuple_size<TypeTuple>::value;

     // The input/output types must be repeated to VirtualMachineState
     using VirtualMachineState_t = VirtualMachineState<input_t, output_t, GRAMMAR_TYPES...>;

     // This is the function type
     using FT = typename VirtualMachineState_t::FT;

     // How many times will we silently ignore a DepthException
     // before tossing an assert error
     static const size_t GENERATE_DEPTH_EXCEPTION_RETRIES = 1000;

     // get the n'th type
     //template<size_t N>
     //using type = typename std::tuple_element<N, TypeTuple>::type;

     // rules[k] stores a SORTED vector of rules for the kth' nonterminal.
     // our iteration order is first for k = 0 ... N_NTs then for r in rules[k]
     std::vector<Rule>        rules[N_NTs];
     std::array<double,N_NTs> Z; // keep the normalizer handy for each nonterminal (not log space)

     size_t GRAMMAR_MAX_DEPTH = 64;

     // This function converts a type (passed as a template parameter) into a
     // size_t index for which one it in in GRAMMAR_TYPES.
     // This is used so that a Rule doesn't need type subclasses/templates, it can
     // store a type as e.g. nt<double>() -> size_t
     template <class T>
     static constexpr nonterminal_t nt() {
         static_assert(sizeof...(GRAMMAR_TYPES) > 0, "*** Cannot use empty grammar types here");
         static_assert(contains_type<T, GRAMMAR_TYPES...>(), "*** The type T (decayed) must be in GRAMMAR_TYPES");
         return (nonterminal_t)TypeIndex<T, std::tuple<GRAMMAR_TYPES...>>::value;
     }

     Grammar() {
         for(size_t i=0;i<N_NTs;i++) {
             Z[i] = 0.0;
         }
     }

     // should not be doing these
     Grammar(const Grammar& g)  = delete;
     Grammar(const Grammar&& g) = delete;

     class RuleIterator {

         // these are require din here for this to be an iterator
         using iterator_category = std::forward_iterator_tag;
         using value_type = Rule;
         using difference_type = int;
         using pointer = Rule;
         using reference = Rule&;


     protected:
             this_t* grammar;
             nonterminal_t current_nt;
             std::vector<Rule>::iterator current_rule;

     public:

             RuleIterator(this_t* g, bool is_end) : grammar(g), current_nt(0) {
                 if(not is_end) {
                     current_rule = g->rules[0].begin();
                 }
                 else {
                     // by convention we set current_rule and current_nt to the last items
                     // since this is what ++ will leave them as below
                     current_nt = N_NTs-1;
                     current_rule = grammar->rules[current_nt].end();
                 }
             }
             Rule& operator*() const  { return *current_rule; }
 //          Rule* operator->() const { return  current_rule; }

             RuleIterator& operator++(int blah) { this->operator++(); return *this; }
             RuleIterator& operator++() {

                 current_rule++;

                 // keep incrementing over rules that are empty, and if we run out of
                 // nonterminals, set us to the end and break
                 while( current_rule == grammar->rules[current_nt].end() ) {
                     if(current_nt < grammar->N_NTs-1) {
                         current_nt++; // next nonterminal
                         current_rule = grammar->rules[current_nt].begin();
                     }
                     else {
                         current_rule = grammar->rules[current_nt].end();
                         break;
                     }
                 }

                 return *this;
             }

             RuleIterator& operator+(size_t n) {
                 for(size_t i=0;i<n;i++) this->operator++();
                 return *this;
             }

             bool operator==(const RuleIterator& rhs) const {
                 return current_nt == rhs.current_nt and current_rule == rhs.current_rule;
             }
     };

     // these are set up to
     RuleIterator begin() const { return RuleIterator(const_cast<this_t*>(this), false); }
     RuleIterator end()   const { return RuleIterator(const_cast<this_t*>(this), true);; }

     constexpr nonterminal_t start() {
         return nt<output_t>();
     }

     constexpr size_t count_nonterminals() const {
         return N_NTs;
     }

     size_t count_rules(const nonterminal_t nt) const {
         assert(nt >= 0 and nt < N_NTs);
         return rules[nt].size();
     }
     size_t count_rules() const {
         size_t n=0;
         for(size_t i=0;i<N_NTs;i++) {
             n += count_rules((nonterminal_t)i);
         }
         return n;
     }

     void change_probability(const std::string& s, const double newp) {
         Rule* r = get_rule(s);
         Z[r->nt] -= r->p;
         r->p = newp;
         Z[r->nt] += r->p;
     }

     size_t count_terminals(nonterminal_t nt) const {
         size_t n=0;
         for(auto& r : rules[nt]) {
             if(r.is_terminal()) n++;
         }
         return n;
     }
     size_t count_nonterminals(nonterminal_t nt) const {
         size_t n=0;
         for(auto& r : rules[nt]) {
             if(not r.is_terminal()) n++;
         }
         return n;
     }

 //  void finite_size(nonterminal_t nt) const {
 //      assert(nt >=0 and nt <= N_NTs);
 //
 //      // need to create a 2d table of what each thing can expand to
 //      std::vector<std::vector<int> > e(N_NTs, std::vector<int>(N_NTs, 0));
 //
 //      for(auto& r : rules[nt]) {
 //          for(auto& t : r.child_types)
 //              ++e[nt][t]; // how many ways can I get to this one?
 //      }
 //
 //      bool updated = false;
 //
 //      do {
 //          for(size_t nt=0;nt<N_NTs;nt++) {
 //              for(auto& r : rules[nt]){
 //                  for(auto& t : r.child_types) {
 //
 //                  }
 //              }
 //          }
 //
 //      } while(updated);
 //  }

     // ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
     // Managing rules
     // (this holds a lot of complexity for how we initialize from PRIMITIVES)
     // ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~


     template<typename X>
     static constexpr bool is_in_GRAMMAR_TYPES() {
         // check if X is in GRAMMAR_TYPES
         // TODO: UPDATE FOR DECAY SINCE WE DONT WANT THAT UNTIL WE HAVE REFERENCES AGAIN?
         return contains_type<X,GRAMMAR_TYPES...>();
     }

     template<typename T, typename... args>
     void add_vms(std::string fmt, FT* f, double p=1.0, Op o=Op::Standard, int a=0) {
         assert(f != nullptr && "*** If you're passing a null f to add_vms, you've really screwed up.");

         nonterminal_t Tnt = this->nt<T>();
         Rule r(Tnt, (void*)f, fmt, {nt<args>()...}, p, o, a);
         Z[Tnt] += r.p; // keep track of the total probability
         auto pos = std::lower_bound( rules[Tnt].begin(), rules[Tnt].end(), r);
         rules[Tnt].insert( pos, r ); // put this before
     }

     template<typename T, typename... args>
     void add(std::string fmt, Primitive<T,args...>& b, double p=1.0, int a=0) {
         // read f and o from b
         assert(b.f != nullptr);
         add_vms<T,args...>(fmt, (FT*)b.f, p, b.op, a);
     }

     template<typename T, typename... args>
     void add(std::string fmt,  std::function<T(args...)> f, double p=1.0, Op o=Op::Standard, int a=0) {

         // first check that the types are allowed
         static_assert((not std::is_reference<T>::value) && "*** Primitives cannot return references.");
         static_assert((not std::is_reference<args>::value && ...) && "*** Arguments cannot be references.");
         static_assert(is_in_GRAMMAR_TYPES<T>() , "*** Return type is not in GRAMMAR_TYPES");
         static_assert((is_in_GRAMMAR_TYPES<args>() && ...), "*** Argument type is not in GRAMMAR_TYPES");

         // NOTE: We want something with friendly error messages instead of the above,
         // but apparently this is not supported:
                 // first check that the types are allowed
 //      if constexpr(std::is_reference<T>::value){
 //          print("*** Primitives cannot return references, in ", fmt);
 //          static_assert(false);
 //      }
 //      if constexpr((std::is_reference<args>::value || ...)){
 //          print("*** Arguments cannot be references, in ", fmt);
 //          static_assert(false);
 //      }
 //      if constexpr(not is_in_GRAMMAR_TYPES<T>()){
 //          print("*** Return type T not in grammar types, in ", fmt);
 //          static_assert(false);
 //      }
 //      if constexpr(not (is_in_GRAMMAR_TYPES<args>() && ...)){
 //          print("*** Argument type not in grammar types, in ", fmt);
 //          static_assert(false);
 //      }
 //
         // create a lambda on the heap that is a function of a VMS, since
         // this is what an instruction must be. This implements the calling order convention too.
         //auto newf = new auto ( [=](VirtualMachineState_t*, int) -> void {
         auto fvms = new FT([=](VirtualMachineState_t* vms, int _a=0) -> void {
                 assert(vms != nullptr);

                 if constexpr (sizeof...(args) ==  0){
                     vms->push( f() );
                 }
                 else if constexpr (sizeof...(args) ==  1) {
                     auto a0 = vms->template getpop_nth<0,args...>();
                     vms->push(f(std::move(a0)));
                 }
                 else if constexpr (sizeof...(args) ==  2) {
                     auto a1 = vms->template getpop_nth<1,args...>();
                     auto a0 = vms->template getpop_nth<0,args...>();
                     vms->push(f(std::move(a0), std::move(a1)));
                 }
                 else if constexpr (sizeof...(args) ==  3) {
                     auto a2 = vms->template getpop_nth<2,args...>();
                     auto a1 = vms->template getpop_nth<1,args...>();
                     auto a0 = vms->template getpop_nth<0,args...>();
                     vms->push(f(std::move(a0), std::move(a1), std::move(a2)));
                 }
                 else if constexpr (sizeof...(args) ==  4) {
                     auto a3 = vms->template getpop_nth<3,args...>();
                     auto a2 = vms->template getpop_nth<2,args...>();
                     auto a1 = vms->template getpop_nth<1,args...>();
                     auto a0 = vms->template getpop_nth<0,args...>();
                     vms->push(f(std::move(a0), std::move(a1), std::move(a2), std::move(a3)));
                 }
                 else if constexpr (sizeof...(args) ==  5) {
                     auto a4 = vms->template getpop_nth<4,args...>();
                     auto a3 = vms->template getpop_nth<3,args...>();
                     auto a2 = vms->template getpop_nth<2,args...>();
                     auto a1 = vms->template getpop_nth<1,args...>();
                     auto a0 = vms->template getpop_nth<0,args...>();
                     vms->push(f(std::move(a0), std::move(a1), std::move(a2), std::move(a3), std::move(a4)));
                 }
                 else if constexpr (sizeof...(args) ==  6) {
                     auto a5 = vms->template getpop_nth<5,args...>();
                     auto a4 = vms->template getpop_nth<4,args...>();
                     auto a3 = vms->template getpop_nth<3,args...>();
                     auto a2 = vms->template getpop_nth<2,args...>();
                     auto a1 = vms->template getpop_nth<1,args...>();
                     auto a0 = vms->template getpop_nth<0,args...>();
                     vms->push(f(std::move(a0), std::move(a1), std::move(a2), std::move(a3), std::move(a4), std::move(a5)));
                 }
                 else {
                     print("*** Error -- too many arguments for a function. Must be updated in Grammar.h ", sizeof...(args) );

                     assert(false);
                 }
             });

         add_vms<T,args...>(fmt, fvms, p, o, a);
     }

     template<typename T, typename... args>
     void add(std::string fmt,  T(*_f)(args...), double p=1.0, Op o=Op::Standard, int a=0) {
         add<T,args...>(fmt, std::function<T(args...)>(_f), p, o, a);
     }


     template<typename T>
     void add_terminal(std::string fmt, T x, double p=1.0, Op o=Op::Standard, int a=0) {
         add(fmt, std::function( [=]()->T { return x; }), p, o, a);
     }


     template<typename T, typename... args>
     void add_ft(std::string fmt,  T(*_f)(args...), double p=1.0, Op o=Op::Standard, int a=0) {
         std::function f = _f; // convert to std::function

         assert(not contains(fmt, "%s")); // should not contain %s since its not a function application

         add_terminal<ft<T,args...>>(fmt, f, p, o, a);
     }

     void remove_all(nonterminal_t nt) {
         rules[nt].clear();
         Z[nt] = 0.0;
     }

     // ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
     // Methods for getting rules by some info
     // ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

     size_t get_index_of(const Rule* r) const {
         for(size_t i=0;i<rules[r->nt].size();i++) {
             if(*get_rule(r->nt,i) == *r) {
                 return i;
             }
         }
         throw YouShouldNotBeHereError("*** Did not find rule in get_index_of.");
     }

     [[nodiscard]] virtual Rule* get_rule(const nonterminal_t nt, size_t k) const {
         assert(nt < N_NTs);
         assert(k < rules[nt].size());
         return const_cast<Rule*>(&rules[nt][k]);
     }

     [[nodiscard]] virtual Rule* get_rule(const nonterminal_t nt, const Op o, const int a=0) {
         assert(nt >= 0 and nt < N_NTs);
         for(auto& r: rules[nt]) {
             // Need to fix this because it used is_a:
             if(r.is_a(o) and r.arg == a)
                 return &r;
         }
         throw YouShouldNotBeHereError("*** Could not find rule");
     }

     [[nodiscard]] virtual Rule* get_rule(const nonterminal_t nt, size_t i) {
         return &rules[nt].at(i);
     }

     [[nodiscard]] virtual Rule* get_rule(const nonterminal_t nt, const std::string s) const {
         // we're going to allow matches to prefixes, but we have to keep track
         // if we have matched a prefix so we don't mutliple count (e.g if one rule was "str" and one was "string"),
         // we'd want to match "string" as "string" and not "str"

         bool was_partial_match = true;

         Rule* ret = nullptr;
         for(auto& r: rules[nt]) {

             if(s == r.format) {
                 if(ret != nullptr and not was_partial_match) { // if we previously found a full match
                     CERR "*** Multiple rules found matching " << s TAB r.format ENDL;
                     throw YouShouldNotBeHereError();
                 }
                 else {
                     was_partial_match = false;  // not a partial match
                     ret = const_cast<Rule*>(&r);
                 }
             } // else we look at partial matches
             else if( was_partial_match and ((s != "" and is_prefix(s, r.format)) or (s=="" and s==r.format))) {
                 if(ret != nullptr) {
                     CERR "*** Multiple rules found matching " << s TAB r.format ENDL;
                     throw YouShouldNotBeHereError();
                 }
                 else {
                     ret = const_cast<Rule*>(&r);
                 }
             }
         }

         if(ret != nullptr) {
             return ret;
         }
         else {
             CERR "*** No rule found to match " TAB QQ(s) ENDL;
             throw YouShouldNotBeHereError();
         }
     }

     [[nodiscard]] virtual Rule* get_rule(const std::string s) const {
         Rule* ret = nullptr;
         for(auto& r : *this) {
             if( (s != "" and is_prefix(s, r.format)) or (s=="" and s==r.format)) {
                 if(ret != nullptr) {
                     CERR "*** Multiple rules found matching " << s TAB r.format ENDL;
                     assert(0);
                 }
                 ret = &r;
             }
         }

         if(ret != nullptr) { return ret; }
         else {
             CERR "*** No rule found to match " TAB QQ(s) ENDL;
             throw YouShouldNotBeHereError();
         }
     }

     // ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
     // Sampling rules
     // ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

     double rule_normalizer(const nonterminal_t nt) const {
         assert(nt < N_NTs);
         return Z[nt];
     }

     virtual Rule* sample_rule(const nonterminal_t nt) const {
         std::function<double(const Rule& r)> f = [](const Rule& r){return r.p;};
         if(rules[nt].size() == 0) {
             print("Failed nonterminal, not in grammar:", nt);
             assert(false && "*** You are trying to sample from a nonterminal with no rules!");
         }
         return sample<Rule,std::vector<Rule>>(rules[nt], Z[nt], f).first; // ignore the probabiltiy
     }


     // ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
     // Generation
     // ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~


     Node makeNode(const Rule* r) const {
         return Node(r, log(r->p)-log(rule_normalizer(r->nt)));
     }


     Node __generate(const nonterminal_t ntfrom=nt<output_t>(), unsigned long depth=0) const {
         if(depth >= GRAMMAR_MAX_DEPTH) {
             #ifdef WARN_DEPTH_EXCEPTION
                 CERR "*** Grammar exceeded max depth, are you sure the grammar probabilities are right?" ENDL;
                 CERR "*** You might be able to figure out what's wrong with gdb and then looking at the backtrace of" ENDL;
                 CERR "*** which nonterminals are called." ENDL;
                 CERR "*** Or.... maybe this nonterminal does not rewrite to a terminal?" ENDL;
             #endif
             throw DepthException();
         }

         Rule* r = sample_rule(ntfrom);
         Node n = makeNode(r);

         // we'll wrap in a catch so we can see the sequence of nonterminals that failed us:
         try {

             for(size_t i=0;i<r->N;i++) {
                 n.set_child(i, __generate(r->type(i), depth+1)); // recurse down
             }

         } catch(const DepthException& e) {
             #ifdef WARN_DEPTH_EXCEPTION
                 CERR ntfrom << " ";
             #endif
             throw e;
         }

         return n;
     }

     Node generate(const nonterminal_t ntfrom=nt<output_t>(), unsigned long depth=0) const {
         for(size_t tries=0;tries<GENERATE_DEPTH_EXCEPTION_RETRIES;tries++) {
             try {
                 return __generate(ntfrom, depth);
             } catch(DepthException& e) { }
         }
         assert(false && "*** Generate failed due to repeated depth exceptions");
     }

     Node copy_resample(const Node& node, bool f(const Node& n)) const {
         if(f(node)){
             return generate(node.rule->nt);
         }
         else {

             // otherwise normal copy
             auto ret = node;
             for(size_t i=0;i<ret.nchildren();i++) {
                 ret.set_child(i, copy_resample(ret.child(i), f));
             }
             return ret;
         }
     }

     // ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
     // Computing log probabilities and priors
     // ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

     const std::map<const Rule*, size_t> get_rule_indexer() const {
         std::map<const Rule*, size_t> out;
         size_t idx = 0;
         const size_t NT = count_nonterminals();
         for(size_t nt=0;nt<NT;nt++) {
             for(const auto& r : rules[nt]) {
                 out[&r] = idx;
                 idx++;
             }
         }
         return out;
     }

     std::vector<size_t> get_counts(const Node& node) const {
         auto idx = get_rule_indexer();
         return get_counts(node,idx);
     }

     std::vector<size_t> get_counts(const Node& node, const std::map<const Rule*,size_t>& indexer) const {

         std::vector<size_t> out(count_rules(),0);
         for(auto& n : node) {
             // now increment out, accounting for the number of rules that must have come before!
             out[indexer.at(n.rule)] += 1;
         }

         return out;
     }

     template<typename K, typename V>
     std::vector<size_t> get_counts(const std::map<K,V>& m, const std::map<const Rule*,size_t>& indexer) const {

         std::vector<size_t> out(count_rules(),0);

         for(const auto& [key,fac] : m) {
             auto c = get_counts(fac.get_value(), indexer); // extract counts using indexer
             for(size_t r=0;r<c.size();r++)  // update cv
                 out[r] += c[r];
         }

         return out;
     }


     // If eigen is defined we can get the transition matrix
     #ifdef AM_I_USING_EIGEN
     Matrix get_nonterminal_transition_matrix() {
         const size_t NT = count_nonterminals();
         Matrix m = Matrix::Zero(NT,NT);
         for(size_t nt=0;nt<NT;nt++) {
             double z = rule_normalizer(nt);
             for(auto& r : rules[nt]) {
                 double p = r.p / z;
                 for(auto& to : r.get_child_types()) {
                     m(to,nt) += p;
                 }
             }
         }

         return m;
     }
     #endif

 //  double get_expected_length(size_t max_depth=50) const {
 //
 //      const size_t NT = count_nonterminals();
 //      nonterminal_t start = nt<output_t>();
 //      // we'll build up a NT x max_depth dynamic programming table
 //
 //      Vector2D<double> tab(NT, max_depth);
 //      tab.fill(0.0);
 //      tab[start,0] = 1; // start with 1
 //
 //      for(size_t d=1;d<max_depth;d++) {
 //          for(nonterminal_t nt=0;nt<NT;nt++) {
 //
 //              double z = rule_normalizer(nt);
 //              for(auto& r : rules[nt]) {
 //                  double p = r.p / z;
 //                  for(auto& to : r.get_child_types()) {
 //                      m(to,nt) += p;
 //                  }
 //              }
 //
 //                  tab[d][nt] = 0.0;
 //          }
 //      }
 //
 //
 //      double l = 0.0;
 //  }
 //

     double log_probability(const Node& n) const {
         double lp = 0.0;
         for(auto& x : n) {
             if(x.rule == NullRule) continue;
             lp += log(x.rule->p) - log(rule_normalizer(x.rule->nt));
         }

         return lp;
     }

     // ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
     // Implementation of converting strings to nodes
     // ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~


     Node from_parseable(std::deque<std::string>& q) const {
         assert(!q.empty() && "*** Should not ever get to here with an empty queue -- are you missing arguments?");

         auto [nts, pfx] = divide(q.front(), Node::NTDelimiter);
         q.pop_front();

         // null rules:
         if(pfx == NullRule->format)
             return makeNode(NullRule);

         // otherwise find the matching rule
         Rule* r = this->get_rule(stoi(nts), pfx);

         Node v = makeNode(r);
         for(size_t i=0;i<r->N;i++) {

             v.set_child(i, from_parseable(q));

             if(r->type(i) != v.child(i).rule->nt) {
                 CERR "*** Grammar expected type " << r->type(i) << " but got type " << v.child(i).rule->nt << " at " << r->format << " argument " << i ENDL;
                 assert(false && "Bad names in from_parseable."); // just check that we didn't miss this up
             }

         }
         return v;
     }

     Node from_parseable(std::string s) const {
         std::deque<std::string> stk = split(s, Node::RuleDelimiter);
         return from_parseable(stk);
     }

     Node from_parseable(const char* c) const {
         std::string s = c;
         return from_parseable(s);
     }


     size_t neighbors(const Node& node) const {
         // How many neighbors do I have? This is the number of neighbors the first gap has
         for(size_t i=0;i<node.rule->N;i++){
             if(node.child(i).is_null()) {
                 return count_rules(node.rule->type(i)); // NOTE: must use rule->child_types since child[i]->rule->nt is always 0 for NullRules
             }
             else {
                 auto cn = neighbors(node.child(i));
                 if(cn > 0) return cn; // we return the number of neighbors for the first gap
             }
         }
         return 0;
     }

     void expand_to_neighbor(Node& node, int& which) {
         // here we find the neighbor indicated by which and expand it into the which'th neighbor
         // to do this, we loop through until which is less than the number of neighbors,
         // and then it must specify which expansion we want to take. This means that when we
         // skip a nullptr, we have to subtract from it the number of neighbors (expansions)
         // we could have taken.
         for(size_t i=0;i<node.rule->N;i++){
             if(node.child(i).is_null()) {
                 int c = count_rules(node.rule->type(i));
                 if(which >= 0 and which < c) {
                     auto r = get_rule(node.rule->type(i), (size_t)which);
                     node.set_child(i, makeNode(r));
                 }
                 which -= c;
             }
             else { // otherwise we have to process that which
                 expand_to_neighbor(node.child(i), which);
             }
         }
     }

     double neighbor_prior(const Node& node, int& which) const {
         // here we find the neighbor indicated by which and expand it into the which'th neighbor
         // to do this, we loop through until which is less than the number of neighbors,
         // and then it must specify which expansion we want to take. This means that when we
         // skip a nullptr, we have to subtract from it the number of neighbors (expansions)
         // we could have taken.
         for(size_t i=0;i<node.rule->N;i++){
             if(node.child(i).is_null()) {
                 int c = count_rules(node.rule->type(i));
                 if(which >= 0 and which < c) {
                     auto r = get_rule(node.rule->type(i), (size_t)which);
                     return log(r->p)-log(rule_normalizer(r->nt));
                 }
                 which -= c;
             }
             else { // otherwise we have to process that which
                 auto o = neighbor_prior(node.child(i), which);
                 if(not std::isnan(o)) { // if this child returned something.
                     //assert(which <= 0);
                     return o;
                 }
             }
         }

         return NaN; // if no neighbors
     }

     void complete(Node& node) {
         // go through and fill in the tree at random
         for(size_t i=0;i<node.rule->N;i++){
             if(node.child(i).is_null()) {
                 node.set_child(i, generate(node.rule->type(i)));
             }
             else {
                 complete(node.child(i));
             }
         }
     }


     // ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
     // Simple parsing routines -- not very well debugged
     // ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

     std::tuple<int,std::vector<int>,int> find_open_commas_close(const std::string s) {

         // return values
         int openpos = -1;
         std::vector<int> commas; // all commas with only one open
         int closepos = -1;

         // how many are open?
         int opencount = 0;

         for(size_t i=0;i<s.length();i++){
             char c = s.at(i);
             // print("C=", c, opencount, openpos, commas.size(),closepos);

             if(opencount==0 and openpos==-1 and c=='(') {
                 openpos = i;
             }

             if(opencount==1 and closepos==-1 and c==')') {
                 closepos = i;
             }

             // commas position are first comma when there is one open
             if(opencount==1 and c==','){
                 assert(closepos == -1);
                 commas.push_back(i);
             }

             opencount += (c=='(');
             opencount -= (c==')');
         }

         return std::make_tuple(openpos, commas, closepos);
     }

     Node simple_parse(std::string s) {
         //print("Parsing ", s);

         // remove the lambda x. if its there
         if(s.substr(0,3) == LAMBDAXDOT_STRING) s.erase(0,3);
         // remove leading whitespace
         while(s.at(0) == ' ' or s.at(0) == '\t') s.erase(0,1);
         // remove trailing whitespace
         while(s.at(s.size()-1) == ' ' or s.at(s.size()-1) == '\t') s.erase(s.size()-1,1);

         // use the above function to find chunks
         auto [open, commas, close] = find_open_commas_close(s);

         // if it's a terminal
         if(open == -1) {
             assert(commas.size()==0 and close==-1);
             auto r = this->get_rule(s);
             return this->makeNode(r);
         }
         else if(close == open+1) { // special case of "f()"
             return this->makeNode(get_rule(s)); // the whole string is what we want and its a terminal
         }
         else {
             assert(close != -1);

             // recover the rule format -- no spaces, etc.
             std::string fmt = s.substr(0,open) + "(%s";
             for(auto& c : commas) {
                 UNUSED(c);
                 fmt += ",%s";
             }
             fmt += ")";

             // find the rule for this format
             auto r = this->get_rule(fmt);
             auto out = this->makeNode(r);

             int prev=open+1;
             int ci=0;
             for(auto& c : commas) {
                 auto child_string = s.substr(prev,c-prev);
                 out.set_child(ci,simple_parse(child_string));
                 prev = c+1;
                 ci++;
             }

             // and the last child
             auto child_string = s.substr(prev,close-prev);
             out.set_child(ci,simple_parse(child_string));

             return out;
         }
     }


 };
Grammar::from_parseable
Node from_parseable(const char *c) const
Definition: Grammar.h:917

Builtins.h
The Primitive type just stores a function pointer and an Op command.

IO.h

Grammar
Definition: Grammar.h:44

Grammar::find_open_commas_close
std::tuple< int, std::vector< int >, int > find_open_commas_close(const std::string s)
Definition: Grammar.h:1003

Grammar::neighbor_prior
double neighbor_prior(const Node &node, int &which) const
Definition: Grammar.h:958

UNUSED
void UNUSED(const T &x)
Definition: Miscellaneous.h:38

Grammar::generate
Node generate(const nonterminal_t ntfrom=nt< output_t >(), unsigned long depth=0) const
A wrapper to catch DepthExcpetions and retry. This means that defaultly we try to generate GENERATE_D...
Definition: Grammar.h:693

QQ
std::string QQ(const std::string &x)
Definition: Strings.h:190

VirtualMachineState
Definition: VirtualMachineState.h:46

Grammar::RuleIterator::current_rule
std::vector< Rule >::iterator current_rule
Definition: Grammar.h:121

Grammar< MyInput, bool, bool, MyObject, MyInput, ObjectSet, ft< bool, bool >, ft< bool, bool, bool >, ft< bool, MyObject >, ft< bool, MyObject, MyObject >, ft< bool, bool, MyObject > >::output_t
bool output_t
Definition: Grammar.h:48

Node
Definition: Node.h:22

Grammar::start
constexpr nonterminal_t start()
The start nonterminal type.
Definition: Grammar.h:177

Grammar::get_rule
virtual Rule * get_rule(const nonterminal_t nt, const std::string s) const
Definition: Grammar.h:524

VirtualMachineState.h
This represents the state of a partial evaluation of a program, corresponding to the value of all of ...

Grammar::RuleIterator
Definition: Grammar.h:108

Grammar::copy_resample
Node copy_resample(const Node &node, bool f(const Node &n)) const
Definition: Grammar.h:702

Grammar::end
RuleIterator end() const
Definition: Grammar.h:172

TAB
#define TAB
Definition: IO.h:19

FleetStatistics::depth_exceptions
std::atomic< uintmax_t > depth_exceptions(0)

Grammar::count_rules
size_t count_rules() const
Definition: Grammar.h:198

Rule::type
nonterminal_t type(size_t i) const
Definition: Rule.h:152

divide
std::pair< std::string, std::string > divide(const std::string &s, const char delimiter)
Definition: Strings.h:144

Grammar::RuleIterator::operator++
RuleIterator & operator++(int blah)
Definition: Grammar.h:139

Rule
Definition: Rule.h:21

Grammar::RuleIterator::operator+
RuleIterator & operator+(size_t n)
Definition: Grammar.h:160

Primitive
Definition: Primitive.h:13

Grammar::RuleIterator::grammar
this_t * grammar
Definition: Grammar.h:119

Matrix
Eigen::MatrixXf Matrix
Definition: EigenLib.h:18

Grammar::from_parseable
Node from_parseable(std::string s) const
Definition: Grammar.h:906

Grammar< MyInput, bool, bool, MyObject, MyInput, ObjectSet, ft< bool, bool >, ft< bool, bool, bool >, ft< bool, MyObject >, ft< bool, MyObject, MyObject >, ft< bool, bool, MyObject > >::input_t
MyInput input_t
Definition: Grammar.h:47

split
std::deque< std::string > split(const std::string &s, const char delimiter)
Split is returns a deque of s split up at the character delimiter. It handles these special cases: sp...
Definition: str.h:50

Grammar::add
void add(std::string fmt, T(*_f)(args...), double p=1.0, Op o=Op::Standard, int a=0)
Wrapper for add to use function pointers.
Definition: Grammar.h:419

Grammar::makeNode
Node makeNode(const Rule *r) const
Definition: Grammar.h:633

Primitive::f
void * f
Definition: Primitive.h:16

Grammar::begin
RuleIterator begin() const
Definition: Grammar.h:171

YouShouldNotBeHereError
Definition: Errors.h:18

Grammar::get_rule
virtual Rule * get_rule(const nonterminal_t nt, size_t i)
Definition: Grammar.h:520

DepthException::DepthException
DepthException()
Definition: Grammar.h:19

Grammar::from_parseable
Node from_parseable(std::deque< std::string > &q) const
Definition: Grammar.h:871

Op
Op
Definition: Ops.h:3

NullRule
const Rule * NullRule
Definition: Rule.h:186

Grammar::simple_parse
Node simple_parse(std::string s)
Very simple parsing routine that takes a string like "and(not(or(eq_pos(pos(parent(x)),&#39;NP-POSS&#39;),eq_pos(&#39;NP-S&#39;,pos(x)))),corefers(x))" (from the Binding example) and parses it into a Node.
Definition: Grammar.h:1048

DepthException
Definition: Grammar.h:18

Node::RuleDelimiter
static const char RuleDelimiter
Definition: Node.h:29

Rule::N
size_t N
Definition: Rule.h:29

Nonterminal.h

Grammar::RuleIterator::current_nt
nonterminal_t current_nt
Definition: Grammar.h:120

Grammar::count_nonterminals
size_t count_nonterminals(nonterminal_t nt) const
Definition: Grammar.h:231

Grammar::sample_rule
virtual Rule * sample_rule(const nonterminal_t nt) const
Definition: Grammar.h:612

print
void print(FIRST f, ARGS... args)
Lock output_lock and print to std:cout.
Definition: IO.h:53

Node::is_null
bool is_null() const
Definition: Node.h:165

Functional.h

Node::NTDelimiter
static const char NTDelimiter
Definition: Node.h:28

VirtualMachineState::push
void push(T &x)
Definition: VirtualMachineState.h:184

Grammar::get_counts
std::vector< size_t > get_counts(const std::map< K, V > &m, const std::map< const Rule *, size_t > &indexer) const
Support for map so we can call on Lexicon::get_value.
Definition: Grammar.h:780

Grammar::Z
std::array< double, N_NTs > Z
Definition: Grammar.h:75

Grammar::nt
static constexpr nonterminal_t nt()
Definition: Grammar.h:84

Grammar::__generate
Node __generate(const nonterminal_t ntfrom=nt< output_t >(), unsigned long depth=0) const
Definition: Grammar.h:644

Grammar::remove_all
void remove_all(nonterminal_t nt)
Remove all the nonterminals of this type from the grammar. NOTE: This is generally a really bad idea ...
Definition: Grammar.h:464

Node.h
A Node is the primary internal representation for a program – it recursively stores a rule and the a...

Errors.h

Rule::format
std::string format
Definition: Rule.h:28

Grammar::count_terminals
size_t count_terminals(nonterminal_t nt) const
Definition: Grammar.h:218

VirtualMachineState::FT
std::function< void(this_t *, int)> FT
Definition: VirtualMachineState.h:56

CERR
#define CERR
Definition: IO.h:23

Grammar::expand_to_neighbor
void expand_to_neighbor(Node &node, int &which)
Definition: Grammar.h:937

Grammar< MyInput, bool, bool, MyObject, MyInput, ObjectSet, ft< bool, bool >, ft< bool, bool, bool >, ft< bool, MyObject >, ft< bool, MyObject, MyObject >, ft< bool, bool, MyObject > >::FT
typename VirtualMachineState_t::FT FT
Definition: Grammar.h:62

Grammar::neighbors
size_t neighbors(const Node &node) const
Definition: Grammar.h:923

BaseNode::child
this_t & child(const size_t i)
Definition: BaseNode.h:175

Node::rule
const Rule * rule
Definition: Node.h:32

nonterminal_t
unsigned short nonterminal_t
Definition: Nonterminal.h:4

Node::set_child
void set_child(const size_t i, Node &n)
Definition: Node.h:88

Grammar::change_probability
void change_probability(const std::string &s, const double newp)
Definition: Grammar.h:211

Grammar::add_terminal
void add_terminal(std::string fmt, T x, double p=1.0, Op o=Op::Standard, int a=0)
Add a variable that is NOT A function – simplification for adding alphabets etc. This just wraps stu...
Definition: Grammar.h:435

Grammar::rule_normalizer
double rule_normalizer(const nonterminal_t nt) const
Definition: Grammar.h:601

Grammar::log_probability
double log_probability(const Node &n) const
This computes the expected length of productions from this grammar, counting terminals and nontermina...
Definition: Grammar.h:849

LAMBDAXDOT_STRING
const std::string LAMBDAXDOT_STRING
Definition: Strings.h:20

is_prefix
bool is_prefix(const T &prefix, const T &x)
Check if prefix is a prefix of x – works with iterables, including strings and vectors.
Definition: Strings.h:39

Rule::p
double p
Definition: Rule.h:30

Grammar::count_nonterminals
constexpr size_t count_nonterminals() const
Definition: Grammar.h:181

ENDL
#define ENDL
Definition: IO.h:21

Grammar::add_ft
void add_ft(std::string fmt, T(*_f)(args...), double p=1.0, Op o=Op::Standard, int a=0)
Adds this as a function type (see Function.h) rather than as a function itself. For example...
Definition: Grammar.h:451

Grammar::RuleIterator::operator==
bool operator==(const RuleIterator &rhs) const
Definition: Grammar.h:165

Grammar::is_in_GRAMMAR_TYPES
static constexpr bool is_in_GRAMMAR_TYPES()
For a given nt, returns the number of finite trees that nt can expand to if its finite; 0 if its infi...
Definition: Grammar.h:281

Grammar::get_index_of
size_t get_index_of(const Rule *r) const
Definition: Grammar.h:473

NaN
constexpr double NaN
Definition: Numerics.h:21

Random.h
This is a thread_local rng whose first object is used to see others (in other threads). This way, we can have thread_local rngs that all are seeded deterministcally in Fleet via –seed=X.

Grammar::count_rules
size_t count_rules(const nonterminal_t nt) const
Definition: Grammar.h:189

Primitive::op
Op op
Definition: Primitive.h:15

Op::Standard

Grammar::get_counts
std::vector< size_t > get_counts(const Node &node) const
Compute a vector of counts of how often each rule was used, in a standard order given by iterating ov...
Definition: Grammar.h:752

Grammar::RuleIterator::operator++
RuleIterator & operator++()
Definition: Grammar.h:140

Grammar< MyInput, bool, bool, MyObject, MyInput, ObjectSet, ft< bool, bool >, ft< bool, bool, bool >, ft< bool, MyObject >, ft< bool, MyObject, MyObject >, ft< bool, bool, MyObject > >::TypeTuple
std::tuple< GRAMMAR_TYPES... > TypeTuple
Definition: Grammar.h:53

Rule::nt
nonterminal_t nt
Definition: Rule.h:27

Grammar::Grammar
Grammar()
Definition: Grammar.h:90

ft
std::function< out(args...)> ft
Definition: Functional.h:14

VirtualMachinePool.h
A little class that any VirtualMachinePool AND VirtualMachines inherit to control their behavior...

Grammar::complete
void complete(Node &node)
Definition: Grammar.h:985

Grammar::get_rule_indexer
const std::map< const Rule *, size_t > get_rule_indexer() const
Returns a map from rule pointers to indices in e.g. a vector, so that every rule has a unique index a...
Definition: Grammar.h:734

TypeIndex
Helpers to Find the numerical index (as a nonterminal_t) in a tuple of a given type.
Definition: Miscellaneous.h:105

Grammar::add
void add(std::string fmt, Primitive< T, args... > &b, double p=1.0, int a=0)
Definition: Grammar.h:312

Grammar::RuleIterator::RuleIterator
RuleIterator(this_t *g, bool is_end)
Definition: Grammar.h:125

Grammar::get_rule
virtual Rule * get_rule(const std::string s) const
Definition: Grammar.h:571

Grammar::get_rule
virtual Rule * get_rule(const nonterminal_t nt, size_t k) const
Definition: Grammar.h:489

Grammar::add
void add(std::string fmt, std::function< T(args...)> f, double p=1.0, Op o=Op::Standard, int a=0)
Definition: Grammar.h:326

contains
bool contains(const std::string &s, const std::string &x)
Definition: Strings.h:53

Grammar::get_counts
std::vector< size_t > get_counts(const Node &node, const std::map< const Rule *, size_t > &indexer) const
Compute a vector of counts of how often each rule was used, using indexer to map each rule to an inde...
Definition: Grammar.h:762

Grammar::RuleIterator::operator*
Rule & operator*() const
Definition: Grammar.h:136

Grammar::get_rule
virtual Rule * get_rule(const nonterminal_t nt, const Op o, const int a=0)
Definition: Grammar.h:502

Op::X

Grammar::rules
std::vector< Rule > rules[N_NTs]
Definition: Grammar.h:74

Grammar::add_vms
void add_vms(std::string fmt, FT *f, double p=1.0, Op o=Op::Standard, int a=0)
Definition: Grammar.h:295