Fleet  0.0.9
Inference in the LOT
ReservoirSample.h
Go to the documentation of this file.
1 #pragma once
2 
3 #include "Errors.h"
4 #include "TopN.h"
5 
15 template<typename T>
16 class ReservoirSample : public Serializable<ReservoirSample<T>> {
17 
18 public:
19 
20  std::vector<T> samples;
21  size_t capacity;
22  unsigned long N; // how many have I seen? Any time I *try* to add something to this, N gets incremented
23 
24 protected:
25  //mutable std::mutex lock;
26 
27 public:
28  ReservoirSample(size_t n=100) : capacity(n), N(0) { }
29 
30  void set_reservoir_size(const size_t s) const {
35  samples.reserve(s);
36  capacity = s;
37  }
38 
39  size_t size() const {
44  return samples.size();
45  }
46 
47  virtual void add(T x) {
48  ++N;
49 
50  if(samples.size() < capacity) {
51  samples.push_back(x);
52  }
53  else {
54  auto which = myrandom(N); // NOTE: it's not capacity, it's N!
55  if(which < capacity) {
56  samples[which] = x;
57  }
58  }
59  }
60  void operator<<(T x) { add(x); }
61 
62 
63  const std::vector<T>& values() const { return samples; }
64 
65  void clear() {
66  samples.clear();
67  }
68 
69  virtual std::string serialize() const override { throw NotImplementedError(); }
70 
71  static ReservoirSample<T> deserialize(const std::string&) { throw NotImplementedError(); }
72 
73 };
74 
75 
76 
77 
87 template<typename T>
88 class PosteriorWeightedReservoirSample : public Serializable<PosteriorWeightedReservoirSample<T>> {
89 
90 public:
91 
92  std::vector<T> samples;
93  size_t capacity; // how many should I have?
94  unsigned long N; // how many have I seen? Any time I *try* to add something to this, N gets incremented
95  double weight_lse;
96 
97  PosteriorWeightedReservoirSample(size_t s=100) : capacity(s), N(0), weight_lse(-infinity) { }
98 
99  void set_reservoir_size(const size_t s) const {
104  capacity = s;
105  samples.reserve(s);
106  }
107 
108  size_t size() const {
113  return samples.size();
114  }
115 
116  virtual void add(T x) {
117  ++N;
118 
119  if(std::isnan(x.posterior) or x.posterior == -infinity)
120  return;
121 
122  weight_lse = logplusexp(weight_lse, x.posterior);
123 
124  //https://en.wikipedia.org/wiki/Reservoir_sampling#Weighted_random_sampling
125 
126  if(samples.size() < capacity) {
127  samples.push_back(x);
128  }
129  else {
130  if(flip(exp(x.posterior - weight_lse))) {
131  auto which = myrandom(capacity);
132  samples[which] = x;
133  }
134  }
135  }
136  void operator<<(T x) { add(x); }
137 
142  const std::vector<T>& values() const { return samples; }
143  void clear() { samples.clear(); }
144 
145  virtual std::string serialize() const override { throw NotImplementedError(); }
147 
148 };
149 
void clear()
Definition: ReservoirSample.h:143
T myrandom(T max)
Definition: Random.h:176
double weight_lse
Definition: ReservoirSample.h:95
const std::vector< T > & values() const
Get a multiset of values (ignoring the reservoir weights)
Definition: ReservoirSample.h:142
virtual std::string serialize() const override
Definition: ReservoirSample.h:69
void set_reservoir_size(const size_t s) const
Definition: ReservoirSample.h:30
virtual void add(T x)
Definition: ReservoirSample.h:116
PosteriorWeightedReservoirSample(size_t s=100)
Definition: ReservoirSample.h:97
void operator<<(T x)
Definition: ReservoirSample.h:136
ReservoirSample(size_t n=100)
Definition: ReservoirSample.h:28
Definition: Serializable.h:4
virtual void add(T x)
Definition: ReservoirSample.h:47
unsigned long N
Definition: ReservoirSample.h:22
bool flip(float p=0.5)
Definition: Random.h:25
std::vector< T > samples
Definition: ReservoirSample.h:92
Definition: ReservoirSample.h:88
void operator<<(T x)
Definition: ReservoirSample.h:60
const std::vector< T > & values() const
Definition: ReservoirSample.h:63
constexpr double infinity
Definition: Numerics.h:20
T logplusexp(const T a, const T b)
Definition: Numerics.h:131
static PosteriorWeightedReservoirSample< T > deserialize(const std::string &)
Definition: ReservoirSample.h:146
virtual std::string serialize() const override
Definition: ReservoirSample.h:145
static ReservoirSample< T > deserialize(const std::string &)
Definition: ReservoirSample.h:71
size_t capacity
Definition: ReservoirSample.h:21
Definition: Errors.h:7
unsigned long N
Definition: ReservoirSample.h:94
void clear()
Definition: ReservoirSample.h:65
size_t capacity
Definition: ReservoirSample.h:93
Definition: ReservoirSample.h:16
size_t size() const
Definition: ReservoirSample.h:39
std::vector< T > samples
Definition: ReservoirSample.h:20
void set_reservoir_size(const size_t s) const
Definition: ReservoirSample.h:99
size_t size() const
Definition: ReservoirSample.h:108