11 template<
typename datum_t>
19 for(
auto [s, cnt] : read_csv<2>(datapath,
false,
'\t')) {
20 data.emplace_back(std::string(
""), s,
NaN, std::stod(cnt));
29 template<
typename T,
typename TDATA>
30 std::map<T, double>
highest(
const std::vector<TDATA>& m,
unsigned long N) {
34 std::map<T, double> out;
36 std::vector<TDATA> v = m;
37 std::sort(v.begin(), v.end(), [](
auto x,
auto y){
return x.count > y.count; });
39 for(
size_t i=0;i<std::min(N, v.size()); i++) {
40 out[v[i].output] = v[i].count;
47 template<
typename TDATA>
54 auto A = model.
best(N,
true);
55 auto B = highest<std::string,TDATA>(data, std::min(N,data.size()) );
57 std::set<std::string> mdata;
58 for(
auto v : data) mdata.insert(v.output);
60 unsigned long nprec = 0;
66 unsigned long nrec = 0;
68 if(model.
count(b.first))
72 return std::make_pair(
double(nprec)/A.size(), double(nrec)/B.size());
Definition: DiscreteDistribution.h:25
size_t count(T x) const
Definition: DiscreteDistribution.h:201
constexpr double NaN
Definition: Numerics.h:21
This stores a distribution from values of T to log probabilities. It is used as the return value from...
std::vector< T > best(size_t n, bool include_equal) const
Definition: DiscreteDistribution.h:166