Zero
0.1.0
src
cmd
kits
util
zipfian.h
Go to the documentation of this file.
1
/* -*- mode:C++; c-basic-offset:4 -*-
2
Shore-kits -- Benchmark implementations for Shore-MT
3
4
Copyright (c) 2007-2009
5
Data Intensive Applications and Systems Labaratory (DIAS)
6
Ecole Polytechnique Federale de Lausanne
7
8
All Rights Reserved.
9
10
Permission to use, copy, modify and distribute this software and
11
its documentation is hereby granted, provided that both the
12
copyright notice and this permission notice appear in all copies of
13
the software, derivative works or modified versions, and any
14
portions thereof, and that both notices appear in supporting
15
documentation.
16
17
This code is distributed in the hope that it will be useful, but
18
WITHOUT ANY WARRANTY; without even the implied warranty of
19
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. THE AUTHORS
20
DISCLAIM ANY LIABILITY OF ANY KIND FOR ANY DAMAGES WHATSOEVER
21
RESULTING FROM THE USE OF THIS SOFTWARE.
22
*/
23
24
#ifndef __ZIPFIAN_H
25
#define __ZIPFIAN_H
26
27
//#include "rand48.h"
28
#include <cmath>
29
30
/* Return a zipfian-distributed random number.
31
32
The computation is based on a x**-(1/s) curve, which has the property that
33
- it's a straight line on log-log scale
34
- increasing s makes the slope steeper (= more skewed)
35
- the value approaches infinity as x approaches zero
36
37
Therefore, we need to compute three things:
38
39
cutoff=pow(n, -k): because 1/x can produce infinitely large values,
40
and we only want to allow values up to 'n', we have to generate
41
inputs ~U(cutoff, 1) rather than ~U(0, 1). The larger the skew and
42
the range, the closer to zero we can tolerate.
43
44
output=pow(u, -1./k): this comes straight from the observation we
45
started with; output(1) = 1, and output(cutoff) = n; increasing k
46
makes the output approach n more slowly, as desired.
47
48
k=1.1*skew-1: for some reason, there's a very precise relationship
49
between the k we want the final distribution to have and the k we
50
should supply. I have no idea why, but empirically it holds very
51
precisely for every combination of n and k which I've tested.
52
*/
53
54
struct
zipfian
{
55
// rand48 _rng;
56
double
_k
;
57
58
double
_mk_inv
;
59
60
double
_cutoff
;
61
62
double
_1mcutoff
;
63
64
// zipfian(int n, double s, long seed_val=rand48::DEFAULT_SEED)
65
zipfian
(
int
n,
double
s)
66
// _rng(seed_val), _k(1.1*s-1), _mk_inv(-1./_k)
67
: _k(1.1 * s - 1),
68
_mk_inv(-1. / _k),
69
_cutoff(pow(n, -_k)),
70
_1mcutoff(1 - _cutoff) {}
71
72
int
operator()
(
double
u) {
73
return
next
(u);
74
}
75
76
int
next
(
double
uniform_input) {
77
//double u = _rng.drand();
78
uniform_input *=
_1mcutoff
;
79
uniform_input +=
_cutoff
;
80
return
(
int
)pow(uniform_input, _mk_inv);
81
}
82
};
83
84
#endif // __ZIPFIAN_H
zipfian::_mk_inv
double _mk_inv
Definition:
zipfian.h:58
zipfian::next
int next(double uniform_input)
Definition:
zipfian.h:76
zipfian
Definition:
zipfian.h:54
zipfian::_cutoff
double _cutoff
Definition:
zipfian.h:60
zipfian::operator()
int operator()(double u)
Definition:
zipfian.h:72
zipfian::zipfian
zipfian(int n, double s)
Definition:
zipfian.h:65
zipfian::_k
double _k
Definition:
zipfian.h:56
zipfian::_1mcutoff
double _1mcutoff
Definition:
zipfian.h:62
Generated by
1.8.12