Zero  0.1.0
zipfian.h
Go to the documentation of this file.
1 /* -*- mode:C++; c-basic-offset:4 -*-
2  Shore-kits -- Benchmark implementations for Shore-MT
3 
4  Copyright (c) 2007-2009
5  Data Intensive Applications and Systems Labaratory (DIAS)
6  Ecole Polytechnique Federale de Lausanne
7 
8  All Rights Reserved.
9 
10  Permission to use, copy, modify and distribute this software and
11  its documentation is hereby granted, provided that both the
12  copyright notice and this permission notice appear in all copies of
13  the software, derivative works or modified versions, and any
14  portions thereof, and that both notices appear in supporting
15  documentation.
16 
17  This code is distributed in the hope that it will be useful, but
18  WITHOUT ANY WARRANTY; without even the implied warranty of
19  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. THE AUTHORS
20  DISCLAIM ANY LIABILITY OF ANY KIND FOR ANY DAMAGES WHATSOEVER
21  RESULTING FROM THE USE OF THIS SOFTWARE.
22 */
23 
24 #ifndef __ZIPFIAN_H
25 #define __ZIPFIAN_H
26 
27 //#include "rand48.h"
28 #include <cmath>
29 
30 /* Return a zipfian-distributed random number.
31 
32  The computation is based on a x**-(1/s) curve, which has the property that
33  - it's a straight line on log-log scale
34  - increasing s makes the slope steeper (= more skewed)
35  - the value approaches infinity as x approaches zero
36 
37  Therefore, we need to compute three things:
38 
39  cutoff=pow(n, -k): because 1/x can produce infinitely large values,
40  and we only want to allow values up to 'n', we have to generate
41  inputs ~U(cutoff, 1) rather than ~U(0, 1). The larger the skew and
42  the range, the closer to zero we can tolerate.
43 
44  output=pow(u, -1./k): this comes straight from the observation we
45  started with; output(1) = 1, and output(cutoff) = n; increasing k
46  makes the output approach n more slowly, as desired.
47 
48  k=1.1*skew-1: for some reason, there's a very precise relationship
49  between the k we want the final distribution to have and the k we
50  should supply. I have no idea why, but empirically it holds very
51  precisely for every combination of n and k which I've tested.
52 */
53 
54 struct zipfian {
55 // rand48 _rng;
56  double _k;
57 
58  double _mk_inv;
59 
60  double _cutoff;
61 
62  double _1mcutoff;
63 
64 // zipfian(int n, double s, long seed_val=rand48::DEFAULT_SEED)
65  zipfian(int n, double s)
66 // _rng(seed_val), _k(1.1*s-1), _mk_inv(-1./_k)
67  : _k(1.1 * s - 1),
68  _mk_inv(-1. / _k),
69  _cutoff(pow(n, -_k)),
70  _1mcutoff(1 - _cutoff) {}
71 
72  int operator()(double u) {
73  return next(u);
74  }
75 
76  int next(double uniform_input) {
77  //double u = _rng.drand();
78  uniform_input *= _1mcutoff;
79  uniform_input += _cutoff;
80  return (int)pow(uniform_input, _mk_inv);
81  }
82 };
83 
84 #endif // __ZIPFIAN_H
double _mk_inv
Definition: zipfian.h:58
int next(double uniform_input)
Definition: zipfian.h:76
Definition: zipfian.h:54
double _cutoff
Definition: zipfian.h:60
int operator()(double u)
Definition: zipfian.h:72
zipfian(int n, double s)
Definition: zipfian.h:65
double _k
Definition: zipfian.h:56
double _1mcutoff
Definition: zipfian.h:62