Crombie Tools
Hist.h
Go to the documentation of this file.
1 #ifndef CROMBIE_HIST_H
2 #define CROMBIE_HIST_H
3 
4 // I really don't like ROOT's global hash tables and confusing lifetimes
5 // Here, I make a simple data structure that can make a histogram
6 
7 #include <cmath>
8 #include <exception>
9 #include <vector>
10 #include <list>
11 #include <limits>
12 #include <tuple>
13 
14 #include "crombie/Types.h"
15 #include "crombie/Debug.h"
16 
17 #include "TH1D.h"
18 
19 namespace crombie {
20  namespace Hist {
21 
22  class Hist {
23  public:
24  /**
25  Constructor of custom Hist class
26  @param label Is the label to go on the x-axis
27  @param nbins The number of bins to show in the plot.
28  There is also one overflow and one underflow bin stored internally.
29  @param min The minimum value shown on the x-axis
30  @param max The maximum value on the x-axis
31  @param w2 If true, store the squared sum of weights, otherwise don't bother
32  @param total_events Is the total weight of events in the file(s) filling this histogram
33  */
34  Hist(const std::string label = "",
35  unsigned nbins = 0, double min = 0, double max = 0,
36  bool w2 = true, double total_events = 0)
37  : label{label}, nbins{nbins}, min{min}, max{max},
38  contents(nbins + 2), sumw2((nbins + 2) * w2),
39  total{total_events} {}
40 
41  /// Fills this histogram with some value and weight
42  void fill (double val, double weight = 1.0);
43  /// Get a reference to a histogram representing an uncertainty
44  Hist& get_unc_hist(const std::string& sys);
45  /// Create and return a histogram inside an envelope
46  Hist& get_env_hist(const std::string& sys);
47 
48  /// Get the uncertainty histogram, either up or down. Helpful for dumping datacards.
49  const Hist& fetch_unc (const std::string& key, const std::string& direction) const;
50 
51  void add (const Hist& other);
52  /// Scale this histogram by a direct scale factor
53  void scale (const double scale);
54  /**
55  Scale this histogram to a luminosity and cross section.
56  The result will be invalid if this scale function is called
57  after any other call to Hist::scale.
58  */
59  void scale (const double lumi, const double xs);
60 
61  /// Returns a Hist that is a ratio between this and another Hist
62  Hist ratio (const Hist& other) const;
63 
64  /**
65  Returns a pointer to a histogram that is owned by global list.
66  This list will handle the deletion when the program is done running.
67  Not thread-safe.
68  @param allunc If this is false, only the stat uncertainties will be included in the histogram.
69  */
70  TH1D* roothist (bool allunc = true) const;
71 
72  /**
73  Get all of the uncertainty histograms.
74  The keys are the name of the uncertainty including "Up" or "Down" at the end.
75  */
76  Types::map<TH1D*> unchists () const;
77 
78  /**
79  Get the maximum bin and the total number of bins.
80  Does not include overflow bins.
81  */
82  std::pair<unsigned, unsigned> get_maxbin_outof () const;
83 
84  /// Get the maximum value including uncertainties (for plotting)
85  double max_w_unc () const;
86  /// Get the minimum value including uncertainties, but not less than 0.0 (for plotting)
87  double min_w_unc (const bool includezeros = true) const;
88 
89  /// Sets the value of the total number of events, throws exception if total is already set.
90  void set_total (double newtotal);
91 
92  private:
93  std::string label {};
94  unsigned nbins {};
95  double min {};
96  double max {};
97 
98  std::vector<double> contents {};
99  std::vector<double> sumw2 {};
100 
101  double total {}; ///< Stores the total weights of files filling this
102 
103  double get_unc (unsigned bin, bool allunc = true) const; ///< Find the full uncertainty from uncs hists and sumw2
104  void doscale (const double scale); ///< Scales histogram without scaling uncertainties
105  void doscale (const double lumi, const double xs); ///< Scales histogram without scaling uncertainties
106  Types::map<Hist> uncs; ///< Store of alternate histograms for uncertainties
107 
108  /// Apply some function to this histogram and all its uncertainties
109  template<typename F> void doall (const F& func);
110 
111  using Envelope = std::tuple<Hist, Hist, std::list<Hist>>;
112  mutable Types::map<Envelope> envs; ///< Envelope information
113  mutable bool envs_set{false}; ///< Determine whether envelope minmax has been set or not
114  void set_env_min_max () const; ///< Sets the min and max histograms for each envelope
115  };
116 
117 
118  namespace {
119  // Put this here so we don't have to move around with the Hist objects
120  std::list<TH1D> histstore;
121  }
122 
123 
124  void Hist::fill(double val, double weight) {
125  unsigned bin {std::min(nbins + 1, static_cast<unsigned>(std::ceil(((val - min) * nbins/(max - min)))))};
126  if (bin >= contents.size()) {
127  std::cerr << bin << " " << val << " " << nbins << " " << min << " " << max << std::endl;
128  throw std::runtime_error{"bin too big"};
129  }
130  contents[bin] += weight;
131  if (sumw2.size())
132  sumw2[bin] += std::pow(weight, 2);
133  }
134 
135 
136  Hist& Hist::get_unc_hist(const std::string& sys) {
137  uncs.insert({sys, {label, nbins, min, max, false}});
138  return uncs[sys];
139  }
140 
141 
142  Hist& Hist::get_env_hist(const std::string& sys) {
143  auto& mysys = std::get<2>(envs[sys]);
144  mysys.push_front({label, nbins, min, max, false});
145  return mysys.front();
146  }
147 
148 
149  void Hist::add(const Hist& other) {
150  Debug::Debug(__PRETTY_FUNCTION__, "Start add", this, nbins);
151  if (nbins == 0)
152  *this = other; // If this not set yet, just simple assignment
153  else {
154  total += other.total; // Increase the total events count
155  if (other.nbins != nbins) { // Check for binning error
156  std::cerr << "Num bins other: " << other.nbins << " me: " << nbins << std::endl;
157  throw std::runtime_error{"Hists don't have same number of bins"};
158  }
159 
160  // Add these histograms together
161  for (unsigned ibin = 0; ibin < contents.size(); ++ibin) {
162  contents[ibin] += other.contents[ibin];
163  if (sumw2.size())
164  sumw2[ibin] += other.sumw2[ibin];
165  }
166 
167  // Add the up/down uncertainties
168  for (auto& unc : uncs)
169  unc.second.add(other.uncs.at(unc.first));
170 
171  // Sum the envelope histograms together
172  for (auto& env : envs) {
173  // If envelopes evaluted, just do those
174  if (envs_set) {
175  if (not other.envs_set)
176  throw std::logic_error{"Trying to add one histogram with envelope set to another one without"};
177 
178  std::get<0>(env.second).add(std::get<0>(other.envs.at(env.first)));
179  std::get<1>(env.second).add(std::get<1>(other.envs.at(env.first)));
180  }
181  else {
182  // Get the other list
183  const auto& otherenv = std::get<2>(other.envs.at(env.first));
184  auto& meenv = std::get<2>(env.second);
185  if (otherenv.size() != meenv.size()) { // Error checking
186  std::cerr << "Num envs other: " << otherenv.size() << " me: " << meenv.size() << std::endl;
187  throw std::runtime_error{"Hists don't have same number of envelope histograms"};
188  }
189  // Loop through both lists at the same time, and add them together
190  auto iother = otherenv.cbegin();
191  for (auto& me : meenv) {
192  me.add(*iother);
193  ++iother;
194  }
195  }
196  }
197  }
198  }
199 
200 
201  void Hist::doscale(const double scale) {
202  Debug::Debug(__PRETTY_FUNCTION__, "Scaling", scale);
203  for (unsigned ibin = 0; ibin < contents.size(); ++ibin) {
204  contents[ibin] *= scale;
205  if (sumw2.size())
206  sumw2[ibin] *= std::pow(scale, 2);
207  }
208  }
209 
210 
211  void Hist::doscale(const double lumi, const double xs) {
212  Debug::Debug(__PRETTY_FUNCTION__, "Scaling", lumi, xs);
213  doscale(lumi * xs / total);
214  }
215 
216 
217  template<typename F> void Hist::doall(const F& func) {
218  func(*this);
219  for (auto& unc : uncs)
220  func(unc.second);
221 
222  set_env_min_max();
223  for (auto& env : envs) {
224  for (auto& hist : std::get<2>(env.second))
225  func(hist);
226  if (envs_set) {
227  func(std::get<0>(env.second));
228  func(std::get<1>(env.second));
229  }
230  }
231  }
232 
233 
234  void Hist::scale(const double scale) {
235  doall([scale] (auto& hist) { hist.doscale(scale); });
236  }
237 
238 
239  void Hist::scale(const double lumi, const double xs) {
240  doall([lumi, xs] (auto& hist) { hist.doscale(lumi, xs); });
241  }
242 
243 
244  TH1D* Hist::roothist(bool allunc) const {
245  static unsigned plot = 0;
246  auto title = std::string(";") + label + ";Events";
247  Debug::Debug(__PRETTY_FUNCTION__, "Generating root hist with", title, nbins, min, max, "from", this);
248  histstore.emplace_back(std::to_string(plot++).data(), title.data(), static_cast<int>(nbins), min, max);
249  auto& hist = histstore.back();
250  for (unsigned ibin = 0; ibin < contents.size(); ++ibin) {
251  hist.SetBinContent(ibin, contents[ibin]);
252  hist.SetBinError(ibin, get_unc(ibin, allunc));
253  }
254  Debug::Debug(__PRETTY_FUNCTION__, "hist with", hist.Integral());
255  return &hist;
256  }
257 
258 
259  std::pair<unsigned, unsigned> Hist::get_maxbin_outof () const {
260  double max = 0;
261  unsigned maxbin = 0;
262  for (unsigned ibin = 1; ibin <= nbins; ++ibin) {
263  if (contents[ibin] > max) {
264  max = contents[ibin];
265  maxbin = ibin;
266  }
267  }
268  return std::make_pair(maxbin, std::max(nbins, 1u));
269  }
270 
271 
272  double Hist::max_w_unc () const {
273  double output = 0;
274  for (unsigned ibin = 1; ibin <= nbins; ++ibin)
275  output = std::max(contents[ibin] + get_unc(ibin), output);
276  return output;
277  }
278 
279 
280  double Hist::min_w_unc (const bool includezeros) const {
281  double output = std::numeric_limits<double>::max();
282  for (unsigned ibin = 1; ibin <= nbins; ++ibin) {
283  if (contents[ibin] or includezeros)
284  output = std::min(contents[ibin] - get_unc(ibin), output);
285  }
286  return std::max(output, 0.0);
287  }
288 
289 
290  Hist Hist::ratio(const Hist& other) const {
291  Debug::Debug(__PRETTY_FUNCTION__, "ratio", nbins, min, max);
292  Hist output{*this};
293 
294  auto change = [&other] (Hist& tochange) {
295  for (unsigned ibin = 0; ibin < other.contents.size(); ++ibin) {
296  auto bincontent = other.contents.at(ibin);
297  if (bincontent) {
298  tochange.contents[ibin] /= other.contents.at(ibin);
299  if (tochange.sumw2.size())
300  tochange.sumw2[ibin] /= std::pow(other.contents.at(ibin), 2);
301  }
302  else {
303  tochange.contents[ibin] = 1.0;
304  if (tochange.sumw2.size())
305  tochange.sumw2[ibin] = 0;
306  }
307  }
308  };
309 
310  output.doall(change);
311 
312  Debug::Debug(__PRETTY_FUNCTION__, "output", output.nbins, output.min, output.max);
313  return output;
314  }
315 
316 
317  void Hist::set_total (double newtotal) {
318  if (total)
319  throw std::logic_error{"Attempted to set total value for histogram twice. Probably a bug."};
320  total = newtotal;
321  }
322 
323 
324  double Hist::get_unc(unsigned bin, bool allunc) const {
325  double w2 = sumw2.size() ? sumw2.at(bin) : 0;
326  if (allunc) {
327  // Divide the uncertainty from each histogram by two to not double count Up and Down
328  for (auto& unc : uncs)
329  w2 += std::pow(contents.at(bin) - unc.second.contents.at(bin), 2)/2;
330 
331  set_env_min_max();
332  // Do the same thing with the min/max envelope uncertainties
333  for (auto& env : envs) {
334  w2 += std::pow(contents.at(bin) - std::get<0>(env.second).contents.at(bin), 2)/2;
335  w2 += std::pow(contents.at(bin) - std::get<1>(env.second).contents.at(bin), 2)/2;
336  }
337  }
338 
339  return std::sqrt(w2);
340  }
341 
342 
343  void Hist::set_env_min_max() const {
344  // If we did this before, do nothing
345  if (envs_set)
346  return;
347 
348  // First time we do this
349  envs_set = true;
350  for (auto& env : envs) {
351  // Only want a histogram without uncertianties
352  auto& hmin = std::get<0>(env.second) = Hist(label, nbins, min, max, false);
353  hmin.contents = contents; // Start with current contents
354  auto& hmax = std::get<1>(env.second) = hmin;
355  // Loop through all the histograms in the envelope
356  auto& envs_list = std::get<2>(env.second);
357  for (auto& part : envs_list) {
358  for (unsigned ibin = 0; ibin != contents.size(); ++ibin) {
359  hmin.contents[ibin] = std::min(hmin.contents[ibin], part.contents[ibin]);
360  hmax.contents[ibin] = std::max(hmax.contents[ibin], part.contents[ibin]);
361  }
362  }
363  envs_list.clear();
364  }
365  }
366 
367 
368  const Hist& Hist::fetch_unc(const std::string& key, const std::string& direction) const {
369  if (direction != "Up" and direction != "Down")
370  throw std::logic_error{"Direction must be 'Up' or 'Down'."};
371  // Check if envelope
372  if (envs.find(key) != envs.end()) {
373  set_env_min_max();
374  if (direction == "Up")
375  return std::get<1>(envs.at(key));
376  return std::get<0>(envs.at(key));
377  }
378  else
379  return uncs.at(key + direction);
380  }
381 
382 
384  Types::map<TH1D*> output;
385  for (auto& unc : uncs)
386  output.insert({unc.first, unc.second.roothist(false)});
387  // Use the fetch_unc function to ensure that the min-max is correct
388  for (auto& env : envs) {
389  for (const std::string& dir : {"Up", "Down"})
390  output.insert({env.first + dir, fetch_unc(env.first, dir).roothist(false)});
391  }
392  return output;
393  }
394 
395  }
396 }
397 
398 
399 #endif