quill
RdtscClock.h
1 
7 #pragma once
8 
9 #include "quill/core/Attributes.h"
10 #include "quill/core/ChronoTimeUtils.h"
11 #include "quill/core/Common.h"
12 #include "quill/core/Rdtsc.h"
13 
14 #include <algorithm>
15 #include <array>
16 #include <atomic>
17 #include <chrono>
18 #include <cstdint>
19 #include <cstdio>
20 #include <limits>
21 #include <vector>
22 
23 QUILL_BEGIN_NAMESPACE
24 
25 namespace detail
26 {
31 {
32 public:
36  class RdtscTicks
37  {
38  public:
39  QUILL_NODISCARD static RdtscTicks& instance()
40  {
41  static RdtscTicks inst;
42  return inst;
43  }
44 
45  /***/
46  QUILL_NODISCARD double ns_per_tick() const noexcept { return _ns_per_tick; }
47 
48  private:
52  RdtscTicks()
53  {
54  // Convert rdtsc to wall time.
55  // 1. Get real time and rdtsc current count
56  // 2. Calculate how many rdtsc ticks can occur in one
57  // calculate _ticks_per_ns as the median over a number of observations
58  // we use always odd number of trials for easy median calc
59  constexpr std::chrono::milliseconds spin_duration = std::chrono::milliseconds{10};
60  constexpr size_t max_trials = 15;
61  constexpr size_t min_trials = 3;
62  constexpr double convergence_threshold = 0.01; // 1% threshold
63 
64  std::vector<double> rates;
65  rates.reserve(max_trials);
66 
67  double previous_median = 0.0;
68 
69  for (size_t i = 0; i < max_trials; ++i)
70  {
71  auto const beg_ts = detail::get_timestamp<std::chrono::steady_clock>();
72  uint64_t const beg_tsc = rdtsc();
73  uint64_t end_tsc;
74  std::chrono::nanoseconds elapsed_ns;
75 
76  do
77  {
78  auto const end_ts = detail::get_timestamp<std::chrono::steady_clock>();
79  end_tsc = rdtsc();
80  elapsed_ns = end_ts - beg_ts;
81  } while (elapsed_ns < spin_duration);
82 
83  rates.push_back(static_cast<double>(end_tsc - beg_tsc) / static_cast<double>(elapsed_ns.count()));
84 
85  // Check for convergence after minimum trials and only on an odd count of trials.
86  if (((i + 1) >= min_trials) && (((i + 1) % 2) != 0))
87  {
88  std::nth_element(rates.begin(), rates.begin() + static_cast<ptrdiff_t>((i + 1) / 2), rates.end());
89  double current_median = rates[(i + 1) / 2];
90 
91  // If we've converged, break early
92  if (std::abs(current_median - previous_median) / current_median < convergence_threshold)
93  {
94  break;
95  }
96 
97  previous_median = current_median;
98  }
99  }
100 
101  // Calculate final median.
102  std::nth_element(rates.begin(), rates.begin() + static_cast<ptrdiff_t>(rates.size() / 2),
103  rates.end());
104 
105  double const ticks_per_ns = rates[rates.size() / 2];
106  _ns_per_tick = 1 / ticks_per_ns;
107  }
108 
109  double _ns_per_tick{0};
110  };
111 
112  /***/
113  explicit RdtscClock(std::chrono::nanoseconds resync_interval)
114  : _ns_per_tick(RdtscTicks::instance().ns_per_tick())
115  {
116  double const calc_value = static_cast<double>(resync_interval.count()) * _ns_per_tick;
117 
118  // Check for overflow and negative values
119  if (calc_value >= static_cast<double>(std::numeric_limits<int64_t>::max()) || calc_value < 0)
120  {
121  _resync_interval_ticks = std::numeric_limits<int64_t>::max();
122  }
123  else
124  {
125  _resync_interval_ticks = static_cast<int64_t>(calc_value);
126  }
127 
128  _resync_interval_original = _resync_interval_ticks;
129 
130  if (!resync(2500))
131  {
132  // try to resync again with higher lag
133  if (!resync(10000))
134  {
135  std::fprintf(stderr, "Failed to sync RdtscClock. Timestamps will be incorrect\n");
136  }
137  }
138  }
139 
140  /***/
141  uint64_t time_since_epoch(uint64_t rdtsc_value) const noexcept
142  {
143  // should only get called by the backend thread
144 
145  // get the current index, this is only sef called my the thread that is doing the resync
146  auto const index = _version.load(std::memory_order_relaxed) & (_base.size() - 1);
147 
148  // get rdtsc current value and compare the diff then add it to base wall time
149  auto diff = static_cast<int64_t>(rdtsc_value - _base[index].base_tsc);
150 
151  // we need to sync after we calculated otherwise base_tsc value will be ahead of passed tsc value
152  if (diff > _resync_interval_ticks)
153  {
154  resync(2500);
155  diff = static_cast<int64_t>(rdtsc_value - _base[index].base_tsc);
156  }
157 
158  return static_cast<uint64_t>(_base[index].base_time +
159  static_cast<int64_t>(static_cast<double>(diff) * _ns_per_tick));
160  }
161 
162  /***/
163  uint64_t time_since_epoch_safe(uint64_t rdtsc_value) const noexcept
164  {
165  // thread-safe, can be called by anyone
166  // this function won't resync as it can be called by anyone and only a single thread resyncs
167  uint32_t version;
168  uint64_t wall_ts;
169 
170  do
171  {
172  version = _version.load(std::memory_order_acquire);
173  auto const index = version & (_base.size() - 1);
174 
175  if (QUILL_UNLIKELY((_base[index].base_tsc) == 0 && (_base[index].base_time == 0)))
176  {
177  return 0;
178  }
179 
180  // get rdtsc current value and compare the diff then add it to base wall time
181  auto const diff = static_cast<int64_t>(rdtsc_value - _base[index].base_tsc);
182  wall_ts = static_cast<uint64_t>(_base[index].base_time +
183  static_cast<int64_t>(static_cast<double>(diff) * _ns_per_tick));
184  } while (version != _version.load(std::memory_order_acquire));
185 
186  return wall_ts;
187  }
188 
189  /***/
190  bool resync(uint32_t lag) const noexcept
191  {
192  // Sometimes we might get an interrupt and might never resync, so we will try again up to max_attempts
193  constexpr uint8_t max_attempts{4};
194 
195  for (uint8_t attempt = 0; attempt < max_attempts; ++attempt)
196  {
197  uint64_t const beg = rdtsc();
198  // we force convert to nanoseconds because the precision of system_clock::time-point is not portable across platforms.
199  auto const wall_time = static_cast<int64_t>(detail::get_timestamp_ns<std::chrono::system_clock>());
200  uint64_t const end = rdtsc();
201 
202  if (QUILL_LIKELY(end - beg <= lag))
203  {
204  // update the next index
205  auto const index = (_version.load(std::memory_order_relaxed) + 1) & (_base.size() - 1);
206  _base[index].base_time = wall_time;
207  _base[index].base_tsc = _fast_average(beg, end);
208  _version.fetch_add(1, std::memory_order_release);
209 
210  _resync_interval_ticks = _resync_interval_original;
211  return true;
212  }
213  }
214 
215  // we failed to return earlier and we never resynced, but we don't really want to keep retrying on each call
216  // to time_since_epoch() so we do non accurate resync we will increase the resync duration to resync later
217  constexpr int64_t max_int64_half = std::numeric_limits<int64_t>::max() / 2;
218  if (_resync_interval_ticks <= max_int64_half)
219  {
220  _resync_interval_ticks = _resync_interval_ticks * 2;
221  }
222 
223  return false;
224  }
225 
226  /***/
227  double nanoseconds_per_tick() const noexcept { return _ns_per_tick; }
228 
229 private:
230  struct BaseTimeTsc
231  {
232  BaseTimeTsc() = default;
233  int64_t base_time{0};
234  uint64_t base_tsc{0};
235  };
236 
237  /***/
238  QUILL_NODISCARD static uint64_t _fast_average(uint64_t x, uint64_t y) noexcept
239  {
240  return (x & y) + ((x ^ y) >> 1);
241  }
242 
243 private:
244  mutable int64_t _resync_interval_ticks{0};
245  int64_t _resync_interval_original{0};
246  double _ns_per_tick{0};
247 
248  alignas(QUILL_CACHE_LINE_ALIGNED) mutable std::atomic<uint32_t> _version{0};
249  mutable std::array<BaseTimeTsc, 2> _base{};
250 };
251 } // namespace detail
252 
253 QUILL_END_NAMESPACE
Setups a signal handler to handle fatal signals.
Definition: BackendManager.h:24
Converts tsc ticks to nanoseconds since epoch.
Definition: RdtscClock.h:30
QUILL_NODISCARD QUILL_ATTRIBUTE_HOT uint64_t rdtsc() noexcept
Get the TSC counter.
Definition: Rdtsc.h:109
A static class that calculates the rdtsc ticks per second.
Definition: RdtscClock.h:36