quill
RdtscClock.h
1 
7 #pragma once
8 
9 #include "quill/core/Attributes.h"
10 #include "quill/core/ChronoTimeUtils.h"
11 #include "quill/core/Common.h"
12 #include "quill/core/Rdtsc.h"
13 
14 #include <algorithm>
15 #include <array>
16 #include <atomic>
17 #include <chrono>
18 #include <cstdint>
19 #include <cstdio>
20 #include <limits>
21 #include <vector>
22 
23 QUILL_BEGIN_NAMESPACE
24 
25 namespace detail
26 {
27 
28 #if defined(_WIN32) && defined(_MSC_VER) && !defined(__GNUC__)
29 #pragma warning(push)
30 #pragma warning(disable : 4324)
31 #endif
32 
37 {
38 public:
42  class RdtscTicks
43  {
44  public:
45  QUILL_NODISCARD static RdtscTicks& instance()
46  {
47  static RdtscTicks inst;
48  return inst;
49  }
50 
51  /***/
52  QUILL_NODISCARD double ns_per_tick() const noexcept { return _ns_per_tick; }
53 
54  private:
58  RdtscTicks()
59  {
60  // Convert rdtsc to wall time.
61  // 1. Get real time and rdtsc current count
62  // 2. Calculate how many rdtsc ticks can occur in one
63  // calculate _ticks_per_ns as the median over a number of observations
64  // we use always odd number of trials for easy median calc
65  constexpr std::chrono::milliseconds spin_duration = std::chrono::milliseconds{10};
66  constexpr size_t max_trials = 15;
67  constexpr size_t min_trials = 3;
68  constexpr double convergence_threshold = 0.01; // 1% threshold
69 
70  std::vector<double> rates;
71  rates.reserve(max_trials);
72 
73  double previous_median = 0.0;
74 
75  for (size_t i = 0; i < max_trials; ++i)
76  {
77  auto const beg_ts = detail::get_timestamp<std::chrono::steady_clock>();
78  uint64_t const beg_tsc = rdtsc();
79  uint64_t end_tsc;
80  std::chrono::nanoseconds elapsed_ns;
81 
82  do
83  {
84  auto const end_ts = detail::get_timestamp<std::chrono::steady_clock>();
85  end_tsc = rdtsc();
86  elapsed_ns = end_ts - beg_ts;
87  } while (elapsed_ns < spin_duration);
88 
89  rates.push_back(static_cast<double>(end_tsc - beg_tsc) / static_cast<double>(elapsed_ns.count()));
90 
91  // Check for convergence after minimum trials and only on an odd count of trials.
92  if (((i + 1) >= min_trials) && (((i + 1) % 2) != 0))
93  {
94  std::nth_element(rates.begin(), rates.begin() + static_cast<ptrdiff_t>((i + 1) / 2), rates.end());
95  double current_median = rates[(i + 1) / 2];
96 
97  // If we've converged, break early
98  if (std::abs(current_median - previous_median) / current_median < convergence_threshold)
99  {
100  break;
101  }
102 
103  previous_median = current_median;
104  }
105  }
106 
107  // Calculate final median.
108  std::nth_element(rates.begin(), rates.begin() + static_cast<ptrdiff_t>(rates.size() / 2),
109  rates.end());
110 
111  double const ticks_per_ns = rates[rates.size() / 2];
112  _ns_per_tick = 1 / ticks_per_ns;
113  }
114 
115  double _ns_per_tick{0};
116  };
117 
118  /***/
119  explicit RdtscClock(std::chrono::nanoseconds resync_interval)
120  : _ns_per_tick(RdtscTicks::instance().ns_per_tick())
121  {
122  double const calc_value = static_cast<double>(resync_interval.count()) * _ns_per_tick;
123 
124  // Check for overflow and negative values
125  if (calc_value >= static_cast<double>(std::numeric_limits<int64_t>::max()) || calc_value < 0)
126  {
127  _resync_interval_ticks = std::numeric_limits<int64_t>::max();
128  }
129  else
130  {
131  _resync_interval_ticks = static_cast<int64_t>(calc_value);
132  }
133 
134  _resync_interval_original = _resync_interval_ticks;
135 
136  if (!resync(resync_lag_cycles))
137  {
138  // try to resync again with higher lag
139  if (!resync(resync_lag_cycles * 2u))
140  {
141  std::fprintf(stderr, "Failed to sync RdtscClock. Timestamps will be incorrect\n");
142  }
143  }
144  }
145 
146  /***/
147  uint64_t time_since_epoch(uint64_t rdtsc_value) const noexcept
148  {
149  // should only get called by the backend thread
150 
151  // get the current index, this is only sef called my the thread that is doing the resync
152  auto const index = _version.load(std::memory_order_relaxed) & (_base.size() - 1);
153 
154  // get rdtsc current value and compare the diff then add it to base wall time
155  auto diff = static_cast<int64_t>(rdtsc_value - _base[index].base_tsc);
156 
157  // we need to sync after we calculated otherwise base_tsc value will be ahead of passed tsc value
158  if (diff > _resync_interval_ticks)
159  {
160  resync(resync_lag_cycles);
161  diff = static_cast<int64_t>(rdtsc_value - _base[index].base_tsc);
162  }
163 
164  return static_cast<uint64_t>(_base[index].base_time +
165  static_cast<int64_t>(static_cast<double>(diff) * _ns_per_tick));
166  }
167 
168  /***/
169  uint64_t time_since_epoch_safe(uint64_t rdtsc_value) const noexcept
170  {
171  // thread-safe, can be called by anyone
172  // this function won't resync as it can be called by anyone and only a single thread resyncs
173  uint32_t version;
174  uint64_t wall_ts;
175 
176  do
177  {
178  version = _version.load(std::memory_order_acquire);
179  auto const index = version & (_base.size() - 1);
180 
181  if (QUILL_UNLIKELY((_base[index].base_tsc) == 0 && (_base[index].base_time == 0)))
182  {
183  return 0;
184  }
185 
186  // get rdtsc current value and compare the diff then add it to base wall time
187  auto const diff = static_cast<int64_t>(rdtsc_value - _base[index].base_tsc);
188  wall_ts = static_cast<uint64_t>(_base[index].base_time +
189  static_cast<int64_t>(static_cast<double>(diff) * _ns_per_tick));
190  } while (version != _version.load(std::memory_order_acquire));
191 
192  return wall_ts;
193  }
194 
195  /***/
196  bool resync(uint32_t lag) const noexcept
197  {
198  // Sometimes we might get an interrupt and might never resync, so we will try again up to max_attempts
199  constexpr uint8_t max_attempts{4};
200 
201  for (uint8_t attempt = 0; attempt < max_attempts; ++attempt)
202  {
203  uint64_t const beg = rdtsc();
204  // we force convert to nanoseconds because the precision of system_clock::time-point is not portable across platforms.
205  auto const wall_time = static_cast<int64_t>(detail::get_timestamp_ns<std::chrono::system_clock>());
206  uint64_t const end = rdtsc();
207 
208  if (QUILL_LIKELY(end - beg <= lag))
209  {
210  // update the next index
211  auto const index = (_version.load(std::memory_order_relaxed) + 1) & (_base.size() - 1);
212  _base[index].base_time = wall_time;
213  _base[index].base_tsc = _fast_average(beg, end);
214  _version.fetch_add(1, std::memory_order_release);
215 
216  _resync_interval_ticks = _resync_interval_original;
217  return true;
218  }
219  }
220 
221  // we failed to return earlier and we never resynced, but we don't really want to keep retrying on each call
222  // to time_since_epoch() so we do non accurate resync we will increase the resync duration to resync later
223  constexpr int64_t max_int64_half = std::numeric_limits<int64_t>::max() / 2;
224  if (_resync_interval_ticks <= max_int64_half)
225  {
226  _resync_interval_ticks = _resync_interval_ticks * 2;
227  }
228 
229  return false;
230  }
231 
232  /***/
233  double nanoseconds_per_tick() const noexcept { return _ns_per_tick; }
234 
235 private:
236  struct BaseTimeTsc
237  {
238  BaseTimeTsc() = default;
239  int64_t base_time{0};
240  uint64_t base_tsc{0};
241  };
242 
243  /***/
244  QUILL_NODISCARD static uint64_t _fast_average(uint64_t x, uint64_t y) noexcept
245  {
246  return (x & y) + ((x ^ y) >> 1);
247  }
248 
249 private:
250  static constexpr uint32_t resync_lag_cycles {50'000};
251  mutable int64_t _resync_interval_ticks{0};
252  int64_t _resync_interval_original{0}; /**< stores the initial interval value as as if we fail to resync we increase the timer */
253  double _ns_per_tick{0};
254 
255  alignas(QUILL_CACHE_LINE_ALIGNED) mutable std::atomic<uint32_t> _version{0};
256  mutable std::array<BaseTimeTsc, 2> _base{};
257 };
258 
259 #if defined(_WIN32) && defined(_MSC_VER) && !defined(__GNUC__)
260 #pragma warning(pop)
261 #endif
262 
263 } // namespace detail
264 
265 QUILL_END_NAMESPACE
Setups a signal handler to handle fatal signals.
Definition: BackendManager.h:24
Converts tsc ticks to nanoseconds since epoch.
Definition: RdtscClock.h:36
QUILL_NODISCARD QUILL_ATTRIBUTE_HOT uint64_t rdtsc() noexcept
Get the TSC counter.
Definition: Rdtsc.h:109
A static class that calculates the rdtsc ticks per second.
Definition: RdtscClock.h:42