Zero  0.1.0
AtomicCounter.hpp
Go to the documentation of this file.
1 #ifndef LINTEL_ATOMIC_COUNTER_HPP
2 #define LINTEL_ATOMIC_COUNTER_HPP
3 
8 //See the following for explanations why things are done the way they are
9 //http://www.hpl.hp.com/personal/Hans_Boehm/c++mm/threadsintro.html
10 //http://www.justsoftwaresolutions.co.uk/threading/intel-memory-ordering-and-c++-memory-model.html
11 //http://gcc.gnu.org/bugzilla/show_bug.cgi?id=17884
12 //
13 //asms could be more explictit - like xaddb xaddw xaddl xaddq, but gasm
14 //uses correct one automatically, since it knows argument size. If your asm doesn't, use %z0
15 //asm volatile + memory clobber is the only available compiler barrier
16 //without it, gcc generates asm which is closer to __sync_add_and_fetch() builtin, but there
17 //is nothing we can do since there is no other compiler barrier.
18 
19 // Choose exactly one of
20 //#define LINTEL_USE_STD_ATOMICS 1
21 //#define LINTEL_USE_GCC_BUILTIN_SYNC_ATOMICS 1
22 #define LINTEL_USE_GCC_ASM_ATOMICS 1
23 
24 #if (LINTEL_USE_STD_ATOMICS + LINTEL_USE_GCC_BUILTIN_SYNC_ATOMICS + LINTEL_USE_GCC_ASM_ATOMICS != 1)
25 # error Choose exactly one of the above
26 #endif
27 
28 #if defined(__GNUC__) && (__GNUC__ * 100 + __GNUC_MINOR__ >= 406) // need at least gcc 4.6
29 # define LINTEL_HAS_STD_ATOMICS 1
30 #endif
31 
32 #if defined(__GNUC__) && (__GNUC__ * 100 + __GNUC_MINOR__ < 407) // need at least gcc 4.7
33 # if defined(LINTEL_USE_STD_ATOMICS)
34 # error gcc-4.6 + std::atomic = bug (http://gcc.gnu.org/ml/gcc-bugs/2012-10/msg00158.html)
35 # endif
36 #endif
37 
38 #if defined(__GNUC__) && (__GNUC__ * 100 + __GNUC_MINOR__ >= 401) // need at least gcc 4.1
39 # if defined(__i386__) // Pure __i386__ does not have a __sync_add_and_fetch that can return a value.
40 # if defined(__i486__) || defined(__i586__) || defined(__i686__)
41 # define LINTEL_HAS_GCC_BUILTIN_SYNC_ATOMICS 1
42 # define LINTEL_HAS_GCC_ASM_ATOMICS 1
43 # endif
44 # elif defined(__x86_64) || defined(__x86_64__) //(__i386__ is not defined on x86_64)
45 # define LINTEL_HAS_GCC_BUILTIN_SYNC_ATOMICS 1
46 # define LINTEL_HAS_GCC_ASM_ATOMICS 1
47 # else // Assume all non i386 archs have it
48 # define LINTEL_HAS_GCC_BUILTIN_SYNC_ATOMICS 1
49 # endif
50 #else
51 # if defined(LINTEL_USE_GCC_BUILTIN_SYNC_ATOMICS)
52 # error detected platform does not have __sync_* primitives
53 # endif
54 #endif
55 
56 #if defined(LINTEL_USE_GCC_BUILTIN_SYNC_ATOMICS) && not defined(LINTEL_HAS_GCC_BUILTIN_SYNC_ATOMICS)
57 #error detected platform does not have __sync builtins
58 #endif
59 
60 #if defined(LINTEL_USE_STD_ATOMICS) && not defined(LINTEL_HAS_STD_ATOMICS)
61 #error detected platform does not have std::atomic<>
62 #endif
63 
64 #if defined(LINTEL_USE_GCC_ASM_ATOMICS) && not defined(LINTEL_HAS_GCC_ASM_ATOMICS)
65 #error detected platform does not have asm atomics
66 #endif
67 
68 #if defined (LINTEL_USE_STD_ATOMICS)
69 # include <atomic>
70 #endif
71 
72 namespace lintel {
73 
74 #if defined (LINTEL_USE_STD_ATOMICS)
75 # define LINTEL_ATOMIC_FETCH(op, var, amount) ::std::atomic_fetch_##op(var, amount)
76 # define LINTEL_ATOMIC_LOAD(ptr) ::std::atomic_load(ptr)
77 # define LINTEL_ATOMIC_STORE(ptr, val) ::std::atomic_store(ptr, val)
78 # define LINTEL_ATOMIC_EXCHANGE(ptr, val) ::std::atomic_exchange(ptr, val)
79 # define LINTEL_COMPARE_EXCHANGE(current, expected, desired) ::std::atomic_compare_exchange_strong(current, expected, desired)
80 # define LINTEL_ATOMIC_THREAD_FENCE(order) ::std::atomic_thread_fence(order)
81 #elif defined (LINTEL_USE_GCC_BUILTIN_SYNC_ATOMICS)
82 # define LINTEL_ATOMIC_FETCH(op, var, amount) ::__sync_fetch_and_##op(var, amount)
83  //unnesessary preceding barrier in gcc 4.6 std::atomic<>, but not here. fixed in gcc 4.7
84 # define LINTEL_ATOMIC_LOAD(ptr) ({__typeof(*(ptr)) t=*(ptr); ::__sync_synchronize(); t;})
85  //BUG in gcc 4.6 std::atomic<> (missing preceding barrier), but not here. fixed in gcc 4.7
86 # define LINTEL_ATOMIC_STORE(ptr, val) {::__sync_synchronize(); *(ptr)=val; ::__sync_synchronize();}
87 # define LINTEL_ATOMIC_EXCHANGE(ptr, val) ::__sync_lock_test_and_set(ptr, val)
88 # define LINTEL_COMPARE_EXCHANGE(current, expected, desired) ({__typeof(*(expected)) val=*(expected); val==(*(expected)=::__sync_val_compare_and_swap(current, val, desired));})
89 # define LINTEL_ATOMIC_THREAD_FENCE(order) {::__sync_synchronize();(void)order;}
90 #elif defined (LINTEL_USE_GCC_ASM_ATOMICS)
91 # define LINTEL_ATOMIC_FETCH(op, var, amount) lintel::x86Gcc_atomic_fetch_##op(var, amount)
92 # define LINTEL_ATOMIC_LOAD(ptr) lintel::x86Gcc_atomic_load(ptr)
93 # define LINTEL_ATOMIC_STORE(ptr, val) lintel::x86Gcc_atomic_store(ptr, val)
94 # define LINTEL_ATOMIC_EXCHANGE(ptr, val) lintel::x86Gcc_atomic_exchange(ptr, val)
95 # define LINTEL_COMPARE_EXCHANGE(current, expected, desired) lintel::x86Gcc_compare_exchange(current, expected, desired)
96 # define LINTEL_ATOMIC_THREAD_FENCE(order) lintel::x86Gcc_atomic_thread_fence(order)
97 #endif
98 
99 #if defined (LINTEL_USE_STD_ATOMICS)
107 #else
108 
110  typedef enum memory_order {
117  } memory_order;
118 
119 #endif
120 
121 #if defined(LINTEL_USE_GCC_ASM_ATOMICS)
122 
123  template<typename T>
124  static inline T x86Gcc_atomic_load(const T* counter) {
125  T v;
126  asm volatile ("mov %1, %0": "=r" (v) : "m" (*counter));
127  asm volatile ("":: :"memory"); //compiler barrier
128  return v;
129  }
130 
131  template<typename T>
132  static inline void x86Gcc_atomic_store(T* counter, T v) {
133  asm volatile ("xchg %1, %0": "+m" (*counter), "+r" (v)::"memory");
134  }
135 
136  template<typename T>
137  static inline T x86Gcc_atomic_exchange(T* counter, T v) {
138  asm volatile ("xchg %1, %0": "+m" (*counter), "+r" (v)::"memory");
139  return v;
140  }
141 
142  template<typename T>
143  static inline bool x86Gcc_compare_exchange(T* current, T* expected, T desired) {
144  bool result;
145  asm volatile ("lock; cmpxchg %3,%0\n\t"
146  "setz %2\n\t"
147  : "+m" (*current), "+a" (*expected), "=rm"(result)
148  : "r" (desired)
149  : "memory");
150  return result;
151  }
152 
153  template<typename T>
154  static inline T x86Gcc_atomic_fetch_add(T* counter, T v) {
155  asm volatile ("lock xadd %1, %0": "+m" (*counter), "+r" (v)::"memory");
156  return v;
157  }
158 
159  template<typename T>
160  static inline T x86Gcc_atomic_fetch_sub(T* counter, T v) {
161  return x86Gcc_atomic_fetch_add<T>(counter, -v);
162  }
163 
164  template<typename T>
165  static inline T x86Gcc_atomic_fetch_or(T* counter, T v) {
166  T expected = LINTEL_ATOMIC_LOAD(counter);
167  T desired;
168  do {
169  desired = expected | v;
170  } while (!x86Gcc_compare_exchange(counter, &expected, desired));
171  return expected;
172  }
173 
174  template<typename T>
175  static inline T x86Gcc_atomic_fetch_and(T* counter, T v) {
176  T expected = LINTEL_ATOMIC_LOAD(counter);
177  T desired;
178  do {
179  desired = expected & v;
180  } while (!x86Gcc_compare_exchange(counter, &expected, desired));
181  return expected;
182  }
183 
184  template<typename T>
185  static inline T x86Gcc_atomic_fetch_xor(T* counter, T v) {
186  T expected = LINTEL_ATOMIC_LOAD(counter);
187  T desired;
188  do {
189  desired = expected ^ v;
190  } while (!x86Gcc_compare_exchange(counter, &expected, desired));
191  return expected;
192  }
193 
194  static inline void x86Gcc_atomic_thread_fence(memory_order order) {
195  // Unlike gcc we don't issue fences unnesessary for Atomic<>.
196  // If you issue nontemporal SSE asms yourself, you have to
197  // issue your own fences yourself as well.
198  switch (order) {
201  // gcc 4.6 issues lfence, which is never needed for current Atomic
202  asm volatile ("#lfence":: :"memory");
203  break;
205  // gcc 4.6 issues sfence, which is never needed for current Atomic
206  asm volatile ("#sfence":: :"memory");
207  break;
209  // gcc 4.6 issues mfence, which is not needed for current Atomic
210  asm volatile ("#mfence":: :"memory");
212  // what gcc does. StoreLoad barrier.
213  asm volatile ("mfence":: :"memory");
214  //asm volatile ("lock; orl $0, (%%rsp)":::"memory"); // faster on x86 <= Westmere
215  break;
217  break; // do nothing
218  }
219  }
220 
221 #endif
222 
223  inline void atomic_thread_fence(memory_order order) {
225  }
226 
227  inline void atomic_signal_fence(memory_order) {
228  asm volatile ("":: :"memory");
229  }
230 
240  template<class T>
241  class Atomic {
242  public:
243  Atomic() {
244  counter = 0;
245  } /*=default*/ // Should be initialized,
246  // for good measure especially
247  // on older compilers
248 
249  explicit Atomic(T counter) : counter(counter) {}
250 
254  return ++*this;
255  }
256 
260  return --*this;
261  }
262 
265  T addThenFetch(T amount) {
266  return *this += amount;
267  }
268 
271  bool isZero() const {
272  return !load();
273  }
274 
275  operator T() const {
276  return load();
277  }
278 
279  T load() const {
280  return LINTEL_ATOMIC_LOAD(&counter);
281  }
282 
283  void store(T t) {
285  }
286 
288  T operator=(T amount) {
289  store(amount);
290  return amount;
291  }
292 
293  T exchange(T t) {
294  return LINTEL_ATOMIC_EXCHANGE(&counter, t);
295  }
296 
297  bool compare_exchange_strong(T* expected, T desired) {
298  return LINTEL_COMPARE_EXCHANGE(&counter, expected, desired);
299  }
300 
301  T fetch_add(T amount) {
302  return LINTEL_ATOMIC_FETCH(add, &counter, amount);
303  }
304 
305  T fetch_sub(T amount) {
306  return LINTEL_ATOMIC_FETCH(sub, &counter, amount);
307  }
308 
309  T fetch_or(T amount) {
310  return LINTEL_ATOMIC_FETCH(or, &counter, amount);
311  }
312 
313  T fetch_and(T amount) {
314  return LINTEL_ATOMIC_FETCH(and, &counter, amount);
315  }
316 
317  T fetch_xor(T amount) {
318  return LINTEL_ATOMIC_FETCH(xor, &counter, amount);
319  }
320 
321  T operator+=(T amount) {
322  return fetch_add(amount) + amount;
323  }
324 
325  T operator-=(T amount) {
326  return fetch_sub(amount) - amount;
327  }
328 
329  T operator|=(T amount) {
330  return fetch_or(amount) | amount;
331  }
332 
333  T operator&=(T amount) {
334  return fetch_and(amount) & amount;
335  }
336 
337  T operator^=(T amount) {
338  return fetch_xor(amount) ^ amount;
339  }
340 
341  T operator++(int) {
342  return this->fetch_add(1);
343  } //suffix
344  T operator--(int) {
345  return this->fetch_sub(1);
346  }
347 
349  return this->fetch_add(1) + 1;
350  } //prefix
352  return this->fetch_sub(1) - 1;
353  }
354 
355  private:
357  Atomic(const Atomic&); //=delete
358 
360  Atomic& operator=(const Atomic&); // = delete;
361 
362 #if defined (LINTEL_USE_STD_ATOMICS)
363  std::atomic<T> counter;
364 #elif defined (LINTEL_USE_GCC_BUILTIN_SYNC_ATOMICS) || defined (LINTEL_USE_GCC_ASM_ATOMICS)
365 
366  T counter; // gcc will always align it because it ignores packed attribute on non-POD fields
367 #endif
368  };
369 
370  typedef Atomic<int> AtomicCounter; //for backward compatibility. Remove when no longer needed.
371 
372 #if defined(__GNUC__) && (__GNUC__ * 100 + __GNUC_MINOR__ > 404)
373 // Below pragmas trigger ICE for GCC 4.4 and below;
374 #pragma GCC push_options
375 #pragma GCC optimize ("no-strict-aliasing")
376 #endif
377 
378  namespace unsafe {
379  // These are not "Type-Based Alias Analysis" (TBAA) safe. Undefined behaviour. use -fno-strict-aliasing
380  // to somewhat constrain compiler, but it is still unsafe. for example
381  // struct S { short x; int a; } __attribute__((packed)) s;
382  // assert (__alignof__(s.a) != 4);
383  // lintel::unsafe::atomic_load(&s.a); //FAIL, not atomic!
384 
385  template<typename T>
386  T atomic_load(const T* object) {
387  return reinterpret_cast<const Atomic <T>*>(object)->load();
388  }
389 
390  template<typename T, typename C>
391  void atomic_store(T* object, C desired) {
392  reinterpret_cast<Atomic <T>*>(object)->store(static_cast<T>(desired));
393  }
394 
395  template<typename T, typename C>
396  T atomic_exchange(T* object, C desired) {
397  return reinterpret_cast<Atomic <T>*>(object)->exchange(static_cast<T>(desired));
398  }
399 
400  template<typename T, typename C>
401  bool atomic_compare_exchange_strong(T* object, T* expected, C desired) {
402  return reinterpret_cast<Atomic <T>*>(object)->compare_exchange_strong(expected, static_cast<T>(desired));
403  }
404 
405  template<typename T, typename C>
406  T atomic_fetch_add(T* object, C operand) {
407  return reinterpret_cast<Atomic <T>*>(object)->fetch_add(static_cast<T>(operand));
408  }
409 
410  template<typename T, typename C>
411  T atomic_fetch_sub(T* object, C operand) {
412  return reinterpret_cast<Atomic <T>*>(object)->fetch_sub(static_cast<T>(operand));
413  }
414 
415  template<typename T, typename C>
416  T atomic_fetch_or(T* object, C operand) {
417  return reinterpret_cast<Atomic <T>*>(object)->fetch_or(static_cast<T>(operand));
418  }
419 
420  template<typename T, typename C>
421  T atomic_fetch_and(T* object, C operand) {
422  return reinterpret_cast<Atomic <T>*>(object)->fetch_and(static_cast<T>(operand));
423  }
424 
425  template<typename T, typename C>
426  T atomic_fetch_xor(T* object, C operand) {
427  return reinterpret_cast<Atomic <T>*>(object)->fetch_xor(static_cast<T>(operand));
428  }
429  }
430 
431 #if defined(__GNUC__) && (__GNUC__ * 100 + __GNUC_MINOR__ > 404)
432 #pragma GCC pop_options
433 #endif
434 } // namespace lintel
435 
436 #endif // LINTEL_ATOMIC_COUNTER_HPP
T load() const
Definition: AtomicCounter.hpp:279
void atomic_thread_fence(memory_order order)
Definition: AtomicCounter.hpp:223
T incThenFetch()
Definition: AtomicCounter.hpp:253
T operator-=(T amount)
Definition: AtomicCounter.hpp:325
T atomic_fetch_xor(T *object, C operand)
Definition: AtomicCounter.hpp:426
#define LINTEL_ATOMIC_EXCHANGE(ptr, val)
Definition: AtomicCounter.hpp:94
T fetch_add(T amount)
Definition: AtomicCounter.hpp:301
T operator|=(T amount)
Definition: AtomicCounter.hpp:329
T operator--()
Definition: AtomicCounter.hpp:351
static T x86Gcc_atomic_load(const T *counter)
Definition: AtomicCounter.hpp:124
Definition: MersenneTwisterRandom.hpp:18
static bool x86Gcc_compare_exchange(T *current, T *expected, T desired)
Definition: AtomicCounter.hpp:143
T atomic_fetch_and(T *object, C operand)
Definition: AtomicCounter.hpp:421
enum lintel::memory_order memory_order
Enumeration for memory_order.
T atomic_fetch_sub(T *object, C operand)
Definition: AtomicCounter.hpp:411
static T x86Gcc_atomic_fetch_add(T *counter, T v)
Definition: AtomicCounter.hpp:154
T decThenFetch()
Definition: AtomicCounter.hpp:259
void atomic_signal_fence(memory_order)
Definition: AtomicCounter.hpp:227
static void x86Gcc_atomic_thread_fence(memory_order order)
Definition: AtomicCounter.hpp:194
T exchange(T t)
Definition: AtomicCounter.hpp:293
static T x86Gcc_atomic_fetch_xor(T *counter, T v)
Definition: AtomicCounter.hpp:185
Atomic< int > AtomicCounter
Definition: AtomicCounter.hpp:370
#define LINTEL_ATOMIC_FETCH(op, var, amount)
Definition: AtomicCounter.hpp:91
Definition: AtomicCounter.hpp:111
T atomic_load(const T *object)
Definition: AtomicCounter.hpp:386
T atomic_exchange(T *object, C desired)
Definition: AtomicCounter.hpp:396
T fetch_or(T amount)
Definition: AtomicCounter.hpp:309
Atomic(T counter)
Definition: AtomicCounter.hpp:249
T operator++(int)
Definition: AtomicCounter.hpp:341
Definition: AtomicCounter.hpp:116
void store(T t)
Definition: AtomicCounter.hpp:283
Atomic()
Definition: AtomicCounter.hpp:243
T atomic_fetch_add(T *object, C operand)
Definition: AtomicCounter.hpp:406
Definition: AtomicCounter.hpp:115
memory_order
Enumeration for memory_order.
Definition: AtomicCounter.hpp:110
static T x86Gcc_atomic_fetch_and(T *counter, T v)
Definition: AtomicCounter.hpp:175
T operator^=(T amount)
Definition: AtomicCounter.hpp:337
#define LINTEL_ATOMIC_LOAD(ptr)
Definition: AtomicCounter.hpp:92
void atomic_store(T *object, C desired)
Definition: AtomicCounter.hpp:391
Definition: AtomicCounter.hpp:114
T operator+=(T amount)
Definition: AtomicCounter.hpp:321
T addThenFetch(T amount)
Definition: AtomicCounter.hpp:265
static void x86Gcc_atomic_store(T *counter, T v)
Definition: AtomicCounter.hpp:132
static T x86Gcc_atomic_fetch_sub(T *counter, T v)
Definition: AtomicCounter.hpp:160
bool isZero() const
Definition: AtomicCounter.hpp:271
T fetch_and(T amount)
Definition: AtomicCounter.hpp:313
T operator=(T amount)
Assignement.
Definition: AtomicCounter.hpp:288
T counter
Definition: AtomicCounter.hpp:366
T fetch_sub(T amount)
Definition: AtomicCounter.hpp:305
An atomic counter that avoids using locks.
Definition: AtomicCounter.hpp:241
bool compare_exchange_strong(T *expected, T desired)
Definition: AtomicCounter.hpp:297
bool atomic_compare_exchange_strong(T *object, T *expected, C desired)
Definition: AtomicCounter.hpp:401
T operator++()
Definition: AtomicCounter.hpp:348
T operator--(int)
Definition: AtomicCounter.hpp:344
#define LINTEL_ATOMIC_STORE(ptr, val)
Definition: AtomicCounter.hpp:93
static T x86Gcc_atomic_exchange(T *counter, T v)
Definition: AtomicCounter.hpp:137
#define LINTEL_COMPARE_EXCHANGE(current, expected, desired)
Definition: AtomicCounter.hpp:95
#define LINTEL_ATOMIC_THREAD_FENCE(order)
Definition: AtomicCounter.hpp:96
T operator &=(T amount)
Definition: AtomicCounter.hpp:333
static T x86Gcc_atomic_fetch_or(T *counter, T v)
Definition: AtomicCounter.hpp:165
#define T
Definition: w_okvl_inl.h:45
Definition: AtomicCounter.hpp:112
T atomic_fetch_or(T *object, C operand)
Definition: AtomicCounter.hpp:416
T fetch_xor(T amount)
Definition: AtomicCounter.hpp:317
Definition: AtomicCounter.hpp:113