BRE12
gcc_armv7.h
1 /*
2  Copyright 2005-2016 Intel Corporation. All Rights Reserved.
3 
4  This file is part of Threading Building Blocks. Threading Building Blocks is free software;
5  you can redistribute it and/or modify it under the terms of the GNU General Public License
6  version 2 as published by the Free Software Foundation. Threading Building Blocks is
7  distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the
8  implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
9  See the GNU General Public License for more details. You should have received a copy of
10  the GNU General Public License along with Threading Building Blocks; if not, write to the
11  Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
12 
13  As a special exception, you may use this file as part of a free software library without
14  restriction. Specifically, if other files instantiate templates or use macros or inline
15  functions from this file, or you compile this file and link it with other files to produce
16  an executable, this file does not by itself cause the resulting executable to be covered
17  by the GNU General Public License. This exception does not however invalidate any other
18  reasons why the executable file might be covered by the GNU General Public License.
19 */
20 
21 /*
22  Platform isolation layer for the ARMv7-a architecture.
23 */
24 
25 #ifndef __TBB_machine_H
26 #error Do not include this file directly; include tbb_machine.h instead
27 #endif
28 
29 //TODO: is ARMv7 is the only version ever to support?
30 #if !(__ARM_ARCH_7A__)
31 #error compilation requires an ARMv7-a architecture.
32 #endif
33 
34 #include <sys/param.h>
35 #include <unistd.h>
36 
37 #define __TBB_WORDSIZE 4
38 
39 // Traditionally ARM is little-endian.
40 // Note that, since only the layout of aligned 32-bit words is of interest,
41 // any apparent PDP-endianness of 32-bit words at half-word alignment or
42 // any little-endian ordering of big-endian 32-bit words in 64-bit quantities
43 // may be disregarded for this setting.
44 #if __BIG_ENDIAN__ || (defined(__BYTE_ORDER__) && __BYTE_ORDER__==__ORDER_BIG_ENDIAN__)
45  #define __TBB_ENDIANNESS __TBB_ENDIAN_BIG
46 #elif __LITTLE_ENDIAN__ || (defined(__BYTE_ORDER__) && __BYTE_ORDER__==__ORDER_LITTLE_ENDIAN__)
47  #define __TBB_ENDIANNESS __TBB_ENDIAN_LITTLE
48 #elif defined(__BYTE_ORDER__)
49  #define __TBB_ENDIANNESS __TBB_ENDIAN_UNSUPPORTED
50 #else
51  #define __TBB_ENDIANNESS __TBB_ENDIAN_DETECT
52 #endif
53 
54 
55 #define __TBB_compiler_fence() __asm__ __volatile__("": : :"memory")
56 #define __TBB_full_memory_fence() __asm__ __volatile__("dmb ish": : :"memory")
57 #define __TBB_control_consistency_helper() __TBB_full_memory_fence()
58 #define __TBB_acquire_consistency_helper() __TBB_full_memory_fence()
59 #define __TBB_release_consistency_helper() __TBB_full_memory_fence()
60 
61 //--------------------------------------------------
62 // Compare and swap
63 //--------------------------------------------------
64 
72 static inline int32_t __TBB_machine_cmpswp4(volatile void *ptr, int32_t value, int32_t comparand )
73 {
74  int32_t oldval, res;
75 
76  __TBB_full_memory_fence();
77 
78  do {
79  __asm__ __volatile__(
80  "ldrex %1, [%3]\n"
81  "mov %0, #0\n"
82  "cmp %1, %4\n"
83  "it eq\n"
84  "strexeq %0, %5, [%3]\n"
85  : "=&r" (res), "=&r" (oldval), "+Qo" (*(volatile int32_t*)ptr)
86  : "r" ((int32_t *)ptr), "Ir" (comparand), "r" (value)
87  : "cc");
88  } while (res);
89 
90  __TBB_full_memory_fence();
91 
92  return oldval;
93 }
94 
102 static inline int64_t __TBB_machine_cmpswp8(volatile void *ptr, int64_t value, int64_t comparand )
103 {
104  int64_t oldval;
105  int32_t res;
106 
107  __TBB_full_memory_fence();
108 
109  do {
110  __asm__ __volatile__(
111  "mov %0, #0\n"
112  "ldrexd %1, %H1, [%3]\n"
113  "cmp %1, %4\n"
114  "it eq\n"
115  "cmpeq %H1, %H4\n"
116  "it eq\n"
117  "strexdeq %0, %5, %H5, [%3]"
118  : "=&r" (res), "=&r" (oldval), "+Qo" (*(volatile int64_t*)ptr)
119  : "r" ((int64_t *)ptr), "r" (comparand), "r" (value)
120  : "cc");
121  } while (res);
122 
123  __TBB_full_memory_fence();
124 
125  return oldval;
126 }
127 
128 static inline int32_t __TBB_machine_fetchadd4(volatile void* ptr, int32_t addend)
129 {
130  unsigned long tmp;
131  int32_t result, tmp2;
132 
133  __TBB_full_memory_fence();
134 
135  __asm__ __volatile__(
136 "1: ldrex %0, [%4]\n"
137 " add %3, %0, %5\n"
138 " strex %1, %3, [%4]\n"
139 " cmp %1, #0\n"
140 " bne 1b\n"
141  : "=&r" (result), "=&r" (tmp), "+Qo" (*(volatile int32_t*)ptr), "=&r"(tmp2)
142  : "r" ((int32_t *)ptr), "Ir" (addend)
143  : "cc");
144 
145  __TBB_full_memory_fence();
146 
147  return result;
148 }
149 
150 static inline int64_t __TBB_machine_fetchadd8(volatile void *ptr, int64_t addend)
151 {
152  unsigned long tmp;
153  int64_t result, tmp2;
154 
155  __TBB_full_memory_fence();
156 
157  __asm__ __volatile__(
158 "1: ldrexd %0, %H0, [%4]\n"
159 " adds %3, %0, %5\n"
160 " adc %H3, %H0, %H5\n"
161 " strexd %1, %3, %H3, [%4]\n"
162 " cmp %1, #0\n"
163 " bne 1b"
164  : "=&r" (result), "=&r" (tmp), "+Qo" (*(volatile int64_t*)ptr), "=&r"(tmp2)
165  : "r" ((int64_t *)ptr), "r" (addend)
166  : "cc");
167 
168 
169  __TBB_full_memory_fence();
170 
171  return result;
172 }
173 
174 inline void __TBB_machine_pause (int32_t delay )
175 {
176  while(delay>0)
177  {
178  __TBB_compiler_fence();
179  delay--;
180  }
181 }
182 
183 namespace tbb {
184 namespace internal {
185  template <typename T, size_t S>
187  static inline T load ( const volatile T& location ) {
188  const T value = location;
189 
190  /*
191  * An extra memory barrier is required for errata #761319
192  * Please see http://infocenter.arm.com/help/topic/com.arm.doc.uan0004a
193  */
194  __TBB_acquire_consistency_helper();
195  return value;
196  }
197 
198  static inline void store ( volatile T& location, T value ) {
199  location = value;
200  }
201  };
202 }} // namespaces internal, tbb
203 
204 // Machine specific atomic operations
205 
206 #define __TBB_CompareAndSwap4(P,V,C) __TBB_machine_cmpswp4(P,V,C)
207 #define __TBB_CompareAndSwap8(P,V,C) __TBB_machine_cmpswp8(P,V,C)
208 #define __TBB_Pause(V) __TBB_machine_pause(V)
209 
210 // Use generics for some things
211 #define __TBB_USE_GENERIC_PART_WORD_CAS 1
212 #define __TBB_USE_GENERIC_PART_WORD_FETCH_ADD 1
213 #define __TBB_USE_GENERIC_PART_WORD_FETCH_STORE 1
214 #define __TBB_USE_GENERIC_FETCH_STORE 1
215 #define __TBB_USE_GENERIC_HALF_FENCED_LOAD_STORE 1
216 #define __TBB_USE_GENERIC_DWORD_LOAD_STORE 1
217 #define __TBB_USE_GENERIC_SEQUENTIAL_CONSISTENCY_LOAD_STORE 1
Definition: _flow_graph_async_msg_impl.h:32
The namespace tbb contains all components of the library.
Definition: parallel_for.h:44