BRE12
gcc_ia32_common.h
1 /*
2  Copyright 2005-2016 Intel Corporation. All Rights Reserved.
3 
4  This file is part of Threading Building Blocks. Threading Building Blocks is free software;
5  you can redistribute it and/or modify it under the terms of the GNU General Public License
6  version 2 as published by the Free Software Foundation. Threading Building Blocks is
7  distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the
8  implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
9  See the GNU General Public License for more details. You should have received a copy of
10  the GNU General Public License along with Threading Building Blocks; if not, write to the
11  Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
12 
13  As a special exception, you may use this file as part of a free software library without
14  restriction. Specifically, if other files instantiate templates or use macros or inline
15  functions from this file, or you compile this file and link it with other files to produce
16  an executable, this file does not by itself cause the resulting executable to be covered
17  by the GNU General Public License. This exception does not however invalidate any other
18  reasons why the executable file might be covered by the GNU General Public License.
19 */
20 
21 #ifndef __TBB_machine_gcc_ia32_common_H
22 #define __TBB_machine_gcc_ia32_common_H
23 
24 //TODO: Add a higher-level function, e.g. tbb::interal::log2(), into tbb_stddef.h, which
25 //uses __TBB_Log2 and contains the assert and remove the assert from here and all other
26 //platform-specific headers.
27 //TODO: Check if use of gcc intrinsic gives a better chance for cross call optimizations
28 template <typename T>
29 static inline intptr_t __TBB_machine_lg( T x ) {
30  __TBB_ASSERT(x>0, "The logarithm of a non-positive value is undefined.");
31  uintptr_t j, i = x;
32  __asm__("bsr %1,%0" : "=r"(j) : "r"(i));
33  return j;
34 }
35 #define __TBB_Log2(V) __TBB_machine_lg(V)
36 
37 #ifndef __TBB_Pause
38 //TODO: check if raising a ratio of pause instructions to loop control instructions
39 //(via e.g. loop unrolling) gives any benefit for HT. E.g, the current implementation
40 //does about 2 CPU-consuming instructions for every pause instruction. Perhaps for
41 //high pause counts it should use an unrolled loop to raise the ratio, and thus free
42 //up more integer cycles for the other hyperthread. On the other hand, if the loop is
43 //unrolled too far, it won't fit in the core's loop cache, and thus take away
44 //instruction decode slots from the other hyperthread.
45 
46 //TODO: check if use of gcc __builtin_ia32_pause intrinsic gives a "some how" better performing code
47 static inline void __TBB_machine_pause( int32_t delay ) {
48  for (int32_t i = 0; i < delay; i++) {
49  __asm__ __volatile__("pause;");
50  }
51  return;
52 }
53 #define __TBB_Pause(V) __TBB_machine_pause(V)
54 #endif /* !__TBB_Pause */
55 
56 namespace tbb { namespace internal { typedef uint64_t machine_tsc_t; } }
57 static inline tbb::internal::machine_tsc_t __TBB_machine_time_stamp() {
58 #if __INTEL_COMPILER
59  return _rdtsc();
60 #else
61  tbb::internal::uint32_t hi, lo;
62  __asm__ __volatile__("rdtsc" : "=d"(hi), "=a"(lo));
63  return (tbb::internal::machine_tsc_t( hi ) << 32) | lo;
64 #endif
65 }
66 #define __TBB_time_stamp() __TBB_machine_time_stamp()
67 
68 // API to retrieve/update FPU control setting
69 #ifndef __TBB_CPU_CTL_ENV_PRESENT
70 #define __TBB_CPU_CTL_ENV_PRESENT 1
71 namespace tbb {
72 namespace internal {
73 class cpu_ctl_env {
74 private:
75  int mxcsr;
76  short x87cw;
77  static const int MXCSR_CONTROL_MASK = ~0x3f; /* all except last six status bits */
78 public:
79  bool operator!=( const cpu_ctl_env& ctl ) const { return mxcsr != ctl.mxcsr || x87cw != ctl.x87cw; }
80  void get_env() {
81  #if __TBB_ICC_12_0_INL_ASM_FSTCW_BROKEN
82  cpu_ctl_env loc_ctl;
83  __asm__ __volatile__ (
84  "stmxcsr %0\n\t"
85  "fstcw %1"
86  : "=m"(loc_ctl.mxcsr), "=m"(loc_ctl.x87cw)
87  );
88  *this = loc_ctl;
89  #else
90  __asm__ __volatile__ (
91  "stmxcsr %0\n\t"
92  "fstcw %1"
93  : "=m"(mxcsr), "=m"(x87cw)
94  );
95  #endif
96  mxcsr &= MXCSR_CONTROL_MASK;
97  }
98  void set_env() const {
99  __asm__ __volatile__ (
100  "ldmxcsr %0\n\t"
101  "fldcw %1"
102  : : "m"(mxcsr), "m"(x87cw)
103  );
104  }
105 };
106 } // namespace internal
107 } // namespace tbb
108 #endif /* !__TBB_CPU_CTL_ENV_PRESENT */
109 
110 #include "gcc_itsx.h"
111 
112 #endif /* __TBB_machine_gcc_ia32_common_H */
Definition: _flow_graph_async_msg_impl.h:32
The namespace tbb contains all components of the library.
Definition: parallel_for.h:44
Definition: gcc_ia32_common.h:73