GraphicsAPI_2020C
neon.h
Go to the documentation of this file.
1 
4 #pragma once
5 
6 #if GLM_ARCH & GLM_ARCH_NEON_BIT
7 #include <arm_neon.h>
8 
9 namespace glm {
10  namespace neon {
11  static float32x4_t dupq_lane(float32x4_t vsrc, int lane) {
12  switch(lane) {
13 #if GLM_ARCH & GLM_ARCH_ARMV8_BIT
14  case 0: return vdupq_laneq_f32(vsrc, 0);
15  case 1: return vdupq_laneq_f32(vsrc, 1);
16  case 2: return vdupq_laneq_f32(vsrc, 2);
17  case 3: return vdupq_laneq_f32(vsrc, 3);
18 #else
19  case 0: return vdupq_n_f32(vgetq_lane_f32(vsrc, 0));
20  case 1: return vdupq_n_f32(vgetq_lane_f32(vsrc, 1));
21  case 2: return vdupq_n_f32(vgetq_lane_f32(vsrc, 2));
22  case 3: return vdupq_n_f32(vgetq_lane_f32(vsrc, 3));
23 #endif
24  }
25  assert(!"Unreachable code executed!");
26  return vdupq_n_f32(0.0f);
27  }
28 
29  static float32x2_t dup_lane(float32x4_t vsrc, int lane) {
30  switch(lane) {
31 #if GLM_ARCH & GLM_ARCH_ARMV8_BIT
32  case 0: return vdup_laneq_f32(vsrc, 0);
33  case 1: return vdup_laneq_f32(vsrc, 1);
34  case 2: return vdup_laneq_f32(vsrc, 2);
35  case 3: return vdup_laneq_f32(vsrc, 3);
36 #else
37  case 0: return vdup_n_f32(vgetq_lane_f32(vsrc, 0));
38  case 1: return vdup_n_f32(vgetq_lane_f32(vsrc, 1));
39  case 2: return vdup_n_f32(vgetq_lane_f32(vsrc, 2));
40  case 3: return vdup_n_f32(vgetq_lane_f32(vsrc, 3));
41 #endif
42  }
43  assert(!"Unreachable code executed!");
44  return vdup_n_f32(0.0f);
45  }
46 
47  static float32x4_t copy_lane(float32x4_t vdst, int dlane, float32x4_t vsrc, int slane) {
48 #if GLM_ARCH & GLM_ARCH_ARMV8_BIT
49  switch(dlane) {
50  case 0:
51  switch(slane) {
52  case 0: return vcopyq_laneq_f32(vdst, 0, vsrc, 0);
53  case 1: return vcopyq_laneq_f32(vdst, 0, vsrc, 1);
54  case 2: return vcopyq_laneq_f32(vdst, 0, vsrc, 2);
55  case 3: return vcopyq_laneq_f32(vdst, 0, vsrc, 3);
56  }
57  assert(!"Unreachable code executed!");
58  case 1:
59  switch(slane) {
60  case 0: return vcopyq_laneq_f32(vdst, 1, vsrc, 0);
61  case 1: return vcopyq_laneq_f32(vdst, 1, vsrc, 1);
62  case 2: return vcopyq_laneq_f32(vdst, 1, vsrc, 2);
63  case 3: return vcopyq_laneq_f32(vdst, 1, vsrc, 3);
64  }
65  assert(!"Unreachable code executed!");
66  case 2:
67  switch(slane) {
68  case 0: return vcopyq_laneq_f32(vdst, 2, vsrc, 0);
69  case 1: return vcopyq_laneq_f32(vdst, 2, vsrc, 1);
70  case 2: return vcopyq_laneq_f32(vdst, 2, vsrc, 2);
71  case 3: return vcopyq_laneq_f32(vdst, 2, vsrc, 3);
72  }
73  assert(!"Unreachable code executed!");
74  case 3:
75  switch(slane) {
76  case 0: return vcopyq_laneq_f32(vdst, 3, vsrc, 0);
77  case 1: return vcopyq_laneq_f32(vdst, 3, vsrc, 1);
78  case 2: return vcopyq_laneq_f32(vdst, 3, vsrc, 2);
79  case 3: return vcopyq_laneq_f32(vdst, 3, vsrc, 3);
80  }
81  assert(!"Unreachable code executed!");
82  }
83 #else
84 
85  float l;
86  switch(slane) {
87  case 0: l = vgetq_lane_f32(vsrc, 0); break;
88  case 1: l = vgetq_lane_f32(vsrc, 1); break;
89  case 2: l = vgetq_lane_f32(vsrc, 2); break;
90  case 3: l = vgetq_lane_f32(vsrc, 3); break;
91  default:
92  assert(!"Unreachable code executed!");
93  }
94  switch(dlane) {
95  case 0: return vsetq_lane_f32(l, vdst, 0);
96  case 1: return vsetq_lane_f32(l, vdst, 1);
97  case 2: return vsetq_lane_f32(l, vdst, 2);
98  case 3: return vsetq_lane_f32(l, vdst, 3);
99  }
100 #endif
101  assert(!"Unreachable code executed!");
102  return vdupq_n_f32(0.0f);
103  }
104 
105  static float32x4_t mul_lane(float32x4_t v, float32x4_t vlane, int lane) {
106 #if GLM_ARCH & GLM_ARCH_ARMV8_BIT
107  switch(lane) {
108  case 0: return vmulq_laneq_f32(v, vlane, 0); break;
109  case 1: return vmulq_laneq_f32(v, vlane, 1); break;
110  case 2: return vmulq_laneq_f32(v, vlane, 2); break;
111  case 3: return vmulq_laneq_f32(v, vlane, 3); break;
112  default:
113  assert(!"Unreachable code executed!");
114  }
115  assert(!"Unreachable code executed!");
116  return vdupq_n_f32(0.0f);
117 #else
118  return vmulq_f32(v, dupq_lane(vlane, lane));
119 #endif
120  }
121 
122  static float32x4_t madd_lane(float32x4_t acc, float32x4_t v, float32x4_t vlane, int lane) {
123 #if GLM_ARCH & GLM_ARCH_ARMV8_BIT
124 #ifdef GLM_CONFIG_FORCE_FMA
125 # define FMADD_LANE(acc, x, y, L) do { asm volatile ("fmla %0.4s, %1.4s, %2.4s" : "+w"(acc) : "w"(x), "w"(dup_lane(y, L))); } while(0)
126 #else
127 # define FMADD_LANE(acc, x, y, L) do { acc = vmlaq_laneq_f32(acc, x, y, L); } while(0)
128 #endif
129 
130  switch(lane) {
131  case 0:
132  FMADD_LANE(acc, v, vlane, 0);
133  return acc;
134  case 1:
135  FMADD_LANE(acc, v, vlane, 1);
136  return acc;
137  case 2:
138  FMADD_LANE(acc, v, vlane, 2);
139  return acc;
140  case 3:
141  FMADD_LANE(acc, v, vlane, 3);
142  return acc;
143  default:
144  assert(!"Unreachable code executed!");
145  }
146  assert(!"Unreachable code executed!");
147  return vdupq_n_f32(0.0f);
148 # undef FMADD_LANE
149 #else
150  return vaddq_f32(acc, vmulq_f32(v, dupq_lane(vlane, lane)));
151 #endif
152  }
153  } //namespace neon
154 } // namespace glm
155 #endif // GLM_ARCH & GLM_ARCH_NEON_BIT
Core features
Definition: common.hpp:20