6 #if GLM_ARCH & GLM_ARCH_NEON_BIT 11 static float32x4_t dupq_lane(float32x4_t vsrc,
int lane) {
13 #if GLM_ARCH & GLM_ARCH_ARMV8_BIT 14 case 0:
return vdupq_laneq_f32(vsrc, 0);
15 case 1:
return vdupq_laneq_f32(vsrc, 1);
16 case 2:
return vdupq_laneq_f32(vsrc, 2);
17 case 3:
return vdupq_laneq_f32(vsrc, 3);
19 case 0:
return vdupq_n_f32(vgetq_lane_f32(vsrc, 0));
20 case 1:
return vdupq_n_f32(vgetq_lane_f32(vsrc, 1));
21 case 2:
return vdupq_n_f32(vgetq_lane_f32(vsrc, 2));
22 case 3:
return vdupq_n_f32(vgetq_lane_f32(vsrc, 3));
25 assert(!
"Unreachable code executed!");
26 return vdupq_n_f32(0.0f);
29 static float32x2_t dup_lane(float32x4_t vsrc,
int lane) {
31 #if GLM_ARCH & GLM_ARCH_ARMV8_BIT 32 case 0:
return vdup_laneq_f32(vsrc, 0);
33 case 1:
return vdup_laneq_f32(vsrc, 1);
34 case 2:
return vdup_laneq_f32(vsrc, 2);
35 case 3:
return vdup_laneq_f32(vsrc, 3);
37 case 0:
return vdup_n_f32(vgetq_lane_f32(vsrc, 0));
38 case 1:
return vdup_n_f32(vgetq_lane_f32(vsrc, 1));
39 case 2:
return vdup_n_f32(vgetq_lane_f32(vsrc, 2));
40 case 3:
return vdup_n_f32(vgetq_lane_f32(vsrc, 3));
43 assert(!
"Unreachable code executed!");
44 return vdup_n_f32(0.0f);
47 static float32x4_t copy_lane(float32x4_t vdst,
int dlane, float32x4_t vsrc,
int slane) {
48 #if GLM_ARCH & GLM_ARCH_ARMV8_BIT 52 case 0:
return vcopyq_laneq_f32(vdst, 0, vsrc, 0);
53 case 1:
return vcopyq_laneq_f32(vdst, 0, vsrc, 1);
54 case 2:
return vcopyq_laneq_f32(vdst, 0, vsrc, 2);
55 case 3:
return vcopyq_laneq_f32(vdst, 0, vsrc, 3);
57 assert(!
"Unreachable code executed!");
60 case 0:
return vcopyq_laneq_f32(vdst, 1, vsrc, 0);
61 case 1:
return vcopyq_laneq_f32(vdst, 1, vsrc, 1);
62 case 2:
return vcopyq_laneq_f32(vdst, 1, vsrc, 2);
63 case 3:
return vcopyq_laneq_f32(vdst, 1, vsrc, 3);
65 assert(!
"Unreachable code executed!");
68 case 0:
return vcopyq_laneq_f32(vdst, 2, vsrc, 0);
69 case 1:
return vcopyq_laneq_f32(vdst, 2, vsrc, 1);
70 case 2:
return vcopyq_laneq_f32(vdst, 2, vsrc, 2);
71 case 3:
return vcopyq_laneq_f32(vdst, 2, vsrc, 3);
73 assert(!
"Unreachable code executed!");
76 case 0:
return vcopyq_laneq_f32(vdst, 3, vsrc, 0);
77 case 1:
return vcopyq_laneq_f32(vdst, 3, vsrc, 1);
78 case 2:
return vcopyq_laneq_f32(vdst, 3, vsrc, 2);
79 case 3:
return vcopyq_laneq_f32(vdst, 3, vsrc, 3);
81 assert(!
"Unreachable code executed!");
87 case 0: l = vgetq_lane_f32(vsrc, 0);
break;
88 case 1: l = vgetq_lane_f32(vsrc, 1);
break;
89 case 2: l = vgetq_lane_f32(vsrc, 2);
break;
90 case 3: l = vgetq_lane_f32(vsrc, 3);
break;
92 assert(!
"Unreachable code executed!");
95 case 0:
return vsetq_lane_f32(l, vdst, 0);
96 case 1:
return vsetq_lane_f32(l, vdst, 1);
97 case 2:
return vsetq_lane_f32(l, vdst, 2);
98 case 3:
return vsetq_lane_f32(l, vdst, 3);
101 assert(!
"Unreachable code executed!");
102 return vdupq_n_f32(0.0f);
105 static float32x4_t mul_lane(float32x4_t v, float32x4_t vlane,
int lane) {
106 #if GLM_ARCH & GLM_ARCH_ARMV8_BIT 108 case 0:
return vmulq_laneq_f32(v, vlane, 0);
break;
109 case 1:
return vmulq_laneq_f32(v, vlane, 1);
break;
110 case 2:
return vmulq_laneq_f32(v, vlane, 2);
break;
111 case 3:
return vmulq_laneq_f32(v, vlane, 3);
break;
113 assert(!
"Unreachable code executed!");
115 assert(!
"Unreachable code executed!");
116 return vdupq_n_f32(0.0f);
118 return vmulq_f32(v, dupq_lane(vlane, lane));
122 static float32x4_t madd_lane(float32x4_t acc, float32x4_t v, float32x4_t vlane,
int lane) {
123 #if GLM_ARCH & GLM_ARCH_ARMV8_BIT 124 #ifdef GLM_CONFIG_FORCE_FMA 125 # define FMADD_LANE(acc, x, y, L) do { asm volatile ("fmla %0.4s, %1.4s, %2.4s" : "+w"(acc) : "w"(x), "w"(dup_lane(y, L))); } while(0) 127 # define FMADD_LANE(acc, x, y, L) do { acc = vmlaq_laneq_f32(acc, x, y, L); } while(0) 132 FMADD_LANE(acc, v, vlane, 0);
135 FMADD_LANE(acc, v, vlane, 1);
138 FMADD_LANE(acc, v, vlane, 2);
141 FMADD_LANE(acc, v, vlane, 3);
144 assert(!
"Unreachable code executed!");
146 assert(!
"Unreachable code executed!");
147 return vdupq_n_f32(0.0f);
150 return vaddq_f32(acc, vmulq_f32(v, dupq_lane(vlane, lane)));
155 #endif // GLM_ARCH & GLM_ARCH_NEON_BIT Core features
Definition: common.hpp:20