53 #ifndef ROCRAND_THREEFRY4_IMPL_H_ 54 #define ROCRAND_THREEFRY4_IMPL_H_ 57 #define FQUALIFIERS __forceinline__ __device__ 60 #include "rocrand/rocrand_threefry_common.h" 61 #include <rocrand/rocrand_common.h> 63 #ifndef THREEFRY4x32_DEFAULT_ROUNDS 64 #define THREEFRY4x32_DEFAULT_ROUNDS 20 67 #ifndef THREEFRY4x64_DEFAULT_ROUNDS 68 #define THREEFRY4x64_DEFAULT_ROUNDS 20 73 static constexpr __device__
int THREEFRY_ROTATION_64_4[8][2] = {
89 static constexpr __device__
int THREEFRY_ROTATION_32_4[8][2] = {
103 template<
class value>
104 FQUALIFIERS int threefry_rotation_array(
int indexX,
int indexY);
107 FQUALIFIERS int threefry_rotation_array<unsigned int>(
int indexX,
int indexY)
109 return THREEFRY_ROTATION_32_4[indexX][indexY];
113 FQUALIFIERS int threefry_rotation_array<unsigned long long>(
int indexX,
int indexY)
115 return THREEFRY_ROTATION_64_4[indexX][indexY];
118 template<
typename state_value,
typename value,
unsigned int Nrounds>
127 unsigned int substate;
134 this->m_state.result = this->threefry_rounds(m_state.counter, m_state.key);
145 m_state.result = this->threefry_rounds(m_state.counter, m_state.key);
155 #if defined(__HIP_PLATFORM_AMD__) 156 value ret = m_state.result.data[m_state.substate];
158 value ret = (&m_state.result.x)[m_state.substate];
161 if(m_state.substate == 4)
163 m_state.substate = 0;
164 m_state.counter = this->bump_counter(m_state.counter);
165 m_state.result = this->threefry_rounds(m_state.counter, m_state.key);
172 state_value ret = m_state.result;
173 m_state.counter = this->bump_counter(m_state.counter);
174 m_state.result = this->threefry_rounds(m_state.counter, m_state.key);
176 return this->interleave(ret, m_state.result);
180 FQUALIFIERS state_value threefry_rounds(state_value counter, state_value key)
185 static_assert(Nrounds <= 72,
"72 or less only supported in threefry rounds");
187 ks[4] = skein_ks_parity<value>();
210 for(
unsigned int round_idx = 0; round_idx < Nrounds; round_idx++)
212 int rot_0 = threefry_rotation_array<value>(round_idx & 7u, 0);
213 int rot_1 = threefry_rotation_array<value>(round_idx & 7u, 1);
214 if((round_idx & 2u) == 0)
217 X.y = rotl<value>(X.y, rot_0);
220 X.w = rotl<value>(X.w, rot_1);
226 X.w = rotl<value>(X.w, rot_0);
229 X.y = rotl<value>(X.y, rot_1);
233 if((round_idx & 3u) == 3)
235 unsigned int inject_idx = round_idx / 4;
237 X.x += ks[(1 + inject_idx) % 5];
238 X.y += ks[(2 + inject_idx) % 5];
239 X.z += ks[(3 + inject_idx) % 5];
240 X.w += ks[(4 + inject_idx) % 5];
241 X.w += 1 + inject_idx;
253 m_state.substate += offset & 3;
254 unsigned long long counter_offset = offset / 4;
255 counter_offset += m_state.substate < 4 ? 0 : 1;
256 m_state.substate += m_state.substate < 4 ? 0 : -4;
265 ::rocrand_device::detail::split_ull(lo, hi, subsequence);
267 value old_counter = m_state.counter.z;
268 m_state.counter.z += lo;
269 m_state.counter.w += hi + (m_state.counter.z < old_counter ? 1 : 0);
277 ::rocrand_device::detail::split_ull(lo, hi, offset);
279 state_value old_counter = m_state.counter;
280 m_state.counter.x += lo;
281 m_state.counter.y += hi + (m_state.counter.x < old_counter.x ? 1 : 0);
282 m_state.counter.z += (m_state.counter.y < old_counter.y ? 1 : 0);
283 m_state.counter.w += (m_state.counter.z < old_counter.z ? 1 : 0);
286 FQUALIFIERS static state_value bump_counter(state_value counter)
289 value add = counter.x == 0 ? 1 : 0;
291 add = counter.y == 0 ? add : 0;
293 add = counter.z == 0 ? add : 0;
298 FQUALIFIERS state_value interleave(
const state_value prev,
const state_value next)
const 300 switch(m_state.substate)
303 case 1:
return state_value{prev.y, prev.z, prev.w, next.x};
304 case 2:
return state_value{prev.z, prev.w, next.x, next.y};
305 case 3:
return state_value{prev.w, next.x, next.y, next.z};
307 __builtin_unreachable();
311 threefry_state_4 m_state;
316 #endif // ROCRAND_THREEFRY4_IMPL_H_ FQUALIFIERS void discard_subsequence(unsigned long long subsequence)
Advances the internal state to skip subsequence subsequences, a subsequence consisting of 4 * (2 ^ b)...
Definition: rocrand_threefry4_impl.h:142
FQUALIFIERS void discard(unsigned long long offset)
Advances the internal state to skip offset numbers.
Definition: rocrand_threefry4_impl.h:131
FQUALIFIERS void discard_impl(unsigned long long offset)
Advances the internal state to skip offset numbers.
Definition: rocrand_threefry4_impl.h:250
Definition: rocrand_threefry4_impl.h:119
FQUALIFIERS void discard_subsequence_impl(unsigned long long subsequence)
Does not calculate new values (or update m_state.result).
Definition: rocrand_threefry4_impl.h:262
Definition: rocrand_threefry4_impl.h:122
#define FQUALIFIERS
Shorthand for commonly used function qualifiers.
Definition: rocrand_uniform.h:31
FQUALIFIERS void discard_state(unsigned long long offset)
Advances the internal state by offset times.
Definition: rocrand_threefry4_impl.h:274
Definition: rocrand_common.h:52