cuda-kat
CUDA kernel author's tools
Functions
kat::unsafe Namespace Reference

Faster implementations of mathematical functions which can be incorrect for extremal or near-extremal values. More...

Functions

template<typename I , typename I2 = I>
constexpr KAT_FHD I round_up_to_power_of_2 (I x, I2 power_of_2) noexcept
 
template<typename I >
constexpr KAT_FHD I round_up_to_full_warps (I x) noexcept
 
template<typename I1 , typename I2 >
constexpr KAT_FHD I1 div_rounding_up (I1 x, const I2 modulus) noexcept
 
template<typename I1 , typename I2 = I1>
constexpr KAT_FHD I1 round_up (I1 x, I2 y) noexcept
 
template<typename I >
constexpr I modular_increment (I x, I modulus)
 
template<typename I >
constexpr I modular_decrement (I x, I modulus)
 

Detailed Description

Faster implementations of mathematical functions which can be incorrect for extremal or near-extremal values.

Function Documentation

§ div_rounding_up()

template<typename I1 , typename I2 >
constexpr KAT_FHD I1 kat::unsafe::div_rounding_up ( I1  x,
const I2  modulus 
)
noexcept
Note
Will overflow when x is within modulus - 1 of the maximum value of I1

§ round_up()

template<typename I1 , typename I2 = I1>
constexpr KAT_FHD I1 kat::unsafe::round_up ( I1  x,
I2  y 
)
noexcept
Note
Will overflow when x is within y - 1 of the maximum value of I1

§ round_up_to_full_warps()

template<typename I >
constexpr KAT_FHD I kat::unsafe::round_up_to_full_warps ( x)
noexcept
Note
careful, this may overflow!

§ round_up_to_power_of_2()

template<typename I , typename I2 = I>
constexpr KAT_FHD I kat::unsafe::round_up_to_power_of_2 ( x,
I2  power_of_2 
)
noexcept
Note
careful, this may overflow!