|
template<class BinaryFunction = ::rocprim::less<Key>, unsigned int FunctionWarpSize = WarpSize> |
ROCPRIM_DEVICE ROCPRIM_INLINE auto | sort (Key &thread_key, BinaryFunction compare_function=BinaryFunction()) -> typename std::enable_if<(FunctionWarpSize<=__AMDGCN_WAVEFRONT_SIZE), void >::type |
| Warp sort for any data type. More...
|
|
template<class BinaryFunction = ::rocprim::less<Key>, unsigned int FunctionWarpSize = WarpSize> |
ROCPRIM_DEVICE ROCPRIM_INLINE auto | sort (Key &, BinaryFunction compare_function=BinaryFunction()) -> typename std::enable_if<(FunctionWarpSize > __AMDGCN_WAVEFRONT_SIZE), void >::type |
| Warp sort for any data type. More...
|
|
template<unsigned int ItemsPerThread, class BinaryFunction = ::rocprim::less<Key>, unsigned int FunctionWarpSize = WarpSize> |
ROCPRIM_DEVICE ROCPRIM_INLINE auto | sort (Key(&thread_keys)[ItemsPerThread], BinaryFunction compare_function=BinaryFunction()) -> typename std::enable_if<(FunctionWarpSize<=__AMDGCN_WAVEFRONT_SIZE), void >::type |
| Warp sort for any data type. More...
|
|
template<unsigned int ItemsPerThread, class BinaryFunction = ::rocprim::less<Key>, unsigned int FunctionWarpSize = WarpSize> |
ROCPRIM_DEVICE ROCPRIM_INLINE auto | sort (Key(&thread_keys)[ItemsPerThread], BinaryFunction compare_function=BinaryFunction()) -> typename std::enable_if<(FunctionWarpSize > __AMDGCN_WAVEFRONT_SIZE), void >::type |
| Warp sort for any data type. More...
|
|
template<class BinaryFunction = ::rocprim::less<Key>, unsigned int FunctionWarpSize = WarpSize> |
ROCPRIM_DEVICE ROCPRIM_INLINE auto | sort (Key &thread_key, storage_type &storage, BinaryFunction compare_function=BinaryFunction()) -> typename std::enable_if<(FunctionWarpSize<=__AMDGCN_WAVEFRONT_SIZE), void >::type |
| Warp sort for any data type using temporary storage. More...
|
|
template<class BinaryFunction = ::rocprim::less<Key>, unsigned int FunctionWarpSize = WarpSize> |
ROCPRIM_DEVICE ROCPRIM_INLINE auto | sort (Key &, storage_type &, BinaryFunction compare_function=BinaryFunction()) -> typename std::enable_if<(FunctionWarpSize > __AMDGCN_WAVEFRONT_SIZE), void >::type |
| Warp sort for any data type using temporary storage. More...
|
|
template<unsigned int ItemsPerThread, class BinaryFunction = ::rocprim::less<Key>, unsigned int FunctionWarpSize = WarpSize> |
ROCPRIM_DEVICE ROCPRIM_INLINE auto | sort (Key(&thread_keys)[ItemsPerThread], storage_type &storage, BinaryFunction compare_function=BinaryFunction()) -> typename std::enable_if<(FunctionWarpSize<=__AMDGCN_WAVEFRONT_SIZE), void >::type |
| Warp sort for any data type using temporary storage. More...
|
|
template<unsigned int ItemsPerThread, class BinaryFunction = ::rocprim::less<Key>, unsigned int FunctionWarpSize = WarpSize> |
ROCPRIM_DEVICE ROCPRIM_INLINE auto | sort (Key(&thread_keys)[ItemsPerThread], storage_type &, BinaryFunction compare_function=BinaryFunction()) -> typename std::enable_if<(FunctionWarpSize > __AMDGCN_WAVEFRONT_SIZE), void >::type |
| Warp sort for any data type using temporary storage. More...
|
|
template<class BinaryFunction = ::rocprim::less<Key>, unsigned int FunctionWarpSize = WarpSize> |
ROCPRIM_DEVICE ROCPRIM_INLINE auto | sort (Key &thread_key, Value &thread_value, BinaryFunction compare_function=BinaryFunction()) -> typename std::enable_if<(FunctionWarpSize<=__AMDGCN_WAVEFRONT_SIZE), void >::type |
| Warp sort by key for any data type. More...
|
|
template<class BinaryFunction = ::rocprim::less<Key>, unsigned int FunctionWarpSize = WarpSize> |
ROCPRIM_DEVICE ROCPRIM_INLINE auto | sort (Key &, Value &, BinaryFunction compare_function=BinaryFunction()) -> typename std::enable_if<(FunctionWarpSize > __AMDGCN_WAVEFRONT_SIZE), void >::type |
| Warp sort by key for any data type. More...
|
|
template<unsigned int ItemsPerThread, class BinaryFunction = ::rocprim::less<Key>, unsigned int FunctionWarpSize = WarpSize> |
ROCPRIM_DEVICE ROCPRIM_INLINE auto | sort (Key(&thread_keys)[ItemsPerThread], Value(&thread_values)[ItemsPerThread], BinaryFunction compare_function=BinaryFunction()) -> typename std::enable_if<(FunctionWarpSize<=__AMDGCN_WAVEFRONT_SIZE), void >::type |
| Warp sort by key for any data type. More...
|
|
template<unsigned int ItemsPerThread, class BinaryFunction = ::rocprim::less<Key>, unsigned int FunctionWarpSize = WarpSize> |
ROCPRIM_DEVICE ROCPRIM_INLINE auto | sort (Key(&thread_keys)[ItemsPerThread], Value(&thread_values)[ItemsPerThread], BinaryFunction compare_function=BinaryFunction()) -> typename std::enable_if<(FunctionWarpSize > __AMDGCN_WAVEFRONT_SIZE), void >::type |
| Warp sort by key for any data type. More...
|
|
template<class BinaryFunction = ::rocprim::less<Key>, unsigned int FunctionWarpSize = WarpSize> |
ROCPRIM_DEVICE ROCPRIM_INLINE auto | sort (Key &thread_key, Value &thread_value, storage_type &storage, BinaryFunction compare_function=BinaryFunction()) -> typename std::enable_if<(FunctionWarpSize<=__AMDGCN_WAVEFRONT_SIZE), void >::type |
| Warp sort by key for any data type using temporary storage. More...
|
|
template<class BinaryFunction = ::rocprim::less<Key>, unsigned int FunctionWarpSize = WarpSize> |
ROCPRIM_DEVICE ROCPRIM_INLINE auto | sort (Key &, Value &, storage_type &, BinaryFunction compare_function=BinaryFunction()) -> typename std::enable_if<(FunctionWarpSize > __AMDGCN_WAVEFRONT_SIZE), void >::type |
| Warp sort by key for any data type using temporary storage. More...
|
|
template<unsigned int ItemsPerThread, class BinaryFunction = ::rocprim::less<Key>, unsigned int FunctionWarpSize = WarpSize> |
ROCPRIM_DEVICE ROCPRIM_INLINE auto | sort (Key(&thread_keys)[ItemsPerThread], Value(&thread_values)[ItemsPerThread], storage_type &storage, BinaryFunction compare_function=BinaryFunction()) -> typename std::enable_if<(FunctionWarpSize<=__AMDGCN_WAVEFRONT_SIZE), void >::type |
| Warp sort by key for any data type using temporary storage. More...
|
|
template<unsigned int ItemsPerThread, class BinaryFunction = ::rocprim::less<Key>, unsigned int FunctionWarpSize = WarpSize> |
ROCPRIM_DEVICE ROCPRIM_INLINE auto | sort (Key(&thread_keys)[ItemsPerThread], Value(&thread_values)[ItemsPerThread], storage_type &, BinaryFunction compare_function=BinaryFunction()) -> typename std::enable_if<(FunctionWarpSize > __AMDGCN_WAVEFRONT_SIZE), void >::type |
| Warp sort by key for any data type using temporary storage. More...
|
|
template<class Key, unsigned int WarpSize = device_warp_size(), class Value = empty_type>
class warp_sort< Key, WarpSize, Value >
The warp_sort class provides warp-wide methods for computing a parallel sort of items across thread warps.
This class currently implements parallel bitonic sort, and only accepts warp sizes that are powers of two.
- Template Parameters
-
Key | Data type for parameter Key |
WarpSize | [optional] The number of threads in a warp |
Value | [optional] Data type for parameter Value. By default, it's empty_type |
- Overview
WarpSize
must be power of two.
WarpSize
must be equal to or less than the size of hardware warp (see rocprim::device_warp_size()). If it is less, sort is performed separately within groups determined by WarpSize. For example, if WarpSize
is 4, hardware warp is 64, sort will be performed in logical warps grouped like this: { {0, 1, 2, 3}, {4, 5, 6, 7 }, ..., {60, 61, 62, 63} }
(thread is represented here by its id within hardware warp).
- Accepts custom compare_functions for sorting across a warp.
- Number of threads executing warp_sort's function must be a multiple of
WarpSize
.
- Example:
Every thread within the warp uses the warp_sort class by first specializing the warp_sort type, and instantiating an object that will be used to invoke a member function.
__global__ void example_kernel(...)
{
const unsigned int i = blockIdx.x * blockDim.x + threadIdx.x;
int value = input[i];
rocprim::warp_sort<int, 64> wsort;
wsort.sort(value);
input[i] = value;
}
Below is a snippet demonstrating how to pass a custom compare function:
__device__ bool customCompare(const int& a, const int& b)
{
return a < b;
}
...
__global__ void example_kernel(...)
{
const unsigned int i = blockIdx.x * blockDim.x + threadIdx.x;
int value = input[i];
rocprim::warp_sort<int, 64> wsort;
wsort.sort(value, customCompare);
input[i] = value;
}