|
template<class T , class BinaryFunction > |
ROCPRIM_DEVICE ROCPRIM_INLINE auto | apply (BinaryFunction op, const T &a, const T &b, unsigned int index, bool_constant< true >, bool_constant< false >) -> decltype(op(b, a, index)) |
|
template<class T , class BinaryFunction > |
ROCPRIM_DEVICE ROCPRIM_INLINE auto | apply (BinaryFunction op, const T &a, const T &b, unsigned int index, bool_constant< true >, bool_constant< true >) -> decltype(op(b, a, index)) |
|
template<typename T , typename BinaryFunction , bool AsFlags> |
ROCPRIM_DEVICE ROCPRIM_INLINE auto | apply (BinaryFunction op, const T &a, const T &b, unsigned int, bool_constant< AsFlags >, bool_constant< false >) -> decltype(op(b, a)) |
|
template<typename T , typename BinaryFunction , bool AsFlags> |
ROCPRIM_DEVICE ROCPRIM_INLINE auto | apply (BinaryFunction op, const T &a, const T &b, unsigned int, bool_constant< AsFlags >, bool_constant< true >) -> decltype(op(b, a)) |
|
template<class F , class... Args, class Fd = typename std::decay<F>::type> |
ROCPRIM_HOST_DEVICE auto | INVOKE (F &&f, Args &&... args) -> decltype(invoke_impl< Fd >::call(std::forward< F >(f), std::forward< Args >(args)...)) |
|
template<class KeysInputIterator1 , class KeysInputIterator2 , class OffsetT , class BinaryFunction > |
ROCPRIM_DEVICE ROCPRIM_INLINE OffsetT | merge_path (KeysInputIterator1 keys_input1, KeysInputIterator2 keys_input2, const OffsetT input1_size, const OffsetT input2_size, const OffsetT diag, BinaryFunction compare_function) |
|
template<class KeyType , unsigned int ItemsPerThread, class BinaryFunction > |
ROCPRIM_DEVICE ROCPRIM_INLINE void | serial_merge (KeyType *keys_shared, KeyType(&outputs)[ItemsPerThread], unsigned int(&index)[ItemsPerThread], range_t range, BinaryFunction compare_function) |
|
template<class KeyType , unsigned int ItemsPerThread, class BinaryFunction > |
ROCPRIM_DEVICE ROCPRIM_INLINE void | serial_merge (KeyType *keys_shared, KeyType(&outputs)[ItemsPerThread], range_t range, BinaryFunction compare_function) |
|
template<class KeyType , class ValueType , unsigned int ItemsPerThread, class BinaryFunction > |
ROCPRIM_DEVICE ROCPRIM_INLINE void | serial_merge (KeyType *keys_shared, KeyType(&outputs)[ItemsPerThread], ValueType *values_shared, ValueType(&values)[ItemsPerThread], range_t range, BinaryFunction compare_function) |
|
template<class T > |
ROCPRIM_HOST_DEVICE constexpr bool | is_power_of_two (const T x) |
|
template<class T > |
ROCPRIM_HOST_DEVICE constexpr T | next_power_of_two (const T x, const T acc=1) |
|
template<typename T , typename U , std::enable_if_t<::rocprim::is_integral< T >::value &&::rocprim::is_unsigned< U >::value, int > = 0> |
ROCPRIM_HOST_DEVICE constexpr auto | ceiling_div (const T a, const U b) |
|
ROCPRIM_HOST_DEVICE size_t | align_size (size_t size, size_t alignment=256) |
|
template<class T > |
ROCPRIM_HOST_DEVICE constexpr T | warp_size_in_class (const T warp_size) |
|
template<class T > |
ROCPRIM_HOST_DEVICE constexpr T | get_min_warp_size (const T block_size, const T max_warp_size) |
|
ROCPRIM_HOST_DEVICE constexpr unsigned int | get_lds_banks_no () |
|
template<class Iterator1 , class Iterator2 > |
bool | can_iterators_alias (Iterator1, Iterator2, const size_t size) |
|
template<typename Value1 , typename Value2 > |
bool | can_iterators_alias (Value1 *iter1, Value2 *iter2, const size_t size) |
|
hipError_t | memcpy_and_sync (void *dst, const void *src, size_t size_bytes, hipMemcpyKind kind, hipStream_t stream) |
| Copy data from src to dest with stream ordering and synchronization. More...
|
|
template<typename T > |
constexpr std::add_const_t< T > & | as_const (T &t) noexcept |
|
template<typename T > |
void | as_const (const T &&t)=delete |
|
template<typename T > |
constexpr std::add_const_t< T > * | as_const_ptr (T *ptr) |
| Add const to the top level pointed to object type. More...
|
|
template<class... Types, class Function , size_t... Indices> |
ROCPRIM_HOST_DEVICE void | for_each_in_tuple_impl (::rocprim::tuple< Types... > &t, Function f, ::rocprim::index_sequence< Indices... >) |
|
template<class... Types, class Function > |
ROCPRIM_HOST_DEVICE void | for_each_in_tuple (::rocprim::tuple< Types... > &t, Function f) |
|
constexpr bool | prefix_equals (const char *lhs, const char *rhs, std::size_t n) |
| Checks if the first n characters of rhs are equal to lhs More...
|
|
constexpr target_arch | get_target_arch_from_name (const char *const arch_name, const std::size_t n) |
|
constexpr target_arch | device_target_arch () |
| Get the current architecture in device compilation. More...
|
|
template<class Config > |
auto | dispatch_target_arch (const target_arch target_arch) |
|
template<typename Config > |
constexpr auto | device_params () |
|
target_arch | parse_gcn_arch (const char *arch_name) |
|
hipError_t | get_device_arch (int device_id, target_arch &arch) |
|
hipError_t | get_device_from_stream (const hipStream_t stream, int &device_id) |
|
hipError_t | host_target_arch (const hipStream_t stream, target_arch &arch) |
|
template<typename T , typename InputIterator > |
ROCPRIM_DEVICE ROCPRIM_INLINE auto | select_previous_values_iterator (T *previous_values, InputIterator, std::true_type) |
|
template<typename T , typename InputIterator > |
ROCPRIM_DEVICE ROCPRIM_INLINE auto | select_previous_values_iterator (T *, InputIterator input, std::false_type) |
|
template<typename Config , bool InPlace, bool Right, typename InputIt , typename OutputIt , typename BinaryFunction > |
ROCPRIM_DEVICE ROCPRIM_FORCE_INLINE void | adjacent_difference_kernel_impl (const InputIt input, const OutputIt output, const std::size_t size, const BinaryFunction op, const typename std::iterator_traits< InputIt >::value_type *previous_values, const std::size_t starting_block) |
|
template<class Size > |
ROCPRIM_DEVICE ROCPRIM_INLINE Size | get_binary_search_middle (Size left, Size right) |
|
template<class RandomAccessIterator , class Size , class T , class BinaryPredicate > |
ROCPRIM_DEVICE ROCPRIM_INLINE Size | lower_bound_n (RandomAccessIterator first, Size size, const T &value, BinaryPredicate compare_op) |
|
template<class RandomAccessIterator , class Size , class T , class BinaryPredicate > |
ROCPRIM_DEVICE ROCPRIM_INLINE Size | upper_bound_n (RandomAccessIterator first, Size size, const T &value, BinaryPredicate compare_op) |
|
constexpr unsigned int | merge_sort_items_per_thread (const unsigned int item_scale) |
|
constexpr unsigned int | merge_sort_block_size (const unsigned int item_scale) |
|
template<class T > |
ROCPRIM_HOST_DEVICE unsigned int | upper_bound (const T *values, unsigned int count, T value) |
|
template<unsigned int BlockSize, unsigned int ItemsPerThread, unsigned int Channels, class Sample > |
ROCPRIM_DEVICE ROCPRIM_INLINE std::enable_if< is_sample_vectorizable< ItemsPerThread, Channels, Sample >::value >::type | load_samples (unsigned int flat_id, Sample *samples, sample_vector< Sample, Channels >(&values)[ItemsPerThread]) |
|
template<unsigned int BlockSize, unsigned int ItemsPerThread, unsigned int Channels, class Sample > |
ROCPRIM_DEVICE ROCPRIM_INLINE std::enable_if<!is_sample_vectorizable< ItemsPerThread, Channels, Sample >::value >::type | load_samples (unsigned int flat_id, Sample *samples, sample_vector< Sample, Channels >(&values)[ItemsPerThread]) |
|
template<unsigned int BlockSize, unsigned int ItemsPerThread, unsigned int Channels, class Sample , class SampleIterator > |
ROCPRIM_DEVICE ROCPRIM_INLINE void | load_samples (unsigned int flat_id, SampleIterator samples, sample_vector< Sample, Channels >(&values)[ItemsPerThread]) |
|
template<unsigned int BlockSize, unsigned int ItemsPerThread, unsigned int Channels, class Sample , class SampleIterator > |
ROCPRIM_DEVICE ROCPRIM_INLINE void | load_samples (unsigned int flat_id, SampleIterator samples, sample_vector< Sample, Channels >(&values)[ItemsPerThread], unsigned int valid_count) |
|
template<unsigned int BlockSize, unsigned int ActiveChannels, class Counter > |
ROCPRIM_DEVICE ROCPRIM_INLINE void | init_histogram (fixed_array< Counter *, ActiveChannels > histogram, fixed_array< unsigned int, ActiveChannels > bins) |
|
template<unsigned int BlockSize, unsigned int ItemsPerThread, unsigned int Channels, unsigned int ActiveChannels, class SampleIterator , class Counter , class SampleToBinOp > |
ROCPRIM_DEVICE ROCPRIM_INLINE void | histogram_shared (SampleIterator samples, unsigned int columns, unsigned int rows, unsigned int row_stride, unsigned int rows_per_block, unsigned int shared_histograms, fixed_array< Counter *, ActiveChannels > histogram, fixed_array< SampleToBinOp, ActiveChannels > sample_to_bin_op, fixed_array< unsigned int, ActiveChannels > bins, unsigned int *block_histogram_start) |
|
template<unsigned int BlockSize, unsigned int ItemsPerThread, unsigned int Channels, unsigned int ActiveChannels, class SampleIterator , class Counter , class SampleToBinOp > |
ROCPRIM_DEVICE ROCPRIM_INLINE void | histogram_global (SampleIterator samples, unsigned int columns, unsigned int row_stride, fixed_array< Counter *, ActiveChannels > histogram, fixed_array< SampleToBinOp, ActiveChannels > sample_to_bin_op, fixed_array< unsigned int, ActiveChannels > bins_bits) |
|
ROCPRIM_DEVICE ROCPRIM_INLINE range_t | compute_range (const unsigned int id, const unsigned int size1, const unsigned int size2, const unsigned int spacing, const unsigned int p1, const unsigned int p2) |
|
template<class IndexIterator , class KeysInputIterator1 , class KeysInputIterator2 , class BinaryFunction > |
ROCPRIM_DEVICE ROCPRIM_INLINE void | partition_kernel_impl (IndexIterator indices, KeysInputIterator1 keys_input1, KeysInputIterator2 keys_input2, const size_t input1_size, const size_t input2_size, const unsigned int spacing, BinaryFunction compare_function) |
|
template<unsigned int BlockSize, unsigned int ItemsPerThread, class KeysInputIterator1 , class KeysInputIterator2 , class KeyType > |
ROCPRIM_DEVICE ROCPRIM_INLINE void | load (unsigned int flat_id, KeysInputIterator1 keys_input1, KeysInputIterator2 keys_input2, KeyType *keys_shared, const size_t input1_size, const size_t input2_size) |
|
template<unsigned int BlockSize, class KeysInputIterator1 , class KeysInputIterator2 , class KeyType , unsigned int ItemsPerThread, class BinaryFunction > |
ROCPRIM_DEVICE ROCPRIM_INLINE void | merge_keys (unsigned int flat_id, KeysInputIterator1 keys_input1, KeysInputIterator2 keys_input2, KeyType(&key_inputs)[ItemsPerThread], unsigned int(&index)[ItemsPerThread], KeyType *keys_shared, range_t range, BinaryFunction compare_function) |
|
template<bool WithValues, unsigned int BlockSize, class ValuesInputIterator1 , class ValuesInputIterator2 , class ValuesOutputIterator , unsigned int ItemsPerThread> |
ROCPRIM_DEVICE ROCPRIM_INLINE std::enable_if< WithValues >::type | merge_values (unsigned int flat_id, ValuesInputIterator1 values_input1, ValuesInputIterator2 values_input2, ValuesOutputIterator values_output, unsigned int(&index)[ItemsPerThread], const size_t input1_size, const size_t input2_size) |
|
template<bool WithValues, unsigned int BlockSize, class ValuesInputIterator1 , class ValuesInputIterator2 , class ValuesOutputIterator , unsigned int ItemsPerThread> |
ROCPRIM_DEVICE ROCPRIM_INLINE std::enable_if<!WithValues >::type | merge_values (unsigned int flat_id, ValuesInputIterator1 values_input1, ValuesInputIterator2 values_input2, ValuesOutputIterator values_output, unsigned int(&index)[ItemsPerThread], const size_t input1_size, const size_t input2_size) |
|
template<unsigned int BlockSize, unsigned int ItemsPerThread, class IndexIterator , class KeysInputIterator1 , class KeysInputIterator2 , class KeysOutputIterator , class ValuesInputIterator1 , class ValuesInputIterator2 , class ValuesOutputIterator , class BinaryFunction > |
ROCPRIM_DEVICE ROCPRIM_FORCE_INLINE void | merge_kernel_impl (IndexIterator indices, KeysInputIterator1 keys_input1, KeysInputIterator2 keys_input2, KeysOutputIterator keys_output, ValuesInputIterator1 values_input1, ValuesInputIterator2 values_input2, ValuesOutputIterator values_output, const size_t input1_size, const size_t input2_size, BinaryFunction compare_function) |
|
template<unsigned int BlockSize, unsigned int ItemsPerThread, block_sort_algorithm Algo, class KeysInputIterator , class KeysOutputIterator , class ValuesInputIterator , class ValuesOutputIterator , class OffsetT , class BinaryFunction , class ValueType = typename std::iterator_traits<ValuesInputIterator>::value_type> |
ROCPRIM_DEVICE ROCPRIM_FORCE_INLINE auto | block_sort_kernel_impl (KeysInputIterator keys_input, KeysOutputIterator keys_output, ValuesInputIterator values_input, ValuesOutputIterator values_output, const OffsetT input_size, BinaryFunction compare_function) |
|
template<unsigned int BlockSize, unsigned int ItemsPerThread, class KeysInputIterator , class KeysOutputIterator , class ValuesInputIterator , class ValuesOutputIterator , class OffsetT , class BinaryFunction > |
ROCPRIM_DEVICE ROCPRIM_INLINE void | block_merge_oddeven_kernel (KeysInputIterator keys_input, KeysOutputIterator keys_output, ValuesInputIterator values_input, ValuesOutputIterator values_output, const OffsetT input_size, const OffsetT sorted_block_size, BinaryFunction compare_function) |
|
template<unsigned int ItemsPerThread, class KeyT , class InputIterator > |
ROCPRIM_DEVICE ROCPRIM_INLINE void | gmem_to_reg (KeyT(&output)[ItemsPerThread], InputIterator input1, InputIterator input2, unsigned int count1, unsigned int count2, bool IsLastTile) |
|
template<unsigned int BlockSize, unsigned int ItemsPerThread, class KeyT , class OutputIterator > |
ROCPRIM_DEVICE ROCPRIM_INLINE void | reg_to_shared (OutputIterator output, KeyT(&input)[ItemsPerThread]) |
|
template<unsigned int BlockSize, unsigned int ItemsPerThread, class KeysInputIterator , class KeysOutputIterator , class ValuesInputIterator , class ValuesOutputIterator , class OffsetT , class BinaryFunction , class ValueType = typename std::iterator_traits<ValuesInputIterator>::value_type> |
ROCPRIM_DEVICE ROCPRIM_FORCE_INLINE auto | block_merge_process_tile (KeysInputIterator keys_input, KeysOutputIterator keys_output, ValuesInputIterator values_input, ValuesOutputIterator values_output, const OffsetT input_size, const OffsetT sorted_block_size, BinaryFunction compare_function, const OffsetT *merge_partitions) -> std::enable_if_t<(!std::is_trivially_copyable< ValueType >::value||rocprim::is_floating_point< ValueType >::value||std::is_integral< ValueType >::value), void > |
|
template<unsigned int BlockSize, unsigned int ItemsPerThread, class KeysInputIterator , class KeysOutputIterator , class ValuesInputIterator , class ValuesOutputIterator , class OffsetT , class BinaryFunction > |
ROCPRIM_DEVICE ROCPRIM_FORCE_INLINE void | block_merge_mergepath_kernel (KeysInputIterator keys_input, KeysOutputIterator keys_output, ValuesInputIterator values_input, ValuesOutputIterator values_output, const OffsetT input_size, const OffsetT sorted_block_size, BinaryFunction compare_function, const OffsetT *merge_partitions) |
|
template<select_method SelectMethod, unsigned int BlockSize, class BlockLoadFlagsType , class BlockDiscontinuityType , class InputIterator , class FlagIterator , class ValueType , unsigned int ItemsPerThread, class UnaryPredicate , class InequalityOp , class StorageType > |
ROCPRIM_DEVICE ROCPRIM_INLINE auto | partition_block_load_flags (InputIterator, FlagIterator block_flags, ValueType(&)[ItemsPerThread], bool(&is_selected)[ItemsPerThread], UnaryPredicate, InequalityOp, StorageType &storage, const bool, const unsigned int, const bool is_global_last_block, const unsigned int valid_in_global_last_block) -> typename std::enable_if< SelectMethod==select_method::flag >::type |
|
template<select_method SelectMethod, unsigned int BlockSize, class BlockLoadFlagsType , class BlockDiscontinuityType , class InputIterator , class FlagIterator , class ValueType , unsigned int ItemsPerThread, class FirstUnaryPredicate , class SecondUnaryPredicate , class InequalityOp , class StorageType > |
ROCPRIM_DEVICE ROCPRIM_INLINE void | partition_block_load_flags (InputIterator, FlagIterator, ValueType(&values)[ItemsPerThread], bool(&is_selected)[2][ItemsPerThread], FirstUnaryPredicate select_first_part_op, SecondUnaryPredicate select_second_part_op, InequalityOp, StorageType &, const unsigned int, const unsigned int block_thread_id, const bool is_global_last_block, const unsigned int valid_in_global_last_block) |
|
template<bool OnlySelected, unsigned int BlockSize, class ValueType , unsigned int ItemsPerThread, class OffsetType , class SelectType , class ScatterStorageType > |
ROCPRIM_DEVICE ROCPRIM_INLINE auto | partition_scatter (ValueType(&values)[ItemsPerThread], bool(&is_selected)[ItemsPerThread], OffsetType(&output_indices)[ItemsPerThread], tuple< SelectType, ::rocprim::empty_type *> output, const size_t total_size, const OffsetType selected_prefix, const OffsetType selected_in_block, ScatterStorageType &storage, const unsigned int flat_block_id, const unsigned int flat_block_thread_id, const bool is_global_last_block, const unsigned int valid_in_global_last_block, size_t(&prev_selected_count_values)[1], size_t prev_processed) -> typename std::enable_if<!OnlySelected >::type |
|
template<bool OnlySelected, unsigned int BlockSize, class ValueType , unsigned int ItemsPerThread, class OffsetType , class SelectType , class RejectType , class ScatterStorageType > |
ROCPRIM_DEVICE ROCPRIM_INLINE auto | partition_scatter (ValueType(&values)[ItemsPerThread], bool(&is_selected)[ItemsPerThread], OffsetType(&output_indices)[ItemsPerThread], tuple< SelectType, RejectType > output, const size_t, const OffsetType selected_prefix, const OffsetType selected_in_block, ScatterStorageType &storage, const unsigned int flat_block_id, const unsigned int flat_block_thread_id, const bool is_global_last_block, const unsigned int valid_in_global_last_block, size_t(&prev_selected_count_values)[1], size_t prev_processed) -> typename std::enable_if<!OnlySelected >::type |
|
template<bool OnlySelected, unsigned int BlockSize, class ValueType , unsigned int ItemsPerThread, class OffsetType , class OutputType , class ScatterStorageType > |
ROCPRIM_DEVICE ROCPRIM_INLINE void | partition_scatter (ValueType(&values)[ItemsPerThread], bool(&is_selected)[2][ItemsPerThread], OffsetType(&output_indices)[ItemsPerThread], OutputType output, const size_t, const OffsetType selected_prefix, const OffsetType selected_in_block, ScatterStorageType &storage, const unsigned int flat_block_id, const unsigned int flat_block_thread_id, const bool is_global_last_block, const unsigned int valid_in_global_last_block, size_t(&prev_selected_count_values)[2], size_t prev_processed) |
|
template<unsigned int items_per_thread, class offset_type > |
ROCPRIM_DEVICE ROCPRIM_INLINE void | convert_selected_to_indices (offset_type(&output_indices)[items_per_thread], bool(&is_selected)[items_per_thread]) |
|
template<unsigned int items_per_thread> |
ROCPRIM_DEVICE ROCPRIM_INLINE void | convert_selected_to_indices (uint2(&output_indices)[items_per_thread], bool(&is_selected)[2][items_per_thread]) |
|
template<class OffsetT > |
ROCPRIM_DEVICE ROCPRIM_INLINE void | store_selected_count (size_t *selected_count, size_t(&prev_selected_count_values)[1], const OffsetT selected_prefix, const OffsetT selected_in_block) |
|
ROCPRIM_DEVICE ROCPRIM_INLINE void | store_selected_count (size_t *selected_count, size_t(&prev_selected_count_values)[2], const uint2 selected_prefix, const uint2 selected_in_block) |
|
template<unsigned int Size> |
ROCPRIM_DEVICE void | load_selected_count (const size_t *const prev_selected_count, size_t(&loaded_values)[Size]) |
|
template<select_method SelectMethod, bool OnlySelected, class Config , class KeyIterator , class ValueIterator , class FlagIterator , class OutputKeyIterator , class OutputValueIterator , class InequalityOp , class OffsetLookbackScanState , class... UnaryPredicates> |
ROCPRIM_DEVICE ROCPRIM_FORCE_INLINE void | partition_kernel_impl (KeyIterator keys_input, ValueIterator values_input, FlagIterator flags, OutputKeyIterator keys_output, OutputValueIterator values_output, size_t *selected_count, size_t *prev_selected_count, size_t prev_processed, const size_t total_size, InequalityOp inequality_op, OffsetLookbackScanState offset_scan_state, const unsigned int number_of_blocks, UnaryPredicates... predicates) |
|
template<bool Descending = false, class SortType , class SortKey , class SortValue , unsigned int ItemsPerThread> |
ROCPRIM_DEVICE ROCPRIM_INLINE void | sort_block (SortType sorter, SortKey(&keys)[ItemsPerThread], SortValue(&values)[ItemsPerThread], typename SortType::storage_type &storage, unsigned int begin_bit, unsigned int end_bit) |
|
template<bool Descending = false, class SortType , class SortKey , unsigned int ItemsPerThread> |
ROCPRIM_DEVICE ROCPRIM_INLINE void | sort_block (SortType sorter, SortKey(&keys)[ItemsPerThread], ::rocprim::empty_type(&values)[ItemsPerThread], typename SortType::storage_type &storage, unsigned int begin_bit, unsigned int end_bit) |
|
template<unsigned int BlockSize, unsigned int ItemsPerThread, bool Descending, class KeysInputIterator , class KeysOutputIterator , class ValuesInputIterator , class ValuesOutputIterator > |
ROCPRIM_DEVICE ROCPRIM_FORCE_INLINE void | sort_single (KeysInputIterator keys_input, KeysOutputIterator keys_output, ValuesInputIterator values_input, ValuesOutputIterator values_output, unsigned int size, unsigned int bit, unsigned int current_radix_bits) |
|
template<class T > |
ROCPRIM_DEVICE ROCPRIM_INLINE auto | compare_nan_sensitive (const T &a, const T &b) -> typename std::enable_if< rocprim::is_floating_point< T >::value, bool >::type |
|
template<unsigned int BlockSize, unsigned int ItemsPerThread, unsigned int RadixBits, bool Descending, class KeysInputIterator , class Offset > |
ROCPRIM_DEVICE ROCPRIM_FORCE_INLINE void | onesweep_histograms (KeysInputIterator keys_input, Offset *global_digit_counts, const Offset size, const Offset full_blocks, const unsigned int begin_bit, const unsigned int end_bit) |
|
template<unsigned int BlockSize, unsigned int RadixBits, class Offset > |
ROCPRIM_DEVICE void | onesweep_scan_histograms (Offset *global_digit_offsets) |
|
template<unsigned int BlockSize, unsigned int ItemsPerThread, unsigned int RadixBits, bool Descending, block_radix_rank_algorithm RadixRankAlgorithm, class KeysInputIterator , class KeysOutputIterator , class ValuesInputIterator , class ValuesOutputIterator , class Offset > |
ROCPRIM_DEVICE void | onesweep_iteration (KeysInputIterator keys_input, KeysOutputIterator keys_output, ValuesInputIterator values_input, ValuesOutputIterator values_output, const unsigned int size, Offset *global_digit_offsets_in, Offset *global_digit_offsets_out, onesweep_lookback_state *lookback_states, const unsigned int bit, const unsigned int current_radix_bits, const unsigned int full_blocks) |
|
template<bool WithInitialValue, class T , class BinaryFunction > |
ROCPRIM_DEVICE ROCPRIM_INLINE auto | reduce_with_initial (T output, T initial_value, BinaryFunction reduce_op) -> typename std::enable_if< WithInitialValue, T >::type |
|
template<bool WithInitialValue, class Config , class ResultType , class InputIterator , class OutputIterator , class InitValueType , class BinaryFunction > |
ROCPRIM_DEVICE ROCPRIM_FORCE_INLINE void | block_reduce_kernel_impl (InputIterator input, const size_t input_size, OutputIterator output, InitValueType initial_value, BinaryFunction reduce_op) |
|
template<bool Exclusive, class BlockScan , class T , unsigned int ItemsPerThread, class BinaryFunction > |
ROCPRIM_DEVICE ROCPRIM_INLINE auto | single_scan_block_scan (T(&input)[ItemsPerThread], T(&output)[ItemsPerThread], T initial_value, typename BlockScan::storage_type &storage, BinaryFunction scan_op) -> typename std::enable_if< Exclusive >::type |
|
template<bool Exclusive, class Config , class InputIterator , class OutputIterator , class BinaryFunction , class ResultType , class LookbackScanState > |
ROCPRIM_DEVICE ROCPRIM_FORCE_INLINE void | lookback_scan_kernel_impl (InputIterator input, OutputIterator output, const size_t size, ResultType initial_value, BinaryFunction scan_op, LookbackScanState scan_state, const unsigned int number_of_blocks, ResultType *previous_last_element=nullptr, ResultType *new_last_element=nullptr, bool override_first_value=false, bool save_last_value=false) |
|
template<bool Exclusive, typename Config , typename KeyInputIterator , typename InputIterator , typename OutputIterator , typename ResultType , typename CompareFunction , typename BinaryFunction , typename LookbackScanState > |
ROCPRIM_DEVICE ROCPRIM_FORCE_INLINE void | device_scan_by_key_kernel_impl (KeyInputIterator keys, InputIterator values, OutputIterator output, ResultType initial_value, const CompareFunction compare, const BinaryFunction scan_op, LookbackScanState scan_state, const size_t size, const size_t starting_block, const size_t number_of_blocks, const rocprim::tuple< ResultType, bool > *const previous_last_value) |
|
template<typename LookBackScanState , typename AccessFunction > |
ROCPRIM_DEVICE ROCPRIM_INLINE void | access_indexed_lookback_value (LookBackScanState lookback_scan_state, const unsigned int number_of_blocks, unsigned int save_index, unsigned int flat_thread_id, AccessFunction access_function) |
|
template<typename LookBackScanState > |
ROCPRIM_DEVICE ROCPRIM_INLINE void | init_lookback_scan_state (LookBackScanState lookback_scan_state, const unsigned int number_of_blocks, ordered_block_id< unsigned int > ordered_bid, unsigned int flat_thread_id) |
|
template<typename LookBackScanState > |
ROCPRIM_DEVICE ROCPRIM_INLINE void | init_lookback_scan_state (LookBackScanState lookback_scan_state, const unsigned int number_of_blocks, unsigned int flat_thread_id) |
|
template<typename LookBackScanState > |
ROCPRIM_KERNEL | __launch_bounds__ (ROCPRIM_DEFAULT_MAX_BLOCK_SIZE) void init_lookback_scan_state_kernel(LookBackScanState lookback_scan_state |
|
class T unsigned int ItemsPerThread class BinaryFunction ROCPRIM_DEVICE ROCPRIM_INLINE auto | lookback_block_scan (T(&values)[ItemsPerThread], T, T &reduction, typename BlockScan::storage_type &storage, BinaryFunction scan_op) -> typename std::enable_if<!Exclusive >::type |
|
template<bool Exclusive, class BlockScan , class T , unsigned int ItemsPerThread, class PrefixCallback , class BinaryFunction > |
ROCPRIM_DEVICE ROCPRIM_INLINE auto | lookback_block_scan (T(&values)[ItemsPerThread], typename BlockScan::storage_type &storage, PrefixCallback &prefix_callback_op, BinaryFunction scan_op) -> typename std::enable_if<!Exclusive >::type |
|
template<class Config , bool Descending, class KeysInputIterator , class KeysOutputIterator , class ValuesInputIterator , class ValuesOutputIterator , class OffsetIterator > |
ROCPRIM_DEVICE ROCPRIM_FORCE_INLINE void | segmented_sort (KeysInputIterator keys_input, typename std::iterator_traits< KeysInputIterator >::value_type *keys_tmp, KeysOutputIterator keys_output, ValuesInputIterator values_input, typename std::iterator_traits< ValuesInputIterator >::value_type *values_tmp, ValuesOutputIterator values_output, bool to_output, OffsetIterator begin_offsets, OffsetIterator end_offsets, unsigned int long_iterations, unsigned int short_iterations, unsigned int begin_bit, unsigned int end_bit) |
|
template<class Config , bool Descending, class KeysInputIterator , class KeysOutputIterator , class ValuesInputIterator , class ValuesOutputIterator , class SegmentIndexIterator , class OffsetIterator > |
ROCPRIM_DEVICE ROCPRIM_FORCE_INLINE void | segmented_sort_large (KeysInputIterator keys_input, typename std::iterator_traits< KeysInputIterator >::value_type *keys_tmp, KeysOutputIterator keys_output, ValuesInputIterator values_input, typename std::iterator_traits< ValuesInputIterator >::value_type *values_tmp, ValuesOutputIterator values_output, bool to_output, SegmentIndexIterator segment_indices, OffsetIterator begin_offsets, OffsetIterator end_offsets, unsigned int long_iterations, unsigned int short_iterations, unsigned int begin_bit, unsigned int end_bit) |
|
template<class Config , bool Descending, class KeysInputIterator , class KeysOutputIterator , class ValuesInputIterator , class ValuesOutputIterator , class SegmentIndexIterator , class OffsetIterator > |
ROCPRIM_DEVICE ROCPRIM_FORCE_INLINE void | segmented_sort_small (KeysInputIterator keys_input, typename std::iterator_traits< KeysInputIterator >::value_type *keys_tmp, KeysOutputIterator keys_output, ValuesInputIterator values_input, typename std::iterator_traits< ValuesInputIterator >::value_type *values_tmp, ValuesOutputIterator values_output, bool to_output, unsigned int num_segments, SegmentIndexIterator segment_indices, OffsetIterator begin_offsets, OffsetIterator end_offsets, unsigned int begin_bit, unsigned int end_bit) |
|
template<class Config , class InputIterator , class OutputIterator , class OffsetIterator , class ResultType , class BinaryFunction > |
ROCPRIM_DEVICE ROCPRIM_FORCE_INLINE void | segmented_reduce (InputIterator input, OutputIterator output, OffsetIterator begin_offsets, OffsetIterator end_offsets, BinaryFunction reduce_op, ResultType initial_value) |
|
template<bool Exclusive, bool UsePrefix, class BlockScanType , class T , unsigned int ItemsPerThread, class BinaryFunction > |
ROCPRIM_DEVICE ROCPRIM_INLINE auto | segmented_scan_block_scan (T(&input)[ItemsPerThread], T(&output)[ItemsPerThread], T &prefix, typename BlockScanType::storage_type &storage, BinaryFunction scan_op) -> typename std::enable_if< Exclusive >::type |
|
template<bool Exclusive, class Config , class ResultType , class InputIterator , class OutputIterator , class OffsetIterator , class InitValueType , class BinaryFunction > |
ROCPRIM_DEVICE ROCPRIM_FORCE_INLINE void | segmented_scan (InputIterator input, OutputIterator output, OffsetIterator begin_offsets, OffsetIterator end_offsets, InitValueType initial_value, BinaryFunction scan_op) |
|
template<unsigned int BlockSize, unsigned int ItemsPerThread, class ResultType , class InputIterator , class OutputIterator , class UnaryFunction > |
ROCPRIM_DEVICE ROCPRIM_INLINE void | transform_kernel_impl (InputIterator input, const size_t input_size, OutputIterator output, UnaryFunction transform_op) |
|
hipError_t | is_sleep_scan_state_used (bool &use_sleep) |
|
ROCPRIM_HOST_DEVICE unsigned int | operator/ (unsigned int n, const uint_fast_div &divisor) |
|
template<typename Config , bool InPlace, bool Right, typename InputIt , typename OutputIt , typename BinaryFunction > |
void ROCPRIM_KERNEL | __launch_bounds__ (Config::block_size) adjacent_difference_kernel(const InputIt input |
|
template<typename Config , bool InPlace, bool Right, typename InputIt , typename OutputIt , typename BinaryFunction > |
hipError_t | adjacent_difference_impl (void *const temporary_storage, std::size_t &storage_size, const InputIt input, const OutputIt output, const std::size_t size, const BinaryFunction op, const hipStream_t stream, const bool debug_synchronous) |
|
template<class Config , class HaystackIterator , class NeedlesIterator , class OutputIterator , class SearchFunction , class CompareFunction > |
hipError_t | binary_search (void *temporary_storage, size_t &storage_size, HaystackIterator haystack, NeedlesIterator needles, OutputIterator output, size_t haystack_size, size_t needles_size, SearchFunction search_op, CompareFunction compare_op, hipStream_t stream, bool debug_synchronous) |
|
template<class Config , unsigned int ActiveChannels, class Counter > |
ROCPRIM_KERNEL | __launch_bounds__ (device_params< Config >() .histogram_config .block_size) void init_histogram_kernel(fixed_array< Counter * |
|
| init_histogram< params.histogram_config.block_size, ActiveChannels > (histogram, bins) |
|
template<class Config , unsigned int Channels, unsigned int ActiveChannels, class SampleIterator , class Counter , class SampleToBinOp > |
ROCPRIM_KERNEL | __launch_bounds__ (device_params< Config >() .histogram_config .block_size) void histogram_shared_kernel(SampleIterator samples |
|
| HIP_DYNAMIC_SHARED (unsigned int, block_histogram) |
|
| histogram_shared< params.histogram_config.block_size, params.histogram_config.items_per_thread, Channels, ActiveChannels > (samples, columns, rows, row_stride, rows_per_block, shared_histograms, histogram, sample_to_bin_op, bins, block_histogram) |
|
| histogram_global< params.histogram_config.block_size, params.histogram_config.items_per_thread, Channels, ActiveChannels > (samples, columns, row_stride, histogram, sample_to_bin_op, bins_bits) |
|
template<unsigned int Channels, unsigned int ActiveChannels, class Config , class SampleIterator , class Counter , class SampleToBinOp > |
hipError_t | histogram_impl (void *temporary_storage, size_t &storage_size, SampleIterator samples, unsigned int columns, unsigned int rows, size_t row_stride_bytes, Counter *histogram[ActiveChannels], unsigned int levels[ActiveChannels], SampleToBinOp sample_to_bin_op[ActiveChannels], hipStream_t stream, bool debug_synchronous) |
|
template<unsigned int Channels, unsigned int ActiveChannels, class Config , class SampleIterator , class Counter , class Level > |
hipError_t | histogram_even_impl (void *temporary_storage, size_t &storage_size, SampleIterator samples, unsigned int columns, unsigned int rows, size_t row_stride_bytes, Counter *histogram[ActiveChannels], unsigned int levels[ActiveChannels], Level lower_level[ActiveChannels], Level upper_level[ActiveChannels], hipStream_t stream, bool debug_synchronous) |
|
template<unsigned int Channels, unsigned int ActiveChannels, class Config , class SampleIterator , class Counter , class Level > |
hipError_t | histogram_range_impl (void *temporary_storage, size_t &storage_size, SampleIterator samples, unsigned int columns, unsigned int rows, size_t row_stride_bytes, Counter *histogram[ActiveChannels], unsigned int levels[ActiveChannels], Level *level_values[ActiveChannels], hipStream_t stream, bool debug_synchronous) |
|
template<typename HistogramConfig > |
constexpr histogram_config_params | wrap_histogram_config () |
|
template<class IndexIterator , class KeysInputIterator1 , class KeysInputIterator2 , class BinaryFunction > |
ROCPRIM_KERNEL | __launch_bounds__ (ROCPRIM_DEFAULT_MAX_BLOCK_SIZE) void partition_kernel(IndexIterator index |
|
template<unsigned int BlockSize, unsigned int ItemsPerThread, class IndexIterator , class KeysInputIterator1 , class KeysInputIterator2 , class KeysOutputIterator , class ValuesInputIterator1 , class ValuesInputIterator2 , class ValuesOutputIterator , class BinaryFunction > |
ROCPRIM_KERNEL | __launch_bounds__ (BlockSize) void merge_kernel(IndexIterator index |
|
template<class Config , class KeysInputIterator1 , class KeysInputIterator2 , class KeysOutputIterator , class ValuesInputIterator1 , class ValuesInputIterator2 , class ValuesOutputIterator , class BinaryFunction > |
hipError_t | merge_impl (void *temporary_storage, size_t &storage_size, KeysInputIterator1 keys_input1, KeysInputIterator2 keys_input2, KeysOutputIterator keys_output, ValuesInputIterator1 values_input1, ValuesInputIterator2 values_input2, ValuesOutputIterator values_output, const size_t input1_size, const size_t input2_size, BinaryFunction compare_function, const hipStream_t stream, bool debug_synchronous) |
|
template<class Config , class KeysInputIterator , class KeysOutputIterator , class ValuesInputIterator , class ValuesOutputIterator , class OffsetT , class BinaryFunction > |
ROCPRIM_KERNEL | __launch_bounds__ (device_params< Config >().block_sort_config.block_size) void block_sort_kernel(KeysInputIterator keys_input |
|
| block_sort_kernel_impl< params.block_sort_config.block_size, params.block_sort_config.items_per_thread, params.block_sort_method > (keys_input, keys_output, values_input, values_output, sorted_block_size, compare_function) |
|
template<class Config , class KeysInputIterator , class KeysOutputIterator , class ValuesInputIterator , class ValuesOutputIterator , class OffsetT , class BinaryFunction > |
ROCPRIM_KERNEL | __launch_bounds__ (device_params< Config >() .merge_oddeven_config .block_size) void device_block_merge_oddeven_kernel(KeysInputIterator keys_input |
|
| block_merge_oddeven_kernel< params.merge_oddeven_config.block_size, params.merge_oddeven_config.items_per_thread > (keys_input, keys_output, values_input, values_output, input_size, sorted_block_size, compare_function) |
|
template<class Config , class KeysInputIterator , class KeysOutputIterator , class ValuesInputIterator , class ValuesOutputIterator , class OffsetT , class BinaryFunction > |
ROCPRIM_KERNEL | __launch_bounds__ (device_params< Config >() .merge_mergepath_config .block_size) void device_block_merge_mergepath_kernel(KeysInputIterator keys_input |
|
| block_merge_mergepath_kernel< params.merge_mergepath_config.block_size, params.merge_mergepath_config.items_per_thread > (keys_input, keys_output, values_input, values_output, input_size, sorted_block_size, compare_function, merge_partitions) |
|
template<typename Config , typename KeysInputIterator , typename OffsetT , typename CompareOpT > |
ROCPRIM_KERNEL | __launch_bounds__ (device_params< Config >() .merge_mergepath_partition_config .block_size) void device_block_merge_mergepath_partition_kernel(KeysInputIterator keys |
|
| if (partition_id >=num_partitions) |
|
template<class Config , class KeysIterator , class ValuesIterator , class OffsetT , class BinaryFunction > |
hipError_t | merge_sort_block_merge (void *temporary_storage, size_t &storage_size, KeysIterator keys, ValuesIterator values, const OffsetT size, unsigned int sorted_block_size, BinaryFunction compare_function, const hipStream_t stream, bool debug_synchronous, typename std::iterator_traits< KeysIterator >::value_type *keys_double_buffer=nullptr, typename std::iterator_traits< ValuesIterator >::value_type *values_double_buffer=nullptr) |
|
template<class Config , class KeysInputIterator , class KeysOutputIterator , class ValuesInputIterator , class ValuesOutputIterator , class BinaryFunction > |
hipError_t | merge_sort_block_sort (KeysInputIterator keys_input, KeysOutputIterator keys_output, ValuesInputIterator values_input, ValuesOutputIterator values_output, const unsigned int size, unsigned int &sort_items_per_block, BinaryFunction compare_function, const hipStream_t stream, bool debug_synchronous) |
|
template<unsigned int A, unsigned int B> |
ROCPRIM_DEVICE void | TAssertEqualGreater () |
|
template<class BlockSortConfig , class BlockMergeConfig > |
ROCPRIM_KERNEL void | device_merge_sort_compile_time_verifier () |
|
template<class Config , class KeysInputIterator , class KeysOutputIterator , class ValuesInputIterator , class ValuesOutputIterator , class BinaryFunction > |
hipError_t | merge_sort_impl (void *temporary_storage, size_t &storage_size, KeysInputIterator keys_input, KeysOutputIterator keys_output, ValuesInputIterator values_input, ValuesOutputIterator values_output, const unsigned int size, BinaryFunction compare_function, const hipStream_t stream, bool debug_synchronous, typename std::iterator_traits< KeysInputIterator >::value_type *keys_buffer=nullptr, typename std::iterator_traits< ValuesInputIterator >::value_type *values_buffer=nullptr) |
|
template<select_method SelectMethod, bool OnlySelected, class Config , class KeyIterator , class ValueIterator , class FlagIterator , class OutputKeyIterator , class OutputValueIterator , class InequalityOp , class OffsetLookbackScanState , class... UnaryPredicates> |
ROCPRIM_KERNEL | __launch_bounds__ (Config::block_size) void partition_kernel(KeyIterator keys_input |
|
template<select_method SelectMethod, bool OnlySelected, class Config , class OffsetT , class KeyIterator , class ValueIterator , class FlagIterator , class OutputKeyIterator , class OutputValueIterator , class InequalityOp , class SelectedCountOutputIterator , class... UnaryPredicates> |
hipError_t | partition_impl (void *temporary_storage, size_t &storage_size, KeyIterator keys_input, ValueIterator values_input, FlagIterator flags, OutputKeyIterator keys_output, OutputValueIterator values_output, SelectedCountOutputIterator selected_count_output, const size_t size, InequalityOp inequality_op, const hipStream_t stream, bool debug_synchronous, UnaryPredicates... predicates) |
|
template<class Config , bool Descending, class KeysInputIterator , class Offset > |
ROCPRIM_KERNEL | __launch_bounds__ (device_params< Config >().histogram.block_size) void onesweep_histograms_kernel(KeysInputIterator keys_input |
|
| onesweep_histograms< params.histogram.block_size, params.histogram.items_per_thread, params.radix_bits_per_place, Descending > (keys_input, global_digit_counts, size, full_blocks, begin_bit, end_bit) |
|
template<class Config , class Offset > |
ROCPRIM_KERNEL | __launch_bounds__ (device_params< Config >() .histogram.block_size) void onesweep_scan_histograms_kernel(Offset *global_digit_offsets) |
|
template<class Config , bool Descending, class KeysInputIterator , class ValuesInputIterator , class Offset > |
hipError_t | radix_sort_onesweep_global_offsets (KeysInputIterator keys_input, ValuesInputIterator, Offset *global_digit_offsets, const Offset size, const unsigned int digit_places, const unsigned begin_bit, const unsigned end_bit, const hipStream_t stream, const bool debug_synchronous) |
|
template<class Config , bool Descending, class KeysInputIterator , class KeysOutputIterator , class ValuesInputIterator , class ValuesOutputIterator , class Offset > |
ROCPRIM_KERNEL | __launch_bounds__ (device_params< Config >().sort.block_size) void onesweep_iteration_kernel(KeysInputIterator keys_input |
|
| onesweep_iteration< params.sort.block_size, params.sort.items_per_thread, params.radix_bits_per_place, Descending, params.radix_rank_algorithm > (keys_input, keys_output, values_input, values_output, size, global_digit_offsets_in, global_digit_offsets_out, lookback_states, bit, current_radix_bits, full_blocks) |
|
template<class Config , bool Descending, class KeysInputIterator , class KeysOutputIterator , class ValuesInputIterator , class ValuesOutputIterator , class Offset > |
hipError_t | radix_sort_onesweep_iteration (KeysInputIterator keys_input, typename std::iterator_traits< KeysInputIterator >::value_type *keys_tmp, KeysOutputIterator keys_output, ValuesInputIterator values_input, typename std::iterator_traits< ValuesInputIterator >::value_type *values_tmp, ValuesOutputIterator values_output, const Offset size, Offset *global_digit_offsets_in, Offset *global_digit_offsets_out, onesweep_lookback_state *lookback_states, const bool from_input, const bool to_output, const unsigned int bit, const unsigned int end_bit, const hipStream_t stream, const bool debug_synchronous) |
|
template<class Config , bool Descending, class KeysInputIterator , class KeysOutputIterator , class ValuesInputIterator , class ValuesOutputIterator , class Size > |
hipError_t | radix_sort_onesweep_impl (void *temporary_storage, size_t &storage_size, KeysInputIterator keys_input, typename std::iterator_traits< KeysInputIterator >::value_type *keys_tmp, KeysOutputIterator keys_output, ValuesInputIterator values_input, typename std::iterator_traits< ValuesInputIterator >::value_type *values_tmp, ValuesOutputIterator values_output, const Size size, bool &is_result_in_output, const unsigned int begin_bit, const unsigned int end_bit, const hipStream_t stream, const bool debug_synchronous) |
|
template<class Config , bool Descending, class KeysInputIterator , class KeysOutputIterator , class ValuesInputIterator , class ValuesOutputIterator , class Size > |
hipError_t | radix_sort_impl (void *temporary_storage, size_t &storage_size, KeysInputIterator keys_input, typename std::iterator_traits< KeysInputIterator >::value_type *keys_tmp, KeysOutputIterator keys_output, ValuesInputIterator values_input, typename std::iterator_traits< ValuesInputIterator >::value_type *values_tmp, ValuesOutputIterator values_output, Size size, bool &is_result_in_output, unsigned int begin_bit, unsigned int end_bit, hipStream_t stream, bool debug_synchronous) |
|
template<bool WithInitialValue, class Config , class ResultType , class InputIterator , class OutputIterator , class InitValueType , class BinaryFunction > |
ROCPRIM_KERNEL | __launch_bounds__ (device_params< Config >().reduce_config.block_size) void block_reduce_kernel(InputIterator input |
|
template<bool WithInitialValue, class Config , class InputIterator , class OutputIterator , class InitValueType , class BinaryFunction > |
hipError_t | reduce_impl (void *temporary_storage, size_t &storage_size, InputIterator input, OutputIterator output, const InitValueType initial_value, const size_t size, BinaryFunction reduce_op, const hipStream_t stream, bool debug_synchronous) |
|
template<bool Exclusive, class Config , class InputIterator , class OutputIterator , class BinaryFunction , class ResultType > |
ROCPRIM_DEVICE ROCPRIM_FORCE_INLINE void | single_scan_kernel_impl (InputIterator input, const size_t input_size, ResultType initial_value, OutputIterator output, BinaryFunction scan_op) |
|
template<bool Exclusive, class Config , class InputIterator , class OutputIterator , class BinaryFunction , class InitValueType > |
ROCPRIM_KERNEL | __launch_bounds__ (device_params< Config >().kernel_config.block_size) void single_scan_kernel(InputIterator input |
|
template<bool Exclusive, class Config , class InputIterator , class OutputIterator , class BinaryFunction , class InitValueType , class LookBackScanState > |
ROCPRIM_KERNEL | __launch_bounds__ (device_params< Config >().kernel_config.block_size) void lookback_scan_kernel(InputIterator input |
|
class InputIterator class OutputIterator class InitValueType class BinaryFunction inline auto | scan_impl (void *temporary_storage, size_t &storage_size, InputIterator input, OutputIterator output, const InitValueType initial_value, const size_t size, BinaryFunction scan_op, const hipStream_t stream, bool debug_synchronous) |
|
template<bool Exclusive, typename Config , typename KeyInputIterator , typename InputIterator , typename OutputIterator , typename InitialValueType , typename CompareFunction , typename BinaryFunction , typename LookbackScanState , typename ResultType > |
void __global__ | __launch_bounds__ (device_params< Config >().kernel_config.block_size) device_scan_by_key_kernel(const KeyInputIterator keys |
|
template<bool Exclusive, typename Config , typename KeysInputIterator , typename InputIterator , typename OutputIterator , typename InitValueType , typename BinaryFunction , typename CompareFunction > |
hipError_t | scan_by_key_impl (void *const temporary_storage, size_t &storage_size, KeysInputIterator keys, InputIterator input, OutputIterator output, const InitValueType initial_value, const size_t size, const BinaryFunction scan_op, const CompareFunction compare, const hipStream_t stream, const bool debug_synchronous) |
|
template<typename ScanByKeyConfig > |
constexpr scan_by_key_config_params | wrap_scan_by_key_config () |
|
template<typename ScanConfig > |
constexpr scan_config_params | wrap_scan_config () |
|
template<class Config , bool Descending, unsigned int BlockSize, class KeysInputIterator , class KeysOutputIterator , class ValuesInputIterator , class ValuesOutputIterator , class OffsetIterator > |
ROCPRIM_KERNEL | __launch_bounds__ (BlockSize) void segmented_sort_kernel(KeysInputIterator keys_input |
|
template<class Config , bool Descending, unsigned int BlockSize, class KeysInputIterator , class KeysOutputIterator , class ValuesInputIterator , class ValuesOutputIterator , class SegmentIndexIterator , class OffsetIterator > |
ROCPRIM_KERNEL | __launch_bounds__ (BlockSize) void segmented_sort_large_kernel(KeysInputIterator keys_input |
|
template<class Config , bool Descending, class KeysInputIterator , class KeysOutputIterator , class ValuesInputIterator , class ValuesOutputIterator , class OffsetIterator > |
hipError_t | segmented_radix_sort_impl (void *temporary_storage, size_t &storage_size, KeysInputIterator keys_input, typename std::iterator_traits< KeysInputIterator >::value_type *keys_tmp, KeysOutputIterator keys_output, ValuesInputIterator values_input, typename std::iterator_traits< ValuesInputIterator >::value_type *values_tmp, ValuesOutputIterator values_output, unsigned int size, bool &is_result_in_output, unsigned int segments, OffsetIterator begin_offsets, OffsetIterator end_offsets, unsigned int begin_bit, unsigned int end_bit, hipStream_t stream, bool debug_synchronous) |
|
template<class Config , class InputIterator , class OutputIterator , class OffsetIterator , class ResultType , class BinaryFunction > |
ROCPRIM_KERNEL | __launch_bounds__ (device_params< Config >() .reduce_config.block_size) void segmented_reduce_kernel(InputIterator input |
|
template<class Config , class InputIterator , class OutputIterator , class OffsetIterator , class InitValueType , class BinaryFunction > |
hipError_t | segmented_reduce_impl (void *temporary_storage, size_t &storage_size, InputIterator input, OutputIterator output, unsigned int segments, OffsetIterator begin_offsets, OffsetIterator end_offsets, BinaryFunction reduce_op, InitValueType initial_value, hipStream_t stream, bool debug_synchronous) |
|
template<bool Exclusive, class Config , class ResultType , class InputIterator , class OutputIterator , class OffsetIterator , class InitValueType , class BinaryFunction > |
ROCPRIM_KERNEL | __launch_bounds__ (ROCPRIM_DEFAULT_MAX_BLOCK_SIZE) void segmented_scan_kernel(InputIterator input |
|
template<bool Exclusive, class Config , class InputIterator , class OutputIterator , class OffsetIterator , class InitValueType , class BinaryFunction > |
hipError_t | segmented_scan_impl (void *temporary_storage, size_t &storage_size, InputIterator input, OutputIterator output, unsigned int segments, OffsetIterator begin_offsets, OffsetIterator end_offsets, const InitValueType initial_value, BinaryFunction scan_op, hipStream_t stream, bool debug_synchronous) |
|
template<class Config , class ResultType , class InputIterator , class OutputIterator , class UnaryFunction > |
ROCPRIM_KERNEL | __launch_bounds__ (device_params< Config >().kernel_config.block_size) void transform_kernel(InputIterator input |
|
template<typename TransformConfig > |
constexpr transform_config_params | wrap_transform_config () |
|
template<class Config , bool Descending, class KeysInputIterator , class KeysOutputIterator , class ValuesInputIterator , class ValuesOutputIterator > |
ROCPRIM_KERNEL | __launch_bounds__ (device_params< Config >().block_size) void radix_sort_block_sort_kernel(KeysInputIterator keys_input |
|
| sort_single< params.block_size, params.items_per_thread, Descending > (keys_input, keys_output, values_input, values_output, size, bit, current_radix_bits) |
|
template<class Config , bool Descending, class KeysInputIterator , class KeysOutputIterator , class ValuesInputIterator , class ValuesOutputIterator > |
hipError_t | radix_sort_block_sort (KeysInputIterator keys_input, KeysOutputIterator keys_output, ValuesInputIterator values_input, ValuesOutputIterator values_output, unsigned int size, unsigned int &sort_items_per_block, unsigned int bit, unsigned int end_bit, hipStream_t stream, bool debug_synchronous) |
|
template<class Config , bool Descending, class KeysInputIterator , class KeysOutputIterator , class ValuesInputIterator , class ValuesOutputIterator > |
hipError_t | radix_sort_merge_impl (void *temporary_storage, size_t &storage_size, KeysInputIterator keys_input, typename std::iterator_traits< KeysInputIterator >::value_type *keys_buffer, KeysOutputIterator keys_output, ValuesInputIterator values_input, typename std::iterator_traits< ValuesInputIterator >::value_type *values_buffer, ValuesOutputIterator values_output, unsigned int size, unsigned int bit, unsigned int end_bit, hipStream_t stream, bool debug_synchronous) |
| In device_radix_sort, we use this device_radix_sort_merge_sort specialization only for low input sizes (< 1M elements). More...
|
|
ROCPRIM_DEVICE ROCPRIM_INLINE unsigned int | atomic_add (unsigned int *address, unsigned int value) |
|
ROCPRIM_DEVICE ROCPRIM_INLINE int | atomic_add (int *address, int value) |
|
ROCPRIM_DEVICE ROCPRIM_INLINE float | atomic_add (float *address, float value) |
|
ROCPRIM_DEVICE ROCPRIM_INLINE unsigned long | atomic_add (unsigned long *address, unsigned long value) |
|
ROCPRIM_DEVICE ROCPRIM_INLINE unsigned long long | atomic_add (unsigned long long *address, unsigned long long value) |
|
ROCPRIM_DEVICE ROCPRIM_INLINE unsigned int | atomic_wrapinc (unsigned int *address, unsigned int value) |
|
ROCPRIM_DEVICE ROCPRIM_INLINE unsigned int | atomic_exch (unsigned int *address, unsigned int value) |
|
ROCPRIM_DEVICE ROCPRIM_INLINE unsigned long long | atomic_exch (unsigned long long *address, unsigned long long value) |
|
template<unsigned int Dim> |
ROCPRIM_DEVICE ROCPRIM_INLINE unsigned int | block_thread_id () |
| Returns thread identifier in a multidimensional block (tile) by dimension.
|
|
template<unsigned int Dim> |
ROCPRIM_DEVICE ROCPRIM_INLINE unsigned int | block_id () |
| Returns block identifier in a multidimensional grid by dimension.
|
|
template<unsigned int Dim> |
ROCPRIM_DEVICE ROCPRIM_INLINE unsigned int | block_size () |
| Returns block size in a multidimensional grid by dimension.
|
|
template<unsigned int Dim> |
ROCPRIM_DEVICE ROCPRIM_INLINE unsigned int | grid_size () |
| Returns grid size by dimension.
|
|
template<unsigned int LogicalWarpSize> |
ROCPRIM_DEVICE ROCPRIM_INLINE auto | logical_lane_id () -> typename std::enable_if< detail::is_power_of_two(LogicalWarpSize), unsigned int >::type |
|
template<> |
ROCPRIM_DEVICE ROCPRIM_INLINE unsigned int | logical_lane_id< device_warp_size()> () |
|
template<unsigned int LogicalWarpSize> |
ROCPRIM_DEVICE ROCPRIM_INLINE unsigned int | logical_warp_id () |
|
template<> |
ROCPRIM_DEVICE ROCPRIM_INLINE unsigned int | logical_warp_id< device_warp_size()> () |
|
ROCPRIM_DEVICE ROCPRIM_INLINE void | memory_fence_system () |
|
ROCPRIM_DEVICE ROCPRIM_INLINE void | memory_fence_block () |
|
ROCPRIM_DEVICE ROCPRIM_INLINE void | memory_fence_device () |
|
ROCPRIM_DEVICE ROCPRIM_INLINE int | warp_any (int predicate) |
|
ROCPRIM_DEVICE ROCPRIM_INLINE int | warp_all (int predicate) |
|
template<class T , class ShuffleOp > |
ROCPRIM_DEVICE ROCPRIM_INLINE std::enable_if< std::is_trivially_copyable< T >::value &&(sizeof(T) % sizeof(int)==0), T >::type | warp_shuffle_op (const T &input, ShuffleOp &&op) |
|
template<class T , class ShuffleOp > |
ROCPRIM_DEVICE ROCPRIM_INLINE std::enable_if<!(std::is_trivially_copyable< T >::value &&(sizeof(T) % sizeof(int)==0)), T >::type | warp_shuffle_op (const T &input, ShuffleOp &&op) |
|
template<class T , int dpp_ctrl, int row_mask = 0xf, int bank_mask = 0xf, bool bound_ctrl = false> |
ROCPRIM_DEVICE ROCPRIM_INLINE T | warp_move_dpp (const T &input) |
|
template<class T , int mask> |
ROCPRIM_DEVICE ROCPRIM_INLINE T | warp_swizzle (const T &input) |
| Swizzle for any data type. More...
|
|
| DEFINE_MAKE_TEXTURE_TYPE (char, 2) |
|
| DEFINE_MAKE_TEXTURE_TYPE (char, 4) |
|
| DEFINE_MAKE_TEXTURE_TYPE (int, 2) |
|
| DEFINE_MAKE_TEXTURE_TYPE (int, 4) |
|
| DEFINE_MAKE_TEXTURE_TYPE (short, 2) |
|
| DEFINE_MAKE_TEXTURE_TYPE (short, 4) |
|
template<class ReferenceTuple , class... Types, size_t... Indices> |
ROCPRIM_HOST_DEVICE ReferenceTuple | dereference_iterator_tuple_impl (const ::rocprim::tuple< Types... > &t, ::rocprim::index_sequence< Indices... >) |
|
template<class ReferenceTuple , class... Types> |
ROCPRIM_HOST_DEVICE ReferenceTuple | dereference_iterator_tuple (const ::rocprim::tuple< Types... > &t) |
|
template<cache_load_modifier MODIFIER = load_default, typename T > |
ROCPRIM_DEVICE __forceinline__ T | AsmThreadLoad (void *ptr) |
|
| ROCPRIM_ASM_THREAD_LOAD_GROUP (load_ca, "glc", "") |
|
| ROCPRIM_ASM_THREAD_LOAD_GROUP (load_cg, "glc slc", "") |
|
| ROCPRIM_ASM_THREAD_LOAD_GROUP (load_cv, "glc", "vmcnt") |
|
| ROCPRIM_ASM_THREAD_LOAD_GROUP (load_volatile, "glc", "vmcnt") |
|
| ROCPRIM_ASM_THREAD_LOAD_GROUP (load_ldg, "", "") |
|
| ROCPRIM_ASM_THREAD_LOAD_GROUP (load_cs, "", "") |
|
template<class InputIteratorT , class OutputIteratorT , class BinaryFunction > |
convert_result_type_wrapper< InputIteratorT, OutputIteratorT, BinaryFunction > | convert_result_type (BinaryFunction op) |
|
template<cache_store_modifier MODIFIER = store_default, typename T > |
ROCPRIM_DEVICE __forceinline__ void | AsmThreadStore (void *ptr, T val) |
|
| ROCPRIM_ASM_THREAD_STORE_GROUP (store_wb, "glc", "") |
|
| ROCPRIM_ASM_THREAD_STORE_GROUP (store_cg, "glc slc", "") |
|
| ROCPRIM_ASM_THREAD_STORE_GROUP (store_wt, "glc", "vmcnt") |
|
| ROCPRIM_ASM_THREAD_STORE_GROUP (store_volatile, "glc", "vmcnt") |
|
| ROCPRIM_ASM_THREAD_STORE_GROUP (store_cs, "", "") |
|
template<typename T > |
ROCPRIM_HOST_DEVICE T | get_input_value (const T value) |
| Used for unpacking a future_value, basically just a cast but its more explicit this way. More...
|
|
template<typename T , typename Iter > |
ROCPRIM_HOST_DEVICE T | get_input_value (::rocprim::future_value< T, Iter > future) |
|
template<class T > |
ROCPRIM_HOST_DEVICE T && | custom_forward (typename std::remove_reference< T >::type &t) noexcept |
|
template<class T > |
ROCPRIM_HOST_DEVICE T && | custom_forward (typename std::remove_reference< T >::type &&t) noexcept |
|
template<class... Types> |
ROCPRIM_HOST_DEVICE void | swallow (Types &&...) noexcept |
|
| DEFINE_VECTOR_TYPE (char, char) |
|
| DEFINE_VECTOR_TYPE (short, short) |
|
| DEFINE_VECTOR_TYPE (int, int) |
|
| DEFINE_VECTOR_TYPE (longlong, long long) |
|
| DEFINE_MAKE_VECTOR_TYPE (char, char) |
|
| DEFINE_MAKE_VECTOR_TYPE (short, short) |
|
| DEFINE_MAKE_VECTOR_TYPE (int, int) |
|
| DEFINE_MAKE_VECTOR_TYPE (longlong, long long) |
|
template<bool HeadSegmented, unsigned int WarpSize, class Flag > |
ROCPRIM_DEVICE ROCPRIM_INLINE auto | last_in_warp_segment (Flag flag) -> typename std::enable_if<(WarpSize<=__AMDGCN_WAVEFRONT_SIZE), unsigned int >::type |
|