21 #ifndef ROCPRIM_DEVICE_DETAIL_DEVICE_TRANSFORM_HPP_ 22 #define ROCPRIM_DEVICE_DETAIL_DEVICE_TRANSFORM_HPP_ 24 #include <type_traits> 27 #include "../../config.hpp" 28 #include "../../detail/various.hpp" 29 #include "../../detail/match_result_type.hpp" 31 #include "../../intrinsics.hpp" 32 #include "../../functional.hpp" 33 #include "../../types.hpp" 35 #include "../../block/block_load.hpp" 36 #include "../../block/block_store.hpp" 38 BEGIN_ROCPRIM_NAMESPACE
45 template<
class T1,
class T2,
class BinaryFunction>
48 using result_type = typename ::rocprim::detail::invoke_result<BinaryFunction, T1, T2>::type;
50 ROCPRIM_HOST_DEVICE
inline 53 ROCPRIM_HOST_DEVICE
inline 58 ROCPRIM_HOST_DEVICE
inline 59 ~unpack_binary_op() =
default;
61 ROCPRIM_HOST_DEVICE
inline 62 result_type operator()(const ::rocprim::tuple<T1, T2>& t)
64 return binary_op_(::rocprim::get<0>(t), ::rocprim::get<1>(t));
68 BinaryFunction binary_op_;
72 unsigned int BlockSize,
73 unsigned int ItemsPerThread,
79 ROCPRIM_DEVICE ROCPRIM_INLINE
80 void transform_kernel_impl(InputIterator input,
81 const size_t input_size,
82 OutputIterator output,
85 using input_type =
typename std::iterator_traits<InputIterator>::value_type;
86 using output_type =
typename std::iterator_traits<OutputIterator>::value_type;
88 typename std::conditional<
89 std::is_void<output_type>::value, ResultType, output_type
92 constexpr
unsigned int items_per_block = BlockSize * ItemsPerThread;
94 const unsigned int flat_id = ::rocprim::detail::block_thread_id<0>();
95 const unsigned int flat_block_id = ::rocprim::detail::block_id<0>();
96 const unsigned int block_offset = flat_block_id * items_per_block;
97 const unsigned int number_of_blocks = ::rocprim::detail::grid_size<0>();
98 const unsigned int valid_in_last_block = input_size - block_offset;
100 input_type input_values[ItemsPerThread];
101 result_type output_values[ItemsPerThread];
103 if(flat_block_id == (number_of_blocks - 1))
105 block_load_direct_striped<BlockSize>(
107 input + block_offset,
113 for(
unsigned int i = 0; i < ItemsPerThread; i++)
115 if(BlockSize * i + flat_id < valid_in_last_block)
117 output_values[i] = transform_op(input_values[i]);
121 block_store_direct_striped<BlockSize>(
123 output + block_offset,
130 block_load_direct_striped<BlockSize>(
132 input + block_offset,
137 for(
unsigned int i = 0; i < ItemsPerThread; i++)
139 output_values[i] = transform_op(input_values[i]);
142 block_store_direct_striped<BlockSize>(
144 output + block_offset,
152 END_ROCPRIM_NAMESPACE
154 #endif // ROCPRIM_DEVICE_DETAIL_DEVICE_TRANSFORM_HPP_ Deprecated: Configuration of device-level scan primitives.
Definition: block_histogram.hpp:62
Definition: device_transform.hpp:46
ROCPRIM_DEVICE ROCPRIM_INLINE unsigned int flat_block_id()
Returns flat (linear, 1D) block identifier in a multidimensional grid.
Definition: thread.hpp:178