21 #ifndef ROCPRIM_BLOCK_DETAIL_BLOCK_ADJACENT_DIFFERENCE_IMPL_HPP_    22 #define ROCPRIM_BLOCK_DETAIL_BLOCK_ADJACENT_DIFFERENCE_IMPL_HPP_    24 #include "../../config.hpp"    25 #include "../../detail/various.hpp"    26 #include "../../intrinsics/thread.hpp"    28 #include <type_traits>    32 BEGIN_ROCPRIM_NAMESPACE
    43 template <
class T, 
class BinaryFunction>
    44 ROCPRIM_DEVICE ROCPRIM_INLINE 
auto apply(BinaryFunction op,
    49                                          bool_constant<false> ) -> decltype(op(b, a, index))
    51     return op(a, b, index);
    54 template <
class T, 
class BinaryFunction>
    55 ROCPRIM_DEVICE ROCPRIM_INLINE 
auto apply(BinaryFunction op,
    61     -> decltype(op(b, a, index))
    63     return op(b, a, index);
    66 template <
typename T, 
typename BinaryFunction, 
bool AsFlags>
    67 ROCPRIM_DEVICE ROCPRIM_INLINE 
auto apply(BinaryFunction op,
    71                                          bool_constant<AsFlags> ,
    72                                          bool_constant<false> ) -> decltype(op(b, a))
    77 template <
typename T, 
typename BinaryFunction, 
bool AsFlags>
    78 ROCPRIM_DEVICE ROCPRIM_INLINE 
auto apply(BinaryFunction op,
    82                                          bool_constant<AsFlags> ,
    83                                          bool_constant<true> ) -> decltype(op(b, a))
    89           unsigned int BlockSizeX,
    90           unsigned int BlockSizeY = 1,
    91           unsigned int BlockSizeZ = 1>
    95     static constexpr 
unsigned int BlockSize = BlockSizeX * BlockSizeY * BlockSizeZ;
   101     template <
bool         AsFlags,
   103               bool         WithTilePredecessor,
   104               unsigned int ItemsPerThread,
   106               typename BinaryFunction>
   107     ROCPRIM_DEVICE 
void apply_left(
const T (&input)[ItemsPerThread],
   108                                    Output (&output)[ItemsPerThread],
   110                                    const T        tile_predecessor_item,
   113         static constexpr 
auto as_flags = bool_constant<AsFlags> {};
   114         static constexpr 
auto reversed = bool_constant<Reversed> {};
   116         const unsigned int flat_id
   117             = ::rocprim::flat_block_thread_id<BlockSizeX, BlockSizeY, BlockSizeZ>();
   120         storage.items[flat_id] = input[ItemsPerThread - 1];
   123         for(
unsigned int i = ItemsPerThread - 1; i > 0; --i)
   125             output[i] = detail::apply(
   126                 op, input[i - 1], input[i], flat_id * ItemsPerThread + i, as_flags, reversed);
   130         if ROCPRIM_IF_CONSTEXPR (WithTilePredecessor)
   132             T predecessor_item = tile_predecessor_item;
   134                 predecessor_item = storage.items[flat_id - 1];
   137             output[0] = detail::apply(
   138                 op, predecessor_item, input[0], flat_id * ItemsPerThread, as_flags, reversed);
   142             output[0] = get_default_item(input, 0, as_flags);
   144                 output[0] = detail::apply(op,
   145                                           storage.items[flat_id - 1],
   147                                           flat_id * ItemsPerThread,
   154     template <
bool         AsFlags,
   156               bool         WithTilePredecessor,
   157               unsigned int ItemsPerThread,
   159               typename BinaryFunction>
   160     ROCPRIM_DEVICE 
void apply_left_partial(
const T (&input)[ItemsPerThread],
   161                                            Output (&output)[ItemsPerThread],
   163                                            const T            tile_predecessor_item,
   164                                            const unsigned int valid_items,
   167         static constexpr 
auto as_flags = bool_constant<AsFlags> {};
   168         static constexpr 
auto reversed = bool_constant<Reversed> {};
   170         const unsigned int flat_id
   171             = ::rocprim::flat_block_thread_id<BlockSizeX, BlockSizeY, BlockSizeZ>();
   174         storage.items[flat_id] = input[ItemsPerThread - 1];
   177         for(
unsigned int i = ItemsPerThread - 1; i > 0; --i)
   179             const unsigned int index = flat_id * ItemsPerThread + i;
   180             output[i] = get_default_item(input, i, as_flags);
   181             if(index < valid_items) {
   182                 output[i] = detail::apply(op, input[i - 1], input[i], index, as_flags, reversed);
   187         const unsigned int index = flat_id * ItemsPerThread;
   189         if ROCPRIM_IF_CONSTEXPR (WithTilePredecessor)
   191             T predecessor_item = tile_predecessor_item;
   193                 predecessor_item = storage.items[flat_id - 1];
   196             output[0] = get_default_item(input, 0, as_flags);
   197             if(index < valid_items)
   200                     = detail::apply(op, predecessor_item, input[0], index, as_flags, reversed);
   205             output[0] = get_default_item(input, 0, as_flags);
   206             if(flat_id != 0 && index < valid_items)
   208                 output[0] = detail::apply(op,
   209                                           storage.items[flat_id - 1],
   211                                           flat_id * ItemsPerThread,
   218     template <
bool         AsFlags,
   220               bool         WithTileSuccessor,
   221               unsigned int ItemsPerThread,
   223               typename BinaryFunction>
   224     ROCPRIM_DEVICE 
void apply_right(
const T (&input)[ItemsPerThread],
   225                                     Output (&output)[ItemsPerThread],
   227                                     const T        tile_successor_item,
   230         static constexpr 
auto as_flags = bool_constant<AsFlags> {};
   231         static constexpr 
auto reversed = bool_constant<Reversed> {};
   233         const unsigned int flat_id
   234             = ::rocprim::flat_block_thread_id<BlockSizeX, BlockSizeY, BlockSizeZ>();
   237         storage.items[flat_id] = input[0];
   240         for(
unsigned int i = 0; i < ItemsPerThread - 1; ++i)
   242             output[i] = detail::apply(
   243                 op, input[i], input[i + 1], flat_id * ItemsPerThread + i + 1, as_flags, reversed);
   247         if ROCPRIM_IF_CONSTEXPR (WithTileSuccessor)
   249             T successor_item = tile_successor_item;
   250             if(flat_id != BlockSize - 1) {
   251                 successor_item = storage.items[flat_id + 1];
   254             output[ItemsPerThread - 1] = detail::apply(op,
   255                                                        input[ItemsPerThread - 1],
   257                                                        flat_id * ItemsPerThread + ItemsPerThread,
   263             output[ItemsPerThread - 1] = get_default_item(input, ItemsPerThread - 1, as_flags);
   264             if(flat_id != BlockSize - 1) {
   265                 output[ItemsPerThread - 1]
   267                                     input[ItemsPerThread - 1],
   268                                     storage.items[flat_id + 1],
   269                                     flat_id * ItemsPerThread + ItemsPerThread,
   275     template <
bool         AsFlags,
   277               unsigned int ItemsPerThread,
   279               typename BinaryFunction>
   280     ROCPRIM_DEVICE 
void apply_right_partial(
const T (&input)[ItemsPerThread],
   281                                             Output (&output)[ItemsPerThread],
   283                                             const unsigned int valid_items,
   286         static constexpr 
auto as_flags = bool_constant<AsFlags> {};
   287         static constexpr 
auto reversed = bool_constant<Reversed> {};
   289         const unsigned int flat_id
   290             = ::rocprim::flat_block_thread_id<BlockSizeX, BlockSizeY, BlockSizeZ>();
   293         storage.items[flat_id] = input[0];
   296         for(
unsigned int i = 0; i < ItemsPerThread - 1; ++i)
   298             const unsigned int index = flat_id * ItemsPerThread + i + 1;
   299             output[i] = get_default_item(input, i, as_flags);
   300             if(index < valid_items)
   302                 output[i] = detail::apply(op, input[i], input[i + 1], index, as_flags, reversed);
   307         output[ItemsPerThread - 1] = get_default_item(input, ItemsPerThread - 1, as_flags);
   309         const unsigned int next_thread_index = flat_id * ItemsPerThread + ItemsPerThread;
   310         if(next_thread_index < valid_items)
   312             output[ItemsPerThread - 1] = detail::apply(op,
   313                                                        input[ItemsPerThread - 1],
   314                                                        storage.items[flat_id + 1],
   322     template <
unsigned int ItemsPerThread>
   323     ROCPRIM_DEVICE 
int get_default_item(
const T (&)[ItemsPerThread],
   325                                         bool_constant<true> )
   330     template <
unsigned int ItemsPerThread>
   331     ROCPRIM_DEVICE T get_default_item(
const T (&input)[ItemsPerThread],
   332                                       const unsigned int index,
   333                                       bool_constant<false> )
   341 END_ROCPRIM_NAMESPACE
   343 #endif // ROCPRIM_BLOCK_DETAIL_BLOCK_ADJACENT_DIFFERENCE_IMPL_HPP_ Deprecated: Configuration of device-level scan primitives. 
Definition: block_histogram.hpp:62
ROCPRIM_DEVICE ROCPRIM_INLINE void syncthreads()
Synchronize all threads in a block (tile) 
Definition: thread.hpp:216
Definition: block_adjacent_difference_impl.hpp:96
Definition: block_adjacent_difference_impl.hpp:92