30 #ifndef ROCPRIM_BLOCK_BLOCK_ADJACENT_DIFFERENCE_HPP_ 31 #define ROCPRIM_BLOCK_BLOCK_ADJACENT_DIFFERENCE_HPP_ 34 #include "detail/block_adjacent_difference_impl.hpp" 36 #include "../config.hpp" 37 #include "../detail/various.hpp" 44 BEGIN_ROCPRIM_NAMESPACE
87 unsigned int BlockSizeX,
88 unsigned int BlockSizeY = 1,
89 unsigned int BlockSizeZ = 1
92 #ifndef DOXYGEN_SHOULD_SKIP_THIS
94 #endif // DOXYGEN_SHOULD_SKIP_THIS 98 static constexpr
unsigned BlockSize = base_type::BlockSize;
116 #ifndef DOXYGEN_SHOULD_SKIP_THIS // hides storage_type implementation for Doxygen 164 template<
unsigned int ItemsPerThread,
class Flag,
class FlagOp>
165 [[
deprecated(
"The flags API of block_adjacent_difference is deprecated." 166 "Use subtract_left or block_discontinuity.flag_heads instead.")]]
167 ROCPRIM_DEVICE ROCPRIM_INLINE
169 const T (&input)[ItemsPerThread],
173 static constexpr
auto as_flags =
true;
174 static constexpr
auto reversed =
true;
175 static constexpr
auto with_predecessor =
false;
176 base_type::template apply_left<as_flags, reversed, with_predecessor>(
177 input, head_flags, flag_op, input[0] , storage.get().left);
186 template<
unsigned int ItemsPerThread,
class Flag,
class FlagOp>
187 [[
deprecated(
"The flags API of block_adjacent_difference is deprecated." 188 "Use subtract_left or block_discontinuity.flag_heads instead.")]]
189 ROCPRIM_DEVICE ROCPRIM_FORCE_INLINE
191 const T (&input)[ItemsPerThread],
195 flag_heads(head_flags, input, flag_op, storage);
249 template<
unsigned int ItemsPerThread,
class Flag,
class FlagOp>
250 [[
deprecated(
"The flags API of block_adjacent_difference is deprecated." 251 "Use subtract_left or block_discontinuity.flag_heads instead.")]]
252 ROCPRIM_DEVICE ROCPRIM_INLINE
254 T tile_predecessor_item,
255 const T (&input)[ItemsPerThread],
259 static constexpr
auto as_flags =
true;
260 static constexpr
auto reversed =
true;
261 static constexpr
auto with_predecessor =
true;
262 base_type::template apply_left<as_flags, reversed, with_predecessor>(
263 input, head_flags, flag_op, tile_predecessor_item, storage.get().left);
273 template<
unsigned int ItemsPerThread,
class Flag,
class FlagOp>
274 [[
deprecated(
"The flags API of block_adjacent_difference is deprecated." 275 "Use subtract_left or block_discontinuity.flag_heads instead.")]]
276 ROCPRIM_DEVICE ROCPRIM_FORCE_INLINE
278 T tile_predecessor_item,
279 const T (&input)[ItemsPerThread],
283 flag_heads(head_flags, tile_predecessor_item, input, flag_op, storage);
329 template<
unsigned int ItemsPerThread,
class Flag,
class FlagOp>
330 [[
deprecated(
"The flags API of block_adjacent_difference is deprecated." 331 "Use subtract_right or block_discontinuity.flag_tails instead.")]]
332 ROCPRIM_DEVICE ROCPRIM_INLINE
334 const T (&input)[ItemsPerThread],
338 static constexpr
auto as_flags =
true;
339 static constexpr
auto reversed =
true;
340 static constexpr
auto with_successor =
false;
341 base_type::template apply_right<as_flags, reversed, with_successor>(
342 input, tail_flags, flag_op, input[0] , storage.get().right);
352 template<
unsigned int ItemsPerThread,
class Flag,
class FlagOp>
353 [[
deprecated(
"The flags API of block_adjacent_difference is deprecated." 354 "Use subtract_right or block_discontinuity.flag_tails instead.")]]
355 ROCPRIM_DEVICE ROCPRIM_FORCE_INLINE
357 const T (&input)[ItemsPerThread],
361 flag_tails(tail_flags, input, flag_op, storage);
415 template<
unsigned int ItemsPerThread,
class Flag,
class FlagOp>
416 [[
deprecated(
"The flags API of block_adjacent_difference is deprecated." 417 "Use subtract_right or block_discontinuity.flag_tails instead.")]]
418 ROCPRIM_DEVICE ROCPRIM_INLINE
420 T tile_successor_item,
421 const T (&input)[ItemsPerThread],
425 static constexpr
auto as_flags =
true;
426 static constexpr
auto reversed =
true;
427 static constexpr
auto with_successor =
true;
428 base_type::template apply_right<as_flags, reversed, with_successor>(
429 input, tail_flags, flag_op, tile_successor_item, storage.get().right);
439 template<
unsigned int ItemsPerThread,
class Flag,
class FlagOp>
440 [[
deprecated(
"The flags API of block_adjacent_difference is deprecated." 441 "Use subtract_right or block_discontinuity.flag_tails instead.")]]
442 ROCPRIM_DEVICE ROCPRIM_FORCE_INLINE
444 T tile_successor_item,
445 const T (&input)[ItemsPerThread],
449 flag_tails(tail_flags, tile_successor_item, input, flag_op, storage);
495 template<
unsigned int ItemsPerThread,
class Flag,
class FlagOp>
496 [[
deprecated(
"The flags API of block_adjacent_difference is deprecated." 497 "Use block_discontinuity.flag_heads_and_tails instead.")]]
498 ROCPRIM_DEVICE ROCPRIM_INLINE
500 Flag (&tail_flags)[ItemsPerThread],
501 const T (&input)[ItemsPerThread],
505 static constexpr
auto as_flags =
true;
506 static constexpr
auto reversed =
true;
507 static constexpr
auto with_predecessor =
false;
508 static constexpr
auto with_successor =
false;
511 T items[ItemsPerThread];
514 for(
unsigned int i = 0; i < ItemsPerThread; ++i) {
518 base_type::template apply_left<as_flags, reversed, with_predecessor>(
519 items, head_flags, flag_op, items[0] , storage.get().left);
521 base_type::template apply_right<as_flags, reversed, with_successor>(
522 items, tail_flags, flag_op, items[0] , storage.get().right);
532 template<
unsigned int ItemsPerThread,
class Flag,
class FlagOp>
533 [[
deprecated(
"The flags API of block_adjacent_difference is deprecated." 534 "Use block_discontinuity.flag_heads_and_tails instead.")]]
535 ROCPRIM_DEVICE ROCPRIM_FORCE_INLINE
537 Flag (&tail_flags)[ItemsPerThread],
538 const T (&input)[ItemsPerThread],
599 template<
unsigned int ItemsPerThread,
class Flag,
class FlagOp>
600 [[
deprecated(
"The flags API of block_adjacent_difference is deprecated." 601 "Use block_discontinuity.flag_heads_and_tails instead.")]]
602 ROCPRIM_DEVICE ROCPRIM_INLINE
604 Flag (&tail_flags)[ItemsPerThread],
605 T tile_successor_item,
606 const T (&input)[ItemsPerThread],
610 static constexpr
auto as_flags =
true;
611 static constexpr
auto reversed =
true;
612 static constexpr
auto with_predecessor =
false;
613 static constexpr
auto with_successor =
true;
616 T items[ItemsPerThread];
619 for(
unsigned int i = 0; i < ItemsPerThread; ++i) {
623 base_type::template apply_left<as_flags, reversed, with_predecessor>(
624 items, head_flags, flag_op, items[0] , storage.get().left);
626 base_type::template apply_right<as_flags, reversed, with_successor>(
627 items, tail_flags, flag_op, tile_successor_item, storage.get().right);
637 template<
unsigned int ItemsPerThread,
class Flag,
class FlagOp>
638 [[
deprecated(
"The flags API of block_adjacent_difference is deprecated." 639 "Use block_discontinuity.flag_heads_and_tails instead.")]]
640 ROCPRIM_DEVICE ROCPRIM_FORCE_INLINE
642 Flag (&tail_flags)[ItemsPerThread],
643 T tile_successor_item,
644 const T (&input)[ItemsPerThread],
705 template<
unsigned int ItemsPerThread,
class Flag,
class FlagOp>
706 [[
deprecated(
"The flags API of block_adjacent_difference is deprecated." 707 "Use block_discontinuity.flag_heads_and_tails instead.")]]
708 ROCPRIM_DEVICE ROCPRIM_INLINE
710 T tile_predecessor_item,
711 Flag (&tail_flags)[ItemsPerThread],
712 const T (&input)[ItemsPerThread],
716 static constexpr
auto as_flags =
true;
717 static constexpr
auto reversed =
true;
718 static constexpr
auto with_predecessor =
true;
719 static constexpr
auto with_successor =
false;
722 T items[ItemsPerThread];
725 for(
unsigned int i = 0; i < ItemsPerThread; ++i) {
729 base_type::template apply_left<as_flags, reversed, with_predecessor>(
730 items, head_flags, flag_op, tile_predecessor_item, storage.get().left);
732 base_type::template apply_right<as_flags, reversed, with_successor>(
733 items, tail_flags, flag_op, items[0] , storage.get().right);
743 template<
unsigned int ItemsPerThread,
class Flag,
class FlagOp>
744 [[
deprecated(
"The flags API of block_adjacent_difference is deprecated." 745 "Use block_discontinuity.flag_heads_and_tails instead.")]]
746 ROCPRIM_DEVICE ROCPRIM_FORCE_INLINE
748 T tile_predecessor_item,
749 Flag (&tail_flags)[ItemsPerThread],
750 const T (&input)[ItemsPerThread],
817 template<
unsigned int ItemsPerThread,
class Flag,
class FlagOp>
818 [[
deprecated(
"The flags API of block_adjacent_difference is deprecated." 819 "Use block_discontinuity.flag_heads_and_tails instead.")]]
820 ROCPRIM_DEVICE ROCPRIM_INLINE
822 T tile_predecessor_item,
823 Flag (&tail_flags)[ItemsPerThread],
824 T tile_successor_item,
825 const T (&input)[ItemsPerThread],
829 static constexpr
auto as_flags =
true;
830 static constexpr
auto reversed =
true;
831 static constexpr
auto with_predecessor =
true;
832 static constexpr
auto with_successor =
true;
835 T items[ItemsPerThread];
838 for(
unsigned int i = 0; i < ItemsPerThread; ++i) {
842 base_type::template apply_left<as_flags, reversed, with_predecessor>(
843 items, head_flags, flag_op, tile_predecessor_item, storage.get().left);
845 base_type::template apply_right<as_flags, reversed, with_successor>(
846 items, tail_flags, flag_op, tile_successor_item, storage.get().right);
856 template<
unsigned int ItemsPerThread,
class Flag,
class FlagOp>
857 [[
deprecated(
"The flags API of block_adjacent_difference is deprecated." 858 "Use block_discontinuity.flag_heads_and_tails instead.")]]
859 ROCPRIM_DEVICE ROCPRIM_FORCE_INLINE
861 T tile_predecessor_item,
862 Flag (&tail_flags)[ItemsPerThread],
863 T tile_successor_item,
864 const T (&input)[ItemsPerThread],
869 head_flags, tile_predecessor_item, tail_flags, tile_successor_item,
870 input, flag_op, storage
897 template <
typename Output,
unsigned int ItemsPerThread,
typename BinaryFunction>
898 ROCPRIM_DEVICE ROCPRIM_INLINE
void subtract_left(
const T (&input)[ItemsPerThread],
899 Output (&output)[ItemsPerThread],
900 const BinaryFunction op,
903 static constexpr
auto as_flags =
false;
904 static constexpr
auto reversed =
true;
905 static constexpr
auto with_predecessor =
false;
907 base_type::template apply_left<as_flags, reversed, with_predecessor>(
908 input, output, op, input[0] , storage.get().left);
937 template <
typename Output,
unsigned int ItemsPerThread,
typename BinaryFunction>
938 ROCPRIM_DEVICE ROCPRIM_INLINE
void subtract_left(
const T (&input)[ItemsPerThread],
939 Output (&output)[ItemsPerThread],
940 const BinaryFunction op,
941 const T tile_predecessor,
944 static constexpr
auto as_flags =
false;
945 static constexpr
auto reversed =
true;
946 static constexpr
auto with_predecessor =
true;
948 base_type::template apply_left<as_flags, reversed, with_predecessor>(
949 input, output, op, tile_predecessor, storage.get().left);
978 template <
typename Output,
unsigned int ItemsPerThread,
typename BinaryFunction>
980 Output (&output)[ItemsPerThread],
981 const BinaryFunction op,
982 const unsigned int valid_items,
985 static constexpr
auto as_flags =
false;
986 static constexpr
auto reversed =
true;
987 static constexpr
auto with_predecessor =
false;
989 base_type::template apply_left_partial<as_flags, reversed, with_predecessor>(
990 input, output, op, input[0] , valid_items, storage.get().left);
1015 template <
typename Output,
unsigned int ItemsPerThread,
typename BinaryFunction>
1017 Output (&output)[ItemsPerThread],
1018 const BinaryFunction op,
1019 const T tile_predecessor,
1020 const unsigned int valid_items,
1023 static constexpr
auto as_flags =
false;
1024 static constexpr
auto reversed =
true;
1025 static constexpr
auto with_predecessor =
true;
1027 base_type::template apply_left_partial<as_flags, reversed, with_predecessor>(
1028 input, output, op, tile_predecessor, valid_items, storage.get().left);
1054 template <
typename Output,
unsigned int ItemsPerThread,
typename BinaryFunction>
1056 Output (&output)[ItemsPerThread],
1057 const BinaryFunction op,
1060 static constexpr
auto as_flags =
false;
1061 static constexpr
auto reversed =
false;
1062 static constexpr
auto with_successor =
false;
1064 base_type::template apply_right<as_flags, reversed, with_successor>(
1065 input, output, op, input[0] , storage.get().right);
1095 template <
typename Output,
unsigned int ItemsPerThread,
typename BinaryFunction>
1097 Output (&output)[ItemsPerThread],
1098 const BinaryFunction op,
1099 const T tile_successor,
1102 static constexpr
auto as_flags =
false;
1103 static constexpr
auto reversed =
false;
1104 static constexpr
auto with_successor =
true;
1106 base_type::template apply_right<as_flags, reversed, with_successor>(
1107 input, output, op, tile_successor, storage.get().right);
1135 template <
typename Output,
unsigned int ItemsPerThread,
typename BinaryFunction>
1137 Output (&output)[ItemsPerThread],
1138 const BinaryFunction op,
1139 const unsigned int valid_items,
1142 static constexpr
auto as_flags =
false;
1143 static constexpr
auto reversed =
false;
1145 base_type::template apply_right_partial<as_flags, reversed>(
1146 input, output, op, valid_items, storage.get().right);
1150 END_ROCPRIM_NAMESPACE
1155 #endif // ROCPRIM_BLOCK_BLOCK_ADJACENT_DIFFERENCE_HPP_ ROCPRIM_DEVICE ROCPRIM_INLINE void flag_heads(Flag(&head_flags)[ItemsPerThread], const T(&input)[ItemsPerThread], FlagOp flag_op, storage_type &storage)
Tags head_flags that indicate discontinuities between items partitioned across the thread block...
Definition: block_adjacent_difference.hpp:168
ROCPRIM_DEVICE ROCPRIM_INLINE void flag_heads_and_tails(Flag(&head_flags)[ItemsPerThread], T tile_predecessor_item, Flag(&tail_flags)[ItemsPerThread], const T(&input)[ItemsPerThread], FlagOp flag_op, storage_type &storage)
Tags both head_flags andtail_flags that indicate discontinuities between items partitioned across the...
Definition: block_adjacent_difference.hpp:709
ROCPRIM_DEVICE ROCPRIM_FORCE_INLINE void flag_tails(Flag(&tail_flags)[ItemsPerThread], const T(&input)[ItemsPerThread], FlagOp flag_op)
This is an overloaded member function, provided for convenience. It differs from the above function o...
Definition: block_adjacent_difference.hpp:356
The block_adjacent_difference class is a block level parallel primitive which provides methods for ap...
Definition: block_adjacent_difference.hpp:91
ROCPRIM_DEVICE ROCPRIM_INLINE void subtract_right_partial(const T(&input)[ItemsPerThread], Output(&output)[ItemsPerThread], const BinaryFunction op, const unsigned int valid_items, storage_type &storage)
Apply a function to each consecutive pair of elements partitioned across threads in the block and wri...
Definition: block_adjacent_difference.hpp:1136
ROCPRIM_DEVICE ROCPRIM_INLINE void subtract_left(const T(&input)[ItemsPerThread], Output(&output)[ItemsPerThread], const BinaryFunction op, const T tile_predecessor, storage_type &storage)
Apply a function to each consecutive pair of elements partitioned across threads in the block and wri...
Definition: block_adjacent_difference.hpp:938
ROCPRIM_DEVICE ROCPRIM_INLINE void subtract_right(const T(&input)[ItemsPerThread], Output(&output)[ItemsPerThread], const BinaryFunction op, storage_type &storage)
Apply a function to each consecutive pair of elements partitioned across threads in the block and wri...
Definition: block_adjacent_difference.hpp:1055
ROCPRIM_DEVICE ROCPRIM_FORCE_INLINE void flag_heads(Flag(&head_flags)[ItemsPerThread], const T(&input)[ItemsPerThread], FlagOp flag_op)
This is an overloaded member function, provided for convenience. It differs from the above function o...
Definition: block_adjacent_difference.hpp:190
ROCPRIM_DEVICE ROCPRIM_INLINE void flag_heads_and_tails(Flag(&head_flags)[ItemsPerThread], Flag(&tail_flags)[ItemsPerThread], const T(&input)[ItemsPerThread], FlagOp flag_op, storage_type &storage)
Tags both head_flags andtail_flags that indicate discontinuities between items partitioned across the...
Definition: block_adjacent_difference.hpp:499
ROCPRIM_DEVICE ROCPRIM_FORCE_INLINE void flag_heads_and_tails(Flag(&head_flags)[ItemsPerThread], T tile_predecessor_item, Flag(&tail_flags)[ItemsPerThread], T tile_successor_item, const T(&input)[ItemsPerThread], FlagOp flag_op)
This is an overloaded member function, provided for convenience. It differs from the above function o...
Definition: block_adjacent_difference.hpp:860
ROCPRIM_DEVICE ROCPRIM_INLINE void flag_heads_and_tails(Flag(&head_flags)[ItemsPerThread], Flag(&tail_flags)[ItemsPerThread], T tile_successor_item, const T(&input)[ItemsPerThread], FlagOp flag_op, storage_type &storage)
Tags both head_flags andtail_flags that indicate discontinuities between items partitioned across the...
Definition: block_adjacent_difference.hpp:603
ROCPRIM_DEVICE ROCPRIM_INLINE void flag_heads(Flag(&head_flags)[ItemsPerThread], T tile_predecessor_item, const T(&input)[ItemsPerThread], FlagOp flag_op, storage_type &storage)
Tags head_flags that indicate discontinuities between items partitioned across the thread block...
Definition: block_adjacent_difference.hpp:253
ROCPRIM_DEVICE ROCPRIM_INLINE void flag_tails(Flag(&tail_flags)[ItemsPerThread], T tile_successor_item, const T(&input)[ItemsPerThread], FlagOp flag_op, storage_type &storage)
Tags tail_flags that indicate discontinuities between items partitioned across the thread block...
Definition: block_adjacent_difference.hpp:419
ROCPRIM_DEVICE ROCPRIM_INLINE void flag_tails(Flag(&tail_flags)[ItemsPerThread], const T(&input)[ItemsPerThread], FlagOp flag_op, storage_type &storage)
Tags tail_flags that indicate discontinuities between items partitioned across the thread block...
Definition: block_adjacent_difference.hpp:333
ROCPRIM_DEVICE ROCPRIM_FORCE_INLINE void flag_heads(Flag(&head_flags)[ItemsPerThread], T tile_predecessor_item, const T(&input)[ItemsPerThread], FlagOp flag_op)
This is an overloaded member function, provided for convenience. It differs from the above function o...
Definition: block_adjacent_difference.hpp:277
ROCPRIM_DEVICE ROCPRIM_FORCE_INLINE void flag_heads_and_tails(Flag(&head_flags)[ItemsPerThread], Flag(&tail_flags)[ItemsPerThread], T tile_successor_item, const T(&input)[ItemsPerThread], FlagOp flag_op)
This is an overloaded member function, provided for convenience. It differs from the above function o...
Definition: block_adjacent_difference.hpp:641
Definition: block_adjacent_difference_impl.hpp:96
ROCPRIM_DEVICE ROCPRIM_INLINE void subtract_right(const T(&input)[ItemsPerThread], Output(&output)[ItemsPerThread], const BinaryFunction op, const T tile_successor, storage_type &storage)
Apply a function to each consecutive pair of elements partitioned across threads in the block and wri...
Definition: block_adjacent_difference.hpp:1096
ROCPRIM_DEVICE ROCPRIM_FORCE_INLINE void flag_heads_and_tails(Flag(&head_flags)[ItemsPerThread], Flag(&tail_flags)[ItemsPerThread], const T(&input)[ItemsPerThread], FlagOp flag_op)
This is an overloaded member function, provided for convenience. It differs from the above function o...
Definition: block_adjacent_difference.hpp:536
ROCPRIM_DEVICE ROCPRIM_INLINE void subtract_left_partial(const T(&input)[ItemsPerThread], Output(&output)[ItemsPerThread], const BinaryFunction op, const unsigned int valid_items, storage_type &storage)
Apply a function to each consecutive pair of elements partitioned across threads in the block and wri...
Definition: block_adjacent_difference.hpp:979
Definition: block_adjacent_difference_impl.hpp:92
ROCPRIM_DEVICE ROCPRIM_INLINE void flag_heads_and_tails(Flag(&head_flags)[ItemsPerThread], T tile_predecessor_item, Flag(&tail_flags)[ItemsPerThread], T tile_successor_item, const T(&input)[ItemsPerThread], FlagOp flag_op, storage_type &storage)
Tags both head_flags andtail_flags that indicate discontinuities between items partitioned across the...
Definition: block_adjacent_difference.hpp:821
ROCPRIM_DEVICE ROCPRIM_INLINE void subtract_left(const T(&input)[ItemsPerThread], Output(&output)[ItemsPerThread], const BinaryFunction op, storage_type &storage)
Apply a function to each consecutive pair of elements partitioned across threads in the block and wri...
Definition: block_adjacent_difference.hpp:898
ROCPRIM_DEVICE ROCPRIM_FORCE_INLINE void flag_tails(Flag(&tail_flags)[ItemsPerThread], T tile_successor_item, const T(&input)[ItemsPerThread], FlagOp flag_op)
This is an overloaded member function, provided for convenience. It differs from the above function o...
Definition: block_adjacent_difference.hpp:443
struct deprecated("use radix_sort_config_v2")]] radix_sort_config
Legacy configuration of device-level radix sort operation.
Definition: device_radix_sort_config.hpp:95
ROCPRIM_DEVICE ROCPRIM_INLINE void subtract_left_partial(const T(&input)[ItemsPerThread], Output(&output)[ItemsPerThread], const BinaryFunction op, const T tile_predecessor, const unsigned int valid_items, storage_type &storage)
Apply a function to each consecutive pair of elements partitioned across threads in the block and wri...
Definition: block_adjacent_difference.hpp:1016
ROCPRIM_DEVICE ROCPRIM_FORCE_INLINE void flag_heads_and_tails(Flag(&head_flags)[ItemsPerThread], T tile_predecessor_item, Flag(&tail_flags)[ItemsPerThread], const T(&input)[ItemsPerThread], FlagOp flag_op)
This is an overloaded member function, provided for convenience. It differs from the above function o...
Definition: block_adjacent_difference.hpp:747