rocPRIM
block_adjacent_difference.hpp
1 /******************************************************************************
2 * Copyright (c) 2011, Duane Merrill. All rights reserved.
3 * Copyright (c) 2011-2018, NVIDIA CORPORATION. All rights reserved.
4 * Modifications Copyright (c) 2022, Advanced Micro Devices, Inc. All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions are met:
8 * * Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * * Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 * * Neither the name of the NVIDIA CORPORATION nor the
14 * names of its contributors may be used to endorse or promote products
15 * derived from this software without specific prior written permission.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
19 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
20 * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY
21 * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
22 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
23 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
24 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
26 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 *
28 ******************************************************************************/
29 
30 #ifndef ROCPRIM_BLOCK_BLOCK_ADJACENT_DIFFERENCE_HPP_
31 #define ROCPRIM_BLOCK_BLOCK_ADJACENT_DIFFERENCE_HPP_
32 
33 
34 #include "detail/block_adjacent_difference_impl.hpp"
35 
36 #include "../config.hpp"
37 #include "../detail/various.hpp"
38 
39 
40 
43 
44 BEGIN_ROCPRIM_NAMESPACE
45 
85 template<
86  class T,
87  unsigned int BlockSizeX,
88  unsigned int BlockSizeY = 1,
89  unsigned int BlockSizeZ = 1
90 >
92 #ifndef DOXYGEN_SHOULD_SKIP_THIS // hide implementation detail from documentation
93  : private detail::block_adjacent_difference_impl<T, BlockSizeX, BlockSizeY, BlockSizeZ>
94 #endif // DOXYGEN_SHOULD_SKIP_THIS
95 {
97 
98  static constexpr unsigned BlockSize = base_type::BlockSize;
99  // Struct used for creating a raw_storage object for this primitive's temporary storage.
100  struct storage_type_
101  {
102  typename base_type::storage_type left;
103  typename base_type::storage_type right;
104  };
105 
106 public:
107 
116  #ifndef DOXYGEN_SHOULD_SKIP_THIS // hides storage_type implementation for Doxygen
118  #else
119  using storage_type = storage_type_;
120  #endif
121 
164  template<unsigned int ItemsPerThread, class Flag, class FlagOp>
165  [[deprecated("The flags API of block_adjacent_difference is deprecated."
166  "Use subtract_left or block_discontinuity.flag_heads instead.")]]
167  ROCPRIM_DEVICE ROCPRIM_INLINE
168  void flag_heads(Flag (&head_flags)[ItemsPerThread],
169  const T (&input)[ItemsPerThread],
170  FlagOp flag_op,
171  storage_type& storage)
172  {
173  static constexpr auto as_flags = true;
174  static constexpr auto reversed = true;
175  static constexpr auto with_predecessor = false;
176  base_type::template apply_left<as_flags, reversed, with_predecessor>(
177  input, head_flags, flag_op, input[0] /* predecessor */, storage.get().left);
178  }
179 
186  template<unsigned int ItemsPerThread, class Flag, class FlagOp>
187  [[deprecated("The flags API of block_adjacent_difference is deprecated."
188  "Use subtract_left or block_discontinuity.flag_heads instead.")]]
189  ROCPRIM_DEVICE ROCPRIM_FORCE_INLINE
190  void flag_heads(Flag (&head_flags)[ItemsPerThread],
191  const T (&input)[ItemsPerThread],
192  FlagOp flag_op)
193  {
194  ROCPRIM_SHARED_MEMORY storage_type storage;
195  flag_heads(head_flags, input, flag_op, storage);
196  }
197 
249  template<unsigned int ItemsPerThread, class Flag, class FlagOp>
250  [[deprecated("The flags API of block_adjacent_difference is deprecated."
251  "Use subtract_left or block_discontinuity.flag_heads instead.")]]
252  ROCPRIM_DEVICE ROCPRIM_INLINE
253  void flag_heads(Flag (&head_flags)[ItemsPerThread],
254  T tile_predecessor_item,
255  const T (&input)[ItemsPerThread],
256  FlagOp flag_op,
257  storage_type& storage)
258  {
259  static constexpr auto as_flags = true;
260  static constexpr auto reversed = true;
261  static constexpr auto with_predecessor = true;
262  base_type::template apply_left<as_flags, reversed, with_predecessor>(
263  input, head_flags, flag_op, tile_predecessor_item, storage.get().left);
264  }
265 
273  template<unsigned int ItemsPerThread, class Flag, class FlagOp>
274  [[deprecated("The flags API of block_adjacent_difference is deprecated."
275  "Use subtract_left or block_discontinuity.flag_heads instead.")]]
276  ROCPRIM_DEVICE ROCPRIM_FORCE_INLINE
277  void flag_heads(Flag (&head_flags)[ItemsPerThread],
278  T tile_predecessor_item,
279  const T (&input)[ItemsPerThread],
280  FlagOp flag_op)
281  {
282  ROCPRIM_SHARED_MEMORY storage_type storage;
283  flag_heads(head_flags, tile_predecessor_item, input, flag_op, storage);
284  }
285 
329  template<unsigned int ItemsPerThread, class Flag, class FlagOp>
330  [[deprecated("The flags API of block_adjacent_difference is deprecated."
331  "Use subtract_right or block_discontinuity.flag_tails instead.")]]
332  ROCPRIM_DEVICE ROCPRIM_INLINE
333  void flag_tails(Flag (&tail_flags)[ItemsPerThread],
334  const T (&input)[ItemsPerThread],
335  FlagOp flag_op,
336  storage_type& storage)
337  {
338  static constexpr auto as_flags = true;
339  static constexpr auto reversed = true;
340  static constexpr auto with_successor = false;
341  base_type::template apply_right<as_flags, reversed, with_successor>(
342  input, tail_flags, flag_op, input[0] /* successor */, storage.get().right);
343  }
344 
352  template<unsigned int ItemsPerThread, class Flag, class FlagOp>
353  [[deprecated("The flags API of block_adjacent_difference is deprecated."
354  "Use subtract_right or block_discontinuity.flag_tails instead.")]]
355  ROCPRIM_DEVICE ROCPRIM_FORCE_INLINE
356  void flag_tails(Flag (&tail_flags)[ItemsPerThread],
357  const T (&input)[ItemsPerThread],
358  FlagOp flag_op)
359  {
360  ROCPRIM_SHARED_MEMORY storage_type storage;
361  flag_tails(tail_flags, input, flag_op, storage);
362  }
363 
415  template<unsigned int ItemsPerThread, class Flag, class FlagOp>
416  [[deprecated("The flags API of block_adjacent_difference is deprecated."
417  "Use subtract_right or block_discontinuity.flag_tails instead.")]]
418  ROCPRIM_DEVICE ROCPRIM_INLINE
419  void flag_tails(Flag (&tail_flags)[ItemsPerThread],
420  T tile_successor_item,
421  const T (&input)[ItemsPerThread],
422  FlagOp flag_op,
423  storage_type& storage)
424  {
425  static constexpr auto as_flags = true;
426  static constexpr auto reversed = true;
427  static constexpr auto with_successor = true;
428  base_type::template apply_right<as_flags, reversed, with_successor>(
429  input, tail_flags, flag_op, tile_successor_item, storage.get().right);
430  }
431 
439  template<unsigned int ItemsPerThread, class Flag, class FlagOp>
440  [[deprecated("The flags API of block_adjacent_difference is deprecated."
441  "Use subtract_right or block_discontinuity.flag_tails instead.")]]
442  ROCPRIM_DEVICE ROCPRIM_FORCE_INLINE
443  void flag_tails(Flag (&tail_flags)[ItemsPerThread],
444  T tile_successor_item,
445  const T (&input)[ItemsPerThread],
446  FlagOp flag_op)
447  {
448  ROCPRIM_SHARED_MEMORY storage_type storage;
449  flag_tails(tail_flags, tile_successor_item, input, flag_op, storage);
450  }
451 
495  template<unsigned int ItemsPerThread, class Flag, class FlagOp>
496  [[deprecated("The flags API of block_adjacent_difference is deprecated."
497  "Use block_discontinuity.flag_heads_and_tails instead.")]]
498  ROCPRIM_DEVICE ROCPRIM_INLINE
499  void flag_heads_and_tails(Flag (&head_flags)[ItemsPerThread],
500  Flag (&tail_flags)[ItemsPerThread],
501  const T (&input)[ItemsPerThread],
502  FlagOp flag_op,
503  storage_type& storage)
504  {
505  static constexpr auto as_flags = true;
506  static constexpr auto reversed = true;
507  static constexpr auto with_predecessor = false;
508  static constexpr auto with_successor = false;
509 
510  // Copy items in case head_flags is aliased with input
511  T items[ItemsPerThread];
512 
513  ROCPRIM_UNROLL
514  for(unsigned int i = 0; i < ItemsPerThread; ++i) {
515  items[i] = input[i];
516  }
517 
518  base_type::template apply_left<as_flags, reversed, with_predecessor>(
519  items, head_flags, flag_op, items[0] /*predecessor*/, storage.get().left);
520 
521  base_type::template apply_right<as_flags, reversed, with_successor>(
522  items, tail_flags, flag_op, items[0] /*successor*/, storage.get().right);
523  }
524 
532  template<unsigned int ItemsPerThread, class Flag, class FlagOp>
533  [[deprecated("The flags API of block_adjacent_difference is deprecated."
534  "Use block_discontinuity.flag_heads_and_tails instead.")]]
535  ROCPRIM_DEVICE ROCPRIM_FORCE_INLINE
536  void flag_heads_and_tails(Flag (&head_flags)[ItemsPerThread],
537  Flag (&tail_flags)[ItemsPerThread],
538  const T (&input)[ItemsPerThread],
539  FlagOp flag_op)
540  {
541  ROCPRIM_SHARED_MEMORY storage_type storage;
542  flag_heads_and_tails(head_flags, tail_flags, input, flag_op, storage);
543  }
544 
599  template<unsigned int ItemsPerThread, class Flag, class FlagOp>
600  [[deprecated("The flags API of block_adjacent_difference is deprecated."
601  "Use block_discontinuity.flag_heads_and_tails instead.")]]
602  ROCPRIM_DEVICE ROCPRIM_INLINE
603  void flag_heads_and_tails(Flag (&head_flags)[ItemsPerThread],
604  Flag (&tail_flags)[ItemsPerThread],
605  T tile_successor_item,
606  const T (&input)[ItemsPerThread],
607  FlagOp flag_op,
608  storage_type& storage)
609  {
610  static constexpr auto as_flags = true;
611  static constexpr auto reversed = true;
612  static constexpr auto with_predecessor = false;
613  static constexpr auto with_successor = true;
614 
615  // Copy items in case head_flags is aliased with input
616  T items[ItemsPerThread];
617 
618  ROCPRIM_UNROLL
619  for(unsigned int i = 0; i < ItemsPerThread; ++i) {
620  items[i] = input[i];
621  }
622 
623  base_type::template apply_left<as_flags, reversed, with_predecessor>(
624  items, head_flags, flag_op, items[0] /*predecessor*/, storage.get().left);
625 
626  base_type::template apply_right<as_flags, reversed, with_successor>(
627  items, tail_flags, flag_op, tile_successor_item, storage.get().right);
628  }
629 
637  template<unsigned int ItemsPerThread, class Flag, class FlagOp>
638  [[deprecated("The flags API of block_adjacent_difference is deprecated."
639  "Use block_discontinuity.flag_heads_and_tails instead.")]]
640  ROCPRIM_DEVICE ROCPRIM_FORCE_INLINE
641  void flag_heads_and_tails(Flag (&head_flags)[ItemsPerThread],
642  Flag (&tail_flags)[ItemsPerThread],
643  T tile_successor_item,
644  const T (&input)[ItemsPerThread],
645  FlagOp flag_op)
646  {
647  ROCPRIM_SHARED_MEMORY storage_type storage;
648  flag_heads_and_tails(head_flags, tail_flags, tile_successor_item, input, flag_op, storage);
649  }
650 
705  template<unsigned int ItemsPerThread, class Flag, class FlagOp>
706  [[deprecated("The flags API of block_adjacent_difference is deprecated."
707  "Use block_discontinuity.flag_heads_and_tails instead.")]]
708  ROCPRIM_DEVICE ROCPRIM_INLINE
709  void flag_heads_and_tails(Flag (&head_flags)[ItemsPerThread],
710  T tile_predecessor_item,
711  Flag (&tail_flags)[ItemsPerThread],
712  const T (&input)[ItemsPerThread],
713  FlagOp flag_op,
714  storage_type& storage)
715  {
716  static constexpr auto as_flags = true;
717  static constexpr auto reversed = true;
718  static constexpr auto with_predecessor = true;
719  static constexpr auto with_successor = false;
720 
721  // Copy items in case head_flags is aliased with input
722  T items[ItemsPerThread];
723 
724  ROCPRIM_UNROLL
725  for(unsigned int i = 0; i < ItemsPerThread; ++i) {
726  items[i] = input[i];
727  }
728 
729  base_type::template apply_left<as_flags, reversed, with_predecessor>(
730  items, head_flags, flag_op, tile_predecessor_item, storage.get().left);
731 
732  base_type::template apply_right<as_flags, reversed, with_successor>(
733  items, tail_flags, flag_op, items[0] /*successor*/, storage.get().right);
734  }
735 
743  template<unsigned int ItemsPerThread, class Flag, class FlagOp>
744  [[deprecated("The flags API of block_adjacent_difference is deprecated."
745  "Use block_discontinuity.flag_heads_and_tails instead.")]]
746  ROCPRIM_DEVICE ROCPRIM_FORCE_INLINE
747  void flag_heads_and_tails(Flag (&head_flags)[ItemsPerThread],
748  T tile_predecessor_item,
749  Flag (&tail_flags)[ItemsPerThread],
750  const T (&input)[ItemsPerThread],
751  FlagOp flag_op)
752  {
753  ROCPRIM_SHARED_MEMORY storage_type storage;
754  flag_heads_and_tails(head_flags, tile_predecessor_item, tail_flags, input, flag_op, storage);
755  }
756 
817  template<unsigned int ItemsPerThread, class Flag, class FlagOp>
818  [[deprecated("The flags API of block_adjacent_difference is deprecated."
819  "Use block_discontinuity.flag_heads_and_tails instead.")]]
820  ROCPRIM_DEVICE ROCPRIM_INLINE
821  void flag_heads_and_tails(Flag (&head_flags)[ItemsPerThread],
822  T tile_predecessor_item,
823  Flag (&tail_flags)[ItemsPerThread],
824  T tile_successor_item,
825  const T (&input)[ItemsPerThread],
826  FlagOp flag_op,
827  storage_type& storage)
828  {
829  static constexpr auto as_flags = true;
830  static constexpr auto reversed = true;
831  static constexpr auto with_predecessor = true;
832  static constexpr auto with_successor = true;
833 
834  // Copy items in case head_flags is aliased with input
835  T items[ItemsPerThread];
836 
837  ROCPRIM_UNROLL
838  for(unsigned int i = 0; i < ItemsPerThread; ++i) {
839  items[i] = input[i];
840  }
841 
842  base_type::template apply_left<as_flags, reversed, with_predecessor>(
843  items, head_flags, flag_op, tile_predecessor_item, storage.get().left);
844 
845  base_type::template apply_right<as_flags, reversed, with_successor>(
846  items, tail_flags, flag_op, tile_successor_item, storage.get().right);
847  }
848 
856  template<unsigned int ItemsPerThread, class Flag, class FlagOp>
857  [[deprecated("The flags API of block_adjacent_difference is deprecated."
858  "Use block_discontinuity.flag_heads_and_tails instead.")]]
859  ROCPRIM_DEVICE ROCPRIM_FORCE_INLINE
860  void flag_heads_and_tails(Flag (&head_flags)[ItemsPerThread],
861  T tile_predecessor_item,
862  Flag (&tail_flags)[ItemsPerThread],
863  T tile_successor_item,
864  const T (&input)[ItemsPerThread],
865  FlagOp flag_op)
866  {
867  ROCPRIM_SHARED_MEMORY storage_type storage;
869  head_flags, tile_predecessor_item, tail_flags, tile_successor_item,
870  input, flag_op, storage
871  );
872  }
873 
897  template <typename Output, unsigned int ItemsPerThread, typename BinaryFunction>
898  ROCPRIM_DEVICE ROCPRIM_INLINE void subtract_left(const T (&input)[ItemsPerThread],
899  Output (&output)[ItemsPerThread],
900  const BinaryFunction op,
901  storage_type& storage)
902  {
903  static constexpr auto as_flags = false;
904  static constexpr auto reversed = true;
905  static constexpr auto with_predecessor = false;
906 
907  base_type::template apply_left<as_flags, reversed, with_predecessor>(
908  input, output, op, input[0] /* predecessor */, storage.get().left);
909  }
910 
937  template <typename Output, unsigned int ItemsPerThread, typename BinaryFunction>
938  ROCPRIM_DEVICE ROCPRIM_INLINE void subtract_left(const T (&input)[ItemsPerThread],
939  Output (&output)[ItemsPerThread],
940  const BinaryFunction op,
941  const T tile_predecessor,
942  storage_type& storage)
943  {
944  static constexpr auto as_flags = false;
945  static constexpr auto reversed = true;
946  static constexpr auto with_predecessor = true;
947 
948  base_type::template apply_left<as_flags, reversed, with_predecessor>(
949  input, output, op, tile_predecessor, storage.get().left);
950  }
951 
978  template <typename Output, unsigned int ItemsPerThread, typename BinaryFunction>
979  ROCPRIM_DEVICE ROCPRIM_INLINE void subtract_left_partial(const T (&input)[ItemsPerThread],
980  Output (&output)[ItemsPerThread],
981  const BinaryFunction op,
982  const unsigned int valid_items,
983  storage_type& storage)
984  {
985  static constexpr auto as_flags = false;
986  static constexpr auto reversed = true;
987  static constexpr auto with_predecessor = false;
988 
989  base_type::template apply_left_partial<as_flags, reversed, with_predecessor>(
990  input, output, op, input[0] /* predecessor */, valid_items, storage.get().left);
991  }
992 
1015  template <typename Output, unsigned int ItemsPerThread, typename BinaryFunction>
1016  ROCPRIM_DEVICE ROCPRIM_INLINE void subtract_left_partial(const T (&input)[ItemsPerThread],
1017  Output (&output)[ItemsPerThread],
1018  const BinaryFunction op,
1019  const T tile_predecessor,
1020  const unsigned int valid_items,
1021  storage_type& storage)
1022  {
1023  static constexpr auto as_flags = false;
1024  static constexpr auto reversed = true;
1025  static constexpr auto with_predecessor = true;
1026 
1027  base_type::template apply_left_partial<as_flags, reversed, with_predecessor>(
1028  input, output, op, tile_predecessor, valid_items, storage.get().left);
1029  }
1030 
1054  template <typename Output, unsigned int ItemsPerThread, typename BinaryFunction>
1055  ROCPRIM_DEVICE ROCPRIM_INLINE void subtract_right(const T (&input)[ItemsPerThread],
1056  Output (&output)[ItemsPerThread],
1057  const BinaryFunction op,
1058  storage_type& storage)
1059  {
1060  static constexpr auto as_flags = false;
1061  static constexpr auto reversed = false;
1062  static constexpr auto with_successor = false;
1063 
1064  base_type::template apply_right<as_flags, reversed, with_successor>(
1065  input, output, op, input[0] /* successor */, storage.get().right);
1066  }
1067 
1095  template <typename Output, unsigned int ItemsPerThread, typename BinaryFunction>
1096  ROCPRIM_DEVICE ROCPRIM_INLINE void subtract_right(const T (&input)[ItemsPerThread],
1097  Output (&output)[ItemsPerThread],
1098  const BinaryFunction op,
1099  const T tile_successor,
1100  storage_type& storage)
1101  {
1102  static constexpr auto as_flags = false;
1103  static constexpr auto reversed = false;
1104  static constexpr auto with_successor = true;
1105 
1106  base_type::template apply_right<as_flags, reversed, with_successor>(
1107  input, output, op, tile_successor, storage.get().right);
1108  }
1109 
1135  template <typename Output, unsigned int ItemsPerThread, typename BinaryFunction>
1136  ROCPRIM_DEVICE ROCPRIM_INLINE void subtract_right_partial(const T (&input)[ItemsPerThread],
1137  Output (&output)[ItemsPerThread],
1138  const BinaryFunction op,
1139  const unsigned int valid_items,
1140  storage_type& storage)
1141  {
1142  static constexpr auto as_flags = false;
1143  static constexpr auto reversed = false;
1144 
1145  base_type::template apply_right_partial<as_flags, reversed>(
1146  input, output, op, valid_items, storage.get().right);
1147  }
1148 };
1149 
1150 END_ROCPRIM_NAMESPACE
1151 
1153 // end of group blockmodule
1154 
1155 #endif // ROCPRIM_BLOCK_BLOCK_ADJACENT_DIFFERENCE_HPP_
ROCPRIM_DEVICE ROCPRIM_INLINE void flag_heads(Flag(&head_flags)[ItemsPerThread], const T(&input)[ItemsPerThread], FlagOp flag_op, storage_type &storage)
Tags head_flags that indicate discontinuities between items partitioned across the thread block...
Definition: block_adjacent_difference.hpp:168
ROCPRIM_DEVICE ROCPRIM_INLINE void flag_heads_and_tails(Flag(&head_flags)[ItemsPerThread], T tile_predecessor_item, Flag(&tail_flags)[ItemsPerThread], const T(&input)[ItemsPerThread], FlagOp flag_op, storage_type &storage)
Tags both head_flags andtail_flags that indicate discontinuities between items partitioned across the...
Definition: block_adjacent_difference.hpp:709
ROCPRIM_DEVICE ROCPRIM_FORCE_INLINE void flag_tails(Flag(&tail_flags)[ItemsPerThread], const T(&input)[ItemsPerThread], FlagOp flag_op)
This is an overloaded member function, provided for convenience. It differs from the above function o...
Definition: block_adjacent_difference.hpp:356
The block_adjacent_difference class is a block level parallel primitive which provides methods for ap...
Definition: block_adjacent_difference.hpp:91
ROCPRIM_DEVICE ROCPRIM_INLINE void subtract_right_partial(const T(&input)[ItemsPerThread], Output(&output)[ItemsPerThread], const BinaryFunction op, const unsigned int valid_items, storage_type &storage)
Apply a function to each consecutive pair of elements partitioned across threads in the block and wri...
Definition: block_adjacent_difference.hpp:1136
ROCPRIM_DEVICE ROCPRIM_INLINE void subtract_left(const T(&input)[ItemsPerThread], Output(&output)[ItemsPerThread], const BinaryFunction op, const T tile_predecessor, storage_type &storage)
Apply a function to each consecutive pair of elements partitioned across threads in the block and wri...
Definition: block_adjacent_difference.hpp:938
ROCPRIM_DEVICE ROCPRIM_INLINE void subtract_right(const T(&input)[ItemsPerThread], Output(&output)[ItemsPerThread], const BinaryFunction op, storage_type &storage)
Apply a function to each consecutive pair of elements partitioned across threads in the block and wri...
Definition: block_adjacent_difference.hpp:1055
ROCPRIM_DEVICE ROCPRIM_FORCE_INLINE void flag_heads(Flag(&head_flags)[ItemsPerThread], const T(&input)[ItemsPerThread], FlagOp flag_op)
This is an overloaded member function, provided for convenience. It differs from the above function o...
Definition: block_adjacent_difference.hpp:190
ROCPRIM_DEVICE ROCPRIM_INLINE void flag_heads_and_tails(Flag(&head_flags)[ItemsPerThread], Flag(&tail_flags)[ItemsPerThread], const T(&input)[ItemsPerThread], FlagOp flag_op, storage_type &storage)
Tags both head_flags andtail_flags that indicate discontinuities between items partitioned across the...
Definition: block_adjacent_difference.hpp:499
ROCPRIM_DEVICE ROCPRIM_FORCE_INLINE void flag_heads_and_tails(Flag(&head_flags)[ItemsPerThread], T tile_predecessor_item, Flag(&tail_flags)[ItemsPerThread], T tile_successor_item, const T(&input)[ItemsPerThread], FlagOp flag_op)
This is an overloaded member function, provided for convenience. It differs from the above function o...
Definition: block_adjacent_difference.hpp:860
ROCPRIM_DEVICE ROCPRIM_INLINE void flag_heads_and_tails(Flag(&head_flags)[ItemsPerThread], Flag(&tail_flags)[ItemsPerThread], T tile_successor_item, const T(&input)[ItemsPerThread], FlagOp flag_op, storage_type &storage)
Tags both head_flags andtail_flags that indicate discontinuities between items partitioned across the...
Definition: block_adjacent_difference.hpp:603
ROCPRIM_DEVICE ROCPRIM_INLINE void flag_heads(Flag(&head_flags)[ItemsPerThread], T tile_predecessor_item, const T(&input)[ItemsPerThread], FlagOp flag_op, storage_type &storage)
Tags head_flags that indicate discontinuities between items partitioned across the thread block...
Definition: block_adjacent_difference.hpp:253
ROCPRIM_DEVICE ROCPRIM_INLINE void flag_tails(Flag(&tail_flags)[ItemsPerThread], T tile_successor_item, const T(&input)[ItemsPerThread], FlagOp flag_op, storage_type &storage)
Tags tail_flags that indicate discontinuities between items partitioned across the thread block...
Definition: block_adjacent_difference.hpp:419
ROCPRIM_DEVICE ROCPRIM_INLINE void flag_tails(Flag(&tail_flags)[ItemsPerThread], const T(&input)[ItemsPerThread], FlagOp flag_op, storage_type &storage)
Tags tail_flags that indicate discontinuities between items partitioned across the thread block...
Definition: block_adjacent_difference.hpp:333
ROCPRIM_DEVICE ROCPRIM_FORCE_INLINE void flag_heads(Flag(&head_flags)[ItemsPerThread], T tile_predecessor_item, const T(&input)[ItemsPerThread], FlagOp flag_op)
This is an overloaded member function, provided for convenience. It differs from the above function o...
Definition: block_adjacent_difference.hpp:277
ROCPRIM_DEVICE ROCPRIM_FORCE_INLINE void flag_heads_and_tails(Flag(&head_flags)[ItemsPerThread], Flag(&tail_flags)[ItemsPerThread], T tile_successor_item, const T(&input)[ItemsPerThread], FlagOp flag_op)
This is an overloaded member function, provided for convenience. It differs from the above function o...
Definition: block_adjacent_difference.hpp:641
Definition: block_adjacent_difference_impl.hpp:96
ROCPRIM_DEVICE ROCPRIM_INLINE void subtract_right(const T(&input)[ItemsPerThread], Output(&output)[ItemsPerThread], const BinaryFunction op, const T tile_successor, storage_type &storage)
Apply a function to each consecutive pair of elements partitioned across threads in the block and wri...
Definition: block_adjacent_difference.hpp:1096
ROCPRIM_DEVICE ROCPRIM_FORCE_INLINE void flag_heads_and_tails(Flag(&head_flags)[ItemsPerThread], Flag(&tail_flags)[ItemsPerThread], const T(&input)[ItemsPerThread], FlagOp flag_op)
This is an overloaded member function, provided for convenience. It differs from the above function o...
Definition: block_adjacent_difference.hpp:536
ROCPRIM_DEVICE ROCPRIM_INLINE void subtract_left_partial(const T(&input)[ItemsPerThread], Output(&output)[ItemsPerThread], const BinaryFunction op, const unsigned int valid_items, storage_type &storage)
Apply a function to each consecutive pair of elements partitioned across threads in the block and wri...
Definition: block_adjacent_difference.hpp:979
Definition: block_adjacent_difference_impl.hpp:92
ROCPRIM_DEVICE ROCPRIM_INLINE void flag_heads_and_tails(Flag(&head_flags)[ItemsPerThread], T tile_predecessor_item, Flag(&tail_flags)[ItemsPerThread], T tile_successor_item, const T(&input)[ItemsPerThread], FlagOp flag_op, storage_type &storage)
Tags both head_flags andtail_flags that indicate discontinuities between items partitioned across the...
Definition: block_adjacent_difference.hpp:821
ROCPRIM_DEVICE ROCPRIM_INLINE void subtract_left(const T(&input)[ItemsPerThread], Output(&output)[ItemsPerThread], const BinaryFunction op, storage_type &storage)
Apply a function to each consecutive pair of elements partitioned across threads in the block and wri...
Definition: block_adjacent_difference.hpp:898
ROCPRIM_DEVICE ROCPRIM_FORCE_INLINE void flag_tails(Flag(&tail_flags)[ItemsPerThread], T tile_successor_item, const T(&input)[ItemsPerThread], FlagOp flag_op)
This is an overloaded member function, provided for convenience. It differs from the above function o...
Definition: block_adjacent_difference.hpp:443
struct deprecated("use radix_sort_config_v2")]] radix_sort_config
Legacy configuration of device-level radix sort operation.
Definition: device_radix_sort_config.hpp:95
ROCPRIM_DEVICE ROCPRIM_INLINE void subtract_left_partial(const T(&input)[ItemsPerThread], Output(&output)[ItemsPerThread], const BinaryFunction op, const T tile_predecessor, const unsigned int valid_items, storage_type &storage)
Apply a function to each consecutive pair of elements partitioned across threads in the block and wri...
Definition: block_adjacent_difference.hpp:1016
ROCPRIM_DEVICE ROCPRIM_FORCE_INLINE void flag_heads_and_tails(Flag(&head_flags)[ItemsPerThread], T tile_predecessor_item, Flag(&tail_flags)[ItemsPerThread], const T(&input)[ItemsPerThread], FlagOp flag_op)
This is an overloaded member function, provided for convenience. It differs from the above function o...
Definition: block_adjacent_difference.hpp:747