rocPRIM
block_discontinuity.hpp
1 // Copyright (c) 2017-2022 Advanced Micro Devices, Inc. All rights reserved.
2 //
3 // Permission is hereby granted, free of charge, to any person obtaining a copy
4 // of this software and associated documentation files (the "Software"), to deal
5 // in the Software without restriction, including without limitation the rights
6 // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7 // copies of the Software, and to permit persons to whom the Software is
8 // furnished to do so, subject to the following conditions:
9 //
10 // The above copyright notice and this permission notice shall be included in
11 // all copies or substantial portions of the Software.
12 //
13 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18 // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
19 // THE SOFTWARE.
20 
21 #ifndef ROCPRIM_BLOCK_BLOCK_DISCONTINUITY_HPP_
22 #define ROCPRIM_BLOCK_BLOCK_DISCONTINUITY_HPP_
23 
24 
25 #include "detail/block_adjacent_difference_impl.hpp"
26 
27 #include "../config.hpp"
28 #include "../detail/various.hpp"
29 
30 
31 
34 
35 BEGIN_ROCPRIM_NAMESPACE
36 
76 template<
77  class T,
78  unsigned int BlockSizeX,
79  unsigned int BlockSizeY = 1,
80  unsigned int BlockSizeZ = 1
81 >
83 #ifndef DOXYGEN_SHOULD_SKIP_THIS // hide implementation detail from documentation
84  : private detail::block_adjacent_difference_impl<T, BlockSizeX, BlockSizeY, BlockSizeZ>
85 #endif // DOXYGEN_SHOULD_SKIP_THIS
86 {
88 
89  static constexpr unsigned BlockSize = base_type::BlockSize;
90  // Struct used for creating a raw_storage object for this primitive's temporary storage.
91  struct storage_type_
92  {
93  typename base_type::storage_type left;
94  typename base_type::storage_type right;
95  };
96 
97 public:
98 
107  #ifndef DOXYGEN_SHOULD_SKIP_THIS // hides storage_type implementation for Doxygen
109  #else
110  using storage_type = storage_type_;
111  #endif
112 
154  template<unsigned int ItemsPerThread, class Flag, class FlagOp>
155  ROCPRIM_DEVICE ROCPRIM_INLINE
156  void flag_heads(Flag (&head_flags)[ItemsPerThread],
157  const T (&input)[ItemsPerThread],
158  FlagOp flag_op,
159  storage_type& storage)
160  {
161  static constexpr auto as_flags = true;
162  static constexpr auto reversed = false;
163  static constexpr auto with_predecessor = false;
164  base_type::template apply_left<as_flags, reversed, with_predecessor>(
165  input, head_flags, flag_op, input[0] /* predecessor */, storage.get().left);
166  }
167 
172  template<unsigned int ItemsPerThread, class Flag, class FlagOp>
173  ROCPRIM_DEVICE ROCPRIM_FORCE_INLINE
174  void flag_heads(Flag (&head_flags)[ItemsPerThread],
175  const T (&input)[ItemsPerThread],
176  FlagOp flag_op)
177  {
178  ROCPRIM_SHARED_MEMORY storage_type storage;
179  flag_heads(head_flags, input, flag_op, storage);
180  }
181 
231  template<unsigned int ItemsPerThread, class Flag, class FlagOp>
232  ROCPRIM_DEVICE ROCPRIM_INLINE
233  void flag_heads(Flag (&head_flags)[ItemsPerThread],
234  T tile_predecessor_item,
235  const T (&input)[ItemsPerThread],
236  FlagOp flag_op,
237  storage_type& storage)
238  {
239  static constexpr auto as_flags = true;
240  static constexpr auto reversed = false;
241  static constexpr auto with_predecessor = true;
242  base_type::template apply_left<as_flags, reversed, with_predecessor>(
243  input, head_flags, flag_op, tile_predecessor_item, storage.get().left);
244  }
245 
250  template<unsigned int ItemsPerThread, class Flag, class FlagOp>
251  ROCPRIM_DEVICE ROCPRIM_FORCE_INLINE
252  void flag_heads(Flag (&head_flags)[ItemsPerThread],
253  T tile_predecessor_item,
254  const T (&input)[ItemsPerThread],
255  FlagOp flag_op)
256  {
257  ROCPRIM_SHARED_MEMORY storage_type storage;
258  flag_heads(head_flags, tile_predecessor_item, input, flag_op, storage);
259  }
260 
302  template<unsigned int ItemsPerThread, class Flag, class FlagOp>
303  ROCPRIM_DEVICE ROCPRIM_INLINE
304  void flag_tails(Flag (&tail_flags)[ItemsPerThread],
305  const T (&input)[ItemsPerThread],
306  FlagOp flag_op,
307  storage_type& storage)
308  {
309  static constexpr auto as_flags = true;
310  static constexpr auto reversed = false;
311  static constexpr auto with_successor = false;
312  base_type::template apply_right<as_flags, reversed, with_successor>(
313  input, tail_flags, flag_op, input[0] /* successor */, storage.get().right);
314  }
315 
320  template<unsigned int ItemsPerThread, class Flag, class FlagOp>
321  ROCPRIM_DEVICE ROCPRIM_FORCE_INLINE
322  void flag_tails(Flag (&tail_flags)[ItemsPerThread],
323  const T (&input)[ItemsPerThread],
324  FlagOp flag_op)
325  {
326  ROCPRIM_SHARED_MEMORY storage_type storage;
327  flag_tails(tail_flags, input, flag_op, storage);
328  }
329 
379  template<unsigned int ItemsPerThread, class Flag, class FlagOp>
380  ROCPRIM_DEVICE ROCPRIM_INLINE
381  void flag_tails(Flag (&tail_flags)[ItemsPerThread],
382  T tile_successor_item,
383  const T (&input)[ItemsPerThread],
384  FlagOp flag_op,
385  storage_type& storage)
386  {
387  static constexpr auto as_flags = true;
388  static constexpr auto reversed = false;
389  static constexpr auto with_successor = true;
390  base_type::template apply_right<as_flags, reversed, with_successor>(
391  input, tail_flags, flag_op, tile_successor_item, storage.get().right);
392  }
393 
398  template<unsigned int ItemsPerThread, class Flag, class FlagOp>
399  ROCPRIM_DEVICE ROCPRIM_FORCE_INLINE
400  void flag_tails(Flag (&tail_flags)[ItemsPerThread],
401  T tile_successor_item,
402  const T (&input)[ItemsPerThread],
403  FlagOp flag_op)
404  {
405  ROCPRIM_SHARED_MEMORY storage_type storage;
406  flag_tails(tail_flags, tile_successor_item, input, flag_op, storage);
407  }
408 
452  template<unsigned int ItemsPerThread, class Flag, class FlagOp>
453  ROCPRIM_DEVICE ROCPRIM_INLINE
454  void flag_heads_and_tails(Flag (&head_flags)[ItemsPerThread],
455  Flag (&tail_flags)[ItemsPerThread],
456  const T (&input)[ItemsPerThread],
457  FlagOp flag_op,
458  storage_type& storage)
459  {
460  static constexpr auto as_flags = true;
461  static constexpr auto reversed = false;
462  static constexpr auto with_predecessor = false;
463  static constexpr auto with_successor = false;
464 
465  // Copy items in case head_flags is aliased with input
466  T items[ItemsPerThread];
467 
468  ROCPRIM_UNROLL
469  for(unsigned int i = 0; i < ItemsPerThread; ++i) {
470  items[i] = input[i];
471  }
472 
473  base_type::template apply_left<as_flags, reversed, with_predecessor>(
474  items, head_flags, flag_op, items[0] /*predecessor*/, storage.get().left);
475 
476  base_type::template apply_right<as_flags, reversed, with_successor>(
477  items, tail_flags, flag_op, items[0] /*successor*/, storage.get().right);
478  }
479 
484  template<unsigned int ItemsPerThread, class Flag, class FlagOp>
485  ROCPRIM_DEVICE ROCPRIM_FORCE_INLINE
486  void flag_heads_and_tails(Flag (&head_flags)[ItemsPerThread],
487  Flag (&tail_flags)[ItemsPerThread],
488  const T (&input)[ItemsPerThread],
489  FlagOp flag_op)
490  {
491  ROCPRIM_SHARED_MEMORY storage_type storage;
492  flag_heads_and_tails(head_flags, tail_flags, input, flag_op, storage);
493  }
494 
547  template<unsigned int ItemsPerThread, class Flag, class FlagOp>
548  ROCPRIM_DEVICE ROCPRIM_INLINE
549  void flag_heads_and_tails(Flag (&head_flags)[ItemsPerThread],
550  Flag (&tail_flags)[ItemsPerThread],
551  T tile_successor_item,
552  const T (&input)[ItemsPerThread],
553  FlagOp flag_op,
554  storage_type& storage)
555  {
556  static constexpr auto as_flags = true;
557  static constexpr auto reversed = false;
558  static constexpr auto with_predecessor = false;
559  static constexpr auto with_successor = true;
560 
561  // Copy items in case head_flags is aliased with input
562  T items[ItemsPerThread];
563 
564  ROCPRIM_UNROLL
565  for(unsigned int i = 0; i < ItemsPerThread; ++i) {
566  items[i] = input[i];
567  }
568 
569  base_type::template apply_left<as_flags, reversed, with_predecessor>(
570  items, head_flags, flag_op, items[0] /*predecessor*/, storage.get().left);
571 
572  base_type::template apply_right<as_flags, reversed, with_successor>(
573  items, tail_flags, flag_op, tile_successor_item, storage.get().right);
574  }
575 
580  template<unsigned int ItemsPerThread, class Flag, class FlagOp>
581  ROCPRIM_DEVICE ROCPRIM_FORCE_INLINE
582  void flag_heads_and_tails(Flag (&head_flags)[ItemsPerThread],
583  Flag (&tail_flags)[ItemsPerThread],
584  T tile_successor_item,
585  const T (&input)[ItemsPerThread],
586  FlagOp flag_op)
587  {
588  ROCPRIM_SHARED_MEMORY storage_type storage;
589  flag_heads_and_tails(head_flags, tail_flags, tile_successor_item, input, flag_op, storage);
590  }
591 
644  template<unsigned int ItemsPerThread, class Flag, class FlagOp>
645  ROCPRIM_DEVICE ROCPRIM_INLINE
646  void flag_heads_and_tails(Flag (&head_flags)[ItemsPerThread],
647  T tile_predecessor_item,
648  Flag (&tail_flags)[ItemsPerThread],
649  const T (&input)[ItemsPerThread],
650  FlagOp flag_op,
651  storage_type& storage)
652  {
653  static constexpr auto as_flags = true;
654  static constexpr auto reversed = false;
655  static constexpr auto with_predecessor = true;
656  static constexpr auto with_successor = false;
657 
658  // Copy items in case head_flags is aliased with input
659  T items[ItemsPerThread];
660 
661  ROCPRIM_UNROLL
662  for(unsigned int i = 0; i < ItemsPerThread; ++i) {
663  items[i] = input[i];
664  }
665 
666  base_type::template apply_left<as_flags, reversed, with_predecessor>(
667  items, head_flags, flag_op, tile_predecessor_item, storage.get().left);
668 
669  base_type::template apply_right<as_flags, reversed, with_successor>(
670  items, tail_flags, flag_op, items[0] /*successor*/, storage.get().right);
671  }
672 
677  template<unsigned int ItemsPerThread, class Flag, class FlagOp>
678  ROCPRIM_DEVICE ROCPRIM_FORCE_INLINE
679  void flag_heads_and_tails(Flag (&head_flags)[ItemsPerThread],
680  T tile_predecessor_item,
681  Flag (&tail_flags)[ItemsPerThread],
682  const T (&input)[ItemsPerThread],
683  FlagOp flag_op)
684  {
685  ROCPRIM_SHARED_MEMORY storage_type storage;
686  flag_heads_and_tails(head_flags, tile_predecessor_item, tail_flags, input, flag_op, storage);
687  }
688 
747  template<unsigned int ItemsPerThread, class Flag, class FlagOp>
748  ROCPRIM_DEVICE ROCPRIM_INLINE
749  void flag_heads_and_tails(Flag (&head_flags)[ItemsPerThread],
750  T tile_predecessor_item,
751  Flag (&tail_flags)[ItemsPerThread],
752  T tile_successor_item,
753  const T (&input)[ItemsPerThread],
754  FlagOp flag_op,
755  storage_type& storage)
756  {
757  static constexpr auto as_flags = true;
758  static constexpr auto reversed = false;
759  static constexpr auto with_predecessor = true;
760  static constexpr auto with_successor = true;
761 
762  // Copy items in case head_flags is aliased with input
763  T items[ItemsPerThread];
764 
765  ROCPRIM_UNROLL
766  for(unsigned int i = 0; i < ItemsPerThread; ++i) {
767  items[i] = input[i];
768  }
769 
770  base_type::template apply_left<as_flags, reversed, with_predecessor>(
771  items, head_flags, flag_op, tile_predecessor_item, storage.get().left);
772 
773  base_type::template apply_right<as_flags, reversed, with_successor>(
774  items, tail_flags, flag_op, tile_successor_item, storage.get().right);
775  }
776 
781  template<unsigned int ItemsPerThread, class Flag, class FlagOp>
782  ROCPRIM_DEVICE ROCPRIM_FORCE_INLINE
783  void flag_heads_and_tails(Flag (&head_flags)[ItemsPerThread],
784  T tile_predecessor_item,
785  Flag (&tail_flags)[ItemsPerThread],
786  T tile_successor_item,
787  const T (&input)[ItemsPerThread],
788  FlagOp flag_op)
789  {
790  ROCPRIM_SHARED_MEMORY storage_type storage;
792  head_flags, tile_predecessor_item, tail_flags, tile_successor_item,
793  input, flag_op, storage
794  );
795  }
796 };
797 
798 END_ROCPRIM_NAMESPACE
799 
801 // end of group blockmodule
802 
803 #endif // ROCPRIM_BLOCK_BLOCK_DISCONTINUITY_HPP_
ROCPRIM_DEVICE ROCPRIM_FORCE_INLINE void flag_heads(Flag(&head_flags)[ItemsPerThread], T tile_predecessor_item, const T(&input)[ItemsPerThread], FlagOp flag_op)
This is an overloaded member function, provided for convenience. It differs from the above function o...
Definition: block_discontinuity.hpp:252
The block_discontinuity class is a block level parallel primitive which provides methods for flagging...
Definition: block_discontinuity.hpp:82
ROCPRIM_DEVICE ROCPRIM_FORCE_INLINE void flag_heads_and_tails(Flag(&head_flags)[ItemsPerThread], T tile_predecessor_item, Flag(&tail_flags)[ItemsPerThread], T tile_successor_item, const T(&input)[ItemsPerThread], FlagOp flag_op)
This is an overloaded member function, provided for convenience. It differs from the above function o...
Definition: block_discontinuity.hpp:783
ROCPRIM_DEVICE ROCPRIM_FORCE_INLINE void flag_heads_and_tails(Flag(&head_flags)[ItemsPerThread], T tile_predecessor_item, Flag(&tail_flags)[ItemsPerThread], const T(&input)[ItemsPerThread], FlagOp flag_op)
This is an overloaded member function, provided for convenience. It differs from the above function o...
Definition: block_discontinuity.hpp:679
ROCPRIM_DEVICE ROCPRIM_INLINE void flag_heads(Flag(&head_flags)[ItemsPerThread], T tile_predecessor_item, const T(&input)[ItemsPerThread], FlagOp flag_op, storage_type &storage)
Tags head_flags that indicate discontinuities between items partitioned across the thread block...
Definition: block_discontinuity.hpp:233
ROCPRIM_DEVICE ROCPRIM_INLINE void flag_heads_and_tails(Flag(&head_flags)[ItemsPerThread], T tile_predecessor_item, Flag(&tail_flags)[ItemsPerThread], const T(&input)[ItemsPerThread], FlagOp flag_op, storage_type &storage)
Tags both head_flags andtail_flags that indicate discontinuities between items partitioned across the...
Definition: block_discontinuity.hpp:646
ROCPRIM_DEVICE ROCPRIM_INLINE void flag_heads(Flag(&head_flags)[ItemsPerThread], const T(&input)[ItemsPerThread], FlagOp flag_op, storage_type &storage)
Tags head_flags that indicate discontinuities between items partitioned across the thread block...
Definition: block_discontinuity.hpp:156
ROCPRIM_DEVICE ROCPRIM_INLINE void flag_tails(Flag(&tail_flags)[ItemsPerThread], T tile_successor_item, const T(&input)[ItemsPerThread], FlagOp flag_op, storage_type &storage)
Tags tail_flags that indicate discontinuities between items partitioned across the thread block...
Definition: block_discontinuity.hpp:381
ROCPRIM_DEVICE ROCPRIM_INLINE void flag_heads_and_tails(Flag(&head_flags)[ItemsPerThread], T tile_predecessor_item, Flag(&tail_flags)[ItemsPerThread], T tile_successor_item, const T(&input)[ItemsPerThread], FlagOp flag_op, storage_type &storage)
Tags both head_flags andtail_flags that indicate discontinuities between items partitioned across the...
Definition: block_discontinuity.hpp:749
ROCPRIM_DEVICE ROCPRIM_FORCE_INLINE void flag_tails(Flag(&tail_flags)[ItemsPerThread], T tile_successor_item, const T(&input)[ItemsPerThread], FlagOp flag_op)
This is an overloaded member function, provided for convenience. It differs from the above function o...
Definition: block_discontinuity.hpp:400
ROCPRIM_DEVICE ROCPRIM_FORCE_INLINE void flag_heads_and_tails(Flag(&head_flags)[ItemsPerThread], Flag(&tail_flags)[ItemsPerThread], const T(&input)[ItemsPerThread], FlagOp flag_op)
This is an overloaded member function, provided for convenience. It differs from the above function o...
Definition: block_discontinuity.hpp:486
ROCPRIM_DEVICE ROCPRIM_FORCE_INLINE void flag_heads(Flag(&head_flags)[ItemsPerThread], const T(&input)[ItemsPerThread], FlagOp flag_op)
This is an overloaded member function, provided for convenience. It differs from the above function o...
Definition: block_discontinuity.hpp:174
ROCPRIM_DEVICE ROCPRIM_FORCE_INLINE void flag_tails(Flag(&tail_flags)[ItemsPerThread], const T(&input)[ItemsPerThread], FlagOp flag_op)
This is an overloaded member function, provided for convenience. It differs from the above function o...
Definition: block_discontinuity.hpp:322
Definition: block_adjacent_difference_impl.hpp:96
ROCPRIM_DEVICE ROCPRIM_INLINE void flag_heads_and_tails(Flag(&head_flags)[ItemsPerThread], Flag(&tail_flags)[ItemsPerThread], const T(&input)[ItemsPerThread], FlagOp flag_op, storage_type &storage)
Tags both head_flags andtail_flags that indicate discontinuities between items partitioned across the...
Definition: block_discontinuity.hpp:454
ROCPRIM_DEVICE ROCPRIM_INLINE void flag_heads_and_tails(Flag(&head_flags)[ItemsPerThread], Flag(&tail_flags)[ItemsPerThread], T tile_successor_item, const T(&input)[ItemsPerThread], FlagOp flag_op, storage_type &storage)
Tags both head_flags andtail_flags that indicate discontinuities between items partitioned across the...
Definition: block_discontinuity.hpp:549
Definition: block_adjacent_difference_impl.hpp:92
ROCPRIM_DEVICE ROCPRIM_FORCE_INLINE void flag_heads_and_tails(Flag(&head_flags)[ItemsPerThread], Flag(&tail_flags)[ItemsPerThread], T tile_successor_item, const T(&input)[ItemsPerThread], FlagOp flag_op)
This is an overloaded member function, provided for convenience. It differs from the above function o...
Definition: block_discontinuity.hpp:582
ROCPRIM_DEVICE ROCPRIM_INLINE void flag_tails(Flag(&tail_flags)[ItemsPerThread], const T(&input)[ItemsPerThread], FlagOp flag_op, storage_type &storage)
Tags tail_flags that indicate discontinuities between items partitioned across the thread block...
Definition: block_discontinuity.hpp:304