rocPRIM
uint_fast_div.hpp
1 // Copyright (c) 2017-2021 Advanced Micro Devices, Inc. All rights reserved.
2 //
3 // Permission is hereby granted, free of charge, to any person obtaining a copy
4 // of this software and associated documentation files (the "Software"), to deal
5 // in the Software without restriction, including without limitation the rights
6 // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7 // copies of the Software, and to permit persons to whom the Software is
8 // furnished to do so, subject to the following conditions:
9 //
10 // The above copyright notice and this permission notice shall be included in
11 // all copies or substantial portions of the Software.
12 //
13 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18 // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
19 // THE SOFTWARE.
20 
21 #ifndef ROCPRIM_DEVICE_DETAIL_UINT_FAST_DIV_HPP_
22 #define ROCPRIM_DEVICE_DETAIL_UINT_FAST_DIV_HPP_
23 
24 #include "../../config.hpp"
25 
26 BEGIN_ROCPRIM_NAMESPACE
27 
28 namespace detail
29 {
30 
32 {
33  unsigned int magic; // Magic number
34  unsigned int shift; // shift amount
35  unsigned int add; // "add" indicator
36 
37  ROCPRIM_HOST_DEVICE inline
38  uint_fast_div() = default;
39 
40  ROCPRIM_HOST_DEVICE inline
41  uint_fast_div(unsigned int d)
42  {
43  // Must have 1 <= d <= 2**32-1.
44 
45  if(d == 1)
46  {
47  magic = 0;
48  shift = 0;
49  add = 0;
50  return;
51  }
52 
53  int p;
54  unsigned int p32 = 1, q, r, delta;
55  add = 0; // Initialize "add" indicator.
56  p = 31; // Initialize p.
57  q = 0x7FFFFFFF/d; // Initialize q = (2**p - 1)/d.
58  r = 0x7FFFFFFF - q*d; // Init. r = rem(2**p - 1, d).
59  do {
60  p = p + 1;
61  if(p == 32) p32 = 1; // Set p32 = 2**(p-32).
62  else p32 = 2*p32;
63  if(r + 1 >= d - r)
64  {
65  if(q >= 0x7FFFFFFF) add = 1;
66  q = 2*q + 1;
67  r = 2*r + 1 - d;
68  }
69  else
70  {
71  if(q >= 0x80000000) add = 1;
72  q = 2*q;
73  r = 2*r + 1;
74  }
75  delta = d - 1 - r;
76  } while (p < 64 && p32 < delta);
77  magic = q + 1; // Magic number and
78  shift = p - 32; // shift amount
79 
80  if(add) shift--;
81  }
82 };
83 
84 ROCPRIM_HOST_DEVICE inline
85 unsigned int operator/(unsigned int n, const uint_fast_div& divisor)
86 {
87  if(divisor.magic == 0)
88  {
89  // Special case for 1
90  return n;
91  }
92 
93  // Higher 32-bit of 64-bit multiplication
94  unsigned int q = (static_cast<unsigned long long>(divisor.magic) * static_cast<unsigned long long>(n)) >> 32;
95  if(divisor.add)
96  {
97  q = ((n - q) >> 1) + q;
98  }
99  return q >> divisor.shift;
100 }
101 
102 } // end of detail namespace
103 
104 END_ROCPRIM_NAMESPACE
105 
106 #endif // ROCPRIM_DEVICE_DETAIL_UINT_FAST_DIV_HPP_
Deprecated: Configuration of device-level scan primitives.
Definition: block_histogram.hpp:62
Definition: uint_fast_div.hpp:31