26 #define MAX_KEY 524288 27 #define ARRAY_SIZE 8388608 34 #define group_oneword 8 36 #define NUM_PASSES (bits_init / group_oneword) 37 #define NUM_BUCKETS (1 << group_oneword) 39 typedef long long index_t;
42 dash::util::TimeMeasure::Clock
49 unsigned bits(
unsigned x,
int k,
int j)
51 return (x >> k) & ~(~0 << j);
61 std::map<
int, std::vector<key_type> > & buckets,
67 result.array =
nullptr;
74 auto rows = NUM_BUCKETS;
83 int l_B = NUM_BUCKETS / nunits;
85 std::vector< std::vector<key_type> > pre_sum;
87 for (
int lb = 0; lb < l_B; ++lb) {
88 std::vector<key_type> lb_pref_sum;
89 for (
int i = 0; i < nunits; ++i) {
90 lb_pref_sum.push_back(0);
92 pre_sum.push_back(lb_pref_sum);
100 for (
auto pass = 0; pass < NUM_PASSES; pass++) {
104 int * lit = count.
lbegin();
105 int * lend = count.
lend();
112 for (
size_t it = 0; lit != lend; ++it, ++lit) {
113 if (it < count.local_size()) {
120 for (
auto i = 0; i < arr_lsize; i++) {
121 unsigned int idx = bits(local_a[i],
122 pass * group_oneword,
124 count[idx][
myid] += 1;
126 cout <<
" Rank: " << std::setw(5) << myid
127 <<
" Pass: " << std::setw(5) << pass
128 <<
" idx: " << std::setw(5) << idx
129 <<
" Array Local[i]: " << std::setw(5) << local_a[i]
130 <<
" Count-idx-rank: " << std::setw(5) << (int)count[idx][myid]
136 buckets[idx].push_back(local_a[i]);
147 key_type new_size = 0;
148 for (
auto j = 0; j < l_B; j++) {
149 key_type idx = j + (myid * l_B);
150 for (
auto p = 0; p < nunits; p++) {
151 pre_sum[j][p] = new_size;
152 new_size += count[idx][p];
157 if (new_size > arr_lsize) {
158 int * temp =
new int[new_size];
159 for (
int la = 0; la < arr_lsize; ++la) {
160 temp[la] = local_a[la];
172 for (
auto j = 0; j < NUM_BUCKETS; j++) {
174 key_type p = j / l_B;
176 key_type p_j = j % l_B;
178 auto buckets_j = buckets[j];
180 if (p != myid && buckets_j.size() > 0) {
182 <<
" radix_sort--MPI_Send: send_count=" << buckets_j.size()
198 for (
auto j = 0; j < l_B; j++) {
199 key_type idx = j + myid * l_B;
201 for (
auto p = 0; p < nunits; p++) {
202 key_type b_count = count[idx][p];
205 key_type * dest = &local_a[pre_sum[j][p]];
209 <<
" radix_sort--MPI_Recv: recv_count=" << b_count
227 arr_lsize = new_size;
238 result.array = local_a;
239 result.size = arr_lsize;
242 catch (std::exception & excep)
244 cerr <<
"Exception: " << excep.what()
246 dash__print_stacktrace();
252 int main(
int argc,
char * argv[])
254 size_t array_size = ARRAY_SIZE;
255 size_t max_key = MAX_KEY;
256 size_t iteration = ITERATION;
257 size_t repeat = INIT_REPEAT;
260 double duration_avg_s;
261 double duration_min_s = std::numeric_limits<double>::max();
262 double duration_max_s = std::numeric_limits<double>::min();
273 array_size = atoi(argv[1]);
276 max_key = atoi(argv[2]);
279 repeat = atoi(argv[3]);
282 iteration = atoi(argv[4]);
286 cout <<
"min. array size: " << array_size << endl;
287 cout <<
"max. key value: " << max_key << endl;
288 cout <<
"num repeats: " << repeat << endl;
289 cout <<
"num iterations: " << iteration << endl;
292 if ((array_size % size) != 0) {
294 cout <<
"Please enter an array size which is divisible ny number of " 301 if ((size % 2) != 0) {
303 cout <<
"Please enter an even size of processes \n";
309 key_type rem_buckets = NUM_BUCKETS % size;
311 if (rem_buckets > 0) {
313 cout <<
"Number of buckets and array size must be divisible by number " 322 cout <<
"min. array size: " << array_size << endl;
323 cout <<
"max. key value: " << max_key << endl;
324 cout <<
"num repeats: " << repeat << endl;
325 cout <<
"num iterations: " << iteration << endl;
328 for (
size_t iter = 0; iter < iteration ; iter++) {
329 duration_min_s = std::numeric_limits<double>::max();
330 duration_max_s = std::numeric_limits<double>::min();
332 double duration_it_s = 0;
337 for (
size_t rep = 0; rep < repeat; rep++) {
340 srand(time(0) + myid);
343 for (
auto lit = arr.
lbegin(); lit != arr.
lend(); ++lit) {
345 *lit = (
dash::myid() * 100 + v + (v % 2) * 512) % max_key;
352 <<
"local array size: " << arr.
lend() - arr.
lbegin() <<
" " 356 #if __PRINT_ARRAY_VALUES__ 357 for (
auto unit_id = 0; unit_id <
dash::size(); ++unit_id) {
360 for (
auto lit = arr.
lbegin(); lit != arr.
lend(); ++lit) {
361 cout <<
"unit " << unit_id <<
": array[" << l++
362 <<
"] = " << *lit << endl;
370 key_type arr_lsize = arr.
lsize();
373 key_type * local_arr =
new key_type[arr_lsize];
375 key_type * local_arr =
new key_type[array_size];
378 for (
size_t lit = 0; lit < arr.
lsize(); lit++) {
379 local_arr[lit] = arr.
local[lit];
382 std::map<int, std::vector<key_type> > buckets;
386 auto ts_rep_start = Timer::Now();
387 auto result = radix_sort(local_arr, buckets, size, myid, arr_lsize);
388 local_arr = result.array;
389 arr_lsize = result.size;
390 double duration_rep_s = Timer::ElapsedSince(ts_rep_start) * 1.0e-6;
403 if (arr_lsize <= 0) {
405 "local result array at unit 0 has size 0");
407 for (
auto j = 0; j < arr_lsize; j++) {
408 cout << std::setw(5) << myid << std::setw(5) << local_arr[j]
414 if (duration_rep_s < duration_min_s) {
415 duration_min_s = duration_rep_s;
418 if (duration_rep_s > duration_max_s) {
419 duration_max_s = duration_rep_s;
421 duration_it_s += duration_rep_s;
425 if (local_arr !=
nullptr) {
430 duration_avg_s = duration_it_s / repeat;
434 cout << std::setw(12) <<
"nunits" 435 << std::setw(12) <<
"n" 436 << std::setw(12) <<
"repeats" 437 << std::setw(12) <<
"min.s" 438 << std::setw(12) <<
"avg.s" 439 << std::setw(12) <<
"max.s" 443 cout << std::setw(12) << size
444 << std::setw(12) << array_size
445 << std::setw(12) << repeat
446 << std::setw(12) << std::setprecision(3) << duration_min_s
447 << std::setw(12) << std::setprecision(3) << duration_avg_s
448 << std::setw(12) << std::setprecision(3) << duration_max_s
global_unit_t myid()
Shortcut to query the global unit ID of the calling unit.
Defines how a list of global indices is mapped to single units within a Team.
size_t size()
Return the number of units in the global team.
constexpr auto end(RangeType &&range) -> decltype(std::forward< RangeType >(range).end())
Specifies cartesian extents in a specific number of dimensions.
constexpr auto begin(RangeType &&range) -> decltype(std::forward< RangeType >(range).begin())
constexpr const ElementType * lbegin() const noexcept
Native pointer to the first local element in the array.
constexpr size_type lsize() const noexcept
The number of elements in the local part of the array.
void finalize()
Finalize the DASH library and the underlying runtime system.
constexpr const ElementType * lend() const noexcept
Native pointer to the end of the array.
constexpr SizeType local_size(team_unit_t unit=UNDEFINED_TEAM_UNIT_ID) const noexcept
The actual number of elements in this pattern that are local to the calling unit in total...
constexpr const PatternType & pattern() const noexcept
The pattern used to distribute array elements to units.
void barrier() const
Establish a barrier for all units operating on the array, publishing all changes to all units...
OutputIt copy(InputIt in_first, InputIt in_last, OutputIt out_first)
Copies the elements in the range, defined by [in_first, in_last), to another range beginning at out_f...
void barrier()
A global barrier involving all units.
ElementT * lbegin() noexcept
Pointer to first element in local range.
void init(int *argc, char ***argv)
Initialize the DASH library and the underlying runtime system.
DistributionSpec describes distribution patterns of all dimensions,.
ElementT * lend() noexcept
Pointer past final element in local range.
local_type local
Local proxy object, allows use in range-based for loops.