DASH  0.3.0
main.cpp
1 
6 #include <libdash.h>
7 
8 #include <vector>
9 #include <map>
10 #include <limits>
11 #include <iostream>
12 #include <iomanip>
13 #include <unistd.h>
14 #include <cstddef>
15 #include <ctime>
16 #include <cstdlib>
17 #include <math.h>
18 
19 #include <mpi.h>
20 
21 
22 using std::cout;
23 using std::cerr;
24 using std::endl;
25 
26 #define MAX_KEY 524288
27 #define ARRAY_SIZE 8388608
28 #define ITERATION 1
29 #define INIT_REPEAT 1
30 
31 // number of bits for integer
32 #define bits_init 32
33 // group of bits for each scan
34 #define group_oneword 8
35 // number of passes
36 #define NUM_PASSES (bits_init / group_oneword)
37 #define NUM_BUCKETS (1 << group_oneword)
38 
39 typedef long long index_t;
40 typedef int key_type;
41 typedef dash::util::Timer <
42 dash::util::TimeMeasure::Clock
43 > Timer;
44 
49 unsigned bits(unsigned x, int k, int j)
50 {
51  return (x >> k) & ~(~0 << j);
52 }
53 
54 typedef struct radix_sort_result_t {
55  key_type * array;
56  int size;
58 
59 radix_sort_result radix_sort(
60  int * local_a,
61  std::map<int, std::vector<key_type> > & buckets,
62  int nunits,
63  int myid,
64  int arr_lsize)
65 {
66  radix_sort_result result;
67  result.array = nullptr;
68  result.size = 0;
69 
70 // cout << "unit " << dash::myid() << " radix_sort--1" << endl;
71 
72  try {
73 
74  auto rows = NUM_BUCKETS;
75  auto cols = nunits;
76 
77  // mpirun -n 1 ./bin/ex.06.pattern-block-visualizer.mpi
78  // -n 100 32 -u 32 1 -t 0 1 -s block
79  typedef dash::Pattern<2> pattern_t;
81  dash::SizeSpec<2>(rows, cols),
82  dash::DistributionSpec<2>(dash::NONE, dash::CYCLIC));
83  int l_B = NUM_BUCKETS / nunits;
84 
85  std::vector< std::vector<key_type> > pre_sum;
86 
87  for (int lb = 0; lb < l_B; ++lb) {
88  std::vector<key_type> lb_pref_sum;
89  for (int i = 0; i < nunits; ++i) {
90  lb_pref_sum.push_back(0);
91  }
92  pre_sum.push_back(lb_pref_sum);
93  }
94 
95 // cout << "unit " << dash::myid() << " radix_sort--2" << endl;
96 
97  MPI_Request req;
98  MPI_Status stat;
99 
100  for (auto pass = 0; pass < NUM_PASSES; pass++) {
101 // cout << "unit " << dash::myid() << " radix_sort--pass--" << pass << "-"
102 // << "begin" << endl;
103 
104  int * lit = count.lbegin();
105  int * lend = count.lend();
106 
107  dash::barrier();
108 
109 // cout << "unit " << dash::myid() << " radix_sort--pass--" << pass << "-"
110 // << "step 1" << endl;
111 
112  for (size_t it = 0; lit != lend; ++it, ++lit) {
113  if (it < count.local_size()) {
114  *lit = 0;
115  }
116  }
117 // cout << "unit " << dash::myid() << " radix_sort--pass--" << pass << "-"
118 // << "step 2 -- arr_lsize = " << arr_lsize << endl;
119 
120  for (auto i = 0; i < arr_lsize; i++) {
121  unsigned int idx = bits(local_a[i],
122  pass * group_oneword,
123  group_oneword);
124  count[idx][myid] += 1;
125 #if __COMMENT
126  cout << " Rank: " << std::setw(5) << myid
127  << " Pass: " << std::setw(5) << pass
128  << " idx: " << std::setw(5) << idx
129  << " Array Local[i]: " << std::setw(5) << local_a[i]
130  << " Count-idx-rank: " << std::setw(5) << (int)count[idx][myid]
131  << " \n";
132 #endif
133 // cout << "unit " << dash::myid() << " radix_sort--pass-" << pass << "-"
134 // << "add_item(idx:" << idx << " item:" << local_a[i] << ")"
135 // << endl;
136  buckets[idx].push_back(local_a[i]);
137  }
138 
139 // cout << "unit " << dash::myid() << " radix_sort--pass--" << pass << "-"
140 // << "step 3 pre-barrier" << endl;
141 
142  dash::barrier();
143 
144 // cout << "unit " << dash::myid() << " radix_sort--pass--" << pass << "-"
145 // << "step 3 post-barrier" << endl;
146 
147  key_type new_size = 0;
148  for (auto j = 0; j < l_B; j++) {
149  key_type idx = j + (myid * l_B);
150  for (auto p = 0; p < nunits; p++) {
151  pre_sum[j][p] = new_size;
152  new_size += count[idx][p];
153  }
154  }
155 
156 #if __ORIGINAL__IMPL
157  if (new_size > arr_lsize) {
158  int * temp = new int[new_size];
159  for (int la = 0; la < arr_lsize; ++la) {
160  temp[la] = local_a[la];
161  }
162  delete[] local_a;
163  local_a = temp;
164  }
165 #endif
166 
167  dash::barrier();
168 
169 // cout << "unit " << dash::myid() << " radix_sort--pass--" << pass << "-"
170 // << "step 4" << endl;
171 
172  for (auto j = 0; j < NUM_BUCKETS; j++) {
173  // determine which process this buckets belongs to
174  key_type p = j / l_B;
175  // transpose to that process local bucket index
176  key_type p_j = j % l_B;
177 
178  auto buckets_j = buckets[j];
179 
180  if (p != myid && buckets_j.size() > 0) {
181  cout << "unit " << dash::myid()
182  << " radix_sort--MPI_Send: send_count=" << buckets_j.size()
183  << endl;
184  MPI_Isend(
185  &buckets_j[0],
186  buckets_j.size(),
187  MPI_INT,
188  p,
189  p_j,
190  MPI_COMM_WORLD,
191  &req);
192  }
193  }
194 
195 // cout << "unit " << dash::myid() << " radix_sort--pass--" << pass << "-"
196 // << "step 5" << endl;
197 
198  for (auto j = 0; j < l_B; j++) {
199  key_type idx = j + myid * l_B;
200 
201  for (auto p = 0; p < nunits; p++) {
202  key_type b_count = count[idx][p];
203 
204  if (b_count > 0) {
205  key_type * dest = &local_a[pre_sum[j][p]];
206 
207  if (myid != p) {
208  cout << "unit " << dash::myid()
209  << " radix_sort--MPI_Recv: recv_count=" << b_count
210  << endl;
211  MPI_Recv(
212  dest,
213  b_count,
214  MPI_INT,
215  p,
216  j,
217  MPI_COMM_WORLD,
218  &stat);
219  } else {
220  std::copy(buckets[idx].begin(),
221  buckets[idx].end(),
222  dest);
223  }
224  }
225  }
226  }
227  arr_lsize = new_size;
228 
229  dash::barrier();
230 // cout << "unit " << dash::myid() << " radix_sort--pass--" << pass << "-"
231 // << "end -- arr_lsize = " << arr_lsize << endl;
232  } // pass ended
233 
234  dash::barrier();
235 
236 // cout << "unit " << dash::myid() << " radix_sort--3" << endl;
237 
238  result.array = local_a;
239  result.size = arr_lsize;
240  return result;
241  }
242  catch (std::exception & excep)
243  {
244  cerr << "Exception: " << excep.what()
245  << endl;
246  dash__print_stacktrace();
247  return result;
248  }
249 }
250 
251 
252 int main(int argc, char * argv[])
253 {
254  size_t array_size = ARRAY_SIZE;
255  size_t max_key = MAX_KEY;
256  size_t iteration = ITERATION;
257  size_t repeat = INIT_REPEAT;
258  int head = 0;
259 
260  double duration_avg_s;
261  double duration_min_s = std::numeric_limits<double>::max();
262  double duration_max_s = std::numeric_limits<double>::min();
263 
264  typedef dash::Pattern<2> pattern_t;
265 
266  dash::init(&argc, &argv);
267  Timer::Calibrate(0);
268 
269  auto myid = dash::myid();
270  auto size = dash::size();
271 
272  if (argc >= 2) {
273  array_size = atoi(argv[1]);
274  }
275  if (argc >= 3) {
276  max_key = atoi(argv[2]);
277  }
278  if (argc >= 4) {
279  repeat = atoi(argv[3]);
280  }
281  if (argc >= 5) {
282  iteration = atoi(argv[4]);
283  }
284 
285  if (dash::myid() == 0) {
286  cout << "min. array size: " << array_size << endl;
287  cout << "max. key value: " << max_key << endl;
288  cout << "num repeats: " << repeat << endl;
289  cout << "num iterations: " << iteration << endl;
290  }
291 
292  if ((array_size % size) != 0) {
293  if (myid == 0) {
294  cout << "Please enter an array size which is divisible ny number of "
295  << "units.\n";
296  }
297  dash::finalize();
298  return 0;
299  }
300 
301  if ((size % 2) != 0) {
302  if (myid == 0) {
303  cout << "Please enter an even size of processes \n";
304  }
305  dash::finalize();
306  return 0;
307  }
308 
309  key_type rem_buckets = NUM_BUCKETS % size;
310 
311  if (rem_buckets > 0) {
312  if (myid == 0) {
313  cout << "Number of buckets and array size must be divisible by number "
314  << "of processors"
315  << std::endl;
316  }
317  dash::finalize();
318  return 0;
319  }
320 
321  if (dash::myid() == 0) {
322  cout << "min. array size: " << array_size << endl;
323  cout << "max. key value: " << max_key << endl;
324  cout << "num repeats: " << repeat << endl;
325  cout << "num iterations: " << iteration << endl;
326  }
327 
328  for (size_t iter = 0; iter < iteration ; iter++) {
329  duration_min_s = std::numeric_limits<double>::max();
330  duration_max_s = std::numeric_limits<double>::min();
331 
332  double duration_it_s = 0;
333 
334  dash::Array<key_type> arr(array_size);
335  dash::barrier();
336 
337  for (size_t rep = 0; rep < repeat; rep++) {
338  // Initialization:
339  // Writing the data into the array.
340  srand(time(0) + myid);
341 
342  int v = 0;
343  for (auto lit = arr.lbegin(); lit != arr.lend(); ++lit) {
344 // *lit = (rand() % max_key) + 1;
345  *lit = (dash::myid() * 100 + v + (v % 2) * 512) % max_key;
346  ++v;
347  }
348 
349  arr.barrier();
350 
351  cout << "unit " << dash::myid() << " "
352  << "local array size: " << arr.lend() - arr.lbegin() << " "
353  << "local pattern size: " << arr.pattern().local_size()
354  << endl;
355 
356 #if __PRINT_ARRAY_VALUES__
357  for (auto unit_id = 0; unit_id < dash::size(); ++unit_id) {
358  if (unit_id == dash::myid()) {
359  int l = 0;
360  for (auto lit = arr.lbegin(); lit != arr.lend(); ++lit) {
361  cout << "unit " << unit_id << ": array[" << l++
362  << "] = " << *lit << endl;
363  }
364  sleep(1);
365  }
366  dash::barrier();
367  }
368 #endif
369 
370  key_type arr_lsize = arr.lsize();
371 
372 #if __ORIGINAL__IMPL
373  key_type * local_arr = new key_type[arr_lsize];
374 #else
375  key_type * local_arr = new key_type[array_size];
376 #endif
377 
378  for (size_t lit = 0; lit < arr.lsize(); lit++) {
379  local_arr[lit] = arr.local[lit];
380  }
381 
382  std::map<int, std::vector<key_type> > buckets;
383 
384  dash::barrier();
385 
386  auto ts_rep_start = Timer::Now();
387  auto result = radix_sort(local_arr, buckets, size, myid, arr_lsize);
388  local_arr = result.array;
389  arr_lsize = result.size;
390  double duration_rep_s = Timer::ElapsedSince(ts_rep_start) * 1.0e-6;
391 /*
392  //TODO: Fill all the values in global array
393  // and check if it is sorted or not.
394  proc_n[myid] = arr_lsize;
395  if(myid == 0){
396  auto iter_p = 0;
397  for(auto j=0; j<proc_n[iter_p];)
398  }
399 */
400  dash::barrier();
401 
402  if (dash::myid() == 0) {
403  if (arr_lsize <= 0) {
405  "local result array at unit 0 has size 0");
406  }
407  for (auto j = 0; j < arr_lsize; j++) {
408  cout << std::setw(5) << myid << std::setw(5) << local_arr[j]
409  << endl;
410  }
411  }
412  dash::barrier();
413 
414  if (duration_rep_s < duration_min_s) {
415  duration_min_s = duration_rep_s;
416  }
417 
418  if (duration_rep_s > duration_max_s) {
419  duration_max_s = duration_rep_s;
420  }
421  duration_it_s += duration_rep_s;
422 
423  dash::barrier();
424 
425  if (local_arr != nullptr) {
426  delete[] local_arr;
427  }
428  }
429 
430  duration_avg_s = duration_it_s / repeat;
431 
432  if (myid == 0) {
433  if (head != 1) {
434  cout << std::setw(12) << "nunits"
435  << std::setw(12) << "n"
436  << std::setw(12) << "repeats"
437  << std::setw(12) << "min.s"
438  << std::setw(12) << "avg.s"
439  << std::setw(12) << "max.s"
440  << std::endl;
441  head = 1;
442  }
443  cout << std::setw(12) << size
444  << std::setw(12) << array_size
445  << std::setw(12) << repeat
446  << std::setw(12) << std::setprecision(3) << duration_min_s
447  << std::setw(12) << std::setprecision(3) << duration_avg_s
448  << std::setw(12) << std::setprecision(3) << duration_max_s
449  << std::endl;
450  }
451 
452  repeat /= 4;
453  array_size *= 4;
454 
455  if (repeat <= 0) {
456  repeat = 1;
457  }
458 
459  }
460  dash::finalize();
461 
462  return EXIT_SUCCESS;
463 }
global_unit_t myid()
Shortcut to query the global unit ID of the calling unit.
Defines how a list of global indices is mapped to single units within a Team.
Definition: BlockPattern.h:42
size_t size()
Return the number of units in the global team.
constexpr auto end(RangeType &&range) -> decltype(std::forward< RangeType >(range).end())
Definition: Range.h:98
Specifies cartesian extents in a specific number of dimensions.
Definition: Cartesian.h:197
constexpr auto begin(RangeType &&range) -> decltype(std::forward< RangeType >(range).begin())
Definition: Range.h:89
constexpr const ElementType * lbegin() const noexcept
Native pointer to the first local element in the array.
Definition: Array.h:1073
constexpr size_type lsize() const noexcept
The number of elements in the local part of the array.
Definition: Array.h:1206
void finalize()
Finalize the DASH library and the underlying runtime system.
constexpr const ElementType * lend() const noexcept
Native pointer to the end of the array.
Definition: Array.h:1087
constexpr SizeType local_size(team_unit_t unit=UNDEFINED_TEAM_UNIT_ID) const noexcept
The actual number of elements in this pattern that are local to the calling unit in total...
constexpr const PatternType & pattern() const noexcept
The pattern used to distribute array elements to units.
Definition: Array.h:1310
A distributed array.
Definition: Array.h:89
void barrier() const
Establish a barrier for all units operating on the array, publishing all changes to all units...
Definition: Array.h:1254
OutputIt copy(InputIt in_first, InputIt in_last, OutputIt out_first)
Copies the elements in the range, defined by [in_first, in_last), to another range beginning at out_f...
void barrier()
A global barrier involving all units.
ElementT * lbegin() noexcept
Pointer to first element in local range.
void init(int *argc, char ***argv)
Initialize the DASH library and the underlying runtime system.
DistributionSpec describes distribution patterns of all dimensions,.
Definition: Dimensional.h:222
Forward-declaration.
ElementT * lend() noexcept
Pointer past final element in local range.
local_type local
Local proxy object, allows use in range-based for loops.
Definition: Array.h:732