DASH  0.3.0
MinMax.h
1 #ifndef DASH__ALGORITHM__MIN_MAX_H__
2 #define DASH__ALGORITHM__MIN_MAX_H__
3 
4 #include <dash/internal/Config.h>
5 
6 #include <dash/Allocator.h>
7 
8 #include <dash/algorithm/LocalRange.h>
9 
10 #include <dash/util/Config.h>
11 #include <dash/util/Trace.h>
12 #include <dash/util/UnitLocality.h>
13 
14 #include <dash/internal/Logging.h>
15 #include <dash/iterator/GlobIter.h>
16 
17 #include <algorithm>
18 #include <memory>
19 
20 #ifdef DASH_ENABLE_OPENMP
21 #include <omp.h>
22 #endif
23 
24 
25 namespace dash {
26 
44 template <
45  class ElementType,
46  class Compare = std::less<const ElementType &> >
47 const ElementType * min_element(
49  const ElementType * l_range_begin,
51  const ElementType * l_range_end,
53  Compare compare
54  = std::less<const ElementType &>())
55 {
56 #ifdef DASH_ENABLE_OPENMP
57 #define ROUNDUP(T, A) ((T + A - 1) & ~(A - 1))
58 
59  typedef typename std::decay<ElementType>::type value_t;
61  //TODO: ask dyloc for available memory spaces
62  auto n_threads = uloc.num_domain_threads();
63  DASH_LOG_DEBUG("dash::min_element", "thread capacity:", n_threads);
64 
65  // TODO: Should also restrict on elements/units > ~10240.
66  // Find a model for the minimum work laod.
67  if (n_threads > 1) {
68  auto l_size = l_range_end - l_range_begin;
69  int min_idx_l = 0;
70  ElementType min_val_l = *l_range_begin;
71 
72  typedef struct min_pos_t { value_t val; size_t idx; } min_pos;
73 
74  DASH_LOG_TRACE("dash::min_element", "sizeof(min_pos):", sizeof(min_pos));
75 
76  dash::HostSpace hostSpace;
77 
78  int align_bytes = uloc.cache_line_size(0);
79  int single_element_sz = ROUNDUP(sizeof(min_pos), align_bytes);
80 
81  size_t min_vals_t_bytes = single_element_sz * n_threads;
82 
83  auto min_vals_t_raw = static_cast<uint8_t *>(
84  hostSpace.allocate(min_vals_t_bytes, align_bytes));
85 
86  DASH_LOG_TRACE("dash::min_element", "min * alloc:", min_vals_t_raw);
87  //DASH_LOG_TRACE("dash::min_element", "min * aligned:", min_vals_t);
88  DASH_LOG_TRACE("dash::min_element", "min * size:", min_vals_t_bytes);
89  DASH_ASSERT_GE(min_vals_t_bytes, n_threads * sizeof(min_pos),
90  "Aligned buffer of min_pos has insufficient size");
91  DASH_ASSERT_MSG(nullptr != min_vals_t_raw,
92  "Aligned allocation of min_pos returned nullptr");
93 
94  // Cannot use user-defined reduction (OpenMP 4.0) as the compare
95  // predicate cannot be used in `omp declare reduction`.
96  // Avoid omp for + omp critical section by using array of
97  // thread-local minimum values, aligned to prevent false sharing:
98  int t_id;
99  #pragma omp parallel num_threads(n_threads) private(t_id)
100  {
101  // Documentation of Intel MIC intrinsics, see:
102  // https://software.intel.com/de-de/node/523533
103  // https://software.intel.com/de-de/node/523387
104  t_id = omp_get_thread_num();
105  DASH_LOG_TRACE("dash::min_element", "starting thread", t_id);
106  auto & min_val_t = *(reinterpret_cast<min_pos *>(min_vals_t_raw + t_id * single_element_sz));
107  min_val_t.idx = min_idx_l;
108  min_val_t.val = min_val_l;
109  // Cannot use explicit private(min_val_t) as ElementType might
110  // not be default-constructible:
111  #pragma omp for schedule(static)
112  for (int i = 0; i < l_size; i++) {
113  const ElementType & val_t = *(l_range_begin + i);
114  if (compare(val_t, min_val_t.val)) {
115  min_val_t.val = val_t;
116  min_val_t.idx = i;
117  }
118  }
119  DASH_LOG_TRACE("dash::min_element", "local minimum at thread", t_id,
120  "idx:", min_val_t.idx,
121  "val:", min_val_t.val);
122  }
123 
124  min_pos min_pos_l = * (reinterpret_cast<min_pos *>(min_vals_t_raw));
125 
126  for (int t = 1; t < n_threads; t++) {
127  const min_pos & mpt = *(reinterpret_cast<min_pos *>(min_vals_t_raw + t * single_element_sz));
128  if (compare(mpt.val, min_pos_l.val)) {
129  min_pos_l = mpt;
130  }
131  }
132 
133  hostSpace.deallocate(min_vals_t_raw, min_vals_t_bytes, align_bytes);
134 
135  return (l_range_begin + min_pos_l.idx);
136  }
137 #endif // DASH_ENABLE_OPENMP
138  return ::std::min_element(l_range_begin, l_range_end, compare);
139 }
140 
157 template <
158  typename GlobInputIt,
159  class Compare = std::less<
160  const typename dash::iterator_traits<GlobInputIt>::value_type &> >
161 GlobInputIt min_element(
163  const typename std::enable_if<
165  GlobInputIt>::type &first,
167  const GlobInputIt &last,
169  Compare compare = Compare())
170 {
171  typedef typename GlobInputIt::pattern_type pattern_t;
172  typedef typename pattern_t::index_type index_t;
173  typedef typename std::decay<
174  typename dash::iterator_traits<GlobInputIt>::value_type>::type value_t;
175 
176  // return last for empty array
177  if (first == last) {
178  DASH_LOG_DEBUG("dash::min_element >",
179  "empty range, returning last", last);
180  return last;
181  }
182 
183  dash::util::Trace trace("min_element");
184 
185  auto & pattern = first.pattern();
186  auto & team = pattern.team();
187  DASH_LOG_DEBUG("dash::min_element()",
188  "allocate minarr, size", team.size());
189  // Global position of end element in range:
190  auto gi_last = last.gpos();
191  // Find the local min. element in parallel
192  // Get local address range between global iterators:
193  auto local_idx_range = dash::local_index_range(first, last);
194  // Pointer to local minimum element:
195  const value_t * lmin = nullptr;
196  // Local offset of local minimum element, or -1 if no element found:
197  index_t l_idx_lmin = -1;
198  if (local_idx_range.begin == local_idx_range.end) {
199  // local range is empty
200  DASH_LOG_DEBUG("dash::min_element", "local range empty");
201  } else {
202  trace.enter_state("local");
203 
204  // Pointer to first element in local memory:
205  auto *lbegin = dash::local_begin(
206  static_cast<typename GlobInputIt::const_pointer>(first), team.myid());
207 
208  // Pointers to first / final element in local range:
209  const auto * l_range_begin = lbegin + local_idx_range.begin;
210  const auto * l_range_end = lbegin + local_idx_range.end;
211 
212  lmin = dash::min_element(l_range_begin, l_range_end, compare);
213 
214  if (lmin != l_range_end) {
215  DASH_LOG_TRACE_VAR("dash::min_element", *lmin);
216  // Offset of local minimum in local memory:
217  l_idx_lmin = lmin - lbegin;
218  }
219 
220  trace.exit_state("local");
221  }
222  DASH_LOG_TRACE("dash::min_element",
223  "local index of local minimum:", l_idx_lmin);
224  DASH_LOG_TRACE("dash::min_element",
225  "waiting for local min of other units");
226 
227  trace.enter_state("barrier");
228  team.barrier();
229  trace.exit_state("barrier");
230 
231  typedef struct {
232  value_t value;
233  index_t g_index;
234  } local_min_t;
235 
236  std::vector<local_min_t> local_min_values(team.size());
237 
238  // Set global index of local minimum to -1 if no local minimum has been
239  // found:
240  local_min_t local_min;
241  local_min.value = l_idx_lmin < 0
242  ? value_t()
243  : *lmin;
244  local_min.g_index = l_idx_lmin < 0
245  ? -1
246  : pattern.global(l_idx_lmin);
247 
248  DASH_LOG_TRACE("dash::min_element", "sending local minimum: {",
249  "value:", local_min.value,
250  "g.index:", local_min.g_index, "}");
251 
252  DASH_LOG_TRACE("dash::min_element", "dart_allgather()");
253  trace.enter_state("allgather");
254  DASH_ASSERT_RETURNS(
256  &local_min,
257  local_min_values.data(),
258  sizeof(local_min_t),
260  team.dart_id()),
261  DART_OK);
262  trace.exit_state("allgather");
263 
264 #ifdef DASH_ENABLE_LOGGING
265  for (int lmin_u = 0; lmin_u < local_min_values.size(); lmin_u++) {
266  auto lmin_entry = local_min_values[lmin_u];
267  DASH_LOG_TRACE("dash::min_element", "dart_allgather >",
268  "unit:", lmin_u,
269  "value:", lmin_entry.value,
270  "g_index:", lmin_entry.g_index);
271  }
272 #endif
273 
274  auto gmin_elem_it = ::std::min_element(
275  local_min_values.begin(),
276  local_min_values.end(),
277  [&](const local_min_t & a,
278  const local_min_t & b) {
279  // Ignore elements with global index -1 (no
280  // element found):
281  return (b.g_index < 0 ||
282  (a.g_index > 0 &&
283  compare(a.value, b.value)));
284  });
285 
286  if (gmin_elem_it == local_min_values.end()) {
287  DASH_LOG_DEBUG_VAR("dash::min_element >", last);
288  return last;
289  }
290 
291  auto gi_minimum = gmin_elem_it->g_index;
292 
293  DASH_LOG_TRACE("dash::min_element",
294  "min. value:", gmin_elem_it->value,
295  "at unit:", (gmin_elem_it - local_min_values.begin()),
296  "global idx:", gi_minimum);
297 
298  DASH_LOG_TRACE_VAR("dash::min_element", gi_minimum);
299  if (gi_minimum < 0 || gi_minimum == gi_last) {
300  DASH_LOG_DEBUG_VAR("dash::min_element >", last);
301  return last;
302  }
303  // iterator 'first' is relative to start of input range, convert to start
304  // of its referenced container (= container.begin()), then apply global
305  // offset of minimum element:
306  auto minimum = (first - first.gpos()) + gi_minimum;
307  DASH_LOG_DEBUG("dash::min_element >", minimum,
308  "=", static_cast<value_t>(*minimum));
309 
310  return minimum;
311 }
312 
329 template <
330  class GlobIter,
331  class Compare = std::greater<const typename GlobIter::value_type &> >
334  const GlobIter &first,
336  const GlobIter &last,
338  Compare compare = Compare())
339 {
340  // Same as min_element with different compare function
341  return dash::min_element(first, last, compare);
342 }
343 
361 template <class ElementType, class Compare = std::greater<ElementType &> >
362 const ElementType *max_element(
364  const ElementType *first,
366  const ElementType *last,
368  Compare compare = Compare())
369 {
370  // Same as min_element with different compare function
371  return dash::min_element(first, last, compare);
372 }
373 
374 } // namespace dash
375 
376 #endif // DASH__ALGORITHM__MIN_MAX_H__
DASH_CONSTEXPR std::enable_if< std::is_same< typename dash::memory_space_traits< MemSpaceT >::memory_space_layout_tag, memory_space_contiguous >::value, T >::type * local_begin(GlobPtr< T, MemSpaceT > global_begin, dash::team_unit_t unit)
Returns the begin of the local memory portion within a global memory segment.
Definition: GlobPtr.h:593
This class is a simple memory pool which holds allocates elements of size ValueType.
Definition: AllOf.h:8
Signals success.
Definition: dart_types.h:33
dart_ret_t dart_allgather(const void *sendbuf, void *recvbuf, size_t nelem, dart_datatype_t dtype, dart_team_t team)
DART Equivalent to MPI allgather.
Returns second operand.
Definition: Operation.h:201
int num_domain_threads()
Number of threads currently available to the active unit.
Definition: UnitLocality.h:306
GlobInputIt min_element(const typename std::enable_if< dash::iterator_traits< GlobInputIt >::is_global_iterator::value, GlobInputIt >::type &first, const GlobInputIt &last, Compare compare=Compare())
Finds an iterator pointing to the element with the smallest value in the range [first,last).
Definition: MinMax.h:161
const ElementType * min_element(const ElementType *l_range_begin, const ElementType *l_range_end, Compare compare=std::less< const ElementType &>())
Finds an iterator pointing to the element with the smallest value in the range [first,last).
Definition: MinMax.h:47
Iterator on Partitioned Global Address Space.
Definition: GlobIter.h:45
#define DART_TYPE_BYTE
integral data types
Definition: dart_types.h:125
GlobIter max_element(const GlobIter &first, const GlobIter &last, Compare compare=Compare())
Finds an iterator pointing to the element with the greatest value in the range [first,last).
Definition: MinMax.h:332
std::enable_if< !GlobInputIter::has_view::value, LocalIndexRange< typename GlobInputIter::pattern_type::index_type >>::type local_index_range(const GlobInputIter &first, const GlobInputIter &last)
Resolves the local index range between global iterators.
Definition: LocalRange.h:101
Wrapper of a single dart_unit_locality_t object.
Definition: UnitLocality.h:30