1 #ifndef DASH__HALO_HALOMEMORY_H 2 #define DASH__HALO_HALOMEMORY_H 4 #include <dash/halo/Types.h> 5 #include <dash/halo/Region.h> 6 #include <dash/halo/Halo.h> 7 #include <dash/Array.h> 13 using namespace internal;
19 template <
typename HaloBlockT>
23 static constexpr
auto RegionsMax = NumRegionsMax<NumDimensions>;
26 using Pattern_t =
typename HaloBlockT::Pattern_t;
27 using ViewSpec_t =
typename HaloBlockT::ViewSpec_t;
28 using extent_t =
typename ViewSpec_t::size_type;
32 using Element_t =
typename HaloBlockT::Element_t;
33 using ElementCoords_t =
34 std::array<typename Pattern_t::index_type, NumDimensions>;
35 using HaloBuffer_t = std::vector<Element_t>;
36 using pattern_size_t =
typename Pattern_t::size_type;
38 using iterator =
typename HaloBuffer_t::iterator;
39 using const_iterator =
const iterator;
41 using MemRange_t = std::pair<iterator, iterator>;
47 HaloMemory(
const HaloBlockT& haloblock) : _haloblock(haloblock) {
48 _halobuffer.resize(haloblock.halo_size());
49 auto it = _halobuffer.begin();
50 std::fill(_halo_offsets.begin(), _halo_offsets.end(), _halobuffer.end());
51 for(
const auto& region : haloblock.halo_regions()) {
52 _halo_offsets[region.index()] = it;
64 return _halo_offsets[
index];
74 auto it = _halo_offsets[
index];
75 if(it == _halobuffer.end())
76 return std::make_pair(it, it);
78 auto* region = _haloblock.halo_region(index);
82 "HaloMemory manages memory for a region that seemed to be empty.");
84 return std::make_pair(it, it + region->size());
90 iterator
begin() {
return _halobuffer.begin(); }
95 const_iterator
begin()
const {
return _halobuffer.begin(); }
100 iterator
end() {
return _halobuffer.end(); }
105 const_iterator
end()
const {
return _halobuffer.end(); }
112 const HaloBuffer_t&
buffer()
const {
return _halobuffer; }
120 ElementCoords_t& coords)
const {
121 const auto& extents =
122 _haloblock.halo_region(region_index)->view().extents();
123 for(
auto d = 0; d < NumDimensions; ++d) {
125 coords[d] += extents[d];
126 else if(static_cast<extent_t>(coords[d]) >= _haloblock.view().extent(d))
127 coords[d] -= _haloblock.view().extent(d);
129 if(static_cast<extent_t>(coords[d]) >= extents[d] || coords[d] < 0)
140 ElementCoords_t& coords)
const {
141 const auto& extents =
142 _haloblock.halo_region(region_index)->view().extents();
143 for(
dim_t d = 0; d < NumDimensions; ++d) {
145 coords[d] += extents[d];
149 if(static_cast<extent_t>(coords[d]) >= _haloblock.view().extent(d))
150 coords[d] -= _haloblock.view().extent(d);
158 pattern_size_t offset(
const region_index_t region_index,
159 const ElementCoords_t& coords)
const {
160 const auto& extents =
161 _haloblock.halo_region(region_index)->view().extents();
162 pattern_size_t off = 0;
163 if(MemoryArrange == ROW_MAJOR) {
165 for(
dim_t d = 1; d < NumDimensions; ++d)
166 off = off * extents[d] + coords[d];
168 off = coords[NumDimensions - 1];
169 for(
dim_t d = NumDimensions - 1; d > 0;) {
171 off = off * extents[d] + coords[d];
179 const HaloBlockT& _haloblock;
180 HaloBuffer_t _halobuffer;
181 std::array<iterator, RegionsMax> _halo_offsets{};
184 template<
typename HaloBlockT>
188 bool signal_used{
false};
193 static constexpr
auto RegionsMax = NumRegionsMax<NumDimensions>;
195 using signal_t = bool;
197 using SignalDataSet_t = std::array<SignalData,RegionsMax>;
198 using SignalHandles_t = std::vector<dart_handle_t>;
199 using Pattern_t =
typename HaloBlockT::Pattern_t;
206 : _signal_buffer(RegionsMax * team.
size(), team),
207 _signal_ready_buffer(RegionsMax * team.
size(), team) {
209 for(region_index_t r = 0; r < RegionsMax; ++r) {
210 _signal_buffer.local[r] = 0;
211 _signal_ready_buffer.local[r] = 1;
214 init_signal_env(halo_block);
217 void put_signal_async(region_index_t region_index) {
218 auto& put_sig = _put_signals[region_index];
219 if(!put_sig.signal_used) {
224 dash::internal::put_handle(put_sig.gptr, &_signal, 1, &handle);
225 _signal_handles.push_back(std::move(handle));
228 void put_signal_blocking(region_index_t region_index) {
229 auto& put_sig = _put_signals[region_index];
230 if(!put_sig.signal_used) {
234 dash::internal::put_blocking(put_sig.gptr, &_signal, 1);
237 void put_ready_signal_async(region_index_t region_index) {
238 auto& put_sig = _put_ready_signals[region_index];
239 if(!put_sig.signal_used) {
244 dash::internal::put_handle(put_sig.gptr, &_signal, 1, &handle);
245 _signal_ready_handles.push_back(std::move(handle));
248 void put_ready_signal_blocking(region_index_t region_index) {
249 auto& put_sig = _put_ready_signals[region_index];
250 if(!put_sig.signal_used) {
254 dash::internal::put_blocking(put_sig.gptr, &_signal, 1);
257 void ready_to_update(region_index_t region_index) {
258 auto& get_data = _get_ready_signals[region_index];
259 if(!get_data.signal_used) {
263 signal_t signal =
false;
265 dash::internal::get_blocking(get_data.gptr, &signal, 1);
267 _signal_ready_buffer.lbegin()[region_index] = 0;
270 void wait_put_signals() {
272 _signal_handles.clear();
275 void wait_put_ready_signals() {
277 _signal_ready_handles.clear();
280 void wait_signal(region_index_t region_index) {
281 auto& get_data = _get_signals[region_index];
282 if(!get_data.signal_used) {
286 signal_t signal =
false;
288 dash::internal::get_blocking(get_data.gptr, &signal, 1);
290 _signal_buffer.lbegin()[region_index] = 0;
294 void init_signal_env(HaloBlockT halo_block) {
295 const auto& env_info_md = halo_block.block_env();
297 long count_put_signals = 0;
298 long count_put_ready_signals = 0;
299 auto my_team_id = halo_block.pattern().team().myid();
300 auto signal_gptr = _signal_buffer.begin().dart_gptr();
301 auto signal_ready_gptr = _signal_ready_buffer.begin().dart_gptr();
303 for(
auto r = 0; r < RegionsMax; ++r) {
304 auto signal_offset = r *
sizeof(bool);
306 const auto& env_md = env_info_md.info(r);
308 if(env_md.neighbor_id_to >= 0) {
309 auto& put_signal = _put_signals[r];
310 put_signal.signal_used =
true;
311 put_signal.gptr = signal_gptr;
312 put_signal.gptr.unitid = env_md.neighbor_id_to;
313 put_signal.gptr.addr_or_offs.offset = signal_offset;
315 auto& get_ready_signal = _get_ready_signals[r];
316 get_ready_signal.signal_used =
true;
317 get_ready_signal.gptr = signal_ready_gptr;
318 get_ready_signal.gptr.unitid = my_team_id;
319 get_ready_signal.gptr.addr_or_offs.offset = signal_offset;
324 auto region = halo_block.halo_region(r);
325 if(region !=
nullptr && region->size() > 0 && env_md.neighbor_id_from >= 0) {
326 auto& get_signal = _get_signals[r];
328 get_signal.signal_used =
true;
329 get_signal.gptr = signal_gptr;
330 get_signal.gptr.unitid = my_team_id;
331 get_signal.gptr.addr_or_offs.offset = signal_offset;
333 auto& put_ready_signal = _put_ready_signals[r];
334 put_ready_signal.signal_used =
true;
335 put_ready_signal.gptr = signal_ready_gptr;
336 put_ready_signal.gptr.unitid = env_md.neighbor_id_from;
337 put_ready_signal.gptr.addr_or_offs.offset = signal_offset;
339 ++count_put_ready_signals;
342 _signal_handles.reserve(count_put_signals);
343 _signal_ready_handles.reserve(count_put_ready_signals);
349 signal_t _signal =
true;
350 SignalDataSet_t _get_signals{};
351 SignalDataSet_t _put_signals{};
352 SignalDataSet_t _get_ready_signals{};
353 SignalDataSet_t _put_ready_signals{};
354 SignalHandles_t _signal_handles;
355 SignalHandles_t _signal_ready_handles;
358 template<
typename ElementT,
typename LengthSizeT>
360 bool needs_packing{
false};
361 std::vector<ElementT*> block_pos{};
362 LengthSizeT block_len{0};
363 ElementT* buffer_pos{
nullptr};
364 std::function<void()> pack_func = [](){};
367 template<
typename ElementT,
typename LengthSizeT>
368 std::ostream& operator<<(std::ostream& os, const PackMetaData<ElementT, LengthSizeT>& pack) {
369 os <<
"packing:" << std::boolalpha << pack.needs_packing
370 <<
", block_len " << pack.block_len
371 <<
", buffer_pos" << pack.buffer_pos;
376 template<
typename HaloBlockT>
379 static constexpr
auto RegionsMax = NumRegionsMax<NumDimensions>;
381 using Pattern_t =
typename HaloBlockT::Pattern_t;
382 using pattern_size_t = std::make_signed_t<typename Pattern_t::size_type>;
383 using upattern_size_t = std::make_unsigned_t<pattern_size_t>;
384 using Element_t =
typename HaloBlockT::Element_t;
388 static constexpr
auto FastestDim =
389 MemoryArrange == ROW_MAJOR ? NumDimensions - 1 : 0;
390 static constexpr
auto ContiguousDim =
391 MemoryArrange == ROW_MAJOR ? 1 : NumDimensions;
394 using HaloPosAll_t = std::array<dart_gptr_t, RegionsMax>;
396 using PackMDataAll_t = std::array<PackMData_t, RegionsMax>;
397 using PackOffs_t = std::array<pattern_size_t, RegionsMax>;
403 PackEnv(
const HaloBlockT& halo_block, Element_t* local_memory,
Team_t& team)
404 : _local_memory(local_memory),
406 auto pack_info = info_pack_buffer(halo_block);
407 _pack_buffer.allocate(pack_info.first * team.
size(), team);
408 init_block_data(halo_block, pack_info.second);
411 void pack(region_index_t region) {
412 _pack_md_all[region].pack_func();
415 dart_gptr_t halo_gptr(region_index_t region_index) {
416 return _get_halos[region_index];
419 const dart_gptr_t& halo_gptr(region_index_t region_index)
const {
420 return _get_halos[region_index];
424 auto info_pack_buffer(
const HaloBlockT& halo_block) {
425 const auto& halo_spec = halo_block.halo_spec();
427 auto max_local_extents = halo_block.pattern().local_extents(rank_0);
428 PackOffs_t packed_offs;
430 pattern_size_t num_pack_elems = 0;
431 pattern_size_t current_offset = 0;
432 for(
auto r = 0; r < RegionsMax; ++r) {
433 const auto& region_spec = halo_spec.spec(r);
434 if(region_spec.extent() == 0 ||
435 (region_spec.level() == 1 && region_spec.relevant_dim() == ContiguousDim)) {
439 pattern_size_t reg_size = 1;
440 for(
auto d = 0; d < NumDimensions; ++d) {
441 if(region_spec[d] != 1) {
442 reg_size *= region_spec.extent();
444 reg_size *= max_local_extents[d];
447 num_pack_elems += reg_size;
448 packed_offs[r] = current_offset;
449 current_offset += reg_size;
452 return std::make_pair(num_pack_elems, packed_offs);
456 void init_block_data(
const HaloBlockT& halo_block,
const PackOffs_t& packed_offs) {
459 const auto& env_info_md = halo_block.block_env();
460 for(
auto r = 0; r < RegionsMax; ++r) {
461 const auto& env_md = env_info_md.info(r);
463 auto region = halo_block.halo_region(r);
464 auto& halo_gptr = _get_halos[r];
465 if(region !=
nullptr && region->size() > 0) {
467 if(region->spec().relevant_dim() == ContiguousDim && region->spec().level() == 1) {
468 halo_gptr = region->begin().dart_gptr();
470 halo_gptr = _pack_buffer.begin().dart_gptr();
471 halo_gptr.unitid = region->begin().dart_gptr().unitid;
472 halo_gptr.addr_or_offs.offset = packed_offs[r] *
sizeof(Element_t);
478 if(env_md.neighbor_id_to < 0) {
484 auto& pack_md = _pack_md_all[r];
485 const auto& reg_spec = halo_block.halo_spec().spec(r);
486 if(reg_spec.relevant_dim() == ContiguousDim && reg_spec.level() == 1) {
490 pack_md.needs_packing =
true;
491 pack_md.buffer_pos = _pack_buffer.lbegin() + packed_offs[r];
493 const auto& view_glob = halo_block.view();
494 auto reg_offsets = view_glob.offsets();
496 const auto& region_extents = env_md.halo_reg_data.view.extents();
497 for(
dim_t d = 0; d < NumDimensions; ++d) {
498 if(reg_spec[d] == 1) {
502 if(reg_spec[d] == 0) {
503 reg_offsets[d] += view_glob.extent(d) - region_extents[d];
505 reg_offsets[d] = view_glob.offset(d);
508 ViewSpec_t view_pack(reg_offsets, region_extents);
509 pattern_size_t num_elems_block = region_extents[FastestDim];
510 pattern_size_t num_blocks = view_pack.size() / num_elems_block;
512 pack_md.block_len = num_elems_block;
513 pack_md.block_pos.resize(num_blocks);
515 auto it_region = region->begin();
516 decltype(it_region) it_pack_data(&(it_region.globmem()), it_region.pattern(), view_pack);
517 for(
auto& pos : pack_md.block_pos) {
518 pos = _local_memory + it_pack_data.lpos().index;
519 it_pack_data += num_elems_block;
521 auto pack = &pack_md;
522 pack_md.pack_func = [pack](){
523 auto buffer_offset = pack->buffer_pos;
524 for(
auto& pos : pack->block_pos) {
525 std::copy(pos, pos + pack->block_len, buffer_offset);
526 buffer_offset += pack->block_len;
533 Element_t* _local_memory;
535 HaloPosAll_t _get_halos;
536 PackMDataAll_t _pack_md_all;
539 template <
typename HaloBlockT, SignalReady SigReady>
542 std::function<void(dart_handle_t&)> get_halos;
547 static constexpr
auto RegionsMax = NumRegionsMax<NumDimensions>;
550 using HaloSpec_t =
typename HaloBlockT::HaloSpec_t;
551 using ViewSpec_t =
typename HaloBlockT::ViewSpec_t;
552 using Pattern_t =
typename HaloBlockT::Pattern_t;
561 using Element_t =
typename HaloBlockT::Element_t;
566 : _halo_block(halo_block),
568 _halo_memory(halo_block),
569 _signal_env(halo_block, team),
570 _pack_env(_halo_block, local_memory, team) {
579 for(
auto& data : _region_data) {
580 update_halo_intern(data.first, data.second);
592 auto it_find = _region_data.find(index);
593 if(it_find != _region_data.end()) {
594 update_halo_intern(it_find->first, it_find->second);
596 if(SigReady == SignalReady::ON) {
597 _signal_env.put_ready_signal_blocking(it_find->first);
607 for(
auto& data : _region_data) {
608 update_halo_intern(data.first, data.second);
619 auto it_find = _region_data.find(index);
620 if(it_find != _region_data.end()) {
621 update_halo_intern(it_find->first, it_find->second);
630 for(
auto& region : _region_data) {
632 if(SigReady == SignalReady::ON) {
633 _signal_env.put_ready_signal_async(region.first);
636 if(SigReady == SignalReady::ON) {
637 _signal_env.wait_put_ready_signals();
646 auto it_find = _region_data.find(index);
647 if(it_find == _region_data.end()) {
652 if(SigReady == SignalReady::ON) {
653 _signal_env.put_ready_signal_blocking(it_find->first);
658 void prepare_update() {
659 for(region_index_t r = 0; r < RegionsMax; ++r) {
660 if(SigReady == SignalReady::ON) {
661 _signal_env.ready_to_update(r);
664 _signal_env.put_signal_async(r);
666 _signal_env.wait_put_signals();
690 void init_update_data() {
691 for(
const auto& region : _halo_block.halo_regions()) {
692 size_t region_size = region.size();
693 if(region_size == 0) {
697 auto gptr = _pack_env.halo_gptr(region.index());
698 if(region.is_custom_region()) {
699 _region_data.insert(std::make_pair(
703 auto* pos = &*(_halo_memory.first_element_at(region.index()));
704 const auto& gptr = _pack_env.halo_gptr(region.index());
705 _region_data.insert(std::make_pair(
706 region.index(), UpdateData{ [pos, gptr, region_size](
dart_handle_t& handle) {
707 dash::internal::get_handle(gptr, pos, region_size, &handle);
714 void update_halo_intern(region_index_t region_index, UpdateData& data) {
715 _signal_env.wait_signal(region_index);
716 data.get_halos(data.handle);
720 const HaloBlockT& _halo_block;
724 std::map<region_index_t, UpdateData> _region_data;
731 #endif // DASH__HALO_HALOMEMORY_H
constexpr std::enable_if< std::is_integral< IndexType >::value, IndexType >::type index(IndexType idx)
void update_at(region_index_t index)
Initiates a blocking halo region update for all halo elements within the the given region...
This class is a simple memory pool which holds allocates elements of size ValueType.
MemRange_t range_at(region_index_t index)
Returns the range of all halo elements for the given region index.
const_iterator begin() const
Returns a const iterator to the first halo element.
void to_halo_mem_coords(const region_index_t region_index, ElementCoords_t &coords) const
Converts coordinates to halo memory coordinates for a given region index.
iterator first_element_at(region_index_t index)
Iterator to the first halo element for the given region index.
const_iterator end() const
Returns a const iterator to the end of the halo elements.
Specifies view parameters for implementing submat, rows and cols.
void wait()
Waits until all halo updates are finished.
void wait(region_index_t index)
Waits until the halo updates for the given halo region is finished.
int dim_t
Scalar type for a dimension value, with 0 indicating the first dimension.
dart_ret_t dart_waitall_local(dart_handle_t handles[], size_t n)
Wait for the local completion of operations.
void fill(GlobIterType first, GlobIterType last, const typename GlobIterType::value_type &value)
Assigns the given value to the elements in the range [first, last)
bool to_halo_mem_coords_check(const region_index_t region_index, ElementCoords_t &coords) const
Converts coordinates to halo memory coordinates for a given region index and returns true if the coor...
size_t size() const
The number of units in this team.
iterator begin()
Returns an iterator to the first halo element.
Mangages the memory for all halo regions provided by the given HaloBlock.
N-Dimensional region coordinates and associated indices for all possible Halo/Boundary regions of a H...
const HaloMemory_t & halo_memory() const
Returns the halo memory management object HaloMemory.
#define DART_GPTR_NULL
A NULL global pointer.
A Team instance specifies a subset of all available units.
void update_async_at(region_index_t index)
Initiates an asychronous halo region update for all halo elements within the given region...
void update()
Initiates a blocking halo region update for all halo elements.
HaloMemory(const HaloBlockT &haloblock)
Constructor.
constexpr dim_t ndim(const DimensionalType &d)
Specifies the arrangement of team units in a specified number of dimensions.
DART Global pointer type.
BlockEnv_t block_env()
Returns the halo environment information object BlockEnvironment.
struct dash::unit_id< dash::local_unit, dart_team_unit_t > team_unit_t
Unit ID to use for team-local IDs.
struct dart_handle_struct * dart_handle_t
Handle returned by dart_get_handle and the like used to wait for a specific operation to complete usi...
#define DART_HANDLE_NULL
Handle returned by dart_get_handle and the like used to wait for a specific operation to complete usi...
const HaloBuffer_t & buffer() const
Container storing all halo elements.
OutputIt copy(InputIt in_first, InputIt in_last, OutputIt out_first)
Copies the elements in the range, defined by [in_first, in_last), to another range beginning at out_f...
HaloMemory_t & halo_memory()
Returns the halo memory management object HaloMemory.
void update_async()
Initiates an asychronous halo region update for all halo elements.
dart_ret_t dart_wait_local(dart_handle_t *handle)
Wait for the local completion of an operation.
iterator end()
Returns an iterator to the end of the halo elements.
const BlockEnv_t & block_env() const
Returns the halo environment information object BlockEnvironment.
static constexpr MemArrange memory_order() noexcept
Memory order followed by the pattern.