BRE12
parallel_reduce.h
1 /*
2  Copyright 2005-2016 Intel Corporation. All Rights Reserved.
3 
4  This file is part of Threading Building Blocks. Threading Building Blocks is free software;
5  you can redistribute it and/or modify it under the terms of the GNU General Public License
6  version 2 as published by the Free Software Foundation. Threading Building Blocks is
7  distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the
8  implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
9  See the GNU General Public License for more details. You should have received a copy of
10  the GNU General Public License along with Threading Building Blocks; if not, write to the
11  Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
12 
13  As a special exception, you may use this file as part of a free software library without
14  restriction. Specifically, if other files instantiate templates or use macros or inline
15  functions from this file, or you compile this file and link it with other files to produce
16  an executable, this file does not by itself cause the resulting executable to be covered
17  by the GNU General Public License. This exception does not however invalidate any other
18  reasons why the executable file might be covered by the GNU General Public License.
19 */
20 
21 #ifndef __TBB_parallel_reduce_H
22 #define __TBB_parallel_reduce_H
23 
24 #include <new>
25 #include "task.h"
26 #include "aligned_space.h"
27 #include "partitioner.h"
28 #include "tbb_profiling.h"
29 
30 namespace tbb {
31 
32 namespace interface9 {
34 namespace internal {
35 
36  using namespace tbb::internal;
37 
39  enum {
40  root_task, left_child, right_child
41  };
42 
44  typedef char reduction_context;
45 
47 
48  template<typename Body>
49  class finish_reduce: public flag_task {
51  bool has_right_zombie;
52  const reduction_context my_context;
53  Body* my_body;
54  aligned_space<Body> zombie_space;
55  finish_reduce( reduction_context context_ ) :
56  has_right_zombie(false), // TODO: substitute by flag_task::child_stolen?
57  my_context(context_),
58  my_body(NULL)
59  {
60  }
61  ~finish_reduce() {
62  if( has_right_zombie )
63  zombie_space.begin()->~Body();
64  }
65  task* execute() {
66  if( has_right_zombie ) {
67  // Right child was stolen.
68  Body* s = zombie_space.begin();
69  my_body->join( *s );
70  // Body::join() won't be called if canceled. Defer destruction to destructor
71  }
72  if( my_context==left_child )
73  itt_store_word_with_release( static_cast<finish_reduce*>(parent())->my_body, my_body );
74  return NULL;
75  }
76  template<typename Range,typename Body_, typename Partitioner>
77  friend class start_reduce;
78  };
79 
81  void allocate_sibling(task* start_reduce_task, task *tasks[], size_t start_bytes, size_t finish_bytes);
82 
84 
85  template<typename Range, typename Body, typename Partitioner>
86  class start_reduce: public task {
87  typedef finish_reduce<Body> finish_type;
88  Body* my_body;
89  Range my_range;
90  typename Partitioner::task_partition_type my_partition;
91  reduction_context my_context;
92  /*override*/ task* execute();
94  /*override*/ void note_affinity( affinity_id id ) {
95  my_partition.note_affinity( id );
96  }
97  template<typename Body_>
98  friend class finish_reduce;
99 
100 public:
102  start_reduce( const Range& range, Body* body, Partitioner& partitioner ) :
103  my_body(body),
104  my_range(range),
105  my_partition(partitioner),
106  my_context(root_task)
107  {
108  }
110 
111  start_reduce( start_reduce& parent_, typename Partitioner::split_type& split_obj ) :
112  my_body(parent_.my_body),
113  my_range(parent_.my_range, split_obj),
114  my_partition(parent_.my_partition, split_obj),
115  my_context(right_child)
116  {
117  my_partition.set_affinity(*this);
118  parent_.my_context = left_child;
119  }
121 
122  start_reduce( start_reduce& parent_, const Range& r, depth_t d ) :
123  my_body(parent_.my_body),
124  my_range(r),
125  my_partition(parent_.my_partition, split()),
126  my_context(right_child)
127  {
128  my_partition.set_affinity(*this);
129  my_partition.align_depth( d ); // TODO: move into constructor of partitioner
130  parent_.my_context = left_child;
131  }
132  static void run( const Range& range, Body& body, Partitioner& partitioner ) {
133  if( !range.empty() ) {
134 #if !__TBB_TASK_GROUP_CONTEXT || TBB_JOIN_OUTER_TASK_GROUP
135  task::spawn_root_and_wait( *new(task::allocate_root()) start_reduce(range,&body,partitioner) );
136 #else
137  // Bound context prevents exceptions from body to affect nesting or sibling algorithms,
138  // and allows users to handle exceptions safely by wrapping parallel_for in the try-block.
139  task_group_context context;
140  task::spawn_root_and_wait( *new(task::allocate_root(context)) start_reduce(range,&body,partitioner) );
141 #endif /* __TBB_TASK_GROUP_CONTEXT && !TBB_JOIN_OUTER_TASK_GROUP */
142  }
143  }
144 #if __TBB_TASK_GROUP_CONTEXT
145  static void run( const Range& range, Body& body, Partitioner& partitioner, task_group_context& context ) {
146  if( !range.empty() )
147  task::spawn_root_and_wait( *new(task::allocate_root(context)) start_reduce(range,&body,partitioner) );
148  }
149 #endif /* __TBB_TASK_GROUP_CONTEXT */
150  void run_body( Range &r ) { (*my_body)( r ); }
152 
154  // TODO: remove code duplication from 'offer_work' methods
155  void offer_work(typename Partitioner::split_type& split_obj) {
156  task *tasks[2];
157  allocate_sibling(static_cast<task*>(this), tasks, sizeof(start_reduce), sizeof(finish_type));
158  new((void*)tasks[0]) finish_type(my_context);
159  new((void*)tasks[1]) start_reduce(*this, split_obj);
160  spawn(*tasks[1]);
161  }
163  void offer_work(const Range& r, depth_t d = 0) {
164  task *tasks[2];
165  allocate_sibling(static_cast<task*>(this), tasks, sizeof(start_reduce), sizeof(finish_type));
166  new((void*)tasks[0]) finish_type(my_context);
167  new((void*)tasks[1]) start_reduce(*this, r, d);
168  spawn(*tasks[1]);
169  }
170  };
171 
173  // TODO: 'inline' here is to avoid multiple definition error but for sake of code size this should not be inlined
174  inline void allocate_sibling(task* start_reduce_task, task *tasks[], size_t start_bytes, size_t finish_bytes) {
175  tasks[0] = &start_reduce_task->allocate_continuation().allocate(finish_bytes);
176  start_reduce_task->set_parent(tasks[0]);
177  tasks[0]->set_ref_count(2);
178  tasks[1] = &tasks[0]->allocate_child().allocate(start_bytes);
179  }
180 
181  template<typename Range, typename Body, typename Partitioner>
182  task* start_reduce<Range,Body,Partitioner>::execute() {
183  my_partition.check_being_stolen( *this );
184  if( my_context==right_child ) {
185  finish_type* parent_ptr = static_cast<finish_type*>(parent());
186  if( !itt_load_word_with_acquire(parent_ptr->my_body) ) { // TODO: replace by is_stolen_task() or by parent_ptr->ref_count() == 2???
187  my_body = new( parent_ptr->zombie_space.begin() ) Body(*my_body,split());
188  parent_ptr->has_right_zombie = true;
189  }
190  } else __TBB_ASSERT(my_context==root_task,NULL);// because left leaf spawns right leafs without recycling
191  my_partition.execute(*this, my_range);
192  if( my_context==left_child ) {
193  finish_type* parent_ptr = static_cast<finish_type*>(parent());
194  __TBB_ASSERT(my_body!=parent_ptr->zombie_space.begin(),NULL);
195  itt_store_word_with_release(parent_ptr->my_body, my_body );
196  }
197  return NULL;
198  }
199 
201 
202  template<typename Body>
203  class finish_deterministic_reduce: public task {
204  Body &my_left_body;
205  Body my_right_body;
206 
207  finish_deterministic_reduce( Body &body ) :
208  my_left_body( body ),
209  my_right_body( body, split() )
210  {
211  }
212  task* execute() {
213  my_left_body.join( my_right_body );
214  return NULL;
215  }
216  template<typename Range,typename Body_>
217  friend class start_deterministic_reduce;
218  };
219 
221 
222  template<typename Range, typename Body>
223  class start_deterministic_reduce: public task {
224  typedef finish_deterministic_reduce<Body> finish_type;
225  Body &my_body;
226  Range my_range;
227  /*override*/ task* execute();
228 
230  start_deterministic_reduce( const Range& range, Body& body ) :
231  my_body( body ),
232  my_range( range )
233  {
234  }
236 
237  start_deterministic_reduce( start_deterministic_reduce& parent_, finish_type& c ) :
238  my_body( c.my_right_body ),
239  my_range( parent_.my_range, split() )
240  {
241  }
242 
243 public:
244  static void run( const Range& range, Body& body ) {
245  if( !range.empty() ) {
246 #if !__TBB_TASK_GROUP_CONTEXT || TBB_JOIN_OUTER_TASK_GROUP
247  task::spawn_root_and_wait( *new(task::allocate_root()) start_deterministic_reduce(range,&body) );
248 #else
249  // Bound context prevents exceptions from body to affect nesting or sibling algorithms,
250  // and allows users to handle exceptions safely by wrapping parallel_for in the try-block.
251  task_group_context context;
252  task::spawn_root_and_wait( *new(task::allocate_root(context)) start_deterministic_reduce(range,body) );
253 #endif /* __TBB_TASK_GROUP_CONTEXT && !TBB_JOIN_OUTER_TASK_GROUP */
254  }
255  }
256 #if __TBB_TASK_GROUP_CONTEXT
257  static void run( const Range& range, Body& body, task_group_context& context ) {
258  if( !range.empty() )
259  task::spawn_root_and_wait( *new(task::allocate_root(context)) start_deterministic_reduce(range,body) );
260  }
261 #endif /* __TBB_TASK_GROUP_CONTEXT */
262  };
263 
264  template<typename Range, typename Body>
265  task* start_deterministic_reduce<Range,Body>::execute() {
266  if( !my_range.is_divisible() ) {
267  my_body( my_range );
268  return NULL;
269  } else {
270  finish_type& c = *new( allocate_continuation() ) finish_type( my_body );
271  recycle_as_child_of(c);
272  c.set_ref_count(2);
273  start_deterministic_reduce& b = *new( c.allocate_child() ) start_deterministic_reduce( *this, c );
274  task::spawn(b);
275  return this;
276  }
277  }
278 } // namespace internal
280 } //namespace interfaceX
281 
283 namespace internal {
284  using interface9::internal::start_reduce;
285  using interface9::internal::start_deterministic_reduce;
287 
291  template<typename Range, typename Value, typename RealBody, typename Reduction>
292  class lambda_reduce_body {
293 
294 //FIXME: decide if my_real_body, my_reduction, and identity_element should be copied or referenced
295 // (might require some performance measurements)
296 
297  const Value& identity_element;
298  const RealBody& my_real_body;
299  const Reduction& my_reduction;
300  Value my_value;
301  lambda_reduce_body& operator= ( const lambda_reduce_body& other );
302  public:
303  lambda_reduce_body( const Value& identity, const RealBody& body, const Reduction& reduction )
304  : identity_element(identity)
305  , my_real_body(body)
306  , my_reduction(reduction)
307  , my_value(identity)
308  { }
309  lambda_reduce_body( const lambda_reduce_body& other )
310  : identity_element(other.identity_element)
311  , my_real_body(other.my_real_body)
312  , my_reduction(other.my_reduction)
313  , my_value(other.my_value)
314  { }
315  lambda_reduce_body( lambda_reduce_body& other, tbb::split )
316  : identity_element(other.identity_element)
317  , my_real_body(other.my_real_body)
318  , my_reduction(other.my_reduction)
319  , my_value(other.identity_element)
320  { }
321  void operator()(Range& range) {
322  my_value = my_real_body(range, const_cast<const Value&>(my_value));
323  }
324  void join( lambda_reduce_body& rhs ) {
325  my_value = my_reduction(const_cast<const Value&>(my_value), const_cast<const Value&>(rhs.my_value));
326  }
327  Value result() const {
328  return my_value;
329  }
330  };
331 
332 } // namespace internal
334 
335 // Requirements on Range concept are documented in blocked_range.h
336 
355 
357 
358 template<typename Range, typename Body>
359 void parallel_reduce( const Range& range, Body& body ) {
360  internal::start_reduce<Range,Body, const __TBB_DEFAULT_PARTITIONER>::run( range, body, __TBB_DEFAULT_PARTITIONER() );
361 }
362 
364 
365 template<typename Range, typename Body>
366 void parallel_reduce( const Range& range, Body& body, const simple_partitioner& partitioner ) {
367  internal::start_reduce<Range,Body,const simple_partitioner>::run( range, body, partitioner );
368 }
369 
371 
372 template<typename Range, typename Body>
373 void parallel_reduce( const Range& range, Body& body, const auto_partitioner& partitioner ) {
374  internal::start_reduce<Range,Body,const auto_partitioner>::run( range, body, partitioner );
375 }
376 
377 #if TBB_PREVIEW_STATIC_PARTITIONER
378 
380 template<typename Range, typename Body>
381 void parallel_reduce( const Range& range, Body& body, const static_partitioner& partitioner ) {
382  internal::start_reduce<Range,Body,const static_partitioner>::run( range, body, partitioner );
383 }
384 #endif
385 
387 
388 template<typename Range, typename Body>
389 void parallel_reduce( const Range& range, Body& body, affinity_partitioner& partitioner ) {
390  internal::start_reduce<Range,Body,affinity_partitioner>::run( range, body, partitioner );
391 }
392 
393 #if __TBB_TASK_GROUP_CONTEXT
394 
396 template<typename Range, typename Body>
397 void parallel_reduce( const Range& range, Body& body, const simple_partitioner& partitioner, task_group_context& context ) {
398  internal::start_reduce<Range,Body,const simple_partitioner>::run( range, body, partitioner, context );
399 }
400 
402 
403 template<typename Range, typename Body>
404 void parallel_reduce( const Range& range, Body& body, const auto_partitioner& partitioner, task_group_context& context ) {
405  internal::start_reduce<Range,Body,const auto_partitioner>::run( range, body, partitioner, context );
406 }
407 
408 #if TBB_PREVIEW_STATIC_PARTITIONER
409 
411 template<typename Range, typename Body>
412 void parallel_reduce( const Range& range, Body& body, const static_partitioner& partitioner, task_group_context& context ) {
413  internal::start_reduce<Range,Body,const static_partitioner>::run( range, body, partitioner, context );
414 }
415 #endif
416 
418 
419 template<typename Range, typename Body>
420 void parallel_reduce( const Range& range, Body& body, affinity_partitioner& partitioner, task_group_context& context ) {
421  internal::start_reduce<Range,Body,affinity_partitioner>::run( range, body, partitioner, context );
422 }
423 #endif /* __TBB_TASK_GROUP_CONTEXT */
424 
428 
430 template<typename Range, typename Value, typename RealBody, typename Reduction>
431 Value parallel_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction ) {
432  internal::lambda_reduce_body<Range,Value,RealBody,Reduction> body(identity, real_body, reduction);
433  internal::start_reduce<Range,internal::lambda_reduce_body<Range,Value,RealBody,Reduction>,const __TBB_DEFAULT_PARTITIONER>
434  ::run(range, body, __TBB_DEFAULT_PARTITIONER() );
435  return body.result();
436 }
437 
439 
440 template<typename Range, typename Value, typename RealBody, typename Reduction>
441 Value parallel_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction,
442  const simple_partitioner& partitioner ) {
443  internal::lambda_reduce_body<Range,Value,RealBody,Reduction> body(identity, real_body, reduction);
444  internal::start_reduce<Range,internal::lambda_reduce_body<Range,Value,RealBody,Reduction>,const simple_partitioner>
445  ::run(range, body, partitioner );
446  return body.result();
447 }
448 
450 
451 template<typename Range, typename Value, typename RealBody, typename Reduction>
452 Value parallel_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction,
453  const auto_partitioner& partitioner ) {
454  internal::lambda_reduce_body<Range,Value,RealBody,Reduction> body(identity, real_body, reduction);
455  internal::start_reduce<Range,internal::lambda_reduce_body<Range,Value,RealBody,Reduction>,const auto_partitioner>
456  ::run( range, body, partitioner );
457  return body.result();
458 }
459 
460 #if TBB_PREVIEW_STATIC_PARTITIONER
461 
463 template<typename Range, typename Value, typename RealBody, typename Reduction>
464 Value parallel_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction,
465  const static_partitioner& partitioner ) {
466  internal::lambda_reduce_body<Range,Value,RealBody,Reduction> body(identity, real_body, reduction);
467  internal::start_reduce<Range,internal::lambda_reduce_body<Range,Value,RealBody,Reduction>,const static_partitioner>
468  ::run( range, body, partitioner );
469  return body.result();
470 }
471 #endif
472 
474 
475 template<typename Range, typename Value, typename RealBody, typename Reduction>
476 Value parallel_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction,
477  affinity_partitioner& partitioner ) {
478  internal::lambda_reduce_body<Range,Value,RealBody,Reduction> body(identity, real_body, reduction);
479  internal::start_reduce<Range,internal::lambda_reduce_body<Range,Value,RealBody,Reduction>,affinity_partitioner>
480  ::run( range, body, partitioner );
481  return body.result();
482 }
483 
484 #if __TBB_TASK_GROUP_CONTEXT
485 
487 template<typename Range, typename Value, typename RealBody, typename Reduction>
488 Value parallel_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction,
489  const simple_partitioner& partitioner, task_group_context& context ) {
490  internal::lambda_reduce_body<Range,Value,RealBody,Reduction> body(identity, real_body, reduction);
491  internal::start_reduce<Range,internal::lambda_reduce_body<Range,Value,RealBody,Reduction>,const simple_partitioner>
492  ::run( range, body, partitioner, context );
493  return body.result();
494 }
495 
497 
498 template<typename Range, typename Value, typename RealBody, typename Reduction>
499 Value parallel_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction,
500  const auto_partitioner& partitioner, task_group_context& context ) {
501  internal::lambda_reduce_body<Range,Value,RealBody,Reduction> body(identity, real_body, reduction);
502  internal::start_reduce<Range,internal::lambda_reduce_body<Range,Value,RealBody,Reduction>,const auto_partitioner>
503  ::run( range, body, partitioner, context );
504  return body.result();
505 }
506 
507 #if TBB_PREVIEW_STATIC_PARTITIONER
508 
510 template<typename Range, typename Value, typename RealBody, typename Reduction>
511 Value parallel_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction,
512  const static_partitioner& partitioner, task_group_context& context ) {
513  internal::lambda_reduce_body<Range,Value,RealBody,Reduction> body(identity, real_body, reduction);
514  internal::start_reduce<Range,internal::lambda_reduce_body<Range,Value,RealBody,Reduction>,const static_partitioner>
515  ::run( range, body, partitioner, context );
516  return body.result();
517 }
518 #endif
519 
521 
522 template<typename Range, typename Value, typename RealBody, typename Reduction>
523 Value parallel_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction,
524  affinity_partitioner& partitioner, task_group_context& context ) {
525  internal::lambda_reduce_body<Range,Value,RealBody,Reduction> body(identity, real_body, reduction);
526  internal::start_reduce<Range,internal::lambda_reduce_body<Range,Value,RealBody,Reduction>,affinity_partitioner>
527  ::run( range, body, partitioner, context );
528  return body.result();
529 }
530 #endif /* __TBB_TASK_GROUP_CONTEXT */
531 
533 
534 template<typename Range, typename Body>
535 void parallel_deterministic_reduce( const Range& range, Body& body ) {
536  internal::start_deterministic_reduce<Range,Body>::run( range, body );
537 }
538 
539 #if __TBB_TASK_GROUP_CONTEXT
540 
542 template<typename Range, typename Body>
543 void parallel_deterministic_reduce( const Range& range, Body& body, task_group_context& context ) {
544  internal::start_deterministic_reduce<Range,Body>::run( range, body, context );
545 }
546 #endif /* __TBB_TASK_GROUP_CONTEXT */
547 
551 
553 template<typename Range, typename Value, typename RealBody, typename Reduction>
554 Value parallel_deterministic_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction ) {
555  internal::lambda_reduce_body<Range,Value,RealBody,Reduction> body(identity, real_body, reduction);
556  internal::start_deterministic_reduce<Range,internal::lambda_reduce_body<Range,Value,RealBody,Reduction> >
557  ::run(range, body);
558  return body.result();
559 }
560 
561 #if __TBB_TASK_GROUP_CONTEXT
562 
564 template<typename Range, typename Value, typename RealBody, typename Reduction>
565 Value parallel_deterministic_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction,
566  task_group_context& context ) {
567  internal::lambda_reduce_body<Range,Value,RealBody,Reduction> body(identity, real_body, reduction);
568  internal::start_deterministic_reduce<Range,internal::lambda_reduce_body<Range,Value,RealBody,Reduction> >
569  ::run( range, body, context );
570  return body.result();
571 }
572 #endif /* __TBB_TASK_GROUP_CONTEXT */
573 
574 
575 } // namespace tbb
576 
577 #endif /* __TBB_parallel_reduce_H */
Definition: atomic.h:535
void parallel_deterministic_reduce(const Range &range, Body &body)
Parallel iteration with deterministic reduction and default partitioner.
Definition: parallel_reduce.h:535
void parallel_reduce(const Range &range, Body &body)
Parallel iteration with reduction and default partitioner.
Definition: parallel_reduce.h:359
Definition: _flow_graph_async_msg_impl.h:32
The namespace tbb contains all components of the library.
Definition: parallel_for.h:44