21 #ifndef __TBB_parallel_reduce_H 22 #define __TBB_parallel_reduce_H 26 #include "aligned_space.h" 27 #include "partitioner.h" 28 #include "tbb_profiling.h" 32 namespace interface9 {
40 root_task, left_child, right_child
44 typedef char reduction_context;
48 template<
typename Body>
49 class finish_reduce:
public flag_task {
51 bool has_right_zombie;
52 const reduction_context my_context;
54 aligned_space<Body> zombie_space;
55 finish_reduce( reduction_context context_ ) :
56 has_right_zombie(false),
62 if( has_right_zombie )
63 zombie_space.begin()->~Body();
66 if( has_right_zombie ) {
68 Body* s = zombie_space.begin();
72 if( my_context==left_child )
73 itt_store_word_with_release( static_cast<finish_reduce*>(parent())->my_body, my_body );
76 template<
typename Range,
typename Body_,
typename Partitioner>
77 friend class start_reduce;
81 void allocate_sibling(task* start_reduce_task, task *tasks[],
size_t start_bytes,
size_t finish_bytes);
85 template<
typename Range,
typename Body,
typename Partitioner>
86 class start_reduce:
public task {
87 typedef finish_reduce<Body> finish_type;
90 typename Partitioner::task_partition_type my_partition;
91 reduction_context my_context;
94 void note_affinity( affinity_id
id ) {
95 my_partition.note_affinity(
id );
97 template<
typename Body_>
98 friend class finish_reduce;
102 start_reduce(
const Range& range, Body* body, Partitioner& partitioner ) :
105 my_partition(partitioner),
106 my_context(root_task)
111 start_reduce( start_reduce& parent_,
typename Partitioner::split_type& split_obj ) :
112 my_body(parent_.my_body),
113 my_range(parent_.my_range, split_obj),
114 my_partition(parent_.my_partition, split_obj),
115 my_context(right_child)
117 my_partition.set_affinity(*
this);
118 parent_.my_context = left_child;
122 start_reduce( start_reduce& parent_,
const Range& r, depth_t d ) :
123 my_body(parent_.my_body),
125 my_partition(parent_.my_partition, split()),
126 my_context(right_child)
128 my_partition.set_affinity(*
this);
129 my_partition.align_depth( d );
130 parent_.my_context = left_child;
132 static void run(
const Range& range, Body& body, Partitioner& partitioner ) {
133 if( !range.empty() ) {
134 #if !__TBB_TASK_GROUP_CONTEXT || TBB_JOIN_OUTER_TASK_GROUP 135 task::spawn_root_and_wait( *
new(task::allocate_root()) start_reduce(range,&body,partitioner) );
139 task_group_context context;
140 task::spawn_root_and_wait( *
new(task::allocate_root(context)) start_reduce(range,&body,partitioner) );
144 #if __TBB_TASK_GROUP_CONTEXT 145 static void run(
const Range& range, Body& body, Partitioner& partitioner, task_group_context& context ) {
147 task::spawn_root_and_wait( *
new(task::allocate_root(context)) start_reduce(range,&body,partitioner) );
150 void run_body( Range &r ) { (*my_body)( r ); }
155 void offer_work(
typename Partitioner::split_type& split_obj) {
157 allocate_sibling(static_cast<task*>(
this), tasks,
sizeof(start_reduce),
sizeof(finish_type));
158 new((
void*)tasks[0]) finish_type(my_context);
159 new((
void*)tasks[1]) start_reduce(*
this, split_obj);
163 void offer_work(
const Range& r, depth_t d = 0) {
165 allocate_sibling(static_cast<task*>(
this), tasks,
sizeof(start_reduce),
sizeof(finish_type));
166 new((
void*)tasks[0]) finish_type(my_context);
167 new((
void*)tasks[1]) start_reduce(*
this, r, d);
174 inline void allocate_sibling(task* start_reduce_task, task *tasks[],
size_t start_bytes,
size_t finish_bytes) {
175 tasks[0] = &start_reduce_task->allocate_continuation().allocate(finish_bytes);
176 start_reduce_task->set_parent(tasks[0]);
177 tasks[0]->set_ref_count(2);
178 tasks[1] = &tasks[0]->allocate_child().allocate(start_bytes);
181 template<
typename Range,
typename Body,
typename Partitioner>
182 task* start_reduce<Range,Body,Partitioner>::execute() {
183 my_partition.check_being_stolen( *
this );
184 if( my_context==right_child ) {
185 finish_type* parent_ptr =
static_cast<finish_type*
>(parent());
186 if( !itt_load_word_with_acquire(parent_ptr->my_body) ) {
187 my_body =
new( parent_ptr->zombie_space.begin() ) Body(*my_body,split());
188 parent_ptr->has_right_zombie =
true;
190 }
else __TBB_ASSERT(my_context==root_task,NULL);
191 my_partition.execute(*
this, my_range);
192 if( my_context==left_child ) {
193 finish_type* parent_ptr =
static_cast<finish_type*
>(parent());
194 __TBB_ASSERT(my_body!=parent_ptr->zombie_space.begin(),NULL);
195 itt_store_word_with_release(parent_ptr->my_body, my_body );
202 template<
typename Body>
203 class finish_deterministic_reduce:
public task {
207 finish_deterministic_reduce( Body &body ) :
208 my_left_body( body ),
209 my_right_body( body, split() )
213 my_left_body.join( my_right_body );
216 template<
typename Range,
typename Body_>
217 friend class start_deterministic_reduce;
222 template<
typename Range,
typename Body>
223 class start_deterministic_reduce:
public task {
224 typedef finish_deterministic_reduce<Body> finish_type;
230 start_deterministic_reduce(
const Range& range, Body& body ) :
237 start_deterministic_reduce( start_deterministic_reduce& parent_, finish_type& c ) :
238 my_body( c.my_right_body ),
239 my_range( parent_.my_range, split() )
244 static void run(
const Range& range, Body& body ) {
245 if( !range.empty() ) {
246 #if !__TBB_TASK_GROUP_CONTEXT || TBB_JOIN_OUTER_TASK_GROUP 247 task::spawn_root_and_wait( *
new(task::allocate_root()) start_deterministic_reduce(range,&body) );
251 task_group_context context;
252 task::spawn_root_and_wait( *
new(task::allocate_root(context)) start_deterministic_reduce(range,body) );
256 #if __TBB_TASK_GROUP_CONTEXT 257 static void run(
const Range& range, Body& body, task_group_context& context ) {
259 task::spawn_root_and_wait( *
new(task::allocate_root(context)) start_deterministic_reduce(range,body) );
264 template<
typename Range,
typename Body>
265 task* start_deterministic_reduce<Range,Body>::execute() {
266 if( !my_range.is_divisible() ) {
270 finish_type& c = *
new( allocate_continuation() ) finish_type( my_body );
271 recycle_as_child_of(c);
273 start_deterministic_reduce& b = *
new( c.allocate_child() ) start_deterministic_reduce( *
this, c );
284 using interface9::internal::start_reduce;
285 using interface9::internal::start_deterministic_reduce;
291 template<
typename Range,
typename Value,
typename RealBody,
typename Reduction>
292 class lambda_reduce_body {
297 const Value& identity_element;
298 const RealBody& my_real_body;
299 const Reduction& my_reduction;
301 lambda_reduce_body& operator= (
const lambda_reduce_body& other );
303 lambda_reduce_body(
const Value& identity,
const RealBody& body,
const Reduction& reduction )
304 : identity_element(identity)
306 , my_reduction(reduction)
309 lambda_reduce_body(
const lambda_reduce_body& other )
310 : identity_element(other.identity_element)
311 , my_real_body(other.my_real_body)
312 , my_reduction(other.my_reduction)
313 , my_value(other.my_value)
315 lambda_reduce_body( lambda_reduce_body& other, tbb::split )
316 : identity_element(other.identity_element)
317 , my_real_body(other.my_real_body)
318 , my_reduction(other.my_reduction)
319 , my_value(other.identity_element)
321 void operator()(Range& range) {
322 my_value = my_real_body(range, const_cast<const Value&>(my_value));
324 void join( lambda_reduce_body& rhs ) {
325 my_value = my_reduction(const_cast<const Value&>(my_value), const_cast<const Value&>(rhs.my_value));
327 Value result()
const {
358 template<
typename Range,
typename Body>
360 internal::start_reduce<Range,Body, const __TBB_DEFAULT_PARTITIONER>::run( range, body, __TBB_DEFAULT_PARTITIONER() );
365 template<
typename Range,
typename Body>
366 void parallel_reduce(
const Range& range, Body& body,
const simple_partitioner& partitioner ) {
367 internal::start_reduce<Range,Body,const simple_partitioner>::run( range, body, partitioner );
372 template<
typename Range,
typename Body>
373 void parallel_reduce(
const Range& range, Body& body,
const auto_partitioner& partitioner ) {
374 internal::start_reduce<Range,Body,const auto_partitioner>::run( range, body, partitioner );
377 #if TBB_PREVIEW_STATIC_PARTITIONER 380 template<
typename Range,
typename Body>
381 void parallel_reduce(
const Range& range, Body& body,
const static_partitioner& partitioner ) {
382 internal::start_reduce<Range,Body,const static_partitioner>::run( range, body, partitioner );
388 template<
typename Range,
typename Body>
389 void parallel_reduce(
const Range& range, Body& body, affinity_partitioner& partitioner ) {
390 internal::start_reduce<Range,Body,affinity_partitioner>::run( range, body, partitioner );
393 #if __TBB_TASK_GROUP_CONTEXT 396 template<
typename Range,
typename Body>
397 void parallel_reduce(
const Range& range, Body& body,
const simple_partitioner& partitioner, task_group_context& context ) {
398 internal::start_reduce<Range,Body,const simple_partitioner>::run( range, body, partitioner, context );
403 template<
typename Range,
typename Body>
404 void parallel_reduce(
const Range& range, Body& body,
const auto_partitioner& partitioner, task_group_context& context ) {
405 internal::start_reduce<Range,Body,const auto_partitioner>::run( range, body, partitioner, context );
408 #if TBB_PREVIEW_STATIC_PARTITIONER 411 template<
typename Range,
typename Body>
412 void parallel_reduce(
const Range& range, Body& body,
const static_partitioner& partitioner, task_group_context& context ) {
413 internal::start_reduce<Range,Body,const static_partitioner>::run( range, body, partitioner, context );
419 template<
typename Range,
typename Body>
420 void parallel_reduce(
const Range& range, Body& body, affinity_partitioner& partitioner, task_group_context& context ) {
421 internal::start_reduce<Range,Body,affinity_partitioner>::run( range, body, partitioner, context );
430 template<
typename Range,
typename Value,
typename RealBody,
typename Reduction>
431 Value
parallel_reduce(
const Range& range,
const Value& identity,
const RealBody& real_body,
const Reduction& reduction ) {
432 internal::lambda_reduce_body<Range,Value,RealBody,Reduction> body(identity, real_body, reduction);
433 internal::start_reduce<Range,internal::lambda_reduce_body<Range,Value,RealBody,Reduction>,
const __TBB_DEFAULT_PARTITIONER>
434 ::run(range, body, __TBB_DEFAULT_PARTITIONER() );
435 return body.result();
440 template<
typename Range,
typename Value,
typename RealBody,
typename Reduction>
441 Value
parallel_reduce(
const Range& range,
const Value& identity,
const RealBody& real_body,
const Reduction& reduction,
442 const simple_partitioner& partitioner ) {
443 internal::lambda_reduce_body<Range,Value,RealBody,Reduction> body(identity, real_body, reduction);
444 internal::start_reduce<Range,internal::lambda_reduce_body<Range,Value,RealBody,Reduction>,
const simple_partitioner>
445 ::run(range, body, partitioner );
446 return body.result();
451 template<
typename Range,
typename Value,
typename RealBody,
typename Reduction>
452 Value
parallel_reduce(
const Range& range,
const Value& identity,
const RealBody& real_body,
const Reduction& reduction,
453 const auto_partitioner& partitioner ) {
454 internal::lambda_reduce_body<Range,Value,RealBody,Reduction> body(identity, real_body, reduction);
455 internal::start_reduce<Range,internal::lambda_reduce_body<Range,Value,RealBody,Reduction>,
const auto_partitioner>
456 ::run( range, body, partitioner );
457 return body.result();
460 #if TBB_PREVIEW_STATIC_PARTITIONER 463 template<
typename Range,
typename Value,
typename RealBody,
typename Reduction>
464 Value
parallel_reduce(
const Range& range,
const Value& identity,
const RealBody& real_body,
const Reduction& reduction,
465 const static_partitioner& partitioner ) {
466 internal::lambda_reduce_body<Range,Value,RealBody,Reduction> body(identity, real_body, reduction);
467 internal::start_reduce<Range,internal::lambda_reduce_body<Range,Value,RealBody,Reduction>,
const static_partitioner>
468 ::run( range, body, partitioner );
469 return body.result();
475 template<
typename Range,
typename Value,
typename RealBody,
typename Reduction>
476 Value
parallel_reduce(
const Range& range,
const Value& identity,
const RealBody& real_body,
const Reduction& reduction,
477 affinity_partitioner& partitioner ) {
478 internal::lambda_reduce_body<Range,Value,RealBody,Reduction> body(identity, real_body, reduction);
479 internal::start_reduce<Range,internal::lambda_reduce_body<Range,Value,RealBody,Reduction>,affinity_partitioner>
480 ::run( range, body, partitioner );
481 return body.result();
484 #if __TBB_TASK_GROUP_CONTEXT 487 template<
typename Range,
typename Value,
typename RealBody,
typename Reduction>
488 Value
parallel_reduce(
const Range& range,
const Value& identity,
const RealBody& real_body,
const Reduction& reduction,
489 const simple_partitioner& partitioner, task_group_context& context ) {
490 internal::lambda_reduce_body<Range,Value,RealBody,Reduction> body(identity, real_body, reduction);
491 internal::start_reduce<Range,internal::lambda_reduce_body<Range,Value,RealBody,Reduction>,
const simple_partitioner>
492 ::run( range, body, partitioner, context );
493 return body.result();
498 template<
typename Range,
typename Value,
typename RealBody,
typename Reduction>
499 Value
parallel_reduce(
const Range& range,
const Value& identity,
const RealBody& real_body,
const Reduction& reduction,
500 const auto_partitioner& partitioner, task_group_context& context ) {
501 internal::lambda_reduce_body<Range,Value,RealBody,Reduction> body(identity, real_body, reduction);
502 internal::start_reduce<Range,internal::lambda_reduce_body<Range,Value,RealBody,Reduction>,
const auto_partitioner>
503 ::run( range, body, partitioner, context );
504 return body.result();
507 #if TBB_PREVIEW_STATIC_PARTITIONER 510 template<
typename Range,
typename Value,
typename RealBody,
typename Reduction>
511 Value
parallel_reduce(
const Range& range,
const Value& identity,
const RealBody& real_body,
const Reduction& reduction,
512 const static_partitioner& partitioner, task_group_context& context ) {
513 internal::lambda_reduce_body<Range,Value,RealBody,Reduction> body(identity, real_body, reduction);
514 internal::start_reduce<Range,internal::lambda_reduce_body<Range,Value,RealBody,Reduction>,
const static_partitioner>
515 ::run( range, body, partitioner, context );
516 return body.result();
522 template<
typename Range,
typename Value,
typename RealBody,
typename Reduction>
523 Value
parallel_reduce(
const Range& range,
const Value& identity,
const RealBody& real_body,
const Reduction& reduction,
524 affinity_partitioner& partitioner, task_group_context& context ) {
525 internal::lambda_reduce_body<Range,Value,RealBody,Reduction> body(identity, real_body, reduction);
526 internal::start_reduce<Range,internal::lambda_reduce_body<Range,Value,RealBody,Reduction>,affinity_partitioner>
527 ::run( range, body, partitioner, context );
528 return body.result();
534 template<
typename Range,
typename Body>
536 internal::start_deterministic_reduce<Range,Body>::run( range, body );
539 #if __TBB_TASK_GROUP_CONTEXT 542 template<
typename Range,
typename Body>
544 internal::start_deterministic_reduce<Range,Body>::run( range, body, context );
553 template<
typename Range,
typename Value,
typename RealBody,
typename Reduction>
555 internal::lambda_reduce_body<Range,Value,RealBody,Reduction> body(identity, real_body, reduction);
556 internal::start_deterministic_reduce<Range,internal::lambda_reduce_body<Range,Value,RealBody,Reduction> >
558 return body.result();
561 #if __TBB_TASK_GROUP_CONTEXT 564 template<
typename Range,
typename Value,
typename RealBody,
typename Reduction>
566 task_group_context& context ) {
567 internal::lambda_reduce_body<Range,Value,RealBody,Reduction> body(identity, real_body, reduction);
568 internal::start_deterministic_reduce<Range,internal::lambda_reduce_body<Range,Value,RealBody,Reduction> >
569 ::run( range, body, context );
570 return body.result();
void parallel_deterministic_reduce(const Range &range, Body &body)
Parallel iteration with deterministic reduction and default partitioner.
Definition: parallel_reduce.h:535
void parallel_reduce(const Range &range, Body &body)
Parallel iteration with reduction and default partitioner.
Definition: parallel_reduce.h:359
Definition: _flow_graph_async_msg_impl.h:32
The namespace tbb contains all components of the library.
Definition: parallel_for.h:44