21 #ifndef __TBB_parallel_scan_H 22 #define __TBB_parallel_scan_H 25 #include "aligned_space.h" 27 #include "partitioner.h" 34 static bool is_final_scan() {
return false;}
40 static bool is_final_scan() {
return true;}
48 template<
typename Range,
typename Body>
49 class final_sum:
public task {
57 final_sum( Body& body_ ) :
58 my_body(body_,split())
60 poison_pointer(my_stuff_last);
63 my_range.
begin()->~Range();
65 void finish_construction(
const Range& range_, Body* stuff_last_ ) {
66 new( my_range.
begin() ) Range(range_);
67 my_stuff_last = stuff_last_;
73 my_stuff_last->assign(my_body);
80 template<
typename Range,
typename Body>
81 class sum_node:
public task {
82 typedef final_sum<Range,Body> final_sum_type;
84 final_sum_type *my_incoming;
85 final_sum_type *my_body;
88 final_sum_type *my_left_sum;
91 bool my_left_is_final;
93 sum_node(
const Range range_,
bool left_is_final_ ) :
97 my_left_is_final(left_is_final_),
101 poison_pointer(my_body);
102 poison_pointer(my_incoming);
104 task* create_child(
const Range& range_, final_sum_type& f, sum_node* n, final_sum_type* incoming_, Body* stuff_last_ ) {
106 f.recycle_as_child_of( *
this );
107 f.finish_construction( range_, stuff_last_ );
111 n->my_incoming = incoming_;
112 n->my_stuff_last = stuff_last_;
119 my_left_sum->my_body.reverse_join( my_incoming->my_body );
120 recycle_as_continuation();
122 task* b = c.create_child(Range(my_range,split()),*my_left_sum,my_right,my_left_sum,my_stuff_last);
123 task* a = my_left_is_final ? NULL : c.create_child(my_range,*my_body,my_left,my_incoming,NULL);
124 set_ref_count( (a!=NULL)+(b!=NULL) );
133 template<
typename Range_,
typename Body_,
typename Partitioner_>
134 friend class start_scan;
136 template<
typename Range_,
typename Body_>
137 friend class finish_scan;
142 template<
typename Range,
typename Body>
143 class finish_scan:
public task {
144 typedef sum_node<Range,Body> sum_node_type;
145 typedef final_sum<Range,Body> final_sum_type;
146 final_sum_type**
const my_sum;
147 sum_node_type*& my_return_slot;
149 final_sum_type* my_right_zombie;
150 sum_node_type& my_result;
153 __TBB_ASSERT( my_result.ref_count()==(my_result.my_left!=NULL)+(my_result.my_right!=NULL), NULL );
154 if( my_result.my_left )
155 my_result.my_left_is_final =
false;
156 if( my_right_zombie && my_sum )
157 ((*my_sum)->my_body).reverse_join(my_result.my_left_sum->my_body);
158 __TBB_ASSERT( !my_return_slot, NULL );
159 if( my_right_zombie || my_result.my_right ) {
160 my_return_slot = &my_result;
162 destroy( my_result );
164 if( my_right_zombie && !my_sum && !my_result.my_right ) {
165 destroy(*my_right_zombie);
166 my_right_zombie = NULL;
171 finish_scan( sum_node_type*& return_slot_, final_sum_type** sum_, sum_node_type& result_ ) :
173 my_return_slot(return_slot_),
174 my_right_zombie(NULL),
177 __TBB_ASSERT( !my_return_slot, NULL );
183 template<
typename Range,
typename Body,
typename Partitioner=simple_partitioner>
184 class start_scan:
public task {
185 typedef sum_node<Range,Body> sum_node_type;
186 typedef final_sum<Range,Body> final_sum_type;
187 final_sum_type* my_body;
189 final_sum_type** my_sum;
190 sum_node_type** my_return_slot;
192 sum_node_type* my_parent_sum;
194 bool my_is_right_child;
196 typename Partitioner::partition_type my_partition;
199 start_scan( sum_node_type*& return_slot_, start_scan& parent_, sum_node_type* parent_sum_ ) :
200 my_body(parent_.my_body),
201 my_sum(parent_.my_sum),
202 my_return_slot(&return_slot_),
203 my_parent_sum(parent_sum_),
204 my_is_final(parent_.my_is_final),
205 my_is_right_child(
false),
206 my_range(parent_.my_range,split()),
207 my_partition(parent_.my_partition,split())
209 __TBB_ASSERT( !*my_return_slot, NULL );
212 start_scan( sum_node_type*& return_slot_,
const Range& range_, final_sum_type& body_,
const Partitioner& partitioner_) :
215 my_return_slot(&return_slot_),
218 my_is_right_child(
false),
220 my_partition(partitioner_)
222 __TBB_ASSERT( !*my_return_slot, NULL );
225 static void run(
const Range& range_, Body& body_,
const Partitioner& partitioner_ ) {
226 if( !range_.empty() ) {
227 typedef internal::start_scan<Range,Body,Partitioner> start_pass1_type;
228 internal::sum_node<Range,Body>* root = NULL;
229 typedef internal::final_sum<Range,Body> final_sum_type;
230 final_sum_type* temp_body =
new(task::allocate_root()) final_sum_type( body_ );
231 start_pass1_type& pass1 = *
new(task::allocate_root()) start_pass1_type(
236 task::spawn_root_and_wait( pass1 );
238 root->my_body = temp_body;
239 root->my_incoming = NULL;
240 root->my_stuff_last = &body_;
241 task::spawn_root_and_wait( *root );
243 body_.assign(temp_body->my_body);
244 temp_body->finish_construction( range_, NULL );
245 temp_body->destroy(*temp_body);
251 template<
typename Range,
typename Body,
typename Partitioner>
252 task* start_scan<Range,Body,Partitioner>::execute() {
253 typedef internal::finish_scan<Range,Body> finish_pass1_type;
254 finish_pass1_type* p = my_parent_sum ?
static_cast<finish_pass1_type*
>( parent() ) : NULL;
258 bool treat_as_stolen = my_is_right_child && (is_stolen_task() || my_body!=p->my_result.my_left_sum);
259 if( treat_as_stolen ) {
261 p->my_right_zombie = my_body =
new( allocate_root() ) final_sum_type(my_body->my_body);
264 task* next_task = NULL;
265 if( (my_is_right_child && !treat_as_stolen) || !my_range.is_divisible() || my_partition.should_execute_range(*
this) ) {
272 __TBB_ASSERT( !*my_return_slot, NULL );
274 sum_node_type* result;
276 result =
new(allocate_additional_child_of(*my_parent_sum)) sum_node_type(my_range,my_is_final);
278 result =
new(task::allocate_root()) sum_node_type(my_range,my_is_final);
279 finish_pass1_type& c = *
new( allocate_continuation()) finish_pass1_type(*my_return_slot,my_sum,*result);
281 start_scan& b = *
new( c.allocate_child() ) start_scan( result->my_right, *
this, result );
282 b.my_is_right_child =
true;
286 recycle_as_child_of(c);
289 my_sum = &result->my_left_sum;
290 my_return_slot = &result->my_left;
291 my_is_right_child =
false;
293 my_parent_sum = result;
294 __TBB_ASSERT( !*my_return_slot, NULL );
323 template<
typename Range,
typename Body>
325 internal::start_scan<Range,Body,__TBB_DEFAULT_PARTITIONER>::run(range,body,__TBB_DEFAULT_PARTITIONER());
330 template<
typename Range,
typename Body>
331 void parallel_scan(
const Range& range, Body& body,
const simple_partitioner& partitioner ) {
332 internal::start_scan<Range,Body,simple_partitioner>::run(range,body,partitioner);
337 template<
typename Range,
typename Body>
338 void parallel_scan(
const Range& range, Body& body,
const auto_partitioner& partitioner ) {
339 internal::start_scan<Range,Body,auto_partitioner>::run(range,body,partitioner);
Block of space aligned sufficiently to construct an array T with N elements.
Definition: aligned_space.h:33
void parallel_scan(const Range &range, Body &body)
Parallel prefix with default partitioner.
Definition: parallel_scan.h:324
T * begin()
Pointer to beginning of array.
Definition: aligned_space.h:39
Definition: _flow_graph_async_msg_impl.h:32
Used to indicate that the initial scan is being performed.
Definition: parallel_scan.h:33
The namespace tbb contains all components of the library.
Definition: parallel_for.h:44
Used to indicate that the final scan is being performed.
Definition: parallel_scan.h:39