BRE12
parallel_scan.h
1 /*
2  Copyright 2005-2016 Intel Corporation. All Rights Reserved.
3 
4  This file is part of Threading Building Blocks. Threading Building Blocks is free software;
5  you can redistribute it and/or modify it under the terms of the GNU General Public License
6  version 2 as published by the Free Software Foundation. Threading Building Blocks is
7  distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the
8  implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
9  See the GNU General Public License for more details. You should have received a copy of
10  the GNU General Public License along with Threading Building Blocks; if not, write to the
11  Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
12 
13  As a special exception, you may use this file as part of a free software library without
14  restriction. Specifically, if other files instantiate templates or use macros or inline
15  functions from this file, or you compile this file and link it with other files to produce
16  an executable, this file does not by itself cause the resulting executable to be covered
17  by the GNU General Public License. This exception does not however invalidate any other
18  reasons why the executable file might be covered by the GNU General Public License.
19 */
20 
21 #ifndef __TBB_parallel_scan_H
22 #define __TBB_parallel_scan_H
23 
24 #include "task.h"
25 #include "aligned_space.h"
26 #include <new>
27 #include "partitioner.h"
28 
29 namespace tbb {
30 
32 
33 struct pre_scan_tag {
34  static bool is_final_scan() {return false;}
35 };
36 
38 
40  static bool is_final_scan() {return true;}
41 };
42 
44 namespace internal {
45 
47 
48  template<typename Range, typename Body>
49  class final_sum: public task {
50  public:
51  Body my_body;
52  private:
53  aligned_space<Range> my_range;
55  Body* my_stuff_last;
56  public:
57  final_sum( Body& body_ ) :
58  my_body(body_,split())
59  {
60  poison_pointer(my_stuff_last);
61  }
62  ~final_sum() {
63  my_range.begin()->~Range();
64  }
65  void finish_construction( const Range& range_, Body* stuff_last_ ) {
66  new( my_range.begin() ) Range(range_);
67  my_stuff_last = stuff_last_;
68  }
69  private:
70  /*override*/ task* execute() {
71  my_body( *my_range.begin(), final_scan_tag() );
72  if( my_stuff_last )
73  my_stuff_last->assign(my_body);
74  return NULL;
75  }
76  };
77 
79 
80  template<typename Range, typename Body>
81  class sum_node: public task {
82  typedef final_sum<Range,Body> final_sum_type;
83  public:
84  final_sum_type *my_incoming;
85  final_sum_type *my_body;
86  Body *my_stuff_last;
87  private:
88  final_sum_type *my_left_sum;
89  sum_node *my_left;
90  sum_node *my_right;
91  bool my_left_is_final;
92  Range my_range;
93  sum_node( const Range range_, bool left_is_final_ ) :
94  my_left_sum(NULL),
95  my_left(NULL),
96  my_right(NULL),
97  my_left_is_final(left_is_final_),
98  my_range(range_)
99  {
100  // Poison fields that will be set by second pass.
101  poison_pointer(my_body);
102  poison_pointer(my_incoming);
103  }
104  task* create_child( const Range& range_, final_sum_type& f, sum_node* n, final_sum_type* incoming_, Body* stuff_last_ ) {
105  if( !n ) {
106  f.recycle_as_child_of( *this );
107  f.finish_construction( range_, stuff_last_ );
108  return &f;
109  } else {
110  n->my_body = &f;
111  n->my_incoming = incoming_;
112  n->my_stuff_last = stuff_last_;
113  return n;
114  }
115  }
116  /*override*/ task* execute() {
117  if( my_body ) {
118  if( my_incoming )
119  my_left_sum->my_body.reverse_join( my_incoming->my_body );
120  recycle_as_continuation();
121  sum_node& c = *this;
122  task* b = c.create_child(Range(my_range,split()),*my_left_sum,my_right,my_left_sum,my_stuff_last);
123  task* a = my_left_is_final ? NULL : c.create_child(my_range,*my_body,my_left,my_incoming,NULL);
124  set_ref_count( (a!=NULL)+(b!=NULL) );
125  my_body = NULL;
126  if( a ) spawn(*b);
127  else a = b;
128  return a;
129  } else {
130  return NULL;
131  }
132  }
133  template<typename Range_,typename Body_,typename Partitioner_>
134  friend class start_scan;
135 
136  template<typename Range_,typename Body_>
137  friend class finish_scan;
138  };
139 
141 
142  template<typename Range, typename Body>
143  class finish_scan: public task {
144  typedef sum_node<Range,Body> sum_node_type;
145  typedef final_sum<Range,Body> final_sum_type;
146  final_sum_type** const my_sum;
147  sum_node_type*& my_return_slot;
148  public:
149  final_sum_type* my_right_zombie;
150  sum_node_type& my_result;
151 
152  /*override*/ task* execute() {
153  __TBB_ASSERT( my_result.ref_count()==(my_result.my_left!=NULL)+(my_result.my_right!=NULL), NULL );
154  if( my_result.my_left )
155  my_result.my_left_is_final = false;
156  if( my_right_zombie && my_sum )
157  ((*my_sum)->my_body).reverse_join(my_result.my_left_sum->my_body);
158  __TBB_ASSERT( !my_return_slot, NULL );
159  if( my_right_zombie || my_result.my_right ) {
160  my_return_slot = &my_result;
161  } else {
162  destroy( my_result );
163  }
164  if( my_right_zombie && !my_sum && !my_result.my_right ) {
165  destroy(*my_right_zombie);
166  my_right_zombie = NULL;
167  }
168  return NULL;
169  }
170 
171  finish_scan( sum_node_type*& return_slot_, final_sum_type** sum_, sum_node_type& result_ ) :
172  my_sum(sum_),
173  my_return_slot(return_slot_),
174  my_right_zombie(NULL),
175  my_result(result_)
176  {
177  __TBB_ASSERT( !my_return_slot, NULL );
178  }
179  };
180 
182 
183  template<typename Range, typename Body, typename Partitioner=simple_partitioner>
184  class start_scan: public task {
185  typedef sum_node<Range,Body> sum_node_type;
186  typedef final_sum<Range,Body> final_sum_type;
187  final_sum_type* my_body;
189  final_sum_type** my_sum;
190  sum_node_type** my_return_slot;
192  sum_node_type* my_parent_sum;
193  bool my_is_final;
194  bool my_is_right_child;
195  Range my_range;
196  typename Partitioner::partition_type my_partition;
197  /*override*/ task* execute();
198  public:
199  start_scan( sum_node_type*& return_slot_, start_scan& parent_, sum_node_type* parent_sum_ ) :
200  my_body(parent_.my_body),
201  my_sum(parent_.my_sum),
202  my_return_slot(&return_slot_),
203  my_parent_sum(parent_sum_),
204  my_is_final(parent_.my_is_final),
205  my_is_right_child(false),
206  my_range(parent_.my_range,split()),
207  my_partition(parent_.my_partition,split())
208  {
209  __TBB_ASSERT( !*my_return_slot, NULL );
210  }
211 
212  start_scan( sum_node_type*& return_slot_, const Range& range_, final_sum_type& body_, const Partitioner& partitioner_) :
213  my_body(&body_),
214  my_sum(NULL),
215  my_return_slot(&return_slot_),
216  my_parent_sum(NULL),
217  my_is_final(true),
218  my_is_right_child(false),
219  my_range(range_),
220  my_partition(partitioner_)
221  {
222  __TBB_ASSERT( !*my_return_slot, NULL );
223  }
224 
225  static void run( const Range& range_, Body& body_, const Partitioner& partitioner_ ) {
226  if( !range_.empty() ) {
227  typedef internal::start_scan<Range,Body,Partitioner> start_pass1_type;
228  internal::sum_node<Range,Body>* root = NULL;
229  typedef internal::final_sum<Range,Body> final_sum_type;
230  final_sum_type* temp_body = new(task::allocate_root()) final_sum_type( body_ );
231  start_pass1_type& pass1 = *new(task::allocate_root()) start_pass1_type(
232  /*my_return_slot=*/root,
233  range_,
234  *temp_body,
235  partitioner_ );
236  task::spawn_root_and_wait( pass1 );
237  if( root ) {
238  root->my_body = temp_body;
239  root->my_incoming = NULL;
240  root->my_stuff_last = &body_;
241  task::spawn_root_and_wait( *root );
242  } else {
243  body_.assign(temp_body->my_body);
244  temp_body->finish_construction( range_, NULL );
245  temp_body->destroy(*temp_body);
246  }
247  }
248  }
249  };
250 
251  template<typename Range, typename Body, typename Partitioner>
252  task* start_scan<Range,Body,Partitioner>::execute() {
253  typedef internal::finish_scan<Range,Body> finish_pass1_type;
254  finish_pass1_type* p = my_parent_sum ? static_cast<finish_pass1_type*>( parent() ) : NULL;
255  // Inspecting p->result.left_sum would ordinarily be a race condition.
256  // But we inspect it only if we are not a stolen task, in which case we
257  // know that task assigning to p->result.left_sum has completed.
258  bool treat_as_stolen = my_is_right_child && (is_stolen_task() || my_body!=p->my_result.my_left_sum);
259  if( treat_as_stolen ) {
260  // Invocation is for right child that has been really stolen or needs to be virtually stolen
261  p->my_right_zombie = my_body = new( allocate_root() ) final_sum_type(my_body->my_body);
262  my_is_final = false;
263  }
264  task* next_task = NULL;
265  if( (my_is_right_child && !treat_as_stolen) || !my_range.is_divisible() || my_partition.should_execute_range(*this) ) {
266  if( my_is_final )
267  (my_body->my_body)( my_range, final_scan_tag() );
268  else if( my_sum )
269  (my_body->my_body)( my_range, pre_scan_tag() );
270  if( my_sum )
271  *my_sum = my_body;
272  __TBB_ASSERT( !*my_return_slot, NULL );
273  } else {
274  sum_node_type* result;
275  if( my_parent_sum )
276  result = new(allocate_additional_child_of(*my_parent_sum)) sum_node_type(my_range,/*my_left_is_final=*/my_is_final);
277  else
278  result = new(task::allocate_root()) sum_node_type(my_range,/*my_left_is_final=*/my_is_final);
279  finish_pass1_type& c = *new( allocate_continuation()) finish_pass1_type(*my_return_slot,my_sum,*result);
280  // Split off right child
281  start_scan& b = *new( c.allocate_child() ) start_scan( /*my_return_slot=*/result->my_right, *this, result );
282  b.my_is_right_child = true;
283  // Left child is recycling of *this. Must recycle this before spawning b,
284  // otherwise b might complete and decrement c.ref_count() to zero, which
285  // would cause c.execute() to run prematurely.
286  recycle_as_child_of(c);
287  c.set_ref_count(2);
288  c.spawn(b);
289  my_sum = &result->my_left_sum;
290  my_return_slot = &result->my_left;
291  my_is_right_child = false;
292  next_task = this;
293  my_parent_sum = result;
294  __TBB_ASSERT( !*my_return_slot, NULL );
295  }
296  return next_task;
297  }
298 } // namespace internal
300 
301 // Requirements on Range concept are documented in blocked_range.h
302 
320 
322 
323 template<typename Range, typename Body>
324 void parallel_scan( const Range& range, Body& body ) {
325  internal::start_scan<Range,Body,__TBB_DEFAULT_PARTITIONER>::run(range,body,__TBB_DEFAULT_PARTITIONER());
326 }
327 
329 
330 template<typename Range, typename Body>
331 void parallel_scan( const Range& range, Body& body, const simple_partitioner& partitioner ) {
332  internal::start_scan<Range,Body,simple_partitioner>::run(range,body,partitioner);
333 }
334 
336 
337 template<typename Range, typename Body>
338 void parallel_scan( const Range& range, Body& body, const auto_partitioner& partitioner ) {
339  internal::start_scan<Range,Body,auto_partitioner>::run(range,body,partitioner);
340 }
342 
343 } // namespace tbb
344 
345 #endif /* __TBB_parallel_scan_H */
346 
Block of space aligned sufficiently to construct an array T with N elements.
Definition: aligned_space.h:33
void parallel_scan(const Range &range, Body &body)
Parallel prefix with default partitioner.
Definition: parallel_scan.h:324
T * begin()
Pointer to beginning of array.
Definition: aligned_space.h:39
Definition: _flow_graph_async_msg_impl.h:32
Used to indicate that the initial scan is being performed.
Definition: parallel_scan.h:33
The namespace tbb contains all components of the library.
Definition: parallel_for.h:44
Used to indicate that the final scan is being performed.
Definition: parallel_scan.h:39