BRE12
parallel_for.h
1 /*
2  Copyright 2005-2016 Intel Corporation. All Rights Reserved.
3 
4  This file is part of Threading Building Blocks. Threading Building Blocks is free software;
5  you can redistribute it and/or modify it under the terms of the GNU General Public License
6  version 2 as published by the Free Software Foundation. Threading Building Blocks is
7  distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the
8  implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
9  See the GNU General Public License for more details. You should have received a copy of
10  the GNU General Public License along with Threading Building Blocks; if not, write to the
11  Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
12 
13  As a special exception, you may use this file as part of a free software library without
14  restriction. Specifically, if other files instantiate templates or use macros or inline
15  functions from this file, or you compile this file and link it with other files to produce
16  an executable, this file does not by itself cause the resulting executable to be covered
17  by the GNU General Public License. This exception does not however invalidate any other
18  reasons why the executable file might be covered by the GNU General Public License.
19 */
20 
21 #ifndef __TBB_parallel_for_H
22 #define __TBB_parallel_for_H
23 
24 #include <new>
25 #include "task.h"
26 #include "partitioner.h"
27 #include "blocked_range.h"
28 #include "tbb_exception.h"
29 
30 namespace tbb {
31 
32 namespace interface9 {
34 namespace internal {
35 
37  void* allocate_sibling(task* start_for_task, size_t bytes);
38 
40 
41  template<typename Range, typename Body, typename Partitioner>
42  class start_for: public task {
43  Range my_range;
44  const Body my_body;
45  typename Partitioner::task_partition_type my_partition;
46  /*override*/ task* execute();
47 
49  /*override*/ void note_affinity( affinity_id id ) {
50  my_partition.note_affinity( id );
51  }
52 
53  public:
55  start_for( const Range& range, const Body& body, Partitioner& partitioner ) :
56  my_range(range),
57  my_body(body),
58  my_partition(partitioner)
59  {
60  }
62 
63  start_for( start_for& parent_, typename Partitioner::split_type& split_obj) :
64  my_range(parent_.my_range, split_obj),
65  my_body(parent_.my_body),
66  my_partition(parent_.my_partition, split_obj)
67  {
68  my_partition.set_affinity(*this);
69  }
71 
72  start_for( start_for& parent_, const Range& r, depth_t d ) :
73  my_range(r),
74  my_body(parent_.my_body),
75  my_partition(parent_.my_partition, split())
76  {
77  my_partition.set_affinity(*this);
78  my_partition.align_depth( d );
79  }
80  static void run( const Range& range, const Body& body, Partitioner& partitioner ) {
81  if( !range.empty() ) {
82 #if !__TBB_TASK_GROUP_CONTEXT || TBB_JOIN_OUTER_TASK_GROUP
83  start_for& a = *new(task::allocate_root()) start_for(range,body,partitioner);
84 #else
85  // Bound context prevents exceptions from body to affect nesting or sibling algorithms,
86  // and allows users to handle exceptions safely by wrapping parallel_for in the try-block.
87  task_group_context context;
88  start_for& a = *new(task::allocate_root(context)) start_for(range,body,partitioner);
89 #endif /* __TBB_TASK_GROUP_CONTEXT && !TBB_JOIN_OUTER_TASK_GROUP */
90  task::spawn_root_and_wait(a);
91  }
92  }
93 #if __TBB_TASK_GROUP_CONTEXT
94  static void run( const Range& range, const Body& body, Partitioner& partitioner, task_group_context& context ) {
95  if( !range.empty() ) {
96  start_for& a = *new(task::allocate_root(context)) start_for(range,body,partitioner);
97  task::spawn_root_and_wait(a);
98  }
99  }
100 #endif /* __TBB_TASK_GROUP_CONTEXT */
101  void run_body( Range &r ) { my_body( r ); }
103 
105  void offer_work(typename Partitioner::split_type& split_obj) {
106  spawn( *new( allocate_sibling(static_cast<task*>(this), sizeof(start_for)) ) start_for(*this, split_obj) );
107  }
109  void offer_work(const Range& r, depth_t d = 0) {
110  spawn( *new( allocate_sibling(static_cast<task*>(this), sizeof(start_for)) ) start_for(*this, r, d) );
111  }
112  };
113 
115  // TODO: 'inline' here is to avoid multiple definition error but for sake of code size this should not be inlined
116  inline void* allocate_sibling(task* start_for_task, size_t bytes) {
117  task* parent_ptr = new( start_for_task->allocate_continuation() ) flag_task();
118  start_for_task->set_parent(parent_ptr);
119  parent_ptr->set_ref_count(2);
120  return &parent_ptr->allocate_child().allocate(bytes);
121  }
122 
124  template<typename Range, typename Body, typename Partitioner>
125  task* start_for<Range,Body,Partitioner>::execute() {
126  my_partition.check_being_stolen( *this );
127  my_partition.execute(*this, my_range);
128  return NULL;
129  }
130 } // namespace internal
132 } // namespace interfaceX
133 
135 namespace internal {
136  using interface9::internal::start_for;
137 
139  template<typename Function, typename Index>
140  class parallel_for_body : internal::no_assign {
141  const Function &my_func;
142  const Index my_begin;
143  const Index my_step;
144  public:
145  parallel_for_body( const Function& _func, Index& _begin, Index& _step )
146  : my_func(_func), my_begin(_begin), my_step(_step) {}
147 
148  void operator()( const tbb::blocked_range<Index>& r ) const {
149  // A set of local variables to help the compiler with vectorization of the following loop.
150  Index b = r.begin();
151  Index e = r.end();
152  Index ms = my_step;
153  Index k = my_begin + b*ms;
154 
155 #if __INTEL_COMPILER
156 #pragma ivdep
157 #if __TBB_ASSERT_ON_VECTORIZATION_FAILURE
158 #pragma vector always assert
159 #endif
160 #endif
161  for ( Index i = b; i < e; ++i, k += ms ) {
162  my_func( k );
163  }
164  }
165  };
166 } // namespace internal
168 
169 // Requirements on Range concept are documented in blocked_range.h
170 
181 
183 
184 template<typename Range, typename Body>
185 void parallel_for( const Range& range, const Body& body ) {
186  internal::start_for<Range,Body,const __TBB_DEFAULT_PARTITIONER>::run(range,body,__TBB_DEFAULT_PARTITIONER());
187 }
188 
190 
191 template<typename Range, typename Body>
192 void parallel_for( const Range& range, const Body& body, const simple_partitioner& partitioner ) {
193  internal::start_for<Range,Body,const simple_partitioner>::run(range,body,partitioner);
194 }
195 
197 
198 template<typename Range, typename Body>
199 void parallel_for( const Range& range, const Body& body, const auto_partitioner& partitioner ) {
200  internal::start_for<Range,Body,const auto_partitioner>::run(range,body,partitioner);
201 }
202 
203 #if TBB_PREVIEW_STATIC_PARTITIONER
204 
206 template<typename Range, typename Body>
207 void parallel_for( const Range& range, const Body& body, const static_partitioner& partitioner ) {
208  internal::start_for<Range,Body,const static_partitioner>::run(range,body,partitioner);
209 }
210 #endif
211 
213 
214 template<typename Range, typename Body>
215 void parallel_for( const Range& range, const Body& body, affinity_partitioner& partitioner ) {
216  internal::start_for<Range,Body,affinity_partitioner>::run(range,body,partitioner);
217 }
218 
219 #if __TBB_TASK_GROUP_CONTEXT
220 
222 template<typename Range, typename Body>
223 void parallel_for( const Range& range, const Body& body, task_group_context& context ) {
224  internal::start_for<Range,Body,const __TBB_DEFAULT_PARTITIONER>::run(range, body, __TBB_DEFAULT_PARTITIONER(), context);
225 }
226 
228 
229 template<typename Range, typename Body>
230 void parallel_for( const Range& range, const Body& body, const simple_partitioner& partitioner, task_group_context& context ) {
231  internal::start_for<Range,Body,const simple_partitioner>::run(range, body, partitioner, context);
232 }
233 
235 
236 template<typename Range, typename Body>
237 void parallel_for( const Range& range, const Body& body, const auto_partitioner& partitioner, task_group_context& context ) {
238  internal::start_for<Range,Body,const auto_partitioner>::run(range, body, partitioner, context);
239 }
240 
241 #if TBB_PREVIEW_STATIC_PARTITIONER
242 
244 template<typename Range, typename Body>
245 void parallel_for( const Range& range, const Body& body, const static_partitioner& partitioner, task_group_context& context ) {
246  internal::start_for<Range,Body,const static_partitioner>::run(range, body, partitioner, context);
247 }
248 #endif
249 
251 
252 template<typename Range, typename Body>
253 void parallel_for( const Range& range, const Body& body, affinity_partitioner& partitioner, task_group_context& context ) {
254  internal::start_for<Range,Body,affinity_partitioner>::run(range,body,partitioner, context);
255 }
256 #endif /* __TBB_TASK_GROUP_CONTEXT */
257 
258 
259 namespace strict_ppl {
260 
262 template <typename Index, typename Function, typename Partitioner>
264 void parallel_for_impl(Index first, Index last, Index step, const Function& f, Partitioner& partitioner) {
265  if (step <= 0 )
266  internal::throw_exception(internal::eid_nonpositive_step); // throws std::invalid_argument
267  else if (last > first) {
268  // Above "else" avoids "potential divide by zero" warning on some platforms
269  Index end = (last - first - Index(1)) / step + Index(1);
270  tbb::blocked_range<Index> range(static_cast<Index>(0), end);
271  internal::parallel_for_body<Function, Index> body(f, first, step);
272  tbb::parallel_for(range, body, partitioner);
273  }
274 }
275 
277 template <typename Index, typename Function>
278 void parallel_for(Index first, Index last, Index step, const Function& f) {
279  parallel_for_impl<Index,Function,const auto_partitioner>(first, last, step, f, auto_partitioner());
280 }
282 template <typename Index, typename Function>
283 void parallel_for(Index first, Index last, Index step, const Function& f, const simple_partitioner& partitioner) {
284  parallel_for_impl<Index,Function,const simple_partitioner>(first, last, step, f, partitioner);
285 }
287 template <typename Index, typename Function>
288 void parallel_for(Index first, Index last, Index step, const Function& f, const auto_partitioner& partitioner) {
289  parallel_for_impl<Index,Function,const auto_partitioner>(first, last, step, f, partitioner);
290 }
291 #if TBB_PREVIEW_STATIC_PARTITIONER
292 template <typename Index, typename Function>
294 void parallel_for(Index first, Index last, Index step, const Function& f, const static_partitioner& partitioner) {
295  parallel_for_impl<Index,Function,const static_partitioner>(first, last, step, f, partitioner);
296 }
297 #endif
298 template <typename Index, typename Function>
300 void parallel_for(Index first, Index last, Index step, const Function& f, affinity_partitioner& partitioner) {
301  parallel_for_impl(first, last, step, f, partitioner);
302 }
303 
305 template <typename Index, typename Function>
306 void parallel_for(Index first, Index last, const Function& f) {
307  parallel_for_impl<Index,Function,const auto_partitioner>(first, last, static_cast<Index>(1), f, auto_partitioner());
308 }
310 template <typename Index, typename Function>
311 void parallel_for(Index first, Index last, const Function& f, const simple_partitioner& partitioner) {
312  parallel_for_impl<Index,Function,const simple_partitioner>(first, last, static_cast<Index>(1), f, partitioner);
313 }
315 template <typename Index, typename Function>
316 void parallel_for(Index first, Index last, const Function& f, const auto_partitioner& partitioner) {
317  parallel_for_impl<Index,Function,const auto_partitioner>(first, last, static_cast<Index>(1), f, partitioner);
318 }
319 #if TBB_PREVIEW_STATIC_PARTITIONER
320 template <typename Index, typename Function>
322 void parallel_for(Index first, Index last, const Function& f, const static_partitioner& partitioner) {
323  parallel_for_impl<Index,Function,const static_partitioner>(first, last, static_cast<Index>(1), f, partitioner);
324 }
325 #endif
326 template <typename Index, typename Function>
328 void parallel_for(Index first, Index last, const Function& f, affinity_partitioner& partitioner) {
329  parallel_for_impl(first, last, static_cast<Index>(1), f, partitioner);
330 }
331 
332 #if __TBB_TASK_GROUP_CONTEXT
333 template <typename Index, typename Function, typename Partitioner>
335 void parallel_for_impl(Index first, Index last, Index step, const Function& f, Partitioner& partitioner, tbb::task_group_context &context) {
336  if (step <= 0 )
337  internal::throw_exception(internal::eid_nonpositive_step); // throws std::invalid_argument
338  else if (last > first) {
339  // Above "else" avoids "potential divide by zero" warning on some platforms
340  Index end = (last - first - Index(1)) / step + Index(1);
341  tbb::blocked_range<Index> range(static_cast<Index>(0), end);
342  internal::parallel_for_body<Function, Index> body(f, first, step);
343  tbb::parallel_for(range, body, partitioner, context);
344  }
345 }
346 
348 template <typename Index, typename Function>
349 void parallel_for(Index first, Index last, Index step, const Function& f, tbb::task_group_context &context) {
350  parallel_for_impl<Index,Function,const auto_partitioner>(first, last, step, f, auto_partitioner(), context);
351 }
353  template <typename Index, typename Function>
354 void parallel_for(Index first, Index last, Index step, const Function& f, const simple_partitioner& partitioner, tbb::task_group_context &context) {
355  parallel_for_impl<Index,Function,const simple_partitioner>(first, last, step, f, partitioner, context);
356 }
358  template <typename Index, typename Function>
359 void parallel_for(Index first, Index last, Index step, const Function& f, const auto_partitioner& partitioner, tbb::task_group_context &context) {
360  parallel_for_impl<Index,Function,const auto_partitioner>(first, last, step, f, partitioner, context);
361 }
362 #if TBB_PREVIEW_STATIC_PARTITIONER
363 template <typename Index, typename Function>
365 void parallel_for(Index first, Index last, Index step, const Function& f, const static_partitioner& partitioner, tbb::task_group_context &context) {
366  parallel_for_impl<Index,Function,const static_partitioner>(first, last, step, f, partitioner, context);
367 }
368 #endif
369  template <typename Index, typename Function>
371 void parallel_for(Index first, Index last, Index step, const Function& f, affinity_partitioner& partitioner, tbb::task_group_context &context) {
372  parallel_for_impl(first, last, step, f, partitioner, context);
373 }
374 
375 
377 template <typename Index, typename Function>
378 void parallel_for(Index first, Index last, const Function& f, tbb::task_group_context &context) {
379  parallel_for_impl<Index,Function,const auto_partitioner>(first, last, static_cast<Index>(1), f, auto_partitioner(), context);
380 }
382  template <typename Index, typename Function>
383 void parallel_for(Index first, Index last, const Function& f, const simple_partitioner& partitioner, tbb::task_group_context &context) {
384  parallel_for_impl<Index,Function,const simple_partitioner>(first, last, static_cast<Index>(1), f, partitioner, context);
385 }
387  template <typename Index, typename Function>
388 void parallel_for(Index first, Index last, const Function& f, const auto_partitioner& partitioner, tbb::task_group_context &context) {
389  parallel_for_impl<Index,Function,const auto_partitioner>(first, last, static_cast<Index>(1), f, partitioner, context);
390 }
391 #if TBB_PREVIEW_STATIC_PARTITIONER
392 template <typename Index, typename Function>
394 void parallel_for(Index first, Index last, const Function& f, const static_partitioner& partitioner, tbb::task_group_context &context) {
395  parallel_for_impl<Index,Function,const static_partitioner>(first, last, static_cast<Index>(1), f, partitioner, context);
396 }
397 #endif
398  template <typename Index, typename Function>
400 void parallel_for(Index first, Index last, const Function& f, affinity_partitioner& partitioner, tbb::task_group_context &context) {
401  parallel_for_impl(first, last, static_cast<Index>(1), f, partitioner, context);
402 }
403 
404 #endif /* __TBB_TASK_GROUP_CONTEXT */
405 
406 
407 } // namespace strict_ppl
408 
410 
411 } // namespace tbb
412 
413 #if TBB_PREVIEW_SERIAL_SUBSET
414 #define __TBB_NORMAL_EXECUTION
415 #include "../serial/tbb/parallel_for.h"
416 #undef __TBB_NORMAL_EXECUTION
417 #endif
418 
419 #endif /* __TBB_parallel_for_H */
const_iterator begin() const
Beginning of range.
Definition: blocked_range.h:62
A range over which to iterate.
Definition: blocked_range.h:40
const_iterator end() const
One past last value in range.
Definition: blocked_range.h:65
void parallel_for(const Range &range, const Body &body)
Parallel iteration over range with default partitioner.
Definition: parallel_for.h:185
void parallel_for(Index first, Index last, Index step, const Function &f)
Parallel iteration over a range of integers with a step provided and default partitioner.
Definition: parallel_for.h:278
Definition: _flow_graph_async_msg_impl.h:32
The namespace tbb contains all components of the library.
Definition: parallel_for.h:44