21 #ifndef __TBB_partitioner_H 22 #define __TBB_partitioner_H 24 #ifndef __TBB_INITIAL_CHUNKS 26 #define __TBB_INITIAL_CHUNKS 2 28 #ifndef __TBB_RANGE_POOL_CAPACITY 30 #define __TBB_RANGE_POOL_CAPACITY 8 32 #ifndef __TBB_INIT_DEPTH 34 #define __TBB_INIT_DEPTH 5 36 #ifndef __TBB_DEMAND_DEPTH_ADD 38 #define __TBB_DEMAND_DEPTH_ADD 1 40 #ifndef __TBB_STATIC_THRESHOLD 42 #define __TBB_STATIC_THRESHOLD 40000 45 #define __TBB_NONUNIFORM_TASK_CREATION 1 46 #ifdef __TBB_time_stamp 47 #define __TBB_USE_MACHINE_TIME_STAMPS 1 48 #define __TBB_task_duration() __TBB_STATIC_THRESHOLD 49 #endif // __TBB_machine_time_stamp 50 #endif // __TBB_DEFINE_MIC 53 #include "aligned_space.h" 55 #include "internal/_template_helpers.h" 57 #if defined(_MSC_VER) && !defined(__INTEL_COMPILER) 59 #pragma warning (push) 60 #pragma warning (disable: 4244) 65 class auto_partitioner;
66 class simple_partitioner;
67 #if TBB_PREVIEW_STATIC_PARTITIONER 68 class static_partitioner;
70 class affinity_partitioner;
72 namespace interface9 {
74 class affinity_partition_type;
79 size_t __TBB_EXPORTED_FUNC get_initial_auto_partitioner_divisor();
82 class affinity_partitioner_base_v3: no_copy {
83 friend class tbb::affinity_partitioner;
84 friend class tbb::interface9::internal::affinity_partition_type;
87 affinity_id* my_array;
91 affinity_partitioner_base_v3() : my_array(NULL), my_size(0) {}
93 ~affinity_partitioner_base_v3() {resize(0);}
96 void __TBB_EXPORTED_METHOD resize(
unsigned factor );
100 class partition_type_base {
102 void set_affinity( task & ) {}
103 void note_affinity( task::affinity_id ) {}
104 task* continue_after_execute_range() {
return NULL;}
105 bool decide_whether_to_delay() {
return false;}
106 void spawn_or_delay(
bool, task& b ) {
111 template<
typename Range,
typename Body,
typename Partitioner>
class start_scan;
116 namespace interface9 {
117 template<
typename Range,
typename Body,
typename Partitioner>
class start_for;
121 namespace interface9 {
125 template<
typename Range,
typename Body,
typename Partitioner>
class start_for;
126 template<
typename Range,
typename Body,
typename Partitioner>
class start_reduce;
129 class flag_task:
public task {
132 flag_task() { my_child_stolen =
false; }
133 task* execute() {
return NULL; }
134 static void mark_task_stolen(task &t) {
136 #if TBB_USE_THREADING_TOOLS 138 flag.fetch_and_store<
release>(
true);
141 #endif //TBB_USE_THREADING_TOOLS 143 static bool is_peer_stolen(task &t) {
144 return static_cast<flag_task*
>(t.parent())->my_child_stolen;
151 typedef unsigned char depth_t;
154 template <
typename T, depth_t MaxCapacity>
159 depth_t my_depth[MaxCapacity];
164 range_vector(
const T& elem) : my_head(0), my_tail(0), my_size(1) {
166 new(
static_cast<void *
>(my_pool.
begin()) ) T(elem);
169 while( !empty() ) pop_back();
171 bool empty()
const {
return my_size == 0; }
172 depth_t size()
const {
return my_size; }
175 void split_to_fill(depth_t max_depth) {
176 while( my_size < MaxCapacity && is_divisible(max_depth) ) {
177 depth_t prev = my_head;
178 my_head = (my_head + 1) % MaxCapacity;
179 new(my_pool.
begin()+my_head) T(my_pool.
begin()[prev]);
180 my_pool.
begin()[prev].~T();
181 new(my_pool.
begin()+prev) T(my_pool.
begin()[my_head], split());
182 my_depth[my_head] = ++my_depth[prev];
187 __TBB_ASSERT(my_size > 0,
"range_vector::pop_back() with empty size");
188 my_pool.
begin()[my_head].~T();
190 my_head = (my_head + MaxCapacity - 1) % MaxCapacity;
193 __TBB_ASSERT(my_size > 0,
"range_vector::pop_front() with empty size");
194 my_pool.
begin()[my_tail].~T();
196 my_tail = (my_tail + 1) % MaxCapacity;
199 __TBB_ASSERT(my_size > 0,
"range_vector::back() with empty size");
200 return my_pool.
begin()[my_head];
203 __TBB_ASSERT(my_size > 0,
"range_vector::front() with empty size");
204 return my_pool.
begin()[my_tail];
207 depth_t front_depth() {
208 __TBB_ASSERT(my_size > 0,
"range_vector::front_depth() with empty size");
209 return my_depth[my_tail];
211 depth_t back_depth() {
212 __TBB_ASSERT(my_size > 0,
"range_vector::back_depth() with empty size");
213 return my_depth[my_head];
215 bool is_divisible(depth_t max_depth) {
216 return back_depth() < max_depth && back().is_divisible();
221 template <
typename Partition>
222 struct partition_type_base {
223 typedef split split_type;
225 void set_affinity( task & ) {}
226 void note_affinity( task::affinity_id ) {}
227 bool check_being_stolen(task &) {
return false; }
228 bool check_for_demand(task &) {
return false; }
229 bool is_divisible() {
return true; }
230 depth_t max_depth() {
return 0; }
231 void align_depth(depth_t) { }
232 template <
typename Range> split_type get_split() {
return split(); }
233 Partition&
self() {
return *
static_cast<Partition*
>(
this); }
235 template<
typename StartType,
typename Range>
236 void work_balance(StartType &start, Range &range) {
237 start.run_body( range );
240 template<
typename StartType,
typename Range>
241 void execute(StartType &start, Range &range) {
249 if ( range.is_divisible() ) {
250 if (
self().is_divisible() ) {
252 typename Partition::split_type split_obj =
self().
template get_split<Range>();
253 start.offer_work( split_obj );
254 }
while ( range.is_divisible() &&
self().is_divisible() );
257 self().work_balance(start, range);
262 template <
typename Partition>
263 struct adaptive_mode : partition_type_base<Partition> {
264 typedef Partition my_partition;
265 using partition_type_base<Partition>::self;
271 static const unsigned factor = 1;
272 adaptive_mode() : my_divisor(
tbb::
internal::get_initial_auto_partitioner_divisor() / 4 * my_partition::factor) {}
273 adaptive_mode(adaptive_mode &src, split) : my_divisor(do_split(src, split())) {}
274 adaptive_mode(adaptive_mode &src,
const proportional_split& split_obj) : my_divisor(do_split(src, split_obj)) {}
276 size_t do_split(adaptive_mode &src, split) {
277 return src.my_divisor /= 2u;
279 size_t do_split(adaptive_mode &src,
const proportional_split& split_obj) {
280 #if __TBB_ENABLE_RANGE_FEEDBACK 281 size_t portion = size_t(
float(src.my_divisor) *
float(split_obj.right())
282 /
float(split_obj.left() + split_obj.right()) + 0.5f);
284 size_t portion = split_obj.right() * my_partition::factor;
286 portion = (portion + my_partition::factor/2) & (0ul - my_partition::factor);
287 #if __TBB_ENABLE_RANGE_FEEDBACK 290 portion = my_partition::factor;
291 else if (portion == src.my_divisor)
292 portion = src.my_divisor - my_partition::factor;
294 src.my_divisor -= portion;
297 bool is_divisible() {
298 return my_divisor > my_partition::factor;
303 template <
typename Partition>
304 struct linear_affinity_mode : adaptive_mode<Partition> {
305 using adaptive_mode<Partition>::my_divisor;
307 using adaptive_mode<Partition>::self;
308 linear_affinity_mode() : adaptive_mode<Partition>(), my_head(0) {}
309 linear_affinity_mode(linear_affinity_mode &src, split) : adaptive_mode<Partition>(src, split())
310 , my_head(src.my_head + src.my_divisor) {}
311 linear_affinity_mode(linear_affinity_mode &src,
const proportional_split& split_obj) : adaptive_mode<Partition>(src, split_obj)
312 , my_head(src.my_head + src.my_divisor) {}
313 void set_affinity( task &t ) {
315 t.set_affinity( affinity_id(my_head) + 1 );
324 template <
typename Range>
325 class is_splittable_in_proportion {
331 template <
typename range_type>
static no& decide(...);
335 static const bool value = (
sizeof(decide<Range>(0)) ==
sizeof(yes));
340 struct unbalancing_partition_type : Mode {
342 unbalancing_partition_type() : Mode() {}
343 unbalancing_partition_type(unbalancing_partition_type& p, split) : Mode(p, split()) {}
344 unbalancing_partition_type(unbalancing_partition_type& p,
const proportional_split& split_obj) : Mode(p, split_obj) {}
345 #if _MSC_VER && !defined(__INTEL_COMPILER) 347 #pragma warning( push ) 348 #pragma warning( disable: 4127 ) 350 template <
typename Range>
351 proportional_split get_split() {
352 if (is_splittable_in_proportion<Range>::value) {
353 size_t size =
self().my_divisor / Mode::my_partition::factor;
354 #if __TBB_NONUNIFORM_TASK_CREATION 355 size_t right = (size + 2) / 3;
357 size_t right = size / 2;
359 size_t left = size - right;
360 return proportional_split(left, right);
362 return proportional_split(1, 1);
365 #if _MSC_VER && !defined(__INTEL_COMPILER) 366 #pragma warning( pop ) 367 #endif // warning 4127 is back 372 struct balancing_partition_type : unbalancing_partition_type<Mode> {
374 #ifdef __TBB_USE_MACHINE_TIME_STAMPS 375 tbb::internal::machine_tsc_t my_dst_tsc;
382 depth_t my_max_depth;
383 static const unsigned range_pool_size = __TBB_RANGE_POOL_CAPACITY;
384 balancing_partition_type(): unbalancing_partition_type<Mode>()
385 #ifdef __TBB_USE_MACHINE_TIME_STAMPS
389 , my_max_depth(__TBB_INIT_DEPTH) {}
390 balancing_partition_type(balancing_partition_type& p, split)
391 : unbalancing_partition_type<Mode>(p, split())
392 #ifdef __TBB_USE_MACHINE_TIME_STAMPS
396 , my_max_depth(p.my_max_depth) {}
397 balancing_partition_type(balancing_partition_type& p,
const proportional_split& split_obj)
398 : unbalancing_partition_type<Mode>(p, split_obj)
399 #ifdef __TBB_USE_MACHINE_TIME_STAMPS
403 , my_max_depth(p.my_max_depth) {}
404 bool check_being_stolen( task &t) {
405 if( !(
self().my_divisor / Mode::my_partition::factor) ) {
406 self().my_divisor = 1;
407 if( t.is_stolen_task() && t.parent()->ref_count() >= 2 ) {
408 #if __TBB_USE_OPTIONAL_RTTI 410 __TBB_ASSERT(dynamic_cast<flag_task*>(t.parent()), 0);
415 flag_task::mark_task_stolen(t);
416 if( !my_max_depth ) my_max_depth++;
417 my_max_depth += __TBB_DEMAND_DEPTH_ADD;
423 depth_t max_depth() {
return my_max_depth; }
424 void align_depth(depth_t base) {
425 __TBB_ASSERT(base <= my_max_depth, 0);
426 my_max_depth -= base;
428 template<
typename StartType,
typename Range>
429 void work_balance(StartType &start, Range &range) {
430 if( !range.is_divisible() || !
self().max_depth() ) {
431 start.run_body( range );
434 internal::range_vector<Range, range_pool_size> range_pool(range);
436 range_pool.split_to_fill(
self().max_depth());
437 if(
self().check_for_demand( start ) ) {
438 if( range_pool.size() > 1 ) {
439 start.offer_work( range_pool.front(), range_pool.front_depth() );
440 range_pool.pop_front();
443 if( range_pool.is_divisible(
self().max_depth()) )
446 start.run_body( range_pool.back() );
447 range_pool.pop_back();
448 }
while( !range_pool.empty() && !start.is_cancelled() );
451 bool check_for_demand( task &t ) {
452 if( pass == my_delay ) {
453 if(
self().my_divisor > 1 )
455 else if(
self().my_divisor && my_max_depth ) {
456 self().my_divisor = 0;
459 else if( flag_task::is_peer_stolen(t) ) {
460 my_max_depth += __TBB_DEMAND_DEPTH_ADD;
463 }
else if( begin == my_delay ) {
464 #ifndef __TBB_USE_MACHINE_TIME_STAMPS 467 my_dst_tsc = __TBB_time_stamp() + __TBB_task_duration();
469 }
else if( run == my_delay ) {
470 if( __TBB_time_stamp() < my_dst_tsc ) {
471 __TBB_ASSERT(my_max_depth > 0, NULL);
477 #endif // __TBB_USE_MACHINE_TIME_STAMPS 483 class auto_partition_type:
public balancing_partition_type<adaptive_mode<auto_partition_type> > {
485 auto_partition_type(
const auto_partitioner& )
486 : balancing_partition_type<adaptive_mode<auto_partition_type> >() {
487 my_divisor *= __TBB_INITIAL_CHUNKS;
489 auto_partition_type( auto_partition_type& src, split)
490 : balancing_partition_type<adaptive_mode<auto_partition_type> >(src, split()) {}
491 bool is_divisible() {
492 if( my_divisor > 1 )
return true;
493 if( my_divisor && my_max_depth ) {
500 bool check_for_demand(task &t) {
501 if( flag_task::is_peer_stolen(t) ) {
502 my_max_depth += __TBB_DEMAND_DEPTH_ADD;
508 class simple_partition_type:
public partition_type_base<simple_partition_type> {
510 simple_partition_type(
const simple_partitioner& ) {}
511 simple_partition_type(
const simple_partition_type&, split ) {}
513 template<
typename StartType,
typename Range>
514 void execute(StartType &start, Range &range) {
515 split_type split_obj = split();
516 while( range.is_divisible() )
517 start.offer_work( split_obj );
518 start.run_body( range );
522 #if TBB_PREVIEW_STATIC_PARTITIONER 523 #ifndef __TBB_STATIC_PARTITIONER_BASE_TYPE 524 #define __TBB_STATIC_PARTITIONER_BASE_TYPE unbalancing_partition_type 526 class static_partition_type :
public __TBB_STATIC_PARTITIONER_BASE_TYPE<linear_affinity_mode<static_partition_type> > {
528 typedef proportional_split split_type;
529 static_partition_type(
const static_partitioner& )
530 : __TBB_STATIC_PARTITIONER_BASE_TYPE<linear_affinity_mode<static_partition_type> >() {}
531 static_partition_type( static_partition_type& p, split )
532 : __TBB_STATIC_PARTITIONER_BASE_TYPE<linear_affinity_mode<static_partition_type> >(p, split()) {}
533 static_partition_type( static_partition_type& p,
const proportional_split& split_obj )
534 : __TBB_STATIC_PARTITIONER_BASE_TYPE<linear_affinity_mode<static_partition_type> >(p, split_obj) {}
536 #undef __TBB_STATIC_PARTITIONER_BASE_TYPE 539 class affinity_partition_type :
public balancing_partition_type<linear_affinity_mode<affinity_partition_type> > {
540 static const unsigned factor_power = 4;
541 tbb::internal::affinity_id* my_array;
543 static const unsigned factor = 1 << factor_power;
544 typedef proportional_split split_type;
545 affinity_partition_type( tbb::internal::affinity_partitioner_base_v3& ap )
546 : balancing_partition_type<linear_affinity_mode<affinity_partition_type> >() {
547 __TBB_ASSERT( (factor&(factor-1))==0,
"factor must be power of two" );
549 my_array = ap.my_array;
550 my_max_depth = factor_power + 1;
551 __TBB_ASSERT( my_max_depth < __TBB_RANGE_POOL_CAPACITY, 0 );
553 affinity_partition_type(affinity_partition_type& p, split)
554 : balancing_partition_type<linear_affinity_mode<affinity_partition_type> >(p, split())
555 , my_array(p.my_array) {}
556 affinity_partition_type(affinity_partition_type& p,
const proportional_split& split_obj)
557 : balancing_partition_type<linear_affinity_mode<affinity_partition_type> >(p, split_obj)
558 , my_array(p.my_array) {}
559 void set_affinity( task &t ) {
561 if( !my_array[my_head] )
563 t.set_affinity( affinity_id(my_head / factor + 1) );
565 t.set_affinity( my_array[my_head] );
568 void note_affinity( task::affinity_id
id ) {
570 my_array[my_head] = id;
575 class old_auto_partition_type:
public tbb::internal::partition_type_base {
577 static const size_t VICTIM_CHUNKS = 4;
579 bool should_execute_range(
const task &t) {
580 if( num_chunks<VICTIM_CHUNKS && t.is_stolen_task() )
581 num_chunks = VICTIM_CHUNKS;
582 return num_chunks==1;
584 old_auto_partition_type(
const auto_partitioner& )
585 : num_chunks(
internal::get_initial_auto_partitioner_divisor()*__TBB_INITIAL_CHUNKS/4) {}
586 old_auto_partition_type(
const affinity_partitioner& )
587 : num_chunks(
internal::get_initial_auto_partitioner_divisor()*__TBB_INITIAL_CHUNKS/4) {}
588 old_auto_partition_type( old_auto_partition_type& pt, split ) {
589 num_chunks = pt.num_chunks = (pt.num_chunks+1u) / 2u;
600 class simple_partitioner {
602 simple_partitioner() {}
604 template<
typename Range,
typename Body,
typename Partitioner>
friend class serial::interface9::start_for;
605 template<
typename Range,
typename Body,
typename Partitioner>
friend class interface9::internal::start_for;
606 template<
typename Range,
typename Body,
typename Partitioner>
friend class interface9::internal::start_reduce;
607 template<
typename Range,
typename Body,
typename Partitioner>
friend class internal::start_scan;
609 class partition_type:
public internal::partition_type_base {
611 bool should_execute_range(
const task& ) {
return false;}
612 partition_type(
const simple_partitioner& ) {}
613 partition_type(
const partition_type&, split ) {}
616 typedef interface9::internal::simple_partition_type task_partition_type;
619 typedef interface9::internal::simple_partition_type::split_type split_type;
626 class auto_partitioner {
628 auto_partitioner() {}
631 template<
typename Range,
typename Body,
typename Partitioner>
friend class serial::interface9::start_for;
632 template<
typename Range,
typename Body,
typename Partitioner>
friend class interface9::internal::start_for;
633 template<
typename Range,
typename Body,
typename Partitioner>
friend class interface9::internal::start_reduce;
634 template<
typename Range,
typename Body,
typename Partitioner>
friend class internal::start_scan;
636 typedef interface9::internal::old_auto_partition_type partition_type;
638 typedef interface9::internal::auto_partition_type task_partition_type;
641 typedef interface9::internal::auto_partition_type::split_type split_type;
644 #if TBB_PREVIEW_STATIC_PARTITIONER 645 class static_partitioner {
648 static_partitioner() {}
650 template<
typename Range,
typename Body,
typename Partitioner>
friend class serial::interface9::start_for;
651 template<
typename Range,
typename Body,
typename Partitioner>
friend class interface9::internal::start_for;
652 template<
typename Range,
typename Body,
typename Partitioner>
friend class interface9::internal::start_reduce;
653 template<
typename Range,
typename Body,
typename Partitioner>
friend class internal::start_scan;
655 typedef interface9::internal::old_auto_partition_type partition_type;
657 typedef interface9::internal::static_partition_type task_partition_type;
660 typedef interface9::internal::static_partition_type::split_type split_type;
665 class affinity_partitioner: internal::affinity_partitioner_base_v3 {
667 affinity_partitioner() {}
670 template<
typename Range,
typename Body,
typename Partitioner>
friend class serial::interface9::start_for;
671 template<
typename Range,
typename Body,
typename Partitioner>
friend class interface9::internal::start_for;
672 template<
typename Range,
typename Body,
typename Partitioner>
friend class interface9::internal::start_reduce;
673 template<
typename Range,
typename Body,
typename Partitioner>
friend class internal::start_scan;
675 typedef interface9::internal::old_auto_partition_type partition_type;
677 typedef interface9::internal::affinity_partition_type task_partition_type;
680 typedef interface9::internal::affinity_partition_type::split_type split_type;
685 #if defined(_MSC_VER) && !defined(__INTEL_COMPILER) 686 #pragma warning (pop) 687 #endif // warning 4244 is back 688 #undef __TBB_INITIAL_CHUNKS 689 #undef __TBB_RANGE_POOL_CAPACITY 690 #undef __TBB_INIT_DEPTH Enables one or the other code branches.
Definition: _template_helpers.h:29
Block of space aligned sufficiently to construct an array T with N elements.
Definition: aligned_space.h:33
*/
Definition: material.h:665
T * begin()
Pointer to beginning of array.
Definition: aligned_space.h:39
Definition: _flow_graph_async_msg_impl.h:32
Release.
Definition: atomic.h:49
The namespace tbb contains all components of the library.
Definition: parallel_for.h:44