14 #include "kmp_affinity.h"
15 #include "kmp_atomic.h"
16 #include "kmp_environment.h"
17 #include "kmp_error.h"
21 #include "kmp_settings.h"
22 #include "kmp_stats.h"
24 #include "kmp_wait_release.h"
25 #include "kmp_wrapper_getpid.h"
26 #include "kmp_dispatch.h"
27 #if KMP_USE_HIER_SCHED
28 #include "kmp_dispatch_hier.h"
32 #include "ompt-specific.h"
36 #define KMP_USE_PRCTL 0
42 #include "tsan_annotations.h"
44 #if defined(KMP_GOMP_COMPAT)
45 char const __kmp_version_alt_comp[] =
46 KMP_VERSION_PREFIX
"alternative compiler support: yes";
49 char const __kmp_version_omp_api[] = KMP_VERSION_PREFIX
"API version: "
61 char const __kmp_version_lock[] =
62 KMP_VERSION_PREFIX
"lock type: run time selectable";
65 #define KMP_MIN(x, y) ((x) < (y) ? (x) : (y))
70 kmp_info_t __kmp_monitor;
75 void __kmp_cleanup(
void);
77 static void __kmp_initialize_info(kmp_info_t *, kmp_team_t *,
int tid,
79 static void __kmp_initialize_team(kmp_team_t *team,
int new_nproc,
80 kmp_internal_control_t *new_icvs,
82 #if OMP_40_ENABLED && KMP_AFFINITY_SUPPORTED
83 static void __kmp_partition_places(kmp_team_t *team,
84 int update_master_only = 0);
86 static void __kmp_do_serial_initialize(
void);
87 void __kmp_fork_barrier(
int gtid,
int tid);
88 void __kmp_join_barrier(
int gtid);
89 void __kmp_setup_icv_copy(kmp_team_t *team,
int new_nproc,
90 kmp_internal_control_t *new_icvs,
ident_t *loc);
92 #ifdef USE_LOAD_BALANCE
93 static int __kmp_load_balance_nproc(kmp_root_t *root,
int set_nproc);
96 static int __kmp_expand_threads(
int nNeed);
98 static int __kmp_unregister_root_other_thread(
int gtid);
100 static void __kmp_unregister_library(
void);
101 static void __kmp_reap_thread(kmp_info_t *thread,
int is_root);
102 kmp_info_t *__kmp_thread_pool_insert_pt = NULL;
107 int __kmp_get_global_thread_id() {
109 kmp_info_t **other_threads;
117 (
"*** __kmp_get_global_thread_id: entering, nproc=%d all_nproc=%d\n",
118 __kmp_nth, __kmp_all_nth));
125 if (!TCR_4(__kmp_init_gtid))
128 #ifdef KMP_TDATA_GTID
129 if (TCR_4(__kmp_gtid_mode) >= 3) {
130 KA_TRACE(1000, (
"*** __kmp_get_global_thread_id: using TDATA\n"));
134 if (TCR_4(__kmp_gtid_mode) >= 2) {
135 KA_TRACE(1000, (
"*** __kmp_get_global_thread_id: using keyed TLS\n"));
136 return __kmp_gtid_get_specific();
138 KA_TRACE(1000, (
"*** __kmp_get_global_thread_id: using internal alg.\n"));
140 stack_addr = (
char *)&stack_data;
141 other_threads = __kmp_threads;
154 for (i = 0; i < __kmp_threads_capacity; i++) {
156 kmp_info_t *thr = (kmp_info_t *)TCR_SYNC_PTR(other_threads[i]);
160 stack_size = (size_t)TCR_PTR(thr->th.th_info.ds.ds_stacksize);
161 stack_base = (
char *)TCR_PTR(thr->th.th_info.ds.ds_stackbase);
165 if (stack_addr <= stack_base) {
166 size_t stack_diff = stack_base - stack_addr;
168 if (stack_diff <= stack_size) {
171 KMP_DEBUG_ASSERT(__kmp_gtid_get_specific() == i);
179 (
"*** __kmp_get_global_thread_id: internal alg. failed to find "
180 "thread, using TLS\n"));
181 i = __kmp_gtid_get_specific();
191 if (!TCR_4(other_threads[i]->th.th_info.ds.ds_stackgrow)) {
192 KMP_FATAL(StackOverflow, i);
195 stack_base = (
char *)other_threads[i]->th.th_info.ds.ds_stackbase;
196 if (stack_addr > stack_base) {
197 TCW_PTR(other_threads[i]->th.th_info.ds.ds_stackbase, stack_addr);
198 TCW_PTR(other_threads[i]->th.th_info.ds.ds_stacksize,
199 other_threads[i]->th.th_info.ds.ds_stacksize + stack_addr -
202 TCW_PTR(other_threads[i]->th.th_info.ds.ds_stacksize,
203 stack_base - stack_addr);
207 if (__kmp_storage_map) {
208 char *stack_end = (
char *)other_threads[i]->th.th_info.ds.ds_stackbase;
209 char *stack_beg = stack_end - other_threads[i]->th.th_info.ds.ds_stacksize;
210 __kmp_print_storage_map_gtid(i, stack_beg, stack_end,
211 other_threads[i]->th.th_info.ds.ds_stacksize,
212 "th_%d stack (refinement)", i);
217 int __kmp_get_global_thread_id_reg() {
220 if (!__kmp_init_serial) {
223 #ifdef KMP_TDATA_GTID
224 if (TCR_4(__kmp_gtid_mode) >= 3) {
225 KA_TRACE(1000, (
"*** __kmp_get_global_thread_id_reg: using TDATA\n"));
229 if (TCR_4(__kmp_gtid_mode) >= 2) {
230 KA_TRACE(1000, (
"*** __kmp_get_global_thread_id_reg: using keyed TLS\n"));
231 gtid = __kmp_gtid_get_specific();
234 (
"*** __kmp_get_global_thread_id_reg: using internal alg.\n"));
235 gtid = __kmp_get_global_thread_id();
239 if (gtid == KMP_GTID_DNE) {
241 (
"__kmp_get_global_thread_id_reg: Encountered new root thread. "
242 "Registering a new gtid.\n"));
243 __kmp_acquire_bootstrap_lock(&__kmp_initz_lock);
244 if (!__kmp_init_serial) {
245 __kmp_do_serial_initialize();
246 gtid = __kmp_gtid_get_specific();
248 gtid = __kmp_register_root(FALSE);
250 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
254 KMP_DEBUG_ASSERT(gtid >= 0);
260 void __kmp_check_stack_overlap(kmp_info_t *th) {
262 char *stack_beg = NULL;
263 char *stack_end = NULL;
266 KA_TRACE(10, (
"__kmp_check_stack_overlap: called\n"));
267 if (__kmp_storage_map) {
268 stack_end = (
char *)th->th.th_info.ds.ds_stackbase;
269 stack_beg = stack_end - th->th.th_info.ds.ds_stacksize;
271 gtid = __kmp_gtid_from_thread(th);
273 if (gtid == KMP_GTID_MONITOR) {
274 __kmp_print_storage_map_gtid(
275 gtid, stack_beg, stack_end, th->th.th_info.ds.ds_stacksize,
276 "th_%s stack (%s)",
"mon",
277 (th->th.th_info.ds.ds_stackgrow) ?
"initial" :
"actual");
279 __kmp_print_storage_map_gtid(
280 gtid, stack_beg, stack_end, th->th.th_info.ds.ds_stacksize,
281 "th_%d stack (%s)", gtid,
282 (th->th.th_info.ds.ds_stackgrow) ?
"initial" :
"actual");
288 gtid = __kmp_gtid_from_thread(th);
289 if (__kmp_env_checks == TRUE && !KMP_UBER_GTID(gtid)) {
291 (
"__kmp_check_stack_overlap: performing extensive checking\n"));
292 if (stack_beg == NULL) {
293 stack_end = (
char *)th->th.th_info.ds.ds_stackbase;
294 stack_beg = stack_end - th->th.th_info.ds.ds_stacksize;
297 for (f = 0; f < __kmp_threads_capacity; f++) {
298 kmp_info_t *f_th = (kmp_info_t *)TCR_SYNC_PTR(__kmp_threads[f]);
300 if (f_th && f_th != th) {
301 char *other_stack_end =
302 (
char *)TCR_PTR(f_th->th.th_info.ds.ds_stackbase);
303 char *other_stack_beg =
304 other_stack_end - (size_t)TCR_PTR(f_th->th.th_info.ds.ds_stacksize);
305 if ((stack_beg > other_stack_beg && stack_beg < other_stack_end) ||
306 (stack_end > other_stack_beg && stack_end < other_stack_end)) {
309 if (__kmp_storage_map)
310 __kmp_print_storage_map_gtid(
311 -1, other_stack_beg, other_stack_end,
312 (
size_t)TCR_PTR(f_th->th.th_info.ds.ds_stacksize),
313 "th_%d stack (overlapped)", __kmp_gtid_from_thread(f_th));
315 __kmp_fatal(KMP_MSG(StackOverlap), KMP_HNT(ChangeStackLimit),
321 KA_TRACE(10, (
"__kmp_check_stack_overlap: returning\n"));
326 void __kmp_infinite_loop(
void) {
327 static int done = FALSE;
334 #define MAX_MESSAGE 512
336 void __kmp_print_storage_map_gtid(
int gtid,
void *p1,
void *p2,
size_t size,
337 char const *format, ...) {
338 char buffer[MAX_MESSAGE];
341 va_start(ap, format);
342 KMP_SNPRINTF(buffer,
sizeof(buffer),
"OMP storage map: %p %p%8lu %s\n", p1,
343 p2, (
unsigned long)size, format);
344 __kmp_acquire_bootstrap_lock(&__kmp_stdio_lock);
345 __kmp_vprintf(kmp_err, buffer, ap);
346 #if KMP_PRINT_DATA_PLACEMENT
349 if (p1 <= p2 && (
char *)p2 - (
char *)p1 == size) {
350 if (__kmp_storage_map_verbose) {
351 node = __kmp_get_host_node(p1);
353 __kmp_storage_map_verbose = FALSE;
357 int localProc = __kmp_get_cpu_from_gtid(gtid);
359 const int page_size = KMP_GET_PAGE_SIZE();
361 p1 = (
void *)((
size_t)p1 & ~((size_t)page_size - 1));
362 p2 = (
void *)(((
size_t)p2 - 1) & ~((
size_t)page_size - 1));
364 __kmp_printf_no_lock(
" GTID %d localNode %d\n", gtid,
367 __kmp_printf_no_lock(
" GTID %d\n", gtid);
376 (
char *)p1 += page_size;
377 }
while (p1 <= p2 && (node = __kmp_get_host_node(p1)) == lastNode);
378 __kmp_printf_no_lock(
" %p-%p memNode %d\n", last, (
char *)p1 - 1,
382 __kmp_printf_no_lock(
" %p-%p memNode %d\n", p1,
383 (
char *)p1 + (page_size - 1),
384 __kmp_get_host_node(p1));
386 __kmp_printf_no_lock(
" %p-%p memNode %d\n", p2,
387 (
char *)p2 + (page_size - 1),
388 __kmp_get_host_node(p2));
394 __kmp_printf_no_lock(
" %s\n", KMP_I18N_STR(StorageMapWarning));
397 __kmp_release_bootstrap_lock(&__kmp_stdio_lock);
400 void __kmp_warn(
char const *format, ...) {
401 char buffer[MAX_MESSAGE];
404 if (__kmp_generate_warnings == kmp_warnings_off) {
408 va_start(ap, format);
410 KMP_SNPRINTF(buffer,
sizeof(buffer),
"OMP warning: %s\n", format);
411 __kmp_acquire_bootstrap_lock(&__kmp_stdio_lock);
412 __kmp_vprintf(kmp_err, buffer, ap);
413 __kmp_release_bootstrap_lock(&__kmp_stdio_lock);
418 void __kmp_abort_process() {
420 __kmp_acquire_bootstrap_lock(&__kmp_exit_lock);
422 if (__kmp_debug_buf) {
423 __kmp_dump_debug_buffer();
426 if (KMP_OS_WINDOWS) {
429 __kmp_global.g.g_abort = SIGABRT;
446 __kmp_infinite_loop();
447 __kmp_release_bootstrap_lock(&__kmp_exit_lock);
451 void __kmp_abort_thread(
void) {
454 __kmp_infinite_loop();
460 static void __kmp_print_thread_storage_map(kmp_info_t *thr,
int gtid) {
461 __kmp_print_storage_map_gtid(gtid, thr, thr + 1,
sizeof(kmp_info_t),
"th_%d",
464 __kmp_print_storage_map_gtid(gtid, &thr->th.th_info, &thr->th.th_team,
465 sizeof(kmp_desc_t),
"th_%d.th_info", gtid);
467 __kmp_print_storage_map_gtid(gtid, &thr->th.th_local, &thr->th.th_pri_head,
468 sizeof(kmp_local_t),
"th_%d.th_local", gtid);
470 __kmp_print_storage_map_gtid(
471 gtid, &thr->th.th_bar[0], &thr->th.th_bar[bs_last_barrier],
472 sizeof(kmp_balign_t) * bs_last_barrier,
"th_%d.th_bar", gtid);
474 __kmp_print_storage_map_gtid(gtid, &thr->th.th_bar[bs_plain_barrier],
475 &thr->th.th_bar[bs_plain_barrier + 1],
476 sizeof(kmp_balign_t),
"th_%d.th_bar[plain]",
479 __kmp_print_storage_map_gtid(gtid, &thr->th.th_bar[bs_forkjoin_barrier],
480 &thr->th.th_bar[bs_forkjoin_barrier + 1],
481 sizeof(kmp_balign_t),
"th_%d.th_bar[forkjoin]",
484 #if KMP_FAST_REDUCTION_BARRIER
485 __kmp_print_storage_map_gtid(gtid, &thr->th.th_bar[bs_reduction_barrier],
486 &thr->th.th_bar[bs_reduction_barrier + 1],
487 sizeof(kmp_balign_t),
"th_%d.th_bar[reduction]",
489 #endif // KMP_FAST_REDUCTION_BARRIER
495 static void __kmp_print_team_storage_map(
const char *header, kmp_team_t *team,
496 int team_id,
int num_thr) {
497 int num_disp_buff = team->t.t_max_nproc > 1 ? __kmp_dispatch_num_buffers : 2;
498 __kmp_print_storage_map_gtid(-1, team, team + 1,
sizeof(kmp_team_t),
"%s_%d",
501 __kmp_print_storage_map_gtid(-1, &team->t.t_bar[0],
502 &team->t.t_bar[bs_last_barrier],
503 sizeof(kmp_balign_team_t) * bs_last_barrier,
504 "%s_%d.t_bar", header, team_id);
506 __kmp_print_storage_map_gtid(-1, &team->t.t_bar[bs_plain_barrier],
507 &team->t.t_bar[bs_plain_barrier + 1],
508 sizeof(kmp_balign_team_t),
"%s_%d.t_bar[plain]",
511 __kmp_print_storage_map_gtid(-1, &team->t.t_bar[bs_forkjoin_barrier],
512 &team->t.t_bar[bs_forkjoin_barrier + 1],
513 sizeof(kmp_balign_team_t),
514 "%s_%d.t_bar[forkjoin]", header, team_id);
516 #if KMP_FAST_REDUCTION_BARRIER
517 __kmp_print_storage_map_gtid(-1, &team->t.t_bar[bs_reduction_barrier],
518 &team->t.t_bar[bs_reduction_barrier + 1],
519 sizeof(kmp_balign_team_t),
520 "%s_%d.t_bar[reduction]", header, team_id);
521 #endif // KMP_FAST_REDUCTION_BARRIER
523 __kmp_print_storage_map_gtid(
524 -1, &team->t.t_dispatch[0], &team->t.t_dispatch[num_thr],
525 sizeof(kmp_disp_t) * num_thr,
"%s_%d.t_dispatch", header, team_id);
527 __kmp_print_storage_map_gtid(
528 -1, &team->t.t_threads[0], &team->t.t_threads[num_thr],
529 sizeof(kmp_info_t *) * num_thr,
"%s_%d.t_threads", header, team_id);
531 __kmp_print_storage_map_gtid(-1, &team->t.t_disp_buffer[0],
532 &team->t.t_disp_buffer[num_disp_buff],
533 sizeof(dispatch_shared_info_t) * num_disp_buff,
534 "%s_%d.t_disp_buffer", header, team_id);
537 static void __kmp_init_allocator() {
539 __kmp_init_memkind();
542 static void __kmp_fini_allocator() {
544 __kmp_fini_memkind();
553 static void __kmp_reset_lock(kmp_bootstrap_lock_t *lck) {
555 __kmp_init_bootstrap_lock(lck);
558 static void __kmp_reset_locks_on_process_detach(
int gtid_req) {
576 for (i = 0; i < __kmp_threads_capacity; ++i) {
579 kmp_info_t *th = __kmp_threads[i];
582 int gtid = th->th.th_info.ds.ds_gtid;
583 if (gtid == gtid_req)
588 int alive = __kmp_is_thread_alive(th, &exit_val);
593 if (thread_count == 0)
599 __kmp_reset_lock(&__kmp_forkjoin_lock);
601 __kmp_reset_lock(&__kmp_stdio_lock);
605 BOOL WINAPI DllMain(HINSTANCE hInstDLL, DWORD fdwReason, LPVOID lpReserved) {
610 case DLL_PROCESS_ATTACH:
611 KA_TRACE(10, (
"DllMain: PROCESS_ATTACH\n"));
615 case DLL_PROCESS_DETACH:
616 KA_TRACE(10, (
"DllMain: PROCESS_DETACH T#%d\n", __kmp_gtid_get_specific()));
618 if (lpReserved != NULL) {
644 __kmp_reset_locks_on_process_detach(__kmp_gtid_get_specific());
647 __kmp_internal_end_library(__kmp_gtid_get_specific());
651 case DLL_THREAD_ATTACH:
652 KA_TRACE(10, (
"DllMain: THREAD_ATTACH\n"));
658 case DLL_THREAD_DETACH:
659 KA_TRACE(10, (
"DllMain: THREAD_DETACH T#%d\n", __kmp_gtid_get_specific()));
661 __kmp_internal_end_thread(__kmp_gtid_get_specific());
672 void __kmp_parallel_deo(
int *gtid_ref,
int *cid_ref,
ident_t *loc_ref) {
673 int gtid = *gtid_ref;
674 #ifdef BUILD_PARALLEL_ORDERED
675 kmp_team_t *team = __kmp_team_from_gtid(gtid);
678 if (__kmp_env_consistency_check) {
679 if (__kmp_threads[gtid]->th.th_root->r.r_active)
680 #if KMP_USE_DYNAMIC_LOCK
681 __kmp_push_sync(gtid, ct_ordered_in_parallel, loc_ref, NULL, 0);
683 __kmp_push_sync(gtid, ct_ordered_in_parallel, loc_ref, NULL);
686 #ifdef BUILD_PARALLEL_ORDERED
687 if (!team->t.t_serialized) {
689 KMP_WAIT(&team->t.t_ordered.dt.t_value, __kmp_tid_from_gtid(gtid), KMP_EQ,
697 void __kmp_parallel_dxo(
int *gtid_ref,
int *cid_ref,
ident_t *loc_ref) {
698 int gtid = *gtid_ref;
699 #ifdef BUILD_PARALLEL_ORDERED
700 int tid = __kmp_tid_from_gtid(gtid);
701 kmp_team_t *team = __kmp_team_from_gtid(gtid);
704 if (__kmp_env_consistency_check) {
705 if (__kmp_threads[gtid]->th.th_root->r.r_active)
706 __kmp_pop_sync(gtid, ct_ordered_in_parallel, loc_ref);
708 #ifdef BUILD_PARALLEL_ORDERED
709 if (!team->t.t_serialized) {
714 team->t.t_ordered.dt.t_value = ((tid + 1) % team->t.t_nproc);
724 int __kmp_enter_single(
int gtid,
ident_t *id_ref,
int push_ws) {
729 if (!TCR_4(__kmp_init_parallel))
730 __kmp_parallel_initialize();
733 __kmp_resume_if_soft_paused();
736 th = __kmp_threads[gtid];
737 team = th->th.th_team;
740 th->th.th_ident = id_ref;
742 if (team->t.t_serialized) {
745 kmp_int32 old_this = th->th.th_local.this_construct;
747 ++th->th.th_local.this_construct;
751 if (team->t.t_construct == old_this) {
752 status = __kmp_atomic_compare_store_acq(&team->t.t_construct, old_this,
753 th->th.th_local.this_construct);
756 if (__itt_metadata_add_ptr && __kmp_forkjoin_frames_mode == 3 &&
757 KMP_MASTER_GTID(gtid) &&
759 th->th.th_teams_microtask == NULL &&
761 team->t.t_active_level ==
763 __kmp_itt_metadata_single(id_ref);
768 if (__kmp_env_consistency_check) {
769 if (status && push_ws) {
770 __kmp_push_workshare(gtid, ct_psingle, id_ref);
772 __kmp_check_workshare(gtid, ct_psingle, id_ref);
777 __kmp_itt_single_start(gtid);
783 void __kmp_exit_single(
int gtid) {
785 __kmp_itt_single_end(gtid);
787 if (__kmp_env_consistency_check)
788 __kmp_pop_workshare(gtid, ct_psingle, NULL);
797 static int __kmp_reserve_threads(kmp_root_t *root, kmp_team_t *parent_team,
798 int master_tid,
int set_nthreads
806 KMP_DEBUG_ASSERT(__kmp_init_serial);
807 KMP_DEBUG_ASSERT(root && parent_team);
808 kmp_info_t *this_thr = parent_team->t.t_threads[master_tid];
812 new_nthreads = set_nthreads;
813 if (!get__dynamic_2(parent_team, master_tid)) {
816 #ifdef USE_LOAD_BALANCE
817 else if (__kmp_global.g.g_dynamic_mode == dynamic_load_balance) {
818 new_nthreads = __kmp_load_balance_nproc(root, set_nthreads);
819 if (new_nthreads == 1) {
820 KC_TRACE(10, (
"__kmp_reserve_threads: T#%d load balance reduced "
821 "reservation to 1 thread\n",
825 if (new_nthreads < set_nthreads) {
826 KC_TRACE(10, (
"__kmp_reserve_threads: T#%d load balance reduced "
827 "reservation to %d threads\n",
828 master_tid, new_nthreads));
832 else if (__kmp_global.g.g_dynamic_mode == dynamic_thread_limit) {
833 new_nthreads = __kmp_avail_proc - __kmp_nth +
834 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc);
835 if (new_nthreads <= 1) {
836 KC_TRACE(10, (
"__kmp_reserve_threads: T#%d thread limit reduced "
837 "reservation to 1 thread\n",
841 if (new_nthreads < set_nthreads) {
842 KC_TRACE(10, (
"__kmp_reserve_threads: T#%d thread limit reduced "
843 "reservation to %d threads\n",
844 master_tid, new_nthreads));
846 new_nthreads = set_nthreads;
848 }
else if (__kmp_global.g.g_dynamic_mode == dynamic_random) {
849 if (set_nthreads > 2) {
850 new_nthreads = __kmp_get_random(parent_team->t.t_threads[master_tid]);
851 new_nthreads = (new_nthreads % set_nthreads) + 1;
852 if (new_nthreads == 1) {
853 KC_TRACE(10, (
"__kmp_reserve_threads: T#%d dynamic random reduced "
854 "reservation to 1 thread\n",
858 if (new_nthreads < set_nthreads) {
859 KC_TRACE(10, (
"__kmp_reserve_threads: T#%d dynamic random reduced "
860 "reservation to %d threads\n",
861 master_tid, new_nthreads));
869 if (__kmp_nth + new_nthreads -
870 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc) >
872 int tl_nthreads = __kmp_max_nth - __kmp_nth +
873 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc);
874 if (tl_nthreads <= 0) {
879 if (!get__dynamic_2(parent_team, master_tid) && (!__kmp_reserve_warn)) {
880 __kmp_reserve_warn = 1;
881 __kmp_msg(kmp_ms_warning,
882 KMP_MSG(CantFormThrTeam, set_nthreads, tl_nthreads),
883 KMP_HNT(Unset_ALL_THREADS), __kmp_msg_null);
885 if (tl_nthreads == 1) {
886 KC_TRACE(10, (
"__kmp_reserve_threads: T#%d KMP_DEVICE_THREAD_LIMIT "
887 "reduced reservation to 1 thread\n",
891 KC_TRACE(10, (
"__kmp_reserve_threads: T#%d KMP_DEVICE_THREAD_LIMIT reduced "
892 "reservation to %d threads\n",
893 master_tid, tl_nthreads));
894 new_nthreads = tl_nthreads;
898 int cg_nthreads = this_thr->th.th_cg_roots->cg_nthreads;
899 int max_cg_threads = this_thr->th.th_cg_roots->cg_thread_limit;
900 if (cg_nthreads + new_nthreads -
901 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc) >
903 int tl_nthreads = max_cg_threads - cg_nthreads +
904 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc);
905 if (tl_nthreads <= 0) {
910 if (!get__dynamic_2(parent_team, master_tid) && (!__kmp_reserve_warn)) {
911 __kmp_reserve_warn = 1;
912 __kmp_msg(kmp_ms_warning,
913 KMP_MSG(CantFormThrTeam, set_nthreads, tl_nthreads),
914 KMP_HNT(Unset_ALL_THREADS), __kmp_msg_null);
916 if (tl_nthreads == 1) {
917 KC_TRACE(10, (
"__kmp_reserve_threads: T#%d OMP_THREAD_LIMIT "
918 "reduced reservation to 1 thread\n",
922 KC_TRACE(10, (
"__kmp_reserve_threads: T#%d OMP_THREAD_LIMIT reduced "
923 "reservation to %d threads\n",
924 master_tid, tl_nthreads));
925 new_nthreads = tl_nthreads;
931 capacity = __kmp_threads_capacity;
932 if (TCR_PTR(__kmp_threads[0]) == NULL) {
935 if (__kmp_nth + new_nthreads -
936 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc) >
939 int slotsRequired = __kmp_nth + new_nthreads -
940 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc) -
942 int slotsAdded = __kmp_expand_threads(slotsRequired);
943 if (slotsAdded < slotsRequired) {
945 new_nthreads -= (slotsRequired - slotsAdded);
946 KMP_ASSERT(new_nthreads >= 1);
949 if (!get__dynamic_2(parent_team, master_tid) && (!__kmp_reserve_warn)) {
950 __kmp_reserve_warn = 1;
951 if (__kmp_tp_cached) {
952 __kmp_msg(kmp_ms_warning,
953 KMP_MSG(CantFormThrTeam, set_nthreads, new_nthreads),
954 KMP_HNT(Set_ALL_THREADPRIVATE, __kmp_tp_capacity),
955 KMP_HNT(PossibleSystemLimitOnThreads), __kmp_msg_null);
957 __kmp_msg(kmp_ms_warning,
958 KMP_MSG(CantFormThrTeam, set_nthreads, new_nthreads),
959 KMP_HNT(SystemLimitOnThreads), __kmp_msg_null);
966 if (new_nthreads == 1) {
968 (
"__kmp_reserve_threads: T#%d serializing team after reclaiming "
969 "dead roots and rechecking; requested %d threads\n",
970 __kmp_get_gtid(), set_nthreads));
972 KC_TRACE(10, (
"__kmp_reserve_threads: T#%d allocating %d threads; requested"
974 __kmp_get_gtid(), new_nthreads, set_nthreads));
983 static void __kmp_fork_team_threads(kmp_root_t *root, kmp_team_t *team,
984 kmp_info_t *master_th,
int master_gtid) {
988 KA_TRACE(10, (
"__kmp_fork_team_threads: new_nprocs = %d\n", team->t.t_nproc));
989 KMP_DEBUG_ASSERT(master_gtid == __kmp_get_gtid());
993 master_th->th.th_info.ds.ds_tid = 0;
994 master_th->th.th_team = team;
995 master_th->th.th_team_nproc = team->t.t_nproc;
996 master_th->th.th_team_master = master_th;
997 master_th->th.th_team_serialized = FALSE;
998 master_th->th.th_dispatch = &team->t.t_dispatch[0];
1001 #if KMP_NESTED_HOT_TEAMS
1003 kmp_hot_team_ptr_t *hot_teams = master_th->th.th_hot_teams;
1006 int level = team->t.t_active_level - 1;
1007 if (master_th->th.th_teams_microtask) {
1008 if (master_th->th.th_teams_size.nteams > 1) {
1012 if (team->t.t_pkfn != (microtask_t)__kmp_teams_master &&
1013 master_th->th.th_teams_level == team->t.t_level) {
1018 if (level < __kmp_hot_teams_max_level) {
1019 if (hot_teams[level].hot_team) {
1021 KMP_DEBUG_ASSERT(hot_teams[level].hot_team == team);
1025 hot_teams[level].hot_team = team;
1026 hot_teams[level].hot_team_nth = team->t.t_nproc;
1033 use_hot_team = team == root->r.r_hot_team;
1035 if (!use_hot_team) {
1038 team->t.t_threads[0] = master_th;
1039 __kmp_initialize_info(master_th, team, 0, master_gtid);
1042 for (i = 1; i < team->t.t_nproc; i++) {
1045 kmp_info_t *thr = __kmp_allocate_thread(root, team, i);
1046 team->t.t_threads[i] = thr;
1047 KMP_DEBUG_ASSERT(thr);
1048 KMP_DEBUG_ASSERT(thr->th.th_team == team);
1050 KA_TRACE(20, (
"__kmp_fork_team_threads: T#%d(%d:%d) init arrived "
1051 "T#%d(%d:%d) join =%llu, plain=%llu\n",
1052 __kmp_gtid_from_tid(0, team), team->t.t_id, 0,
1053 __kmp_gtid_from_tid(i, team), team->t.t_id, i,
1054 team->t.t_bar[bs_forkjoin_barrier].b_arrived,
1055 team->t.t_bar[bs_plain_barrier].b_arrived));
1057 thr->th.th_teams_microtask = master_th->th.th_teams_microtask;
1058 thr->th.th_teams_level = master_th->th.th_teams_level;
1059 thr->th.th_teams_size = master_th->th.th_teams_size;
1063 kmp_balign_t *balign = team->t.t_threads[i]->th.th_bar;
1064 for (b = 0; b < bs_last_barrier; ++b) {
1065 balign[b].bb.b_arrived = team->t.t_bar[b].b_arrived;
1066 KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG);
1068 balign[b].bb.b_worker_arrived = team->t.t_bar[b].b_team_arrived;
1074 #if OMP_40_ENABLED && KMP_AFFINITY_SUPPORTED
1075 __kmp_partition_places(team);
1080 if (__kmp_display_affinity && team->t.t_display_affinity != 1) {
1081 for (i = 0; i < team->t.t_nproc; i++) {
1082 kmp_info_t *thr = team->t.t_threads[i];
1083 if (thr->th.th_prev_num_threads != team->t.t_nproc ||
1084 thr->th.th_prev_level != team->t.t_level) {
1085 team->t.t_display_affinity = 1;
1095 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
1099 inline static void propagateFPControl(kmp_team_t *team) {
1100 if (__kmp_inherit_fp_control) {
1101 kmp_int16 x87_fpu_control_word;
1105 __kmp_store_x87_fpu_control_word(&x87_fpu_control_word);
1106 __kmp_store_mxcsr(&mxcsr);
1107 mxcsr &= KMP_X86_MXCSR_MASK;
1118 KMP_CHECK_UPDATE(team->t.t_x87_fpu_control_word, x87_fpu_control_word);
1119 KMP_CHECK_UPDATE(team->t.t_mxcsr, mxcsr);
1122 KMP_CHECK_UPDATE(team->t.t_fp_control_saved, TRUE);
1126 KMP_CHECK_UPDATE(team->t.t_fp_control_saved, FALSE);
1132 inline static void updateHWFPControl(kmp_team_t *team) {
1133 if (__kmp_inherit_fp_control && team->t.t_fp_control_saved) {
1136 kmp_int16 x87_fpu_control_word;
1138 __kmp_store_x87_fpu_control_word(&x87_fpu_control_word);
1139 __kmp_store_mxcsr(&mxcsr);
1140 mxcsr &= KMP_X86_MXCSR_MASK;
1142 if (team->t.t_x87_fpu_control_word != x87_fpu_control_word) {
1143 __kmp_clear_x87_fpu_status_word();
1144 __kmp_load_x87_fpu_control_word(&team->t.t_x87_fpu_control_word);
1147 if (team->t.t_mxcsr != mxcsr) {
1148 __kmp_load_mxcsr(&team->t.t_mxcsr);
1153 #define propagateFPControl(x) ((void)0)
1154 #define updateHWFPControl(x) ((void)0)
1157 static void __kmp_alloc_argv_entries(
int argc, kmp_team_t *team,
1162 void __kmp_serialized_parallel(
ident_t *loc, kmp_int32 global_tid) {
1163 kmp_info_t *this_thr;
1164 kmp_team_t *serial_team;
1166 KC_TRACE(10, (
"__kmpc_serialized_parallel: called by T#%d\n", global_tid));
1173 if (!TCR_4(__kmp_init_parallel))
1174 __kmp_parallel_initialize();
1177 __kmp_resume_if_soft_paused();
1180 this_thr = __kmp_threads[global_tid];
1181 serial_team = this_thr->th.th_serial_team;
1184 KMP_DEBUG_ASSERT(serial_team);
1187 if (__kmp_tasking_mode != tskm_immediate_exec) {
1189 this_thr->th.th_task_team ==
1190 this_thr->th.th_team->t.t_task_team[this_thr->th.th_task_state]);
1191 KMP_DEBUG_ASSERT(serial_team->t.t_task_team[this_thr->th.th_task_state] ==
1193 KA_TRACE(20, (
"__kmpc_serialized_parallel: T#%d pushing task_team %p / "
1194 "team %p, new task_team = NULL\n",
1195 global_tid, this_thr->th.th_task_team, this_thr->th.th_team));
1196 this_thr->th.th_task_team = NULL;
1200 kmp_proc_bind_t proc_bind = this_thr->th.th_set_proc_bind;
1201 if (this_thr->th.th_current_task->td_icvs.proc_bind == proc_bind_false) {
1202 proc_bind = proc_bind_false;
1203 }
else if (proc_bind == proc_bind_default) {
1206 proc_bind = this_thr->th.th_current_task->td_icvs.proc_bind;
1209 this_thr->th.th_set_proc_bind = proc_bind_default;
1213 ompt_data_t ompt_parallel_data = ompt_data_none;
1214 ompt_data_t *implicit_task_data;
1215 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(global_tid);
1216 if (ompt_enabled.enabled &&
1217 this_thr->th.ompt_thread_info.state != ompt_state_overhead) {
1219 ompt_task_info_t *parent_task_info;
1220 parent_task_info = OMPT_CUR_TASK_INFO(this_thr);
1222 parent_task_info->frame.enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
1223 if (ompt_enabled.ompt_callback_parallel_begin) {
1226 ompt_callbacks.ompt_callback(ompt_callback_parallel_begin)(
1227 &(parent_task_info->task_data), &(parent_task_info->frame),
1228 &ompt_parallel_data, team_size, ompt_parallel_invoker_program,
1232 #endif // OMPT_SUPPORT
1234 if (this_thr->th.th_team != serial_team) {
1236 int level = this_thr->th.th_team->t.t_level;
1238 if (serial_team->t.t_serialized) {
1241 kmp_team_t *new_team;
1243 __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock);
1245 new_team = __kmp_allocate_team(this_thr->th.th_root, 1, 1,
1252 &this_thr->th.th_current_task->td_icvs,
1253 0 USE_NESTED_HOT_ARG(NULL));
1254 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
1255 KMP_ASSERT(new_team);
1258 new_team->t.t_threads[0] = this_thr;
1259 new_team->t.t_parent = this_thr->th.th_team;
1260 serial_team = new_team;
1261 this_thr->th.th_serial_team = serial_team;
1265 (
"__kmpc_serialized_parallel: T#%d allocated new serial team %p\n",
1266 global_tid, serial_team));
1274 (
"__kmpc_serialized_parallel: T#%d reusing cached serial team %p\n",
1275 global_tid, serial_team));
1279 KMP_DEBUG_ASSERT(serial_team->t.t_threads);
1280 KMP_DEBUG_ASSERT(serial_team->t.t_threads[0] == this_thr);
1281 KMP_DEBUG_ASSERT(this_thr->th.th_team != serial_team);
1282 serial_team->t.t_ident = loc;
1283 serial_team->t.t_serialized = 1;
1284 serial_team->t.t_nproc = 1;
1285 serial_team->t.t_parent = this_thr->th.th_team;
1286 serial_team->t.t_sched.sched = this_thr->th.th_team->t.t_sched.sched;
1287 this_thr->th.th_team = serial_team;
1288 serial_team->t.t_master_tid = this_thr->th.th_info.ds.ds_tid;
1290 KF_TRACE(10, (
"__kmpc_serialized_parallel: T#d curtask=%p\n", global_tid,
1291 this_thr->th.th_current_task));
1292 KMP_ASSERT(this_thr->th.th_current_task->td_flags.executing == 1);
1293 this_thr->th.th_current_task->td_flags.executing = 0;
1295 __kmp_push_current_task_to_thread(this_thr, serial_team, 0);
1300 copy_icvs(&this_thr->th.th_current_task->td_icvs,
1301 &this_thr->th.th_current_task->td_parent->td_icvs);
1305 if (__kmp_nested_nth.used && (level + 1 < __kmp_nested_nth.used)) {
1306 this_thr->th.th_current_task->td_icvs.nproc =
1307 __kmp_nested_nth.nth[level + 1];
1311 if (__kmp_nested_proc_bind.used &&
1312 (level + 1 < __kmp_nested_proc_bind.used)) {
1313 this_thr->th.th_current_task->td_icvs.proc_bind =
1314 __kmp_nested_proc_bind.bind_types[level + 1];
1319 serial_team->t.t_pkfn = (microtask_t)(~0);
1321 this_thr->th.th_info.ds.ds_tid = 0;
1324 this_thr->th.th_team_nproc = 1;
1325 this_thr->th.th_team_master = this_thr;
1326 this_thr->th.th_team_serialized = 1;
1328 serial_team->t.t_level = serial_team->t.t_parent->t.t_level + 1;
1329 serial_team->t.t_active_level = serial_team->t.t_parent->t.t_active_level;
1331 serial_team->t.t_def_allocator = this_thr->th.th_def_allocator;
1334 propagateFPControl(serial_team);
1337 KMP_DEBUG_ASSERT(serial_team->t.t_dispatch);
1338 if (!serial_team->t.t_dispatch->th_disp_buffer) {
1339 serial_team->t.t_dispatch->th_disp_buffer =
1340 (dispatch_private_info_t *)__kmp_allocate(
1341 sizeof(dispatch_private_info_t));
1343 this_thr->th.th_dispatch = serial_team->t.t_dispatch;
1350 KMP_DEBUG_ASSERT(this_thr->th.th_team == serial_team);
1351 KMP_DEBUG_ASSERT(serial_team->t.t_threads);
1352 KMP_DEBUG_ASSERT(serial_team->t.t_threads[0] == this_thr);
1353 ++serial_team->t.t_serialized;
1354 this_thr->th.th_team_serialized = serial_team->t.t_serialized;
1357 int level = this_thr->th.th_team->t.t_level;
1360 if (__kmp_nested_nth.used && (level + 1 < __kmp_nested_nth.used)) {
1361 this_thr->th.th_current_task->td_icvs.nproc =
1362 __kmp_nested_nth.nth[level + 1];
1364 serial_team->t.t_level++;
1365 KF_TRACE(10, (
"__kmpc_serialized_parallel: T#%d increasing nesting level "
1366 "of serial team %p to %d\n",
1367 global_tid, serial_team, serial_team->t.t_level));
1370 KMP_DEBUG_ASSERT(serial_team->t.t_dispatch);
1372 dispatch_private_info_t *disp_buffer =
1373 (dispatch_private_info_t *)__kmp_allocate(
1374 sizeof(dispatch_private_info_t));
1375 disp_buffer->next = serial_team->t.t_dispatch->th_disp_buffer;
1376 serial_team->t.t_dispatch->th_disp_buffer = disp_buffer;
1378 this_thr->th.th_dispatch = serial_team->t.t_dispatch;
1383 KMP_CHECK_UPDATE(serial_team->t.t_cancel_request, cancel_noreq);
1389 if (__kmp_display_affinity) {
1390 if (this_thr->th.th_prev_level != serial_team->t.t_level ||
1391 this_thr->th.th_prev_num_threads != 1) {
1393 __kmp_aux_display_affinity(global_tid, NULL);
1394 this_thr->th.th_prev_level = serial_team->t.t_level;
1395 this_thr->th.th_prev_num_threads = 1;
1400 if (__kmp_env_consistency_check)
1401 __kmp_push_parallel(global_tid, NULL);
1403 serial_team->t.ompt_team_info.master_return_address = codeptr;
1404 if (ompt_enabled.enabled &&
1405 this_thr->th.ompt_thread_info.state != ompt_state_overhead) {
1406 OMPT_CUR_TASK_INFO(this_thr)->frame.exit_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
1408 ompt_lw_taskteam_t lw_taskteam;
1409 __ompt_lw_taskteam_init(&lw_taskteam, this_thr, global_tid,
1410 &ompt_parallel_data, codeptr);
1412 __ompt_lw_taskteam_link(&lw_taskteam, this_thr, 1);
1416 implicit_task_data = OMPT_CUR_TASK_DATA(this_thr);
1417 if (ompt_enabled.ompt_callback_implicit_task) {
1418 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
1419 ompt_scope_begin, OMPT_CUR_TEAM_DATA(this_thr),
1420 OMPT_CUR_TASK_DATA(this_thr), 1, __kmp_tid_from_gtid(global_tid), ompt_task_implicit);
1421 OMPT_CUR_TASK_INFO(this_thr)
1422 ->thread_num = __kmp_tid_from_gtid(global_tid);
1426 this_thr->th.ompt_thread_info.state = ompt_state_work_parallel;
1427 OMPT_CUR_TASK_INFO(this_thr)->frame.exit_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
1434 int __kmp_fork_call(
ident_t *loc,
int gtid,
1435 enum fork_context_e call_context,
1436 kmp_int32 argc, microtask_t microtask, launch_t invoker,
1438 #
if (KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && KMP_OS_LINUX
1447 int master_this_cons;
1449 kmp_team_t *parent_team;
1450 kmp_info_t *master_th;
1454 int master_set_numthreads;
1460 #if KMP_NESTED_HOT_TEAMS
1461 kmp_hot_team_ptr_t **p_hot_teams;
1464 KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_fork_call);
1467 KA_TRACE(20, (
"__kmp_fork_call: enter T#%d\n", gtid));
1468 if (__kmp_stkpadding > 0 && __kmp_root[gtid] != NULL) {
1471 void *dummy = KMP_ALLOCA(__kmp_stkpadding);
1473 if (__kmp_stkpadding > KMP_MAX_STKPADDING)
1474 __kmp_stkpadding += (short)((kmp_int64)dummy);
1480 if (!TCR_4(__kmp_init_parallel))
1481 __kmp_parallel_initialize();
1484 __kmp_resume_if_soft_paused();
1488 master_th = __kmp_threads[gtid];
1490 parent_team = master_th->th.th_team;
1491 master_tid = master_th->th.th_info.ds.ds_tid;
1492 master_this_cons = master_th->th.th_local.this_construct;
1493 root = master_th->th.th_root;
1494 master_active = root->r.r_active;
1495 master_set_numthreads = master_th->th.th_set_nproc;
1498 ompt_data_t ompt_parallel_data = ompt_data_none;
1499 ompt_data_t *parent_task_data;
1500 ompt_frame_t *ompt_frame;
1501 ompt_data_t *implicit_task_data;
1502 void *return_address = NULL;
1504 if (ompt_enabled.enabled) {
1505 __ompt_get_task_info_internal(0, NULL, &parent_task_data, &ompt_frame,
1507 return_address = OMPT_LOAD_RETURN_ADDRESS(gtid);
1512 level = parent_team->t.t_level;
1514 active_level = parent_team->t.t_active_level;
1517 teams_level = master_th->th.th_teams_level;
1519 #if KMP_NESTED_HOT_TEAMS
1520 p_hot_teams = &master_th->th.th_hot_teams;
1521 if (*p_hot_teams == NULL && __kmp_hot_teams_max_level > 0) {
1522 *p_hot_teams = (kmp_hot_team_ptr_t *)__kmp_allocate(
1523 sizeof(kmp_hot_team_ptr_t) * __kmp_hot_teams_max_level);
1524 (*p_hot_teams)[0].hot_team = root->r.r_hot_team;
1526 (*p_hot_teams)[0].hot_team_nth = 1;
1531 if (ompt_enabled.enabled) {
1532 if (ompt_enabled.ompt_callback_parallel_begin) {
1533 int team_size = master_set_numthreads
1534 ? master_set_numthreads
1535 : get__nproc_2(parent_team, master_tid);
1536 ompt_callbacks.ompt_callback(ompt_callback_parallel_begin)(
1537 parent_task_data, ompt_frame, &ompt_parallel_data, team_size,
1538 OMPT_INVOKER(call_context), return_address);
1540 master_th->th.ompt_thread_info.state = ompt_state_overhead;
1544 master_th->th.th_ident = loc;
1547 if (master_th->th.th_teams_microtask && ap &&
1548 microtask != (microtask_t)__kmp_teams_master && level == teams_level) {
1552 parent_team->t.t_ident = loc;
1553 __kmp_alloc_argv_entries(argc, parent_team, TRUE);
1554 parent_team->t.t_argc = argc;
1555 argv = (
void **)parent_team->t.t_argv;
1556 for (i = argc - 1; i >= 0; --i)
1558 #
if (KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && KMP_OS_LINUX
1559 *argv++ = va_arg(*ap,
void *);
1561 *argv++ = va_arg(ap,
void *);
1564 if (parent_team == master_th->th.th_serial_team) {
1567 KMP_DEBUG_ASSERT(parent_team->t.t_serialized > 1);
1570 parent_team->t.t_serialized--;
1573 void **exit_runtime_p;
1575 ompt_lw_taskteam_t lw_taskteam;
1577 if (ompt_enabled.enabled) {
1578 __ompt_lw_taskteam_init(&lw_taskteam, master_th, gtid,
1579 &ompt_parallel_data, return_address);
1580 exit_runtime_p = &(lw_taskteam.ompt_task_info.frame.exit_frame.ptr);
1582 __ompt_lw_taskteam_link(&lw_taskteam, master_th, 0);
1586 implicit_task_data = OMPT_CUR_TASK_DATA(master_th);
1587 if (ompt_enabled.ompt_callback_implicit_task) {
1588 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
1589 ompt_scope_begin, OMPT_CUR_TEAM_DATA(master_th),
1590 implicit_task_data, 1, __kmp_tid_from_gtid(gtid), ompt_task_implicit);
1591 OMPT_CUR_TASK_INFO(master_th)
1592 ->thread_num = __kmp_tid_from_gtid(gtid);
1596 master_th->th.ompt_thread_info.state = ompt_state_work_parallel;
1598 exit_runtime_p = &dummy;
1603 KMP_TIME_PARTITIONED_BLOCK(OMP_parallel);
1604 KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK);
1605 __kmp_invoke_microtask(microtask, gtid, 0, argc, parent_team->t.t_argv
1614 *exit_runtime_p = NULL;
1615 if (ompt_enabled.enabled) {
1616 OMPT_CUR_TASK_INFO(master_th)->frame.exit_frame = ompt_data_none;
1617 if (ompt_enabled.ompt_callback_implicit_task) {
1618 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
1619 ompt_scope_end, NULL, implicit_task_data, 1,
1620 OMPT_CUR_TASK_INFO(master_th)->thread_num, ompt_task_implicit);
1622 __ompt_lw_taskteam_unlink(master_th);
1624 if (ompt_enabled.ompt_callback_parallel_end) {
1625 ompt_callbacks.ompt_callback(ompt_callback_parallel_end)(
1626 OMPT_CUR_TEAM_DATA(master_th), OMPT_CUR_TASK_DATA(master_th),
1627 OMPT_INVOKER(call_context), return_address);
1629 master_th->th.ompt_thread_info.state = ompt_state_overhead;
1635 parent_team->t.t_pkfn = microtask;
1636 parent_team->t.t_invoke = invoker;
1637 KMP_ATOMIC_INC(&root->r.r_in_parallel);
1638 parent_team->t.t_active_level++;
1639 parent_team->t.t_level++;
1641 parent_team->t.t_def_allocator = master_th->th.th_def_allocator;
1645 if (master_set_numthreads) {
1646 if (master_set_numthreads < master_th->th.th_teams_size.nth) {
1648 kmp_info_t **other_threads = parent_team->t.t_threads;
1649 parent_team->t.t_nproc = master_set_numthreads;
1650 for (i = 0; i < master_set_numthreads; ++i) {
1651 other_threads[i]->th.th_team_nproc = master_set_numthreads;
1655 master_th->th.th_set_nproc = 0;
1659 if (__kmp_debugging) {
1660 int nth = __kmp_omp_num_threads(loc);
1662 master_set_numthreads = nth;
1667 KF_TRACE(10, (
"__kmp_fork_call: before internal fork: root=%p, team=%p, "
1668 "master_th=%p, gtid=%d\n",
1669 root, parent_team, master_th, gtid));
1670 __kmp_internal_fork(loc, gtid, parent_team);
1671 KF_TRACE(10, (
"__kmp_fork_call: after internal fork: root=%p, team=%p, "
1672 "master_th=%p, gtid=%d\n",
1673 root, parent_team, master_th, gtid));
1676 KA_TRACE(20, (
"__kmp_fork_call: T#%d(%d:0) invoke microtask = %p\n", gtid,
1677 parent_team->t.t_id, parent_team->t.t_pkfn));
1679 if (!parent_team->t.t_invoke(gtid)) {
1680 KMP_ASSERT2(0,
"cannot invoke microtask for MASTER thread");
1682 KA_TRACE(20, (
"__kmp_fork_call: T#%d(%d:0) done microtask = %p\n", gtid,
1683 parent_team->t.t_id, parent_team->t.t_pkfn));
1686 KA_TRACE(20, (
"__kmp_fork_call: parallel exit T#%d\n", gtid));
1693 if (__kmp_tasking_mode != tskm_immediate_exec) {
1694 KMP_DEBUG_ASSERT(master_th->th.th_task_team ==
1695 parent_team->t.t_task_team[master_th->th.th_task_state]);
1699 if (parent_team->t.t_active_level >=
1700 master_th->th.th_current_task->td_icvs.max_active_levels) {
1704 int enter_teams = ((ap == NULL && active_level == 0) ||
1705 (ap && teams_level > 0 && teams_level == level));
1708 master_set_numthreads
1709 ? master_set_numthreads
1718 if ((get__max_active_levels(master_th) == 1 && (root->r.r_in_parallel
1723 (__kmp_library == library_serial)) {
1724 KC_TRACE(10, (
"__kmp_fork_call: T#%d serializing team; requested %d"
1732 __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock);
1733 nthreads = __kmp_reserve_threads(
1734 root, parent_team, master_tid, nthreads
1745 if (nthreads == 1) {
1749 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
1753 KMP_DEBUG_ASSERT(nthreads > 0);
1756 master_th->th.th_set_nproc = 0;
1759 if (nthreads == 1) {
1761 #if KMP_OS_LINUX && \
1762 (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
1765 void **args = (
void **)KMP_ALLOCA(argc *
sizeof(
void *));
1770 (
"__kmp_fork_call: T#%d serializing parallel region\n", gtid));
1774 if (call_context == fork_context_intel) {
1776 master_th->th.th_serial_team->t.t_ident = loc;
1780 master_th->th.th_serial_team->t.t_level--;
1785 void **exit_runtime_p;
1786 ompt_task_info_t *task_info;
1788 ompt_lw_taskteam_t lw_taskteam;
1790 if (ompt_enabled.enabled) {
1791 __ompt_lw_taskteam_init(&lw_taskteam, master_th, gtid,
1792 &ompt_parallel_data, return_address);
1794 __ompt_lw_taskteam_link(&lw_taskteam, master_th, 0);
1797 task_info = OMPT_CUR_TASK_INFO(master_th);
1798 exit_runtime_p = &(task_info->frame.exit_frame.ptr);
1799 if (ompt_enabled.ompt_callback_implicit_task) {
1800 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
1801 ompt_scope_begin, OMPT_CUR_TEAM_DATA(master_th),
1802 &(task_info->task_data), 1, __kmp_tid_from_gtid(gtid), ompt_task_implicit);
1803 OMPT_CUR_TASK_INFO(master_th)
1804 ->thread_num = __kmp_tid_from_gtid(gtid);
1808 master_th->th.ompt_thread_info.state = ompt_state_work_parallel;
1810 exit_runtime_p = &dummy;
1815 KMP_TIME_PARTITIONED_BLOCK(OMP_parallel);
1816 KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK);
1817 __kmp_invoke_microtask(microtask, gtid, 0, argc,
1818 parent_team->t.t_argv
1827 if (ompt_enabled.enabled) {
1828 exit_runtime_p = NULL;
1829 if (ompt_enabled.ompt_callback_implicit_task) {
1830 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
1831 ompt_scope_end, NULL, &(task_info->task_data), 1,
1832 OMPT_CUR_TASK_INFO(master_th)->thread_num, ompt_task_implicit);
1835 __ompt_lw_taskteam_unlink(master_th);
1836 if (ompt_enabled.ompt_callback_parallel_end) {
1837 ompt_callbacks.ompt_callback(ompt_callback_parallel_end)(
1838 OMPT_CUR_TEAM_DATA(master_th), parent_task_data,
1839 OMPT_INVOKER(call_context), return_address);
1841 master_th->th.ompt_thread_info.state = ompt_state_overhead;
1844 }
else if (microtask == (microtask_t)__kmp_teams_master) {
1845 KMP_DEBUG_ASSERT(master_th->th.th_team ==
1846 master_th->th.th_serial_team);
1847 team = master_th->th.th_team;
1849 team->t.t_invoke = invoker;
1850 __kmp_alloc_argv_entries(argc, team, TRUE);
1851 team->t.t_argc = argc;
1852 argv = (
void **)team->t.t_argv;
1854 for (i = argc - 1; i >= 0; --i)
1856 #
if (KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && KMP_OS_LINUX
1857 *argv++ = va_arg(*ap,
void *);
1859 *argv++ = va_arg(ap,
void *);
1862 for (i = 0; i < argc; ++i)
1864 argv[i] = parent_team->t.t_argv[i];
1874 for (i = argc - 1; i >= 0; --i)
1876 #
if (KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && KMP_OS_LINUX
1877 *argv++ = va_arg(*ap,
void *);
1879 *argv++ = va_arg(ap,
void *);
1885 void **exit_runtime_p;
1886 ompt_task_info_t *task_info;
1888 ompt_lw_taskteam_t lw_taskteam;
1890 if (ompt_enabled.enabled) {
1891 __ompt_lw_taskteam_init(&lw_taskteam, master_th, gtid,
1892 &ompt_parallel_data, return_address);
1893 __ompt_lw_taskteam_link(&lw_taskteam, master_th, 0);
1895 task_info = OMPT_CUR_TASK_INFO(master_th);
1896 exit_runtime_p = &(task_info->frame.exit_frame.ptr);
1899 implicit_task_data = OMPT_CUR_TASK_DATA(master_th);
1900 if (ompt_enabled.ompt_callback_implicit_task) {
1901 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
1902 ompt_scope_begin, OMPT_CUR_TEAM_DATA(master_th),
1903 implicit_task_data, 1, __kmp_tid_from_gtid(gtid), ompt_task_implicit);
1904 OMPT_CUR_TASK_INFO(master_th)
1905 ->thread_num = __kmp_tid_from_gtid(gtid);
1909 master_th->th.ompt_thread_info.state = ompt_state_work_parallel;
1911 exit_runtime_p = &dummy;
1916 KMP_TIME_PARTITIONED_BLOCK(OMP_parallel);
1917 KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK);
1918 __kmp_invoke_microtask(microtask, gtid, 0, argc, args
1927 if (ompt_enabled.enabled) {
1928 *exit_runtime_p = NULL;
1929 if (ompt_enabled.ompt_callback_implicit_task) {
1930 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
1931 ompt_scope_end, NULL, &(task_info->task_data), 1,
1932 OMPT_CUR_TASK_INFO(master_th)->thread_num, ompt_task_implicit);
1935 ompt_parallel_data = *OMPT_CUR_TEAM_DATA(master_th);
1936 __ompt_lw_taskteam_unlink(master_th);
1937 if (ompt_enabled.ompt_callback_parallel_end) {
1938 ompt_callbacks.ompt_callback(ompt_callback_parallel_end)(
1939 &ompt_parallel_data, parent_task_data,
1940 OMPT_INVOKER(call_context), return_address);
1942 master_th->th.ompt_thread_info.state = ompt_state_overhead;
1948 }
else if (call_context == fork_context_gnu) {
1950 ompt_lw_taskteam_t lwt;
1951 __ompt_lw_taskteam_init(&lwt, master_th, gtid, &ompt_parallel_data,
1954 lwt.ompt_task_info.frame.exit_frame = ompt_data_none;
1955 __ompt_lw_taskteam_link(&lwt, master_th, 1);
1960 KA_TRACE(20, (
"__kmp_fork_call: T#%d serial exit\n", gtid));
1963 KMP_ASSERT2(call_context < fork_context_last,
1964 "__kmp_fork_call: unknown fork_context parameter");
1967 KA_TRACE(20, (
"__kmp_fork_call: T#%d serial exit\n", gtid));
1974 KF_TRACE(10, (
"__kmp_fork_call: parent_team_aclevel=%d, master_th=%p, "
1975 "curtask=%p, curtask_max_aclevel=%d\n",
1976 parent_team->t.t_active_level, master_th,
1977 master_th->th.th_current_task,
1978 master_th->th.th_current_task->td_icvs.max_active_levels));
1982 master_th->th.th_current_task->td_flags.executing = 0;
1985 if (!master_th->th.th_teams_microtask || level > teams_level)
1989 KMP_ATOMIC_INC(&root->r.r_in_parallel);
1993 int nthreads_icv = master_th->th.th_current_task->td_icvs.nproc;
1994 if ((level + 1 < __kmp_nested_nth.used) &&
1995 (__kmp_nested_nth.nth[level + 1] != nthreads_icv)) {
1996 nthreads_icv = __kmp_nested_nth.nth[level + 1];
2003 kmp_proc_bind_t proc_bind = master_th->th.th_set_proc_bind;
2004 kmp_proc_bind_t proc_bind_icv =
2006 if (master_th->th.th_current_task->td_icvs.proc_bind == proc_bind_false) {
2007 proc_bind = proc_bind_false;
2009 if (proc_bind == proc_bind_default) {
2012 proc_bind = master_th->th.th_current_task->td_icvs.proc_bind;
2018 if ((level + 1 < __kmp_nested_proc_bind.used) &&
2019 (__kmp_nested_proc_bind.bind_types[level + 1] !=
2020 master_th->th.th_current_task->td_icvs.proc_bind)) {
2021 proc_bind_icv = __kmp_nested_proc_bind.bind_types[level + 1];
2026 master_th->th.th_set_proc_bind = proc_bind_default;
2029 if ((nthreads_icv > 0)
2031 || (proc_bind_icv != proc_bind_default)
2034 kmp_internal_control_t new_icvs;
2035 copy_icvs(&new_icvs, &master_th->th.th_current_task->td_icvs);
2036 new_icvs.next = NULL;
2037 if (nthreads_icv > 0) {
2038 new_icvs.nproc = nthreads_icv;
2042 if (proc_bind_icv != proc_bind_default) {
2043 new_icvs.proc_bind = proc_bind_icv;
2048 KF_TRACE(10, (
"__kmp_fork_call: before __kmp_allocate_team\n"));
2049 team = __kmp_allocate_team(root, nthreads, nthreads,
2056 &new_icvs, argc USE_NESTED_HOT_ARG(master_th));
2059 KF_TRACE(10, (
"__kmp_fork_call: before __kmp_allocate_team\n"));
2060 team = __kmp_allocate_team(root, nthreads, nthreads,
2067 &master_th->th.th_current_task->td_icvs,
2068 argc USE_NESTED_HOT_ARG(master_th));
2071 10, (
"__kmp_fork_call: after __kmp_allocate_team - team = %p\n", team));
2074 KMP_CHECK_UPDATE(team->t.t_master_tid, master_tid);
2075 KMP_CHECK_UPDATE(team->t.t_master_this_cons, master_this_cons);
2076 KMP_CHECK_UPDATE(team->t.t_ident, loc);
2077 KMP_CHECK_UPDATE(team->t.t_parent, parent_team);
2078 KMP_CHECK_UPDATE_SYNC(team->t.t_pkfn, microtask);
2080 KMP_CHECK_UPDATE_SYNC(team->t.ompt_team_info.master_return_address,
2083 KMP_CHECK_UPDATE(team->t.t_invoke, invoker);
2086 if (!master_th->th.th_teams_microtask || level > teams_level) {
2088 int new_level = parent_team->t.t_level + 1;
2089 KMP_CHECK_UPDATE(team->t.t_level, new_level);
2090 new_level = parent_team->t.t_active_level + 1;
2091 KMP_CHECK_UPDATE(team->t.t_active_level, new_level);
2095 int new_level = parent_team->t.t_level;
2096 KMP_CHECK_UPDATE(team->t.t_level, new_level);
2097 new_level = parent_team->t.t_active_level;
2098 KMP_CHECK_UPDATE(team->t.t_active_level, new_level);
2101 kmp_r_sched_t new_sched = get__sched_2(parent_team, master_tid);
2103 KMP_CHECK_UPDATE(team->t.t_sched.sched, new_sched.sched);
2106 KMP_CHECK_UPDATE(team->t.t_cancel_request, cancel_noreq);
2109 KMP_CHECK_UPDATE(team->t.t_def_allocator, master_th->th.th_def_allocator);
2113 propagateFPControl(team);
2115 if (__kmp_tasking_mode != tskm_immediate_exec) {
2118 KMP_DEBUG_ASSERT(master_th->th.th_task_team ==
2119 parent_team->t.t_task_team[master_th->th.th_task_state]);
2120 KA_TRACE(20, (
"__kmp_fork_call: Master T#%d pushing task_team %p / team "
2121 "%p, new task_team %p / team %p\n",
2122 __kmp_gtid_from_thread(master_th),
2123 master_th->th.th_task_team, parent_team,
2124 team->t.t_task_team[master_th->th.th_task_state], team));
2126 if (active_level || master_th->th.th_task_team) {
2128 KMP_DEBUG_ASSERT(master_th->th.th_task_state_memo_stack);
2129 if (master_th->th.th_task_state_top >=
2130 master_th->th.th_task_state_stack_sz) {
2131 kmp_uint32 new_size = 2 * master_th->th.th_task_state_stack_sz;
2132 kmp_uint8 *old_stack, *new_stack;
2134 new_stack = (kmp_uint8 *)__kmp_allocate(new_size);
2135 for (i = 0; i < master_th->th.th_task_state_stack_sz; ++i) {
2136 new_stack[i] = master_th->th.th_task_state_memo_stack[i];
2138 for (i = master_th->th.th_task_state_stack_sz; i < new_size;
2142 old_stack = master_th->th.th_task_state_memo_stack;
2143 master_th->th.th_task_state_memo_stack = new_stack;
2144 master_th->th.th_task_state_stack_sz = new_size;
2145 __kmp_free(old_stack);
2149 .th_task_state_memo_stack[master_th->th.th_task_state_top] =
2150 master_th->th.th_task_state;
2151 master_th->th.th_task_state_top++;
2152 #if KMP_NESTED_HOT_TEAMS
2153 if (master_th->th.th_hot_teams &&
2154 active_level < __kmp_hot_teams_max_level &&
2155 team == master_th->th.th_hot_teams[active_level].hot_team) {
2157 master_th->th.th_task_state =
2159 .th_task_state_memo_stack[master_th->th.th_task_state_top];
2162 master_th->th.th_task_state = 0;
2163 #if KMP_NESTED_HOT_TEAMS
2167 #if !KMP_NESTED_HOT_TEAMS
2168 KMP_DEBUG_ASSERT((master_th->th.th_task_team == NULL) ||
2169 (team == root->r.r_hot_team));
2175 (
"__kmp_fork_call: T#%d(%d:%d)->(%d:0) created a team of %d threads\n",
2176 gtid, parent_team->t.t_id, team->t.t_master_tid, team->t.t_id,
2178 KMP_DEBUG_ASSERT(team != root->r.r_hot_team ||
2179 (team->t.t_master_tid == 0 &&
2180 (team->t.t_parent == root->r.r_root_team ||
2181 team->t.t_parent->t.t_serialized)));
2185 argv = (
void **)team->t.t_argv;
2189 for (i = argc - 1; i >= 0; --i) {
2191 #if (KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && KMP_OS_LINUX
2192 void *new_argv = va_arg(*ap,
void *);
2194 void *new_argv = va_arg(ap,
void *);
2196 KMP_CHECK_UPDATE(*argv, new_argv);
2201 for (i = 0; i < argc; ++i) {
2203 KMP_CHECK_UPDATE(argv[i], team->t.t_parent->t.t_argv[i]);
2209 KMP_CHECK_UPDATE(team->t.t_master_active, master_active);
2210 if (!root->r.r_active)
2211 root->r.r_active = TRUE;
2213 __kmp_fork_team_threads(root, team, master_th, gtid);
2214 __kmp_setup_icv_copy(team, nthreads,
2215 &master_th->th.th_current_task->td_icvs, loc);
2218 master_th->th.ompt_thread_info.state = ompt_state_work_parallel;
2221 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
2224 if (team->t.t_active_level == 1
2226 && !master_th->th.th_teams_microtask
2230 if ((__itt_frame_submit_v3_ptr || KMP_ITT_DEBUG) &&
2231 (__kmp_forkjoin_frames_mode == 3 ||
2232 __kmp_forkjoin_frames_mode == 1)) {
2233 kmp_uint64 tmp_time = 0;
2234 if (__itt_get_timestamp_ptr)
2235 tmp_time = __itt_get_timestamp();
2237 master_th->th.th_frame_time = tmp_time;
2238 if (__kmp_forkjoin_frames_mode == 3)
2239 team->t.t_region_time = tmp_time;
2243 if ((__itt_frame_begin_v3_ptr || KMP_ITT_DEBUG) &&
2244 __kmp_forkjoin_frames && !__kmp_forkjoin_frames_mode) {
2246 __kmp_itt_region_forking(gtid, team->t.t_nproc, 0);
2252 KMP_DEBUG_ASSERT(team == __kmp_threads[gtid]->th.th_team);
2255 (
"__kmp_internal_fork : root=%p, team=%p, master_th=%p, gtid=%d\n",
2256 root, team, master_th, gtid));
2259 if (__itt_stack_caller_create_ptr) {
2260 team->t.t_stack_id =
2261 __kmp_itt_stack_caller_create();
2272 __kmp_internal_fork(loc, gtid, team);
2273 KF_TRACE(10, (
"__kmp_internal_fork : after : root=%p, team=%p, "
2274 "master_th=%p, gtid=%d\n",
2275 root, team, master_th, gtid));
2278 if (call_context == fork_context_gnu) {
2279 KA_TRACE(20, (
"__kmp_fork_call: parallel exit T#%d\n", gtid));
2284 KA_TRACE(20, (
"__kmp_fork_call: T#%d(%d:0) invoke microtask = %p\n", gtid,
2285 team->t.t_id, team->t.t_pkfn));
2288 #if KMP_STATS_ENABLED && OMP_40_ENABLED
2292 KMP_SET_THREAD_STATE(stats_state_e::TEAMS_REGION);
2296 if (!team->t.t_invoke(gtid)) {
2297 KMP_ASSERT2(0,
"cannot invoke microtask for MASTER thread");
2300 #if KMP_STATS_ENABLED && OMP_40_ENABLED
2303 KMP_SET_THREAD_STATE(previous_state);
2307 KA_TRACE(20, (
"__kmp_fork_call: T#%d(%d:0) done microtask = %p\n", gtid,
2308 team->t.t_id, team->t.t_pkfn));
2311 KA_TRACE(20, (
"__kmp_fork_call: parallel exit T#%d\n", gtid));
2314 if (ompt_enabled.enabled) {
2315 master_th->th.ompt_thread_info.state = ompt_state_overhead;
2323 static inline void __kmp_join_restore_state(kmp_info_t *thread,
2326 thread->th.ompt_thread_info.state =
2327 ((team->t.t_serialized) ? ompt_state_work_serial
2328 : ompt_state_work_parallel);
2331 static inline void __kmp_join_ompt(
int gtid, kmp_info_t *thread,
2332 kmp_team_t *team, ompt_data_t *parallel_data,
2333 fork_context_e fork_context,
void *codeptr) {
2334 ompt_task_info_t *task_info = __ompt_get_task_info_object(0);
2335 if (ompt_enabled.ompt_callback_parallel_end) {
2336 ompt_callbacks.ompt_callback(ompt_callback_parallel_end)(
2337 parallel_data, &(task_info->task_data), OMPT_INVOKER(fork_context),
2341 task_info->frame.enter_frame = ompt_data_none;
2342 __kmp_join_restore_state(thread, team);
2346 void __kmp_join_call(
ident_t *loc,
int gtid
2349 enum fork_context_e fork_context
2356 KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_join_call);
2358 kmp_team_t *parent_team;
2359 kmp_info_t *master_th;
2363 KA_TRACE(20, (
"__kmp_join_call: enter T#%d\n", gtid));
2366 master_th = __kmp_threads[gtid];
2367 root = master_th->th.th_root;
2368 team = master_th->th.th_team;
2369 parent_team = team->t.t_parent;
2371 master_th->th.th_ident = loc;
2374 if (ompt_enabled.enabled) {
2375 master_th->th.ompt_thread_info.state = ompt_state_overhead;
2380 if (__kmp_tasking_mode != tskm_immediate_exec && !exit_teams) {
2381 KA_TRACE(20, (
"__kmp_join_call: T#%d, old team = %p old task_team = %p, "
2382 "th_task_team = %p\n",
2383 __kmp_gtid_from_thread(master_th), team,
2384 team->t.t_task_team[master_th->th.th_task_state],
2385 master_th->th.th_task_team));
2386 KMP_DEBUG_ASSERT(master_th->th.th_task_team ==
2387 team->t.t_task_team[master_th->th.th_task_state]);
2391 if (team->t.t_serialized) {
2393 if (master_th->th.th_teams_microtask) {
2395 int level = team->t.t_level;
2396 int tlevel = master_th->th.th_teams_level;
2397 if (level == tlevel) {
2401 }
else if (level == tlevel + 1) {
2405 team->t.t_serialized++;
2412 if (ompt_enabled.enabled) {
2413 __kmp_join_restore_state(master_th, parent_team);
2420 master_active = team->t.t_master_active;
2428 __kmp_internal_join(loc, gtid, team);
2432 master_th->th.th_task_state =
2440 ompt_data_t *parallel_data = &(team->t.ompt_team_info.parallel_data);
2441 void *codeptr = team->t.ompt_team_info.master_return_address;
2445 if (__itt_stack_caller_create_ptr) {
2446 __kmp_itt_stack_caller_destroy(
2447 (__itt_caller)team->t
2452 if (team->t.t_active_level == 1
2454 && !master_th->th.th_teams_microtask
2457 master_th->th.th_ident = loc;
2460 if ((__itt_frame_submit_v3_ptr || KMP_ITT_DEBUG) &&
2461 __kmp_forkjoin_frames_mode == 3)
2462 __kmp_itt_frame_submit(gtid, team->t.t_region_time,
2463 master_th->th.th_frame_time, 0, loc,
2464 master_th->th.th_team_nproc, 1);
2465 else if ((__itt_frame_end_v3_ptr || KMP_ITT_DEBUG) &&
2466 !__kmp_forkjoin_frames_mode && __kmp_forkjoin_frames)
2467 __kmp_itt_region_joined(gtid);
2472 if (master_th->th.th_teams_microtask && !exit_teams &&
2473 team->t.t_pkfn != (microtask_t)__kmp_teams_master &&
2474 team->t.t_level == master_th->th.th_teams_level + 1) {
2481 team->t.t_active_level--;
2482 KMP_ATOMIC_DEC(&root->r.r_in_parallel);
2488 if (master_th->th.th_team_nproc < master_th->th.th_teams_size.nth) {
2489 int old_num = master_th->th.th_team_nproc;
2490 int new_num = master_th->th.th_teams_size.nth;
2491 kmp_info_t **other_threads = team->t.t_threads;
2492 team->t.t_nproc = new_num;
2493 for (
int i = 0; i < old_num; ++i) {
2494 other_threads[i]->th.th_team_nproc = new_num;
2497 for (
int i = old_num; i < new_num; ++i) {
2499 KMP_DEBUG_ASSERT(other_threads[i]);
2500 kmp_balign_t *balign = other_threads[i]->th.th_bar;
2501 for (
int b = 0; b < bs_last_barrier; ++b) {
2502 balign[b].bb.b_arrived = team->t.t_bar[b].b_arrived;
2503 KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG);
2505 balign[b].bb.b_worker_arrived = team->t.t_bar[b].b_team_arrived;
2508 if (__kmp_tasking_mode != tskm_immediate_exec) {
2510 other_threads[i]->th.th_task_state = master_th->th.th_task_state;
2516 if (ompt_enabled.enabled) {
2517 __kmp_join_ompt(gtid, master_th, parent_team, parallel_data, fork_context,
2527 master_th->th.th_info.ds.ds_tid = team->t.t_master_tid;
2528 master_th->th.th_local.this_construct = team->t.t_master_this_cons;
2530 master_th->th.th_dispatch = &parent_team->t.t_dispatch[team->t.t_master_tid];
2535 __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock);
2538 if (!master_th->th.th_teams_microtask ||
2539 team->t.t_level > master_th->th.th_teams_level)
2543 KMP_ATOMIC_DEC(&root->r.r_in_parallel);
2545 KMP_DEBUG_ASSERT(root->r.r_in_parallel >= 0);
2548 if (ompt_enabled.enabled) {
2549 ompt_task_info_t *task_info = __ompt_get_task_info_object(0);
2550 if (ompt_enabled.ompt_callback_implicit_task) {
2551 int ompt_team_size = team->t.t_nproc;
2552 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
2553 ompt_scope_end, NULL, &(task_info->task_data), ompt_team_size,
2554 OMPT_CUR_TASK_INFO(master_th)->thread_num, ompt_task_implicit);
2557 task_info->frame.exit_frame = ompt_data_none;
2558 task_info->task_data = ompt_data_none;
2562 KF_TRACE(10, (
"__kmp_join_call1: T#%d, this_thread=%p team=%p\n", 0,
2564 __kmp_pop_current_task_from_thread(master_th);
2566 #if OMP_40_ENABLED && KMP_AFFINITY_SUPPORTED
2568 master_th->th.th_first_place = team->t.t_first_place;
2569 master_th->th.th_last_place = team->t.t_last_place;
2572 master_th->th.th_def_allocator = team->t.t_def_allocator;
2575 updateHWFPControl(team);
2577 if (root->r.r_active != master_active)
2578 root->r.r_active = master_active;
2580 __kmp_free_team(root, team USE_NESTED_HOT_ARG(
2588 master_th->th.th_team = parent_team;
2589 master_th->th.th_team_nproc = parent_team->t.t_nproc;
2590 master_th->th.th_team_master = parent_team->t.t_threads[0];
2591 master_th->th.th_team_serialized = parent_team->t.t_serialized;
2594 if (parent_team->t.t_serialized &&
2595 parent_team != master_th->th.th_serial_team &&
2596 parent_team != root->r.r_root_team) {
2597 __kmp_free_team(root,
2598 master_th->th.th_serial_team USE_NESTED_HOT_ARG(NULL));
2599 master_th->th.th_serial_team = parent_team;
2602 if (__kmp_tasking_mode != tskm_immediate_exec) {
2603 if (master_th->th.th_task_state_top >
2605 KMP_DEBUG_ASSERT(master_th->th.th_task_state_memo_stack);
2607 master_th->th.th_task_state_memo_stack[master_th->th.th_task_state_top] =
2608 master_th->th.th_task_state;
2609 --master_th->th.th_task_state_top;
2611 master_th->th.th_task_state =
2613 .th_task_state_memo_stack[master_th->th.th_task_state_top];
2616 master_th->th.th_task_team =
2617 parent_team->t.t_task_team[master_th->th.th_task_state];
2619 (
"__kmp_join_call: Master T#%d restoring task_team %p / team %p\n",
2620 __kmp_gtid_from_thread(master_th), master_th->th.th_task_team,
2627 master_th->th.th_current_task->td_flags.executing = 1;
2629 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
2632 if (ompt_enabled.enabled) {
2633 __kmp_join_ompt(gtid, master_th, parent_team, parallel_data, fork_context,
2639 KA_TRACE(20, (
"__kmp_join_call: exit T#%d\n", gtid));
2644 void __kmp_save_internal_controls(kmp_info_t *thread) {
2646 if (thread->th.th_team != thread->th.th_serial_team) {
2649 if (thread->th.th_team->t.t_serialized > 1) {
2652 if (thread->th.th_team->t.t_control_stack_top == NULL) {
2655 if (thread->th.th_team->t.t_control_stack_top->serial_nesting_level !=
2656 thread->th.th_team->t.t_serialized) {
2661 kmp_internal_control_t *control =
2662 (kmp_internal_control_t *)__kmp_allocate(
2663 sizeof(kmp_internal_control_t));
2665 copy_icvs(control, &thread->th.th_current_task->td_icvs);
2667 control->serial_nesting_level = thread->th.th_team->t.t_serialized;
2669 control->next = thread->th.th_team->t.t_control_stack_top;
2670 thread->th.th_team->t.t_control_stack_top = control;
2676 void __kmp_set_num_threads(
int new_nth,
int gtid) {
2680 KF_TRACE(10, (
"__kmp_set_num_threads: new __kmp_nth = %d\n", new_nth));
2681 KMP_DEBUG_ASSERT(__kmp_init_serial);
2685 else if (new_nth > __kmp_max_nth)
2686 new_nth = __kmp_max_nth;
2689 thread = __kmp_threads[gtid];
2690 if (thread->th.th_current_task->td_icvs.nproc == new_nth)
2693 __kmp_save_internal_controls(thread);
2695 set__nproc(thread, new_nth);
2700 root = thread->th.th_root;
2701 if (__kmp_init_parallel && (!root->r.r_active) &&
2702 (root->r.r_hot_team->t.t_nproc > new_nth)
2703 #
if KMP_NESTED_HOT_TEAMS
2704 && __kmp_hot_teams_max_level && !__kmp_hot_teams_mode
2707 kmp_team_t *hot_team = root->r.r_hot_team;
2710 __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock);
2713 for (f = new_nth; f < hot_team->t.t_nproc; f++) {
2714 KMP_DEBUG_ASSERT(hot_team->t.t_threads[f] != NULL);
2715 if (__kmp_tasking_mode != tskm_immediate_exec) {
2718 hot_team->t.t_threads[f]->th.th_task_team = NULL;
2720 __kmp_free_thread(hot_team->t.t_threads[f]);
2721 hot_team->t.t_threads[f] = NULL;
2723 hot_team->t.t_nproc = new_nth;
2724 #if KMP_NESTED_HOT_TEAMS
2725 if (thread->th.th_hot_teams) {
2726 KMP_DEBUG_ASSERT(hot_team == thread->th.th_hot_teams[0].hot_team);
2727 thread->th.th_hot_teams[0].hot_team_nth = new_nth;
2731 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
2734 for (f = 0; f < new_nth; f++) {
2735 KMP_DEBUG_ASSERT(hot_team->t.t_threads[f] != NULL);
2736 hot_team->t.t_threads[f]->th.th_team_nproc = new_nth;
2739 hot_team->t.t_size_changed = -1;
2744 void __kmp_set_max_active_levels(
int gtid,
int max_active_levels) {
2747 KF_TRACE(10, (
"__kmp_set_max_active_levels: new max_active_levels for thread "
2749 gtid, max_active_levels));
2750 KMP_DEBUG_ASSERT(__kmp_init_serial);
2753 if (max_active_levels < 0) {
2754 KMP_WARNING(ActiveLevelsNegative, max_active_levels);
2759 KF_TRACE(10, (
"__kmp_set_max_active_levels: the call is ignored: new "
2760 "max_active_levels for thread %d = (%d)\n",
2761 gtid, max_active_levels));
2764 if (max_active_levels <= KMP_MAX_ACTIVE_LEVELS_LIMIT) {
2769 KMP_WARNING(ActiveLevelsExceedLimit, max_active_levels,
2770 KMP_MAX_ACTIVE_LEVELS_LIMIT);
2771 max_active_levels = KMP_MAX_ACTIVE_LEVELS_LIMIT;
2777 KF_TRACE(10, (
"__kmp_set_max_active_levels: after validation: new "
2778 "max_active_levels for thread %d = (%d)\n",
2779 gtid, max_active_levels));
2781 thread = __kmp_threads[gtid];
2783 __kmp_save_internal_controls(thread);
2785 set__max_active_levels(thread, max_active_levels);
2789 int __kmp_get_max_active_levels(
int gtid) {
2792 KF_TRACE(10, (
"__kmp_get_max_active_levels: thread %d\n", gtid));
2793 KMP_DEBUG_ASSERT(__kmp_init_serial);
2795 thread = __kmp_threads[gtid];
2796 KMP_DEBUG_ASSERT(thread->th.th_current_task);
2797 KF_TRACE(10, (
"__kmp_get_max_active_levels: thread %d, curtask=%p, "
2798 "curtask_maxaclevel=%d\n",
2799 gtid, thread->th.th_current_task,
2800 thread->th.th_current_task->td_icvs.max_active_levels));
2801 return thread->th.th_current_task->td_icvs.max_active_levels;
2804 KMP_BUILD_ASSERT(
sizeof(kmp_sched_t) ==
sizeof(
int));
2805 KMP_BUILD_ASSERT(
sizeof(
enum sched_type) ==
sizeof(
int));
2808 void __kmp_set_schedule(
int gtid, kmp_sched_t kind,
int chunk) {
2810 kmp_sched_t orig_kind;
2813 KF_TRACE(10, (
"__kmp_set_schedule: new schedule for thread %d = (%d, %d)\n",
2814 gtid, (
int)kind, chunk));
2815 KMP_DEBUG_ASSERT(__kmp_init_serial);
2822 kind = __kmp_sched_without_mods(kind);
2824 if (kind <= kmp_sched_lower || kind >= kmp_sched_upper ||
2825 (kind <= kmp_sched_lower_ext && kind >= kmp_sched_upper_std)) {
2827 __kmp_msg(kmp_ms_warning, KMP_MSG(ScheduleKindOutOfRange, kind),
2828 KMP_HNT(DefaultScheduleKindUsed,
"static, no chunk"),
2830 kind = kmp_sched_default;
2834 thread = __kmp_threads[gtid];
2836 __kmp_save_internal_controls(thread);
2838 if (kind < kmp_sched_upper_std) {
2839 if (kind == kmp_sched_static && chunk < KMP_DEFAULT_CHUNK) {
2842 thread->th.th_current_task->td_icvs.sched.r_sched_type =
kmp_sch_static;
2844 thread->th.th_current_task->td_icvs.sched.r_sched_type =
2845 __kmp_sch_map[kind - kmp_sched_lower - 1];
2850 thread->th.th_current_task->td_icvs.sched.r_sched_type =
2851 __kmp_sch_map[kind - kmp_sched_lower_ext + kmp_sched_upper_std -
2852 kmp_sched_lower - 2];
2854 __kmp_sched_apply_mods_intkind(
2855 orig_kind, &(thread->th.th_current_task->td_icvs.sched.r_sched_type));
2856 if (kind == kmp_sched_auto || chunk < 1) {
2858 thread->th.th_current_task->td_icvs.sched.chunk = KMP_DEFAULT_CHUNK;
2860 thread->th.th_current_task->td_icvs.sched.chunk = chunk;
2865 void __kmp_get_schedule(
int gtid, kmp_sched_t *kind,
int *chunk) {
2869 KF_TRACE(10, (
"__kmp_get_schedule: thread %d\n", gtid));
2870 KMP_DEBUG_ASSERT(__kmp_init_serial);
2872 thread = __kmp_threads[gtid];
2874 th_type = thread->th.th_current_task->td_icvs.sched.r_sched_type;
2875 switch (SCHEDULE_WITHOUT_MODIFIERS(th_type)) {
2877 case kmp_sch_static_greedy:
2878 case kmp_sch_static_balanced:
2879 *kind = kmp_sched_static;
2880 __kmp_sched_apply_mods_stdkind(kind, th_type);
2883 case kmp_sch_static_chunked:
2884 *kind = kmp_sched_static;
2886 case kmp_sch_dynamic_chunked:
2887 *kind = kmp_sched_dynamic;
2890 case kmp_sch_guided_iterative_chunked:
2891 case kmp_sch_guided_analytical_chunked:
2892 *kind = kmp_sched_guided;
2895 *kind = kmp_sched_auto;
2897 case kmp_sch_trapezoidal:
2898 *kind = kmp_sched_trapezoidal;
2900 #if KMP_STATIC_STEAL_ENABLED
2901 case kmp_sch_static_steal:
2902 *kind = kmp_sched_static_steal;
2906 KMP_FATAL(UnknownSchedulingType, th_type);
2909 __kmp_sched_apply_mods_stdkind(kind, th_type);
2910 *chunk = thread->th.th_current_task->td_icvs.sched.chunk;
2913 int __kmp_get_ancestor_thread_num(
int gtid,
int level) {
2919 KF_TRACE(10, (
"__kmp_get_ancestor_thread_num: thread %d %d\n", gtid, level));
2920 KMP_DEBUG_ASSERT(__kmp_init_serial);
2927 thr = __kmp_threads[gtid];
2928 team = thr->th.th_team;
2929 ii = team->t.t_level;
2934 if (thr->th.th_teams_microtask) {
2936 int tlevel = thr->th.th_teams_level;
2939 KMP_DEBUG_ASSERT(ii >= tlevel);
2952 return __kmp_tid_from_gtid(gtid);
2954 dd = team->t.t_serialized;
2956 while (ii > level) {
2957 for (dd = team->t.t_serialized; (dd > 0) && (ii > level); dd--, ii--) {
2959 if ((team->t.t_serialized) && (!dd)) {
2960 team = team->t.t_parent;
2964 team = team->t.t_parent;
2965 dd = team->t.t_serialized;
2970 return (dd > 1) ? (0) : (team->t.t_master_tid);
2973 int __kmp_get_team_size(
int gtid,
int level) {
2979 KF_TRACE(10, (
"__kmp_get_team_size: thread %d %d\n", gtid, level));
2980 KMP_DEBUG_ASSERT(__kmp_init_serial);
2987 thr = __kmp_threads[gtid];
2988 team = thr->th.th_team;
2989 ii = team->t.t_level;
2994 if (thr->th.th_teams_microtask) {
2996 int tlevel = thr->th.th_teams_level;
2999 KMP_DEBUG_ASSERT(ii >= tlevel);
3011 while (ii > level) {
3012 for (dd = team->t.t_serialized; (dd > 0) && (ii > level); dd--, ii--) {
3014 if (team->t.t_serialized && (!dd)) {
3015 team = team->t.t_parent;
3019 team = team->t.t_parent;
3024 return team->t.t_nproc;
3027 kmp_r_sched_t __kmp_get_schedule_global() {
3032 kmp_r_sched_t r_sched;
3038 enum sched_type s = SCHEDULE_WITHOUT_MODIFIERS(__kmp_sched);
3040 enum sched_type sched_modifiers = SCHEDULE_GET_MODIFIERS(__kmp_sched);
3044 r_sched.r_sched_type = __kmp_static;
3047 r_sched.r_sched_type = __kmp_guided;
3049 r_sched.r_sched_type = __kmp_sched;
3052 SCHEDULE_SET_MODIFIERS(r_sched.r_sched_type, sched_modifiers);
3055 if (__kmp_chunk < KMP_DEFAULT_CHUNK) {
3057 r_sched.chunk = KMP_DEFAULT_CHUNK;
3059 r_sched.chunk = __kmp_chunk;
3067 static void __kmp_alloc_argv_entries(
int argc, kmp_team_t *team,
int realloc) {
3069 KMP_DEBUG_ASSERT(team);
3070 if (!realloc || argc > team->t.t_max_argc) {
3072 KA_TRACE(100, (
"__kmp_alloc_argv_entries: team %d: needed entries=%d, "
3073 "current entries=%d\n",
3074 team->t.t_id, argc, (realloc) ? team->t.t_max_argc : 0));
3076 if (realloc && team->t.t_argv != &team->t.t_inline_argv[0])
3077 __kmp_free((
void *)team->t.t_argv);
3079 if (argc <= KMP_INLINE_ARGV_ENTRIES) {
3081 team->t.t_max_argc = KMP_INLINE_ARGV_ENTRIES;
3082 KA_TRACE(100, (
"__kmp_alloc_argv_entries: team %d: inline allocate %d "
3084 team->t.t_id, team->t.t_max_argc));
3085 team->t.t_argv = &team->t.t_inline_argv[0];
3086 if (__kmp_storage_map) {
3087 __kmp_print_storage_map_gtid(
3088 -1, &team->t.t_inline_argv[0],
3089 &team->t.t_inline_argv[KMP_INLINE_ARGV_ENTRIES],
3090 (
sizeof(
void *) * KMP_INLINE_ARGV_ENTRIES),
"team_%d.t_inline_argv",
3095 team->t.t_max_argc = (argc <= (KMP_MIN_MALLOC_ARGV_ENTRIES >> 1))
3096 ? KMP_MIN_MALLOC_ARGV_ENTRIES
3098 KA_TRACE(100, (
"__kmp_alloc_argv_entries: team %d: dynamic allocate %d "
3100 team->t.t_id, team->t.t_max_argc));
3102 (
void **)__kmp_page_allocate(
sizeof(
void *) * team->t.t_max_argc);
3103 if (__kmp_storage_map) {
3104 __kmp_print_storage_map_gtid(-1, &team->t.t_argv[0],
3105 &team->t.t_argv[team->t.t_max_argc],
3106 sizeof(
void *) * team->t.t_max_argc,
3107 "team_%d.t_argv", team->t.t_id);
3113 static void __kmp_allocate_team_arrays(kmp_team_t *team,
int max_nth) {
3115 int num_disp_buff = max_nth > 1 ? __kmp_dispatch_num_buffers : 2;
3117 (kmp_info_t **)__kmp_allocate(
sizeof(kmp_info_t *) * max_nth);
3118 team->t.t_disp_buffer = (dispatch_shared_info_t *)__kmp_allocate(
3119 sizeof(dispatch_shared_info_t) * num_disp_buff);
3120 team->t.t_dispatch =
3121 (kmp_disp_t *)__kmp_allocate(
sizeof(kmp_disp_t) * max_nth);
3122 team->t.t_implicit_task_taskdata =
3123 (kmp_taskdata_t *)__kmp_allocate(
sizeof(kmp_taskdata_t) * max_nth);
3124 team->t.t_max_nproc = max_nth;
3127 for (i = 0; i < num_disp_buff; ++i) {
3128 team->t.t_disp_buffer[i].buffer_index = i;
3130 team->t.t_disp_buffer[i].doacross_buf_idx = i;
3135 static void __kmp_free_team_arrays(kmp_team_t *team) {
3138 for (i = 0; i < team->t.t_max_nproc; ++i) {
3139 if (team->t.t_dispatch[i].th_disp_buffer != NULL) {
3140 __kmp_free(team->t.t_dispatch[i].th_disp_buffer);
3141 team->t.t_dispatch[i].th_disp_buffer = NULL;
3144 #if KMP_USE_HIER_SCHED
3145 __kmp_dispatch_free_hierarchies(team);
3147 __kmp_free(team->t.t_threads);
3148 __kmp_free(team->t.t_disp_buffer);
3149 __kmp_free(team->t.t_dispatch);
3150 __kmp_free(team->t.t_implicit_task_taskdata);
3151 team->t.t_threads = NULL;
3152 team->t.t_disp_buffer = NULL;
3153 team->t.t_dispatch = NULL;
3154 team->t.t_implicit_task_taskdata = 0;
3157 static void __kmp_reallocate_team_arrays(kmp_team_t *team,
int max_nth) {
3158 kmp_info_t **oldThreads = team->t.t_threads;
3160 __kmp_free(team->t.t_disp_buffer);
3161 __kmp_free(team->t.t_dispatch);
3162 __kmp_free(team->t.t_implicit_task_taskdata);
3163 __kmp_allocate_team_arrays(team, max_nth);
3165 KMP_MEMCPY(team->t.t_threads, oldThreads,
3166 team->t.t_nproc *
sizeof(kmp_info_t *));
3168 __kmp_free(oldThreads);
3171 static kmp_internal_control_t __kmp_get_global_icvs(
void) {
3173 kmp_r_sched_t r_sched =
3174 __kmp_get_schedule_global();
3177 KMP_DEBUG_ASSERT(__kmp_nested_proc_bind.used > 0);
3180 kmp_internal_control_t g_icvs = {
3182 (kmp_int8)__kmp_global.g.g_dynamic,
3184 (kmp_int8)__kmp_env_blocktime,
3186 __kmp_dflt_blocktime,
3191 __kmp_dflt_team_nth,
3195 __kmp_dflt_max_active_levels,
3200 __kmp_nested_proc_bind.bind_types[0],
3201 __kmp_default_device,
3209 static kmp_internal_control_t __kmp_get_x_global_icvs(
const kmp_team_t *team) {
3211 kmp_internal_control_t gx_icvs;
3212 gx_icvs.serial_nesting_level =
3214 copy_icvs(&gx_icvs, &team->t.t_threads[0]->th.th_current_task->td_icvs);
3215 gx_icvs.next = NULL;
3220 static void __kmp_initialize_root(kmp_root_t *root) {
3222 kmp_team_t *root_team;
3223 kmp_team_t *hot_team;
3224 int hot_team_max_nth;
3225 kmp_r_sched_t r_sched =
3226 __kmp_get_schedule_global();
3227 kmp_internal_control_t r_icvs = __kmp_get_global_icvs();
3228 KMP_DEBUG_ASSERT(root);
3229 KMP_ASSERT(!root->r.r_begin);
3232 __kmp_init_lock(&root->r.r_begin_lock);
3233 root->r.r_begin = FALSE;
3234 root->r.r_active = FALSE;
3235 root->r.r_in_parallel = 0;
3236 root->r.r_blocktime = __kmp_dflt_blocktime;
3240 KF_TRACE(10, (
"__kmp_initialize_root: before root_team\n"));
3243 __kmp_allocate_team(root,
3250 __kmp_nested_proc_bind.bind_types[0],
3254 USE_NESTED_HOT_ARG(NULL)
3259 TCW_SYNC_PTR(root_team->t.t_pkfn, (microtask_t)(~0));
3262 KF_TRACE(10, (
"__kmp_initialize_root: after root_team = %p\n", root_team));
3264 root->r.r_root_team = root_team;
3265 root_team->t.t_control_stack_top = NULL;
3268 root_team->t.t_threads[0] = NULL;
3269 root_team->t.t_nproc = 1;
3270 root_team->t.t_serialized = 1;
3272 root_team->t.t_sched.sched = r_sched.sched;
3275 (
"__kmp_initialize_root: init root team %d arrived: join=%u, plain=%u\n",
3276 root_team->t.t_id, KMP_INIT_BARRIER_STATE, KMP_INIT_BARRIER_STATE));
3280 KF_TRACE(10, (
"__kmp_initialize_root: before hot_team\n"));
3283 __kmp_allocate_team(root,
3285 __kmp_dflt_team_nth_ub * 2,
3290 __kmp_nested_proc_bind.bind_types[0],
3294 USE_NESTED_HOT_ARG(NULL)
3296 KF_TRACE(10, (
"__kmp_initialize_root: after hot_team = %p\n", hot_team));
3298 root->r.r_hot_team = hot_team;
3299 root_team->t.t_control_stack_top = NULL;
3302 hot_team->t.t_parent = root_team;
3305 hot_team_max_nth = hot_team->t.t_max_nproc;
3306 for (f = 0; f < hot_team_max_nth; ++f) {
3307 hot_team->t.t_threads[f] = NULL;
3309 hot_team->t.t_nproc = 1;
3311 hot_team->t.t_sched.sched = r_sched.sched;
3312 hot_team->t.t_size_changed = 0;
3317 typedef struct kmp_team_list_item {
3318 kmp_team_p
const *entry;
3319 struct kmp_team_list_item *next;
3320 } kmp_team_list_item_t;
3321 typedef kmp_team_list_item_t *kmp_team_list_t;
3323 static void __kmp_print_structure_team_accum(
3324 kmp_team_list_t list,
3325 kmp_team_p
const *team
3335 KMP_DEBUG_ASSERT(list != NULL);
3340 __kmp_print_structure_team_accum(list, team->t.t_parent);
3341 __kmp_print_structure_team_accum(list, team->t.t_next_pool);
3345 while (l->next != NULL && l->entry != team) {
3348 if (l->next != NULL) {
3354 while (l->next != NULL && l->entry->t.t_id <= team->t.t_id) {
3360 kmp_team_list_item_t *item = (kmp_team_list_item_t *)KMP_INTERNAL_MALLOC(
3361 sizeof(kmp_team_list_item_t));
3368 static void __kmp_print_structure_team(
char const *title, kmp_team_p
const *team
3371 __kmp_printf(
"%s", title);
3373 __kmp_printf(
"%2x %p\n", team->t.t_id, team);
3375 __kmp_printf(
" - (nil)\n");
3379 static void __kmp_print_structure_thread(
char const *title,
3380 kmp_info_p
const *thread) {
3381 __kmp_printf(
"%s", title);
3382 if (thread != NULL) {
3383 __kmp_printf(
"%2d %p\n", thread->th.th_info.ds.ds_gtid, thread);
3385 __kmp_printf(
" - (nil)\n");
3389 void __kmp_print_structure(
void) {
3391 kmp_team_list_t list;
3395 (kmp_team_list_item_t *)KMP_INTERNAL_MALLOC(
sizeof(kmp_team_list_item_t));
3399 __kmp_printf(
"\n------------------------------\nGlobal Thread "
3400 "Table\n------------------------------\n");
3403 for (gtid = 0; gtid < __kmp_threads_capacity; ++gtid) {
3404 __kmp_printf(
"%2d", gtid);
3405 if (__kmp_threads != NULL) {
3406 __kmp_printf(
" %p", __kmp_threads[gtid]);
3408 if (__kmp_root != NULL) {
3409 __kmp_printf(
" %p", __kmp_root[gtid]);
3416 __kmp_printf(
"\n------------------------------\nThreads\n--------------------"
3418 if (__kmp_threads != NULL) {
3420 for (gtid = 0; gtid < __kmp_threads_capacity; ++gtid) {
3421 kmp_info_t
const *thread = __kmp_threads[gtid];
3422 if (thread != NULL) {
3423 __kmp_printf(
"GTID %2d %p:\n", gtid, thread);
3424 __kmp_printf(
" Our Root: %p\n", thread->th.th_root);
3425 __kmp_print_structure_team(
" Our Team: ", thread->th.th_team);
3426 __kmp_print_structure_team(
" Serial Team: ",
3427 thread->th.th_serial_team);
3428 __kmp_printf(
" Threads: %2d\n", thread->th.th_team_nproc);
3429 __kmp_print_structure_thread(
" Master: ",
3430 thread->th.th_team_master);
3431 __kmp_printf(
" Serialized?: %2d\n", thread->th.th_team_serialized);
3432 __kmp_printf(
" Set NProc: %2d\n", thread->th.th_set_nproc);
3434 __kmp_printf(
" Set Proc Bind: %2d\n", thread->th.th_set_proc_bind);
3436 __kmp_print_structure_thread(
" Next in pool: ",
3437 thread->th.th_next_pool);
3439 __kmp_print_structure_team_accum(list, thread->th.th_team);
3440 __kmp_print_structure_team_accum(list, thread->th.th_serial_team);
3444 __kmp_printf(
"Threads array is not allocated.\n");
3448 __kmp_printf(
"\n------------------------------\nUbers\n----------------------"
3450 if (__kmp_root != NULL) {
3452 for (gtid = 0; gtid < __kmp_threads_capacity; ++gtid) {
3453 kmp_root_t
const *root = __kmp_root[gtid];
3455 __kmp_printf(
"GTID %2d %p:\n", gtid, root);
3456 __kmp_print_structure_team(
" Root Team: ", root->r.r_root_team);
3457 __kmp_print_structure_team(
" Hot Team: ", root->r.r_hot_team);
3458 __kmp_print_structure_thread(
" Uber Thread: ",
3459 root->r.r_uber_thread);
3460 __kmp_printf(
" Active?: %2d\n", root->r.r_active);
3461 __kmp_printf(
" In Parallel: %2d\n",
3462 KMP_ATOMIC_LD_RLX(&root->r.r_in_parallel));
3464 __kmp_print_structure_team_accum(list, root->r.r_root_team);
3465 __kmp_print_structure_team_accum(list, root->r.r_hot_team);
3469 __kmp_printf(
"Ubers array is not allocated.\n");
3472 __kmp_printf(
"\n------------------------------\nTeams\n----------------------"
3474 while (list->next != NULL) {
3475 kmp_team_p
const *team = list->entry;
3477 __kmp_printf(
"Team %2x %p:\n", team->t.t_id, team);
3478 __kmp_print_structure_team(
" Parent Team: ", team->t.t_parent);
3479 __kmp_printf(
" Master TID: %2d\n", team->t.t_master_tid);
3480 __kmp_printf(
" Max threads: %2d\n", team->t.t_max_nproc);
3481 __kmp_printf(
" Levels of serial: %2d\n", team->t.t_serialized);
3482 __kmp_printf(
" Number threads: %2d\n", team->t.t_nproc);
3483 for (i = 0; i < team->t.t_nproc; ++i) {
3484 __kmp_printf(
" Thread %2d: ", i);
3485 __kmp_print_structure_thread(
"", team->t.t_threads[i]);
3487 __kmp_print_structure_team(
" Next in pool: ", team->t.t_next_pool);
3493 __kmp_printf(
"\n------------------------------\nPools\n----------------------"
3495 __kmp_print_structure_thread(
"Thread pool: ",
3496 CCAST(kmp_info_t *, __kmp_thread_pool));
3497 __kmp_print_structure_team(
"Team pool: ",
3498 CCAST(kmp_team_t *, __kmp_team_pool));
3502 while (list != NULL) {
3503 kmp_team_list_item_t *item = list;
3505 KMP_INTERNAL_FREE(item);
3514 static const unsigned __kmp_primes[] = {
3515 0x9e3779b1, 0xffe6cc59, 0x2109f6dd, 0x43977ab5, 0xba5703f5, 0xb495a877,
3516 0xe1626741, 0x79695e6b, 0xbc98c09f, 0xd5bee2b3, 0x287488f9, 0x3af18231,
3517 0x9677cd4d, 0xbe3a6929, 0xadc6a877, 0xdcf0674b, 0xbe4d6fe9, 0x5f15e201,
3518 0x99afc3fd, 0xf3f16801, 0xe222cfff, 0x24ba5fdb, 0x0620452d, 0x79f149e3,
3519 0xc8b93f49, 0x972702cd, 0xb07dd827, 0x6c97d5ed, 0x085a3d61, 0x46eb5ea7,
3520 0x3d9910ed, 0x2e687b5b, 0x29609227, 0x6eb081f1, 0x0954c4e1, 0x9d114db9,
3521 0x542acfa9, 0xb3e6bd7b, 0x0742d917, 0xe9f3ffa7, 0x54581edb, 0xf2480f45,
3522 0x0bb9288f, 0xef1affc7, 0x85fa0ca7, 0x3ccc14db, 0xe6baf34b, 0x343377f7,
3523 0x5ca19031, 0xe6d9293b, 0xf0a9f391, 0x5d2e980b, 0xfc411073, 0xc3749363,
3524 0xb892d829, 0x3549366b, 0x629750ad, 0xb98294e5, 0x892d9483, 0xc235baf3,
3525 0x3d2402a3, 0x6bdef3c9, 0xbec333cd, 0x40c9520f};
3529 unsigned short __kmp_get_random(kmp_info_t *thread) {
3530 unsigned x = thread->th.th_x;
3531 unsigned short r = x >> 16;
3533 thread->th.th_x = x * thread->th.th_a + 1;
3535 KA_TRACE(30, (
"__kmp_get_random: THREAD: %d, RETURN: %u\n",
3536 thread->th.th_info.ds.ds_tid, r));
3542 void __kmp_init_random(kmp_info_t *thread) {
3543 unsigned seed = thread->th.th_info.ds.ds_tid;
3546 __kmp_primes[seed % (
sizeof(__kmp_primes) /
sizeof(__kmp_primes[0]))];
3547 thread->th.th_x = (seed + 1) * thread->th.th_a + 1;
3549 (
"__kmp_init_random: THREAD: %u; A: %u\n", seed, thread->th.th_a));
3555 static int __kmp_reclaim_dead_roots(
void) {
3558 for (i = 0; i < __kmp_threads_capacity; ++i) {
3559 if (KMP_UBER_GTID(i) &&
3560 !__kmp_still_running((kmp_info_t *)TCR_SYNC_PTR(__kmp_threads[i])) &&
3563 r += __kmp_unregister_root_other_thread(i);
3588 static int __kmp_expand_threads(
int nNeed) {
3590 int minimumRequiredCapacity;
3592 kmp_info_t **newThreads;
3593 kmp_root_t **newRoot;
3599 #if KMP_OS_WINDOWS && !KMP_DYNAMIC_LIB
3602 added = __kmp_reclaim_dead_roots();
3631 KMP_DEBUG_ASSERT(__kmp_sys_max_nth >= __kmp_threads_capacity);
3634 if (__kmp_sys_max_nth - __kmp_threads_capacity < nNeed) {
3638 minimumRequiredCapacity = __kmp_threads_capacity + nNeed;
3640 newCapacity = __kmp_threads_capacity;
3642 newCapacity = newCapacity <= (__kmp_sys_max_nth >> 1) ? (newCapacity << 1)
3643 : __kmp_sys_max_nth;
3644 }
while (newCapacity < minimumRequiredCapacity);
3645 newThreads = (kmp_info_t **)__kmp_allocate(
3646 (
sizeof(kmp_info_t *) +
sizeof(kmp_root_t *)) * newCapacity + CACHE_LINE);
3648 (kmp_root_t **)((
char *)newThreads +
sizeof(kmp_info_t *) * newCapacity);
3649 KMP_MEMCPY(newThreads, __kmp_threads,
3650 __kmp_threads_capacity *
sizeof(kmp_info_t *));
3651 KMP_MEMCPY(newRoot, __kmp_root,
3652 __kmp_threads_capacity *
sizeof(kmp_root_t *));
3654 kmp_info_t **temp_threads = __kmp_threads;
3655 *(kmp_info_t * *
volatile *)&__kmp_threads = newThreads;
3656 *(kmp_root_t * *
volatile *)&__kmp_root = newRoot;
3657 __kmp_free(temp_threads);
3658 added += newCapacity - __kmp_threads_capacity;
3659 *(
volatile int *)&__kmp_threads_capacity = newCapacity;
3661 if (newCapacity > __kmp_tp_capacity) {
3662 __kmp_acquire_bootstrap_lock(&__kmp_tp_cached_lock);
3663 if (__kmp_tp_cached && newCapacity > __kmp_tp_capacity) {
3664 __kmp_threadprivate_resize_cache(newCapacity);
3666 *(
volatile int *)&__kmp_tp_capacity = newCapacity;
3668 __kmp_release_bootstrap_lock(&__kmp_tp_cached_lock);
3677 int __kmp_register_root(
int initial_thread) {
3678 kmp_info_t *root_thread;
3682 __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock);
3683 KA_TRACE(20, (
"__kmp_register_root: entered\n"));
3700 capacity = __kmp_threads_capacity;
3701 if (!initial_thread && TCR_PTR(__kmp_threads[0]) == NULL) {
3706 if (__kmp_all_nth >= capacity && !__kmp_expand_threads(1)) {
3707 if (__kmp_tp_cached) {
3708 __kmp_fatal(KMP_MSG(CantRegisterNewThread),
3709 KMP_HNT(Set_ALL_THREADPRIVATE, __kmp_tp_capacity),
3710 KMP_HNT(PossibleSystemLimitOnThreads), __kmp_msg_null);
3712 __kmp_fatal(KMP_MSG(CantRegisterNewThread), KMP_HNT(SystemLimitOnThreads),
3720 for (gtid = (initial_thread ? 0 : 1); TCR_PTR(__kmp_threads[gtid]) != NULL;
3724 (
"__kmp_register_root: found slot in threads array: T#%d\n", gtid));
3725 KMP_ASSERT(gtid < __kmp_threads_capacity);
3729 TCW_4(__kmp_nth, __kmp_nth + 1);
3733 if (__kmp_adjust_gtid_mode) {
3734 if (__kmp_all_nth >= __kmp_tls_gtid_min) {
3735 if (TCR_4(__kmp_gtid_mode) != 2) {
3736 TCW_4(__kmp_gtid_mode, 2);
3739 if (TCR_4(__kmp_gtid_mode) != 1) {
3740 TCW_4(__kmp_gtid_mode, 1);
3745 #ifdef KMP_ADJUST_BLOCKTIME
3748 if (!__kmp_env_blocktime && (__kmp_avail_proc > 0)) {
3749 if (__kmp_nth > __kmp_avail_proc) {
3750 __kmp_zero_bt = TRUE;
3756 if (!(root = __kmp_root[gtid])) {
3757 root = __kmp_root[gtid] = (kmp_root_t *)__kmp_allocate(
sizeof(kmp_root_t));
3758 KMP_DEBUG_ASSERT(!root->r.r_root_team);
3761 #if KMP_STATS_ENABLED
3763 __kmp_stats_thread_ptr = __kmp_stats_list->push_back(gtid);
3764 __kmp_stats_thread_ptr->startLife();
3765 KMP_SET_THREAD_STATE(SERIAL_REGION);
3768 __kmp_initialize_root(root);
3771 if (root->r.r_uber_thread) {
3772 root_thread = root->r.r_uber_thread;
3774 root_thread = (kmp_info_t *)__kmp_allocate(
sizeof(kmp_info_t));
3775 if (__kmp_storage_map) {
3776 __kmp_print_thread_storage_map(root_thread, gtid);
3778 root_thread->th.th_info.ds.ds_gtid = gtid;
3780 root_thread->th.ompt_thread_info.thread_data = ompt_data_none;
3782 root_thread->th.th_root = root;
3783 if (__kmp_env_consistency_check) {
3784 root_thread->th.th_cons = __kmp_allocate_cons_stack(gtid);
3787 __kmp_initialize_fast_memory(root_thread);
3791 KMP_DEBUG_ASSERT(root_thread->th.th_local.bget_data == NULL);
3792 __kmp_initialize_bget(root_thread);
3794 __kmp_init_random(root_thread);
3798 if (!root_thread->th.th_serial_team) {
3799 kmp_internal_control_t r_icvs = __kmp_get_global_icvs();
3800 KF_TRACE(10, (
"__kmp_register_root: before serial_team\n"));
3801 root_thread->th.th_serial_team =
3802 __kmp_allocate_team(root, 1, 1,
3809 &r_icvs, 0 USE_NESTED_HOT_ARG(NULL));
3811 KMP_ASSERT(root_thread->th.th_serial_team);
3812 KF_TRACE(10, (
"__kmp_register_root: after serial_team = %p\n",
3813 root_thread->th.th_serial_team));
3816 TCW_SYNC_PTR(__kmp_threads[gtid], root_thread);
3818 root->r.r_root_team->t.t_threads[0] = root_thread;
3819 root->r.r_hot_team->t.t_threads[0] = root_thread;
3820 root_thread->th.th_serial_team->t.t_threads[0] = root_thread;
3822 root_thread->th.th_serial_team->t.t_serialized = 0;
3823 root->r.r_uber_thread = root_thread;
3826 __kmp_initialize_info(root_thread, root->r.r_root_team, 0, gtid);
3827 TCW_4(__kmp_init_gtid, TRUE);
3830 __kmp_gtid_set_specific(gtid);
3833 __kmp_itt_thread_name(gtid);
3836 #ifdef KMP_TDATA_GTID
3839 __kmp_create_worker(gtid, root_thread, __kmp_stksize);
3840 KMP_DEBUG_ASSERT(__kmp_gtid_get_specific() == gtid);
3842 KA_TRACE(20, (
"__kmp_register_root: T#%d init T#%d(%d:%d) arrived: join=%u, "
3844 gtid, __kmp_gtid_from_tid(0, root->r.r_hot_team),
3845 root->r.r_hot_team->t.t_id, 0, KMP_INIT_BARRIER_STATE,
3846 KMP_INIT_BARRIER_STATE));
3849 for (b = 0; b < bs_last_barrier; ++b) {
3850 root_thread->th.th_bar[b].bb.b_arrived = KMP_INIT_BARRIER_STATE;
3852 root_thread->th.th_bar[b].bb.b_worker_arrived = 0;
3856 KMP_DEBUG_ASSERT(root->r.r_hot_team->t.t_bar[bs_forkjoin_barrier].b_arrived ==
3857 KMP_INIT_BARRIER_STATE);
3859 #if KMP_AFFINITY_SUPPORTED
3861 root_thread->th.th_current_place = KMP_PLACE_UNDEFINED;
3862 root_thread->th.th_new_place = KMP_PLACE_UNDEFINED;
3863 root_thread->th.th_first_place = KMP_PLACE_UNDEFINED;
3864 root_thread->th.th_last_place = KMP_PLACE_UNDEFINED;
3866 if (TCR_4(__kmp_init_middle)) {
3867 __kmp_affinity_set_init_mask(gtid, TRUE);
3871 root_thread->th.th_def_allocator = __kmp_def_allocator;
3872 root_thread->th.th_prev_level = 0;
3873 root_thread->th.th_prev_num_threads = 1;
3876 kmp_cg_root_t *tmp = (kmp_cg_root_t *)__kmp_allocate(
sizeof(kmp_cg_root_t));
3877 tmp->cg_root = root_thread;
3878 tmp->cg_thread_limit = __kmp_cg_max_nth;
3879 tmp->cg_nthreads = 1;
3880 KA_TRACE(100, (
"__kmp_register_root: Thread %p created node %p with"
3881 " cg_nthreads init to 1\n",
3884 root_thread->th.th_cg_roots = tmp;
3886 __kmp_root_counter++;
3889 if (!initial_thread && ompt_enabled.enabled) {
3891 kmp_info_t *root_thread = ompt_get_thread();
3893 ompt_set_thread_state(root_thread, ompt_state_overhead);
3895 if (ompt_enabled.ompt_callback_thread_begin) {
3896 ompt_callbacks.ompt_callback(ompt_callback_thread_begin)(
3897 ompt_thread_initial, __ompt_get_thread_data_internal());
3899 ompt_data_t *task_data;
3900 ompt_data_t *parallel_data;
3901 __ompt_get_task_info_internal(0, NULL, &task_data, NULL, ¶llel_data, NULL);
3902 if (ompt_enabled.ompt_callback_implicit_task) {
3903 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
3904 ompt_scope_begin, parallel_data, task_data, 1, 1, ompt_task_initial);
3907 ompt_set_thread_state(root_thread, ompt_state_work_serial);
3912 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
3917 #if KMP_NESTED_HOT_TEAMS
3918 static int __kmp_free_hot_teams(kmp_root_t *root, kmp_info_t *thr,
int level,
3919 const int max_level) {
3921 kmp_hot_team_ptr_t *hot_teams = thr->th.th_hot_teams;
3922 if (!hot_teams || !hot_teams[level].hot_team) {
3925 KMP_DEBUG_ASSERT(level < max_level);
3926 kmp_team_t *team = hot_teams[level].hot_team;
3927 nth = hot_teams[level].hot_team_nth;
3929 if (level < max_level - 1) {
3930 for (i = 0; i < nth; ++i) {
3931 kmp_info_t *th = team->t.t_threads[i];
3932 n += __kmp_free_hot_teams(root, th, level + 1, max_level);
3933 if (i > 0 && th->th.th_hot_teams) {
3934 __kmp_free(th->th.th_hot_teams);
3935 th->th.th_hot_teams = NULL;
3939 __kmp_free_team(root, team, NULL);
3946 static int __kmp_reset_root(
int gtid, kmp_root_t *root) {
3947 kmp_team_t *root_team = root->r.r_root_team;
3948 kmp_team_t *hot_team = root->r.r_hot_team;
3949 int n = hot_team->t.t_nproc;
3952 KMP_DEBUG_ASSERT(!root->r.r_active);
3954 root->r.r_root_team = NULL;
3955 root->r.r_hot_team = NULL;
3958 __kmp_free_team(root, root_team USE_NESTED_HOT_ARG(NULL));
3959 #if KMP_NESTED_HOT_TEAMS
3960 if (__kmp_hot_teams_max_level >
3962 for (i = 0; i < hot_team->t.t_nproc; ++i) {
3963 kmp_info_t *th = hot_team->t.t_threads[i];
3964 if (__kmp_hot_teams_max_level > 1) {
3965 n += __kmp_free_hot_teams(root, th, 1, __kmp_hot_teams_max_level);
3967 if (th->th.th_hot_teams) {
3968 __kmp_free(th->th.th_hot_teams);
3969 th->th.th_hot_teams = NULL;
3974 __kmp_free_team(root, hot_team USE_NESTED_HOT_ARG(NULL));
3979 if (__kmp_tasking_mode != tskm_immediate_exec) {
3980 __kmp_wait_to_unref_task_teams();
3986 10, (
"__kmp_reset_root: free handle, th = %p, handle = %" KMP_UINTPTR_SPEC
3988 (LPVOID) & (root->r.r_uber_thread->th),
3989 root->r.r_uber_thread->th.th_info.ds.ds_thread));
3990 __kmp_free_handle(root->r.r_uber_thread->th.th_info.ds.ds_thread);
3994 ompt_data_t *task_data;
3995 ompt_data_t *parallel_data;
3996 __ompt_get_task_info_internal(0, NULL, &task_data, NULL, ¶llel_data, NULL);
3997 if (ompt_enabled.ompt_callback_implicit_task) {
3998 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
3999 ompt_scope_end, parallel_data, task_data, 0, 1, ompt_task_initial);
4001 if (ompt_enabled.ompt_callback_thread_end) {
4002 ompt_callbacks.ompt_callback(ompt_callback_thread_end)(
4003 &(root->r.r_uber_thread->th.ompt_thread_info.thread_data));
4009 i = root->r.r_uber_thread->th.th_cg_roots->cg_nthreads--;
4010 KA_TRACE(100, (
"__kmp_reset_root: Thread %p decrement cg_nthreads on node %p"
4012 root->r.r_uber_thread, root->r.r_uber_thread->th.th_cg_roots,
4013 root->r.r_uber_thread->th.th_cg_roots->cg_nthreads));
4016 KMP_DEBUG_ASSERT(root->r.r_uber_thread ==
4017 root->r.r_uber_thread->th.th_cg_roots->cg_root);
4018 KMP_DEBUG_ASSERT(root->r.r_uber_thread->th.th_cg_roots->up == NULL);
4019 __kmp_free(root->r.r_uber_thread->th.th_cg_roots);
4020 root->r.r_uber_thread->th.th_cg_roots = NULL;
4022 __kmp_reap_thread(root->r.r_uber_thread, 1);
4026 root->r.r_uber_thread = NULL;
4028 root->r.r_begin = FALSE;
4033 void __kmp_unregister_root_current_thread(
int gtid) {
4034 KA_TRACE(1, (
"__kmp_unregister_root_current_thread: enter T#%d\n", gtid));
4038 __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock);
4039 if (TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial) {
4040 KC_TRACE(10, (
"__kmp_unregister_root_current_thread: already finished, "
4043 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
4046 kmp_root_t *root = __kmp_root[gtid];
4048 KMP_DEBUG_ASSERT(__kmp_threads && __kmp_threads[gtid]);
4049 KMP_ASSERT(KMP_UBER_GTID(gtid));
4050 KMP_ASSERT(root == __kmp_threads[gtid]->th.th_root);
4051 KMP_ASSERT(root->r.r_active == FALSE);
4056 kmp_info_t *thread = __kmp_threads[gtid];
4057 kmp_team_t *team = thread->th.th_team;
4058 kmp_task_team_t *task_team = thread->th.th_task_team;
4061 if (task_team != NULL && task_team->tt.tt_found_proxy_tasks) {
4064 thread->th.ompt_thread_info.state = ompt_state_undefined;
4066 __kmp_task_team_wait(thread, team USE_ITT_BUILD_ARG(NULL));
4070 __kmp_reset_root(gtid, root);
4073 __kmp_gtid_set_specific(KMP_GTID_DNE);
4074 #ifdef KMP_TDATA_GTID
4075 __kmp_gtid = KMP_GTID_DNE;
4080 (
"__kmp_unregister_root_current_thread: T#%d unregistered\n", gtid));
4082 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
4089 static int __kmp_unregister_root_other_thread(
int gtid) {
4090 kmp_root_t *root = __kmp_root[gtid];
4093 KA_TRACE(1, (
"__kmp_unregister_root_other_thread: enter T#%d\n", gtid));
4094 KMP_DEBUG_ASSERT(__kmp_threads && __kmp_threads[gtid]);
4095 KMP_ASSERT(KMP_UBER_GTID(gtid));
4096 KMP_ASSERT(root == __kmp_threads[gtid]->th.th_root);
4097 KMP_ASSERT(root->r.r_active == FALSE);
4099 r = __kmp_reset_root(gtid, root);
4101 (
"__kmp_unregister_root_other_thread: T#%d unregistered\n", gtid));
4107 void __kmp_task_info() {
4109 kmp_int32 gtid = __kmp_entry_gtid();
4110 kmp_int32 tid = __kmp_tid_from_gtid(gtid);
4111 kmp_info_t *this_thr = __kmp_threads[gtid];
4112 kmp_team_t *steam = this_thr->th.th_serial_team;
4113 kmp_team_t *team = this_thr->th.th_team;
4116 "__kmp_task_info: gtid=%d tid=%d t_thread=%p team=%p steam=%p curtask=%p "
4118 gtid, tid, this_thr, team, steam, this_thr->th.th_current_task,
4119 team->t.t_implicit_task_taskdata[tid].td_parent);
4126 static void __kmp_initialize_info(kmp_info_t *this_thr, kmp_team_t *team,
4127 int tid,
int gtid) {
4131 kmp_info_t *master = team->t.t_threads[0];
4132 KMP_DEBUG_ASSERT(this_thr != NULL);
4133 KMP_DEBUG_ASSERT(this_thr->th.th_serial_team);
4134 KMP_DEBUG_ASSERT(team);
4135 KMP_DEBUG_ASSERT(team->t.t_threads);
4136 KMP_DEBUG_ASSERT(team->t.t_dispatch);
4137 KMP_DEBUG_ASSERT(master);
4138 KMP_DEBUG_ASSERT(master->th.th_root);
4142 TCW_SYNC_PTR(this_thr->th.th_team, team);
4144 this_thr->th.th_info.ds.ds_tid = tid;
4145 this_thr->th.th_set_nproc = 0;
4146 if (__kmp_tasking_mode != tskm_immediate_exec)
4149 this_thr->th.th_reap_state = KMP_NOT_SAFE_TO_REAP;
4151 this_thr->th.th_reap_state = KMP_SAFE_TO_REAP;
4153 this_thr->th.th_set_proc_bind = proc_bind_default;
4154 #if KMP_AFFINITY_SUPPORTED
4155 this_thr->th.th_new_place = this_thr->th.th_current_place;
4158 this_thr->th.th_root = master->th.th_root;
4161 this_thr->th.th_team_nproc = team->t.t_nproc;
4162 this_thr->th.th_team_master = master;
4163 this_thr->th.th_team_serialized = team->t.t_serialized;
4164 TCW_PTR(this_thr->th.th_sleep_loc, NULL);
4166 KMP_DEBUG_ASSERT(team->t.t_implicit_task_taskdata);
4168 KF_TRACE(10, (
"__kmp_initialize_info1: T#%d:%d this_thread=%p curtask=%p\n",
4169 tid, gtid, this_thr, this_thr->th.th_current_task));
4171 __kmp_init_implicit_task(this_thr->th.th_team_master->th.th_ident, this_thr,
4174 KF_TRACE(10, (
"__kmp_initialize_info2: T#%d:%d this_thread=%p curtask=%p\n",
4175 tid, gtid, this_thr, this_thr->th.th_current_task));
4180 this_thr->th.th_dispatch = &team->t.t_dispatch[tid];
4182 this_thr->th.th_local.this_construct = 0;
4184 if (!this_thr->th.th_pri_common) {
4185 this_thr->th.th_pri_common =
4186 (
struct common_table *)__kmp_allocate(
sizeof(
struct common_table));
4187 if (__kmp_storage_map) {
4188 __kmp_print_storage_map_gtid(
4189 gtid, this_thr->th.th_pri_common, this_thr->th.th_pri_common + 1,
4190 sizeof(
struct common_table),
"th_%d.th_pri_common\n", gtid);
4192 this_thr->th.th_pri_head = NULL;
4195 if (this_thr != master &&
4196 this_thr->th.th_cg_roots != master->th.th_cg_roots) {
4198 KMP_DEBUG_ASSERT(master->th.th_cg_roots);
4199 this_thr->th.th_cg_roots = master->th.th_cg_roots;
4201 this_thr->th.th_cg_roots->cg_nthreads++;
4202 KA_TRACE(100, (
"__kmp_initialize_info: Thread %p increment cg_nthreads on"
4203 " node %p of thread %p to %d\n",
4204 this_thr, this_thr->th.th_cg_roots,
4205 this_thr->th.th_cg_roots->cg_root,
4206 this_thr->th.th_cg_roots->cg_nthreads));
4207 this_thr->th.th_current_task->td_icvs.thread_limit =
4208 this_thr->th.th_cg_roots->cg_thread_limit;
4213 volatile kmp_disp_t *dispatch = this_thr->th.th_dispatch;
4216 sizeof(dispatch_private_info_t) *
4217 (team->t.t_max_nproc == 1 ? 1 : __kmp_dispatch_num_buffers);
4218 KD_TRACE(10, (
"__kmp_initialize_info: T#%d max_nproc: %d\n", gtid,
4219 team->t.t_max_nproc));
4220 KMP_ASSERT(dispatch);
4221 KMP_DEBUG_ASSERT(team->t.t_dispatch);
4222 KMP_DEBUG_ASSERT(dispatch == &team->t.t_dispatch[tid]);
4224 dispatch->th_disp_index = 0;
4226 dispatch->th_doacross_buf_idx = 0;
4228 if (!dispatch->th_disp_buffer) {
4229 dispatch->th_disp_buffer =
4230 (dispatch_private_info_t *)__kmp_allocate(disp_size);
4232 if (__kmp_storage_map) {
4233 __kmp_print_storage_map_gtid(
4234 gtid, &dispatch->th_disp_buffer[0],
4235 &dispatch->th_disp_buffer[team->t.t_max_nproc == 1
4237 : __kmp_dispatch_num_buffers],
4238 disp_size,
"th_%d.th_dispatch.th_disp_buffer "
4239 "(team_%d.t_dispatch[%d].th_disp_buffer)",
4240 gtid, team->t.t_id, gtid);
4243 memset(&dispatch->th_disp_buffer[0],
'\0', disp_size);
4246 dispatch->th_dispatch_pr_current = 0;
4247 dispatch->th_dispatch_sh_current = 0;
4249 dispatch->th_deo_fcn = 0;
4250 dispatch->th_dxo_fcn = 0;
4253 this_thr->th.th_next_pool = NULL;
4255 if (!this_thr->th.th_task_state_memo_stack) {
4257 this_thr->th.th_task_state_memo_stack =
4258 (kmp_uint8 *)__kmp_allocate(4 *
sizeof(kmp_uint8));
4259 this_thr->th.th_task_state_top = 0;
4260 this_thr->th.th_task_state_stack_sz = 4;
4261 for (i = 0; i < this_thr->th.th_task_state_stack_sz;
4263 this_thr->th.th_task_state_memo_stack[i] = 0;
4266 KMP_DEBUG_ASSERT(!this_thr->th.th_spin_here);
4267 KMP_DEBUG_ASSERT(this_thr->th.th_next_waiting == 0);
4277 kmp_info_t *__kmp_allocate_thread(kmp_root_t *root, kmp_team_t *team,
4279 kmp_team_t *serial_team;
4280 kmp_info_t *new_thr;
4283 KA_TRACE(20, (
"__kmp_allocate_thread: T#%d\n", __kmp_get_gtid()));
4284 KMP_DEBUG_ASSERT(root && team);
4285 #if !KMP_NESTED_HOT_TEAMS
4286 KMP_DEBUG_ASSERT(KMP_MASTER_GTID(__kmp_get_gtid()));
4291 if (__kmp_thread_pool) {
4292 new_thr = CCAST(kmp_info_t *, __kmp_thread_pool);
4293 __kmp_thread_pool = (
volatile kmp_info_t *)new_thr->th.th_next_pool;
4294 if (new_thr == __kmp_thread_pool_insert_pt) {
4295 __kmp_thread_pool_insert_pt = NULL;
4297 TCW_4(new_thr->th.th_in_pool, FALSE);
4298 __kmp_suspend_initialize_thread(new_thr);
4299 __kmp_lock_suspend_mx(new_thr);
4300 if (new_thr->th.th_active_in_pool == TRUE) {
4301 KMP_DEBUG_ASSERT(new_thr->th.th_active == TRUE);
4302 KMP_ATOMIC_DEC(&__kmp_thread_pool_active_nth);
4303 new_thr->th.th_active_in_pool = FALSE;
4305 __kmp_unlock_suspend_mx(new_thr);
4307 KA_TRACE(20, (
"__kmp_allocate_thread: T#%d using thread T#%d\n",
4308 __kmp_get_gtid(), new_thr->th.th_info.ds.ds_gtid));
4309 KMP_ASSERT(!new_thr->th.th_team);
4310 KMP_DEBUG_ASSERT(__kmp_nth < __kmp_threads_capacity);
4313 __kmp_initialize_info(new_thr, team, new_tid,
4314 new_thr->th.th_info.ds.ds_gtid);
4315 KMP_DEBUG_ASSERT(new_thr->th.th_serial_team);
4317 TCW_4(__kmp_nth, __kmp_nth + 1);
4319 new_thr->th.th_task_state = 0;
4320 new_thr->th.th_task_state_top = 0;
4321 new_thr->th.th_task_state_stack_sz = 4;
4323 #ifdef KMP_ADJUST_BLOCKTIME
4326 if (!__kmp_env_blocktime && (__kmp_avail_proc > 0)) {
4327 if (__kmp_nth > __kmp_avail_proc) {
4328 __kmp_zero_bt = TRUE;
4337 kmp_balign_t *balign = new_thr->th.th_bar;
4338 for (b = 0; b < bs_last_barrier; ++b)
4339 KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG);
4342 KF_TRACE(10, (
"__kmp_allocate_thread: T#%d using thread %p T#%d\n",
4343 __kmp_get_gtid(), new_thr, new_thr->th.th_info.ds.ds_gtid));
4350 KMP_ASSERT(__kmp_nth == __kmp_all_nth);
4351 KMP_ASSERT(__kmp_all_nth < __kmp_threads_capacity);
4356 if (!TCR_4(__kmp_init_monitor)) {
4357 __kmp_acquire_bootstrap_lock(&__kmp_monitor_lock);
4358 if (!TCR_4(__kmp_init_monitor)) {
4359 KF_TRACE(10, (
"before __kmp_create_monitor\n"));
4360 TCW_4(__kmp_init_monitor, 1);
4361 __kmp_create_monitor(&__kmp_monitor);
4362 KF_TRACE(10, (
"after __kmp_create_monitor\n"));
4373 while (TCR_4(__kmp_init_monitor) < 2) {
4376 KF_TRACE(10, (
"after monitor thread has started\n"));
4379 __kmp_release_bootstrap_lock(&__kmp_monitor_lock);
4384 for (new_gtid = 1; TCR_PTR(__kmp_threads[new_gtid]) != NULL; ++new_gtid) {
4385 KMP_DEBUG_ASSERT(new_gtid < __kmp_threads_capacity);
4389 new_thr = (kmp_info_t *)__kmp_allocate(
sizeof(kmp_info_t));
4391 TCW_SYNC_PTR(__kmp_threads[new_gtid], new_thr);
4393 if (__kmp_storage_map) {
4394 __kmp_print_thread_storage_map(new_thr, new_gtid);
4399 kmp_internal_control_t r_icvs = __kmp_get_x_global_icvs(team);
4400 KF_TRACE(10, (
"__kmp_allocate_thread: before th_serial/serial_team\n"));
4401 new_thr->th.th_serial_team = serial_team =
4402 (kmp_team_t *)__kmp_allocate_team(root, 1, 1,
4409 &r_icvs, 0 USE_NESTED_HOT_ARG(NULL));
4411 KMP_ASSERT(serial_team);
4412 serial_team->t.t_serialized = 0;
4414 serial_team->t.t_threads[0] = new_thr;
4416 (
"__kmp_allocate_thread: after th_serial/serial_team : new_thr=%p\n",
4420 __kmp_initialize_info(new_thr, team, new_tid, new_gtid);
4423 __kmp_initialize_fast_memory(new_thr);
4427 KMP_DEBUG_ASSERT(new_thr->th.th_local.bget_data == NULL);
4428 __kmp_initialize_bget(new_thr);
4431 __kmp_init_random(new_thr);
4435 (
"__kmp_allocate_thread: T#%d init go fork=%u, plain=%u\n",
4436 __kmp_get_gtid(), KMP_INIT_BARRIER_STATE, KMP_INIT_BARRIER_STATE));
4439 kmp_balign_t *balign = new_thr->th.th_bar;
4440 for (b = 0; b < bs_last_barrier; ++b) {
4441 balign[b].bb.b_go = KMP_INIT_BARRIER_STATE;
4442 balign[b].bb.team = NULL;
4443 balign[b].bb.wait_flag = KMP_BARRIER_NOT_WAITING;
4444 balign[b].bb.use_oncore_barrier = 0;
4447 new_thr->th.th_spin_here = FALSE;
4448 new_thr->th.th_next_waiting = 0;
4450 new_thr->th.th_blocking =
false;
4453 #if OMP_40_ENABLED && KMP_AFFINITY_SUPPORTED
4454 new_thr->th.th_current_place = KMP_PLACE_UNDEFINED;
4455 new_thr->th.th_new_place = KMP_PLACE_UNDEFINED;
4456 new_thr->th.th_first_place = KMP_PLACE_UNDEFINED;
4457 new_thr->th.th_last_place = KMP_PLACE_UNDEFINED;
4460 new_thr->th.th_def_allocator = __kmp_def_allocator;
4461 new_thr->th.th_prev_level = 0;
4462 new_thr->th.th_prev_num_threads = 1;
4465 TCW_4(new_thr->th.th_in_pool, FALSE);
4466 new_thr->th.th_active_in_pool = FALSE;
4467 TCW_4(new_thr->th.th_active, TRUE);
4475 if (__kmp_adjust_gtid_mode) {
4476 if (__kmp_all_nth >= __kmp_tls_gtid_min) {
4477 if (TCR_4(__kmp_gtid_mode) != 2) {
4478 TCW_4(__kmp_gtid_mode, 2);
4481 if (TCR_4(__kmp_gtid_mode) != 1) {
4482 TCW_4(__kmp_gtid_mode, 1);
4487 #ifdef KMP_ADJUST_BLOCKTIME
4490 if (!__kmp_env_blocktime && (__kmp_avail_proc > 0)) {
4491 if (__kmp_nth > __kmp_avail_proc) {
4492 __kmp_zero_bt = TRUE;
4499 10, (
"__kmp_allocate_thread: before __kmp_create_worker: %p\n", new_thr));
4500 __kmp_create_worker(new_gtid, new_thr, __kmp_stksize);
4502 (
"__kmp_allocate_thread: after __kmp_create_worker: %p\n", new_thr));
4504 KA_TRACE(20, (
"__kmp_allocate_thread: T#%d forked T#%d\n", __kmp_get_gtid(),
4515 static void __kmp_reinitialize_team(kmp_team_t *team,
4516 kmp_internal_control_t *new_icvs,
4518 KF_TRACE(10, (
"__kmp_reinitialize_team: enter this_thread=%p team=%p\n",
4519 team->t.t_threads[0], team));
4520 KMP_DEBUG_ASSERT(team && new_icvs);
4521 KMP_DEBUG_ASSERT((!TCR_4(__kmp_init_parallel)) || new_icvs->nproc);
4522 KMP_CHECK_UPDATE(team->t.t_ident, loc);
4524 KMP_CHECK_UPDATE(team->t.t_id, KMP_GEN_TEAM_ID());
4526 __kmp_init_implicit_task(loc, team->t.t_threads[0], team, 0, FALSE);
4527 copy_icvs(&team->t.t_implicit_task_taskdata[0].td_icvs, new_icvs);
4529 KF_TRACE(10, (
"__kmp_reinitialize_team: exit this_thread=%p team=%p\n",
4530 team->t.t_threads[0], team));
4536 static void __kmp_initialize_team(kmp_team_t *team,
int new_nproc,
4537 kmp_internal_control_t *new_icvs,
4539 KF_TRACE(10, (
"__kmp_initialize_team: enter: team=%p\n", team));
4542 KMP_DEBUG_ASSERT(team);
4543 KMP_DEBUG_ASSERT(new_nproc <= team->t.t_max_nproc);
4544 KMP_DEBUG_ASSERT(team->t.t_threads);
4547 team->t.t_master_tid = 0;
4549 team->t.t_serialized = new_nproc > 1 ? 0 : 1;
4550 team->t.t_nproc = new_nproc;
4553 team->t.t_next_pool = NULL;
4557 TCW_SYNC_PTR(team->t.t_pkfn, NULL);
4558 team->t.t_invoke = NULL;
4561 team->t.t_sched.sched = new_icvs->sched.sched;
4563 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
4564 team->t.t_fp_control_saved = FALSE;
4565 team->t.t_x87_fpu_control_word = 0;
4566 team->t.t_mxcsr = 0;
4569 team->t.t_construct = 0;
4571 team->t.t_ordered.dt.t_value = 0;
4572 team->t.t_master_active = FALSE;
4575 team->t.t_copypriv_data = NULL;
4578 team->t.t_copyin_counter = 0;
4581 team->t.t_control_stack_top = NULL;
4583 __kmp_reinitialize_team(team, new_icvs, loc);
4586 KF_TRACE(10, (
"__kmp_initialize_team: exit: team=%p\n", team));
4589 #if KMP_OS_LINUX && KMP_AFFINITY_SUPPORTED
4592 __kmp_set_thread_affinity_mask_full_tmp(kmp_affin_mask_t *old_mask) {
4593 if (KMP_AFFINITY_CAPABLE()) {
4595 if (old_mask != NULL) {
4596 status = __kmp_get_system_affinity(old_mask, TRUE);
4599 __kmp_fatal(KMP_MSG(ChangeThreadAffMaskError), KMP_ERR(error),
4603 __kmp_set_system_affinity(__kmp_affin_fullMask, TRUE);
4608 #if OMP_40_ENABLED && KMP_AFFINITY_SUPPORTED
4614 static void __kmp_partition_places(kmp_team_t *team,
int update_master_only) {
4616 kmp_info_t *master_th = team->t.t_threads[0];
4617 KMP_DEBUG_ASSERT(master_th != NULL);
4618 kmp_proc_bind_t proc_bind = team->t.t_proc_bind;
4619 int first_place = master_th->th.th_first_place;
4620 int last_place = master_th->th.th_last_place;
4621 int masters_place = master_th->th.th_current_place;
4622 team->t.t_first_place = first_place;
4623 team->t.t_last_place = last_place;
4625 KA_TRACE(20, (
"__kmp_partition_places: enter: proc_bind = %d T#%d(%d:0) "
4626 "bound to place %d partition = [%d,%d]\n",
4627 proc_bind, __kmp_gtid_from_thread(team->t.t_threads[0]),
4628 team->t.t_id, masters_place, first_place, last_place));
4630 switch (proc_bind) {
4632 case proc_bind_default:
4635 KMP_DEBUG_ASSERT(team->t.t_nproc == 1);
4638 case proc_bind_master: {
4640 int n_th = team->t.t_nproc;
4641 for (f = 1; f < n_th; f++) {
4642 kmp_info_t *th = team->t.t_threads[f];
4643 KMP_DEBUG_ASSERT(th != NULL);
4644 th->th.th_first_place = first_place;
4645 th->th.th_last_place = last_place;
4646 th->th.th_new_place = masters_place;
4648 if (__kmp_display_affinity && masters_place != th->th.th_current_place &&
4649 team->t.t_display_affinity != 1) {
4650 team->t.t_display_affinity = 1;
4654 KA_TRACE(100, (
"__kmp_partition_places: master: T#%d(%d:%d) place %d "
4655 "partition = [%d,%d]\n",
4656 __kmp_gtid_from_thread(team->t.t_threads[f]), team->t.t_id,
4657 f, masters_place, first_place, last_place));
4661 case proc_bind_close: {
4663 int n_th = team->t.t_nproc;
4665 if (first_place <= last_place) {
4666 n_places = last_place - first_place + 1;
4668 n_places = __kmp_affinity_num_masks - first_place + last_place + 1;
4670 if (n_th <= n_places) {
4671 int place = masters_place;
4672 for (f = 1; f < n_th; f++) {
4673 kmp_info_t *th = team->t.t_threads[f];
4674 KMP_DEBUG_ASSERT(th != NULL);
4676 if (place == last_place) {
4677 place = first_place;
4678 }
else if (place == (
int)(__kmp_affinity_num_masks - 1)) {
4683 th->th.th_first_place = first_place;
4684 th->th.th_last_place = last_place;
4685 th->th.th_new_place = place;
4687 if (__kmp_display_affinity && place != th->th.th_current_place &&
4688 team->t.t_display_affinity != 1) {
4689 team->t.t_display_affinity = 1;
4693 KA_TRACE(100, (
"__kmp_partition_places: close: T#%d(%d:%d) place %d "
4694 "partition = [%d,%d]\n",
4695 __kmp_gtid_from_thread(team->t.t_threads[f]),
4696 team->t.t_id, f, place, first_place, last_place));
4699 int S, rem, gap, s_count;
4700 S = n_th / n_places;
4702 rem = n_th - (S * n_places);
4703 gap = rem > 0 ? n_places / rem : n_places;
4704 int place = masters_place;
4706 for (f = 0; f < n_th; f++) {
4707 kmp_info_t *th = team->t.t_threads[f];
4708 KMP_DEBUG_ASSERT(th != NULL);
4710 th->th.th_first_place = first_place;
4711 th->th.th_last_place = last_place;
4712 th->th.th_new_place = place;
4714 if (__kmp_display_affinity && place != th->th.th_current_place &&
4715 team->t.t_display_affinity != 1) {
4716 team->t.t_display_affinity = 1;
4721 if ((s_count == S) && rem && (gap_ct == gap)) {
4723 }
else if ((s_count == S + 1) && rem && (gap_ct == gap)) {
4725 if (place == last_place) {
4726 place = first_place;
4727 }
else if (place == (
int)(__kmp_affinity_num_masks - 1)) {
4735 }
else if (s_count == S) {
4736 if (place == last_place) {
4737 place = first_place;
4738 }
else if (place == (
int)(__kmp_affinity_num_masks - 1)) {
4748 (
"__kmp_partition_places: close: T#%d(%d:%d) place %d "
4749 "partition = [%d,%d]\n",
4750 __kmp_gtid_from_thread(team->t.t_threads[f]), team->t.t_id, f,
4751 th->th.th_new_place, first_place, last_place));
4753 KMP_DEBUG_ASSERT(place == masters_place);
4757 case proc_bind_spread: {
4759 int n_th = team->t.t_nproc;
4762 if (first_place <= last_place) {
4763 n_places = last_place - first_place + 1;
4765 n_places = __kmp_affinity_num_masks - first_place + last_place + 1;
4767 if (n_th <= n_places) {
4770 if (n_places != static_cast<int>(__kmp_affinity_num_masks)) {
4771 int S = n_places / n_th;
4772 int s_count, rem, gap, gap_ct;
4774 place = masters_place;
4775 rem = n_places - n_th * S;
4776 gap = rem ? n_th / rem : 1;
4779 if (update_master_only == 1)
4781 for (f = 0; f < thidx; f++) {
4782 kmp_info_t *th = team->t.t_threads[f];
4783 KMP_DEBUG_ASSERT(th != NULL);
4785 th->th.th_first_place = place;
4786 th->th.th_new_place = place;
4788 if (__kmp_display_affinity && place != th->th.th_current_place &&
4789 team->t.t_display_affinity != 1) {
4790 team->t.t_display_affinity = 1;
4794 while (s_count < S) {
4795 if (place == last_place) {
4796 place = first_place;
4797 }
else if (place == (
int)(__kmp_affinity_num_masks - 1)) {
4804 if (rem && (gap_ct == gap)) {
4805 if (place == last_place) {
4806 place = first_place;
4807 }
else if (place == (
int)(__kmp_affinity_num_masks - 1)) {
4815 th->th.th_last_place = place;
4818 if (place == last_place) {
4819 place = first_place;
4820 }
else if (place == (
int)(__kmp_affinity_num_masks - 1)) {
4827 (
"__kmp_partition_places: spread: T#%d(%d:%d) place %d "
4828 "partition = [%d,%d], __kmp_affinity_num_masks: %u\n",
4829 __kmp_gtid_from_thread(team->t.t_threads[f]), team->t.t_id,
4830 f, th->th.th_new_place, th->th.th_first_place,
4831 th->th.th_last_place, __kmp_affinity_num_masks));
4837 double current =
static_cast<double>(masters_place);
4839 (
static_cast<double>(n_places + 1) / static_cast<double>(n_th));
4844 if (update_master_only == 1)
4846 for (f = 0; f < thidx; f++) {
4847 first =
static_cast<int>(current);
4848 last =
static_cast<int>(current + spacing) - 1;
4849 KMP_DEBUG_ASSERT(last >= first);
4850 if (first >= n_places) {
4851 if (masters_place) {
4854 if (first == (masters_place + 1)) {
4855 KMP_DEBUG_ASSERT(f == n_th);
4858 if (last == masters_place) {
4859 KMP_DEBUG_ASSERT(f == (n_th - 1));
4863 KMP_DEBUG_ASSERT(f == n_th);
4868 if (last >= n_places) {
4869 last = (n_places - 1);
4874 KMP_DEBUG_ASSERT(0 <= first);
4875 KMP_DEBUG_ASSERT(n_places > first);
4876 KMP_DEBUG_ASSERT(0 <= last);
4877 KMP_DEBUG_ASSERT(n_places > last);
4878 KMP_DEBUG_ASSERT(last_place >= first_place);
4879 th = team->t.t_threads[f];
4880 KMP_DEBUG_ASSERT(th);
4881 th->th.th_first_place = first;
4882 th->th.th_new_place = place;
4883 th->th.th_last_place = last;
4885 if (__kmp_display_affinity && place != th->th.th_current_place &&
4886 team->t.t_display_affinity != 1) {
4887 team->t.t_display_affinity = 1;
4891 (
"__kmp_partition_places: spread: T#%d(%d:%d) place %d "
4892 "partition = [%d,%d], spacing = %.4f\n",
4893 __kmp_gtid_from_thread(team->t.t_threads[f]),
4894 team->t.t_id, f, th->th.th_new_place,
4895 th->th.th_first_place, th->th.th_last_place, spacing));
4899 KMP_DEBUG_ASSERT(update_master_only || place == masters_place);
4901 int S, rem, gap, s_count;
4902 S = n_th / n_places;
4904 rem = n_th - (S * n_places);
4905 gap = rem > 0 ? n_places / rem : n_places;
4906 int place = masters_place;
4909 if (update_master_only == 1)
4911 for (f = 0; f < thidx; f++) {
4912 kmp_info_t *th = team->t.t_threads[f];
4913 KMP_DEBUG_ASSERT(th != NULL);
4915 th->th.th_first_place = place;
4916 th->th.th_last_place = place;
4917 th->th.th_new_place = place;
4919 if (__kmp_display_affinity && place != th->th.th_current_place &&
4920 team->t.t_display_affinity != 1) {
4921 team->t.t_display_affinity = 1;
4926 if ((s_count == S) && rem && (gap_ct == gap)) {
4928 }
else if ((s_count == S + 1) && rem && (gap_ct == gap)) {
4930 if (place == last_place) {
4931 place = first_place;
4932 }
else if (place == (
int)(__kmp_affinity_num_masks - 1)) {
4940 }
else if (s_count == S) {
4941 if (place == last_place) {
4942 place = first_place;
4943 }
else if (place == (
int)(__kmp_affinity_num_masks - 1)) {
4952 KA_TRACE(100, (
"__kmp_partition_places: spread: T#%d(%d:%d) place %d "
4953 "partition = [%d,%d]\n",
4954 __kmp_gtid_from_thread(team->t.t_threads[f]),
4955 team->t.t_id, f, th->th.th_new_place,
4956 th->th.th_first_place, th->th.th_last_place));
4958 KMP_DEBUG_ASSERT(update_master_only || place == masters_place);
4966 KA_TRACE(20, (
"__kmp_partition_places: exit T#%d\n", team->t.t_id));
4974 __kmp_allocate_team(kmp_root_t *root,
int new_nproc,
int max_nproc,
4976 ompt_data_t ompt_parallel_data,
4979 kmp_proc_bind_t new_proc_bind,
4981 kmp_internal_control_t *new_icvs,
4982 int argc USE_NESTED_HOT_ARG(kmp_info_t *master)) {
4983 KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_allocate_team);
4986 int use_hot_team = !root->r.r_active;
4989 KA_TRACE(20, (
"__kmp_allocate_team: called\n"));
4990 KMP_DEBUG_ASSERT(new_nproc >= 1 && argc >= 0);
4991 KMP_DEBUG_ASSERT(max_nproc >= new_nproc);
4994 #if KMP_NESTED_HOT_TEAMS
4995 kmp_hot_team_ptr_t *hot_teams;
4997 team = master->th.th_team;
4998 level = team->t.t_active_level;
4999 if (master->th.th_teams_microtask) {
5000 if (master->th.th_teams_size.nteams > 1 &&
5003 (microtask_t)__kmp_teams_master ||
5004 master->th.th_teams_level <
5010 hot_teams = master->th.th_hot_teams;
5011 if (level < __kmp_hot_teams_max_level && hot_teams &&
5021 if (use_hot_team && new_nproc > 1) {
5022 KMP_DEBUG_ASSERT(new_nproc <= max_nproc);
5023 #if KMP_NESTED_HOT_TEAMS
5024 team = hot_teams[level].hot_team;
5026 team = root->r.r_hot_team;
5029 if (__kmp_tasking_mode != tskm_immediate_exec) {
5030 KA_TRACE(20, (
"__kmp_allocate_team: hot team task_team[0] = %p "
5031 "task_team[1] = %p before reinit\n",
5032 team->t.t_task_team[0], team->t.t_task_team[1]));
5039 if (team->t.t_nproc == new_nproc) {
5040 KA_TRACE(20, (
"__kmp_allocate_team: reusing hot team\n"));
5043 if (team->t.t_size_changed == -1) {
5044 team->t.t_size_changed = 1;
5046 KMP_CHECK_UPDATE(team->t.t_size_changed, 0);
5050 kmp_r_sched_t new_sched = new_icvs->sched;
5052 KMP_CHECK_UPDATE(team->t.t_sched.sched, new_sched.sched);
5054 __kmp_reinitialize_team(team, new_icvs,
5055 root->r.r_uber_thread->th.th_ident);
5057 KF_TRACE(10, (
"__kmp_allocate_team2: T#%d, this_thread=%p team=%p\n", 0,
5058 team->t.t_threads[0], team));
5059 __kmp_push_current_task_to_thread(team->t.t_threads[0], team, 0);
5062 #if KMP_AFFINITY_SUPPORTED
5063 if ((team->t.t_size_changed == 0) &&
5064 (team->t.t_proc_bind == new_proc_bind)) {
5065 if (new_proc_bind == proc_bind_spread) {
5066 __kmp_partition_places(
5069 KA_TRACE(200, (
"__kmp_allocate_team: reusing hot team #%d bindings: "
5070 "proc_bind = %d, partition = [%d,%d]\n",
5071 team->t.t_id, new_proc_bind, team->t.t_first_place,
5072 team->t.t_last_place));
5074 KMP_CHECK_UPDATE(team->t.t_proc_bind, new_proc_bind);
5075 __kmp_partition_places(team);
5078 KMP_CHECK_UPDATE(team->t.t_proc_bind, new_proc_bind);
5081 }
else if (team->t.t_nproc > new_nproc) {
5083 (
"__kmp_allocate_team: decreasing hot team thread count to %d\n",
5086 team->t.t_size_changed = 1;
5087 #if KMP_NESTED_HOT_TEAMS
5088 if (__kmp_hot_teams_mode == 0) {
5091 KMP_DEBUG_ASSERT(hot_teams[level].hot_team_nth == team->t.t_nproc);
5092 hot_teams[level].hot_team_nth = new_nproc;
5093 #endif // KMP_NESTED_HOT_TEAMS
5095 for (f = new_nproc; f < team->t.t_nproc; f++) {
5096 KMP_DEBUG_ASSERT(team->t.t_threads[f]);
5097 if (__kmp_tasking_mode != tskm_immediate_exec) {
5100 team->t.t_threads[f]->th.th_task_team = NULL;
5102 __kmp_free_thread(team->t.t_threads[f]);
5103 team->t.t_threads[f] = NULL;
5105 #if KMP_NESTED_HOT_TEAMS
5110 for (f = new_nproc; f < team->t.t_nproc; ++f) {
5111 KMP_DEBUG_ASSERT(team->t.t_threads[f]);
5112 kmp_balign_t *balign = team->t.t_threads[f]->th.th_bar;
5113 for (
int b = 0; b < bs_last_barrier; ++b) {
5114 if (balign[b].bb.wait_flag == KMP_BARRIER_PARENT_FLAG) {
5115 balign[b].bb.wait_flag = KMP_BARRIER_SWITCH_TO_OWN_FLAG;
5117 KMP_CHECK_UPDATE(balign[b].bb.leaf_kids, 0);
5121 #endif // KMP_NESTED_HOT_TEAMS
5122 team->t.t_nproc = new_nproc;
5124 KMP_CHECK_UPDATE(team->t.t_sched.sched, new_icvs->sched.sched);
5125 __kmp_reinitialize_team(team, new_icvs,
5126 root->r.r_uber_thread->th.th_ident);
5129 for (f = 0; f < new_nproc; ++f) {
5130 team->t.t_threads[f]->th.th_team_nproc = new_nproc;
5135 KF_TRACE(10, (
"__kmp_allocate_team: T#%d, this_thread=%p team=%p\n", 0,
5136 team->t.t_threads[0], team));
5138 __kmp_push_current_task_to_thread(team->t.t_threads[0], team, 0);
5141 for (f = 0; f < team->t.t_nproc; f++) {
5142 KMP_DEBUG_ASSERT(team->t.t_threads[f] &&
5143 team->t.t_threads[f]->th.th_team_nproc ==
5149 KMP_CHECK_UPDATE(team->t.t_proc_bind, new_proc_bind);
5150 #if KMP_AFFINITY_SUPPORTED
5151 __kmp_partition_places(team);
5155 #if KMP_OS_LINUX && KMP_AFFINITY_SUPPORTED
5156 kmp_affin_mask_t *old_mask;
5157 if (KMP_AFFINITY_CAPABLE()) {
5158 KMP_CPU_ALLOC(old_mask);
5163 (
"__kmp_allocate_team: increasing hot team thread count to %d\n",
5166 team->t.t_size_changed = 1;
5168 #if KMP_NESTED_HOT_TEAMS
5169 int avail_threads = hot_teams[level].hot_team_nth;
5170 if (new_nproc < avail_threads)
5171 avail_threads = new_nproc;
5172 kmp_info_t **other_threads = team->t.t_threads;
5173 for (f = team->t.t_nproc; f < avail_threads; ++f) {
5177 kmp_balign_t *balign = other_threads[f]->th.th_bar;
5178 for (b = 0; b < bs_last_barrier; ++b) {
5179 balign[b].bb.b_arrived = team->t.t_bar[b].b_arrived;
5180 KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG);
5182 balign[b].bb.b_worker_arrived = team->t.t_bar[b].b_team_arrived;
5186 if (hot_teams[level].hot_team_nth >= new_nproc) {
5189 KMP_DEBUG_ASSERT(__kmp_hot_teams_mode == 1);
5190 team->t.t_nproc = new_nproc;
5196 hot_teams[level].hot_team_nth = new_nproc;
5197 #endif // KMP_NESTED_HOT_TEAMS
5198 if (team->t.t_max_nproc < new_nproc) {
5200 __kmp_reallocate_team_arrays(team, new_nproc);
5201 __kmp_reinitialize_team(team, new_icvs, NULL);
5204 #if KMP_OS_LINUX && KMP_AFFINITY_SUPPORTED
5209 __kmp_set_thread_affinity_mask_full_tmp(old_mask);
5213 for (f = team->t.t_nproc; f < new_nproc; f++) {
5214 kmp_info_t *new_worker = __kmp_allocate_thread(root, team, f);
5215 KMP_DEBUG_ASSERT(new_worker);
5216 team->t.t_threads[f] = new_worker;
5219 (
"__kmp_allocate_team: team %d init T#%d arrived: "
5220 "join=%llu, plain=%llu\n",
5221 team->t.t_id, __kmp_gtid_from_tid(f, team), team->t.t_id, f,
5222 team->t.t_bar[bs_forkjoin_barrier].b_arrived,
5223 team->t.t_bar[bs_plain_barrier].b_arrived));
5227 kmp_balign_t *balign = new_worker->th.th_bar;
5228 for (b = 0; b < bs_last_barrier; ++b) {
5229 balign[b].bb.b_arrived = team->t.t_bar[b].b_arrived;
5230 KMP_DEBUG_ASSERT(balign[b].bb.wait_flag !=
5231 KMP_BARRIER_PARENT_FLAG);
5233 balign[b].bb.b_worker_arrived = team->t.t_bar[b].b_team_arrived;
5239 #if KMP_OS_LINUX && KMP_AFFINITY_SUPPORTED
5240 if (KMP_AFFINITY_CAPABLE()) {
5242 __kmp_set_system_affinity(old_mask, TRUE);
5243 KMP_CPU_FREE(old_mask);
5246 #if KMP_NESTED_HOT_TEAMS
5248 #endif // KMP_NESTED_HOT_TEAMS
5250 int old_nproc = team->t.t_nproc;
5252 __kmp_initialize_team(team, new_nproc, new_icvs,
5253 root->r.r_uber_thread->th.th_ident);
5256 KMP_DEBUG_ASSERT(team->t.t_nproc == new_nproc);
5257 for (f = 0; f < team->t.t_nproc; ++f)
5258 __kmp_initialize_info(team->t.t_threads[f], team, f,
5259 __kmp_gtid_from_tid(f, team));
5267 for (f = old_nproc; f < team->t.t_nproc; ++f)
5268 team->t.t_threads[f]->th.th_task_state =
5269 team->t.t_threads[0]->th.th_task_state_memo_stack[level];
5272 team->t.t_threads[0]->th.th_task_state;
5273 for (f = old_nproc; f < team->t.t_nproc; ++f)
5274 team->t.t_threads[f]->th.th_task_state = old_state;
5278 for (f = 0; f < team->t.t_nproc; ++f) {
5279 KMP_DEBUG_ASSERT(team->t.t_threads[f] &&
5280 team->t.t_threads[f]->th.th_team_nproc ==
5286 KMP_CHECK_UPDATE(team->t.t_proc_bind, new_proc_bind);
5287 #if KMP_AFFINITY_SUPPORTED
5288 __kmp_partition_places(team);
5294 kmp_info_t *master = team->t.t_threads[0];
5295 if (master->th.th_teams_microtask) {
5296 for (f = 1; f < new_nproc; ++f) {
5298 kmp_info_t *thr = team->t.t_threads[f];
5299 thr->th.th_teams_microtask = master->th.th_teams_microtask;
5300 thr->th.th_teams_level = master->th.th_teams_level;
5301 thr->th.th_teams_size = master->th.th_teams_size;
5305 #if KMP_NESTED_HOT_TEAMS
5309 for (f = 1; f < new_nproc; ++f) {
5310 kmp_info_t *thr = team->t.t_threads[f];
5312 kmp_balign_t *balign = thr->th.th_bar;
5313 for (b = 0; b < bs_last_barrier; ++b) {
5314 balign[b].bb.b_arrived = team->t.t_bar[b].b_arrived;
5315 KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG);
5317 balign[b].bb.b_worker_arrived = team->t.t_bar[b].b_team_arrived;
5322 #endif // KMP_NESTED_HOT_TEAMS
5325 __kmp_alloc_argv_entries(argc, team, TRUE);
5326 KMP_CHECK_UPDATE(team->t.t_argc, argc);
5330 KF_TRACE(10, (
" hot_team = %p\n", team));
5333 if (__kmp_tasking_mode != tskm_immediate_exec) {
5334 KA_TRACE(20, (
"__kmp_allocate_team: hot team task_team[0] = %p "
5335 "task_team[1] = %p after reinit\n",
5336 team->t.t_task_team[0], team->t.t_task_team[1]));
5341 __ompt_team_assign_id(team, ompt_parallel_data);
5351 for (team = CCAST(kmp_team_t *, __kmp_team_pool); (team);) {
5354 if (team->t.t_max_nproc >= max_nproc) {
5356 __kmp_team_pool = team->t.t_next_pool;
5359 __kmp_initialize_team(team, new_nproc, new_icvs, NULL);
5361 KA_TRACE(20, (
"__kmp_allocate_team: setting task_team[0] %p and "
5362 "task_team[1] %p to NULL\n",
5363 &team->t.t_task_team[0], &team->t.t_task_team[1]));
5364 team->t.t_task_team[0] = NULL;
5365 team->t.t_task_team[1] = NULL;
5368 __kmp_alloc_argv_entries(argc, team, TRUE);
5369 KMP_CHECK_UPDATE(team->t.t_argc, argc);
5372 20, (
"__kmp_allocate_team: team %d init arrived: join=%u, plain=%u\n",
5373 team->t.t_id, KMP_INIT_BARRIER_STATE, KMP_INIT_BARRIER_STATE));
5376 for (b = 0; b < bs_last_barrier; ++b) {
5377 team->t.t_bar[b].b_arrived = KMP_INIT_BARRIER_STATE;
5379 team->t.t_bar[b].b_master_arrived = 0;
5380 team->t.t_bar[b].b_team_arrived = 0;
5386 team->t.t_proc_bind = new_proc_bind;
5389 KA_TRACE(20, (
"__kmp_allocate_team: using team from pool %d.\n",
5393 __ompt_team_assign_id(team, ompt_parallel_data);
5405 team = __kmp_reap_team(team);
5406 __kmp_team_pool = team;
5411 team = (kmp_team_t *)__kmp_allocate(
sizeof(kmp_team_t));
5414 team->t.t_max_nproc = max_nproc;
5417 __kmp_allocate_team_arrays(team, max_nproc);
5419 KA_TRACE(20, (
"__kmp_allocate_team: making a new team\n"));
5420 __kmp_initialize_team(team, new_nproc, new_icvs, NULL);
5422 KA_TRACE(20, (
"__kmp_allocate_team: setting task_team[0] %p and task_team[1] "
5424 &team->t.t_task_team[0], &team->t.t_task_team[1]));
5425 team->t.t_task_team[0] = NULL;
5427 team->t.t_task_team[1] = NULL;
5430 if (__kmp_storage_map) {
5431 __kmp_print_team_storage_map(
"team", team, team->t.t_id, new_nproc);
5435 __kmp_alloc_argv_entries(argc, team, FALSE);
5436 team->t.t_argc = argc;
5439 (
"__kmp_allocate_team: team %d init arrived: join=%u, plain=%u\n",
5440 team->t.t_id, KMP_INIT_BARRIER_STATE, KMP_INIT_BARRIER_STATE));
5443 for (b = 0; b < bs_last_barrier; ++b) {
5444 team->t.t_bar[b].b_arrived = KMP_INIT_BARRIER_STATE;
5446 team->t.t_bar[b].b_master_arrived = 0;
5447 team->t.t_bar[b].b_team_arrived = 0;
5453 team->t.t_proc_bind = new_proc_bind;
5457 __ompt_team_assign_id(team, ompt_parallel_data);
5458 team->t.ompt_serialized_team_info = NULL;
5463 KA_TRACE(20, (
"__kmp_allocate_team: done creating a new team %d.\n",
5474 void __kmp_free_team(kmp_root_t *root,
5475 kmp_team_t *team USE_NESTED_HOT_ARG(kmp_info_t *master)) {
5477 KA_TRACE(20, (
"__kmp_free_team: T#%d freeing team %d\n", __kmp_get_gtid(),
5481 KMP_DEBUG_ASSERT(root);
5482 KMP_DEBUG_ASSERT(team);
5483 KMP_DEBUG_ASSERT(team->t.t_nproc <= team->t.t_max_nproc);
5484 KMP_DEBUG_ASSERT(team->t.t_threads);
5486 int use_hot_team = team == root->r.r_hot_team;
5487 #if KMP_NESTED_HOT_TEAMS
5489 kmp_hot_team_ptr_t *hot_teams;
5491 level = team->t.t_active_level - 1;
5492 if (master->th.th_teams_microtask) {
5493 if (master->th.th_teams_size.nteams > 1) {
5497 if (team->t.t_pkfn != (microtask_t)__kmp_teams_master &&
5498 master->th.th_teams_level == team->t.t_level) {
5503 hot_teams = master->th.th_hot_teams;
5504 if (level < __kmp_hot_teams_max_level) {
5505 KMP_DEBUG_ASSERT(team == hot_teams[level].hot_team);
5509 #endif // KMP_NESTED_HOT_TEAMS
5512 TCW_SYNC_PTR(team->t.t_pkfn,
5515 team->t.t_copyin_counter = 0;
5520 if (!use_hot_team) {
5521 if (__kmp_tasking_mode != tskm_immediate_exec) {
5523 for (f = 1; f < team->t.t_nproc; ++f) {
5524 KMP_DEBUG_ASSERT(team->t.t_threads[f]);
5525 kmp_info_t *th = team->t.t_threads[f];
5526 volatile kmp_uint32 *state = &th->th.th_reap_state;
5527 while (*state != KMP_SAFE_TO_REAP) {
5531 if (!__kmp_is_thread_alive(th, &ecode)) {
5532 *state = KMP_SAFE_TO_REAP;
5537 kmp_flag_64 fl(&th->th.th_bar[bs_forkjoin_barrier].bb.b_go, th);
5538 if (fl.is_sleeping())
5539 fl.resume(__kmp_gtid_from_thread(th));
5546 for (tt_idx = 0; tt_idx < 2; ++tt_idx) {
5547 kmp_task_team_t *task_team = team->t.t_task_team[tt_idx];
5548 if (task_team != NULL) {
5549 for (f = 0; f < team->t.t_nproc; ++f) {
5550 KMP_DEBUG_ASSERT(team->t.t_threads[f]);
5551 team->t.t_threads[f]->th.th_task_team = NULL;
5555 (
"__kmp_free_team: T#%d deactivating task_team %p on team %d\n",
5556 __kmp_get_gtid(), task_team, team->t.t_id));
5557 #if KMP_NESTED_HOT_TEAMS
5558 __kmp_free_task_team(master, task_team);
5560 team->t.t_task_team[tt_idx] = NULL;
5566 team->t.t_parent = NULL;
5567 team->t.t_level = 0;
5568 team->t.t_active_level = 0;
5571 for (f = 1; f < team->t.t_nproc; ++f) {
5572 KMP_DEBUG_ASSERT(team->t.t_threads[f]);
5573 __kmp_free_thread(team->t.t_threads[f]);
5574 team->t.t_threads[f] = NULL;
5579 team->t.t_next_pool = CCAST(kmp_team_t *, __kmp_team_pool);
5580 __kmp_team_pool = (
volatile kmp_team_t *)team;
5583 KMP_DEBUG_ASSERT(team->t.t_threads[1] &&
5584 team->t.t_threads[1]->th.th_cg_roots);
5585 if (team->t.t_threads[1]->th.th_cg_roots->cg_root == team->t.t_threads[1]) {
5587 for (f = 1; f < team->t.t_nproc; ++f) {
5588 kmp_info_t *thr = team->t.t_threads[f];
5589 KMP_DEBUG_ASSERT(thr && thr->th.th_cg_roots &&
5590 thr->th.th_cg_roots->cg_root == thr);
5592 kmp_cg_root_t *tmp = thr->th.th_cg_roots;
5593 thr->th.th_cg_roots = tmp->up;
5594 KA_TRACE(100, (
"__kmp_free_team: Thread %p popping node %p and moving"
5595 " up to node %p. cg_nthreads was %d\n",
5596 thr, tmp, thr->th.th_cg_roots, tmp->cg_nthreads));
5599 if (thr->th.th_cg_roots)
5600 thr->th.th_current_task->td_icvs.thread_limit =
5601 thr->th.th_cg_roots->cg_thread_limit;
5610 kmp_team_t *__kmp_reap_team(kmp_team_t *team) {
5611 kmp_team_t *next_pool = team->t.t_next_pool;
5613 KMP_DEBUG_ASSERT(team);
5614 KMP_DEBUG_ASSERT(team->t.t_dispatch);
5615 KMP_DEBUG_ASSERT(team->t.t_disp_buffer);
5616 KMP_DEBUG_ASSERT(team->t.t_threads);
5617 KMP_DEBUG_ASSERT(team->t.t_argv);
5622 __kmp_free_team_arrays(team);
5623 if (team->t.t_argv != &team->t.t_inline_argv[0])
5624 __kmp_free((
void *)team->t.t_argv);
5656 void __kmp_free_thread(kmp_info_t *this_th) {
5660 KA_TRACE(20, (
"__kmp_free_thread: T#%d putting T#%d back on free pool.\n",
5661 __kmp_get_gtid(), this_th->th.th_info.ds.ds_gtid));
5663 KMP_DEBUG_ASSERT(this_th);
5668 kmp_balign_t *balign = this_th->th.th_bar;
5669 for (b = 0; b < bs_last_barrier; ++b) {
5670 if (balign[b].bb.wait_flag == KMP_BARRIER_PARENT_FLAG)
5671 balign[b].bb.wait_flag = KMP_BARRIER_SWITCH_TO_OWN_FLAG;
5672 balign[b].bb.team = NULL;
5673 balign[b].bb.leaf_kids = 0;
5675 this_th->th.th_task_state = 0;
5676 this_th->th.th_reap_state = KMP_SAFE_TO_REAP;
5679 TCW_PTR(this_th->th.th_team, NULL);
5680 TCW_PTR(this_th->th.th_root, NULL);
5681 TCW_PTR(this_th->th.th_dispatch, NULL);
5683 while (this_th->th.th_cg_roots) {
5684 this_th->th.th_cg_roots->cg_nthreads--;
5685 KA_TRACE(100, (
"__kmp_free_thread: Thread %p decrement cg_nthreads on node"
5686 " %p of thread %p to %d\n",
5687 this_th, this_th->th.th_cg_roots,
5688 this_th->th.th_cg_roots->cg_root,
5689 this_th->th.th_cg_roots->cg_nthreads));
5690 kmp_cg_root_t *tmp = this_th->th.th_cg_roots;
5691 if (tmp->cg_root == this_th) {
5692 KMP_DEBUG_ASSERT(tmp->cg_nthreads == 0);
5694 5, (
"__kmp_free_thread: Thread %p freeing node %p\n", this_th, tmp));
5695 this_th->th.th_cg_roots = tmp->up;
5698 this_th->th.th_cg_roots = NULL;
5708 __kmp_free_implicit_task(this_th);
5709 this_th->th.th_current_task = NULL;
5713 gtid = this_th->th.th_info.ds.ds_gtid;
5714 if (__kmp_thread_pool_insert_pt != NULL) {
5715 KMP_DEBUG_ASSERT(__kmp_thread_pool != NULL);
5716 if (__kmp_thread_pool_insert_pt->th.th_info.ds.ds_gtid > gtid) {
5717 __kmp_thread_pool_insert_pt = NULL;
5726 if (__kmp_thread_pool_insert_pt != NULL) {
5727 scan = &(__kmp_thread_pool_insert_pt->th.th_next_pool);
5729 scan = CCAST(kmp_info_t **, &__kmp_thread_pool);
5731 for (; (*scan != NULL) && ((*scan)->th.th_info.ds.ds_gtid < gtid);
5732 scan = &((*scan)->th.th_next_pool))
5737 TCW_PTR(this_th->th.th_next_pool, *scan);
5738 __kmp_thread_pool_insert_pt = *scan = this_th;
5739 KMP_DEBUG_ASSERT((this_th->th.th_next_pool == NULL) ||
5740 (this_th->th.th_info.ds.ds_gtid <
5741 this_th->th.th_next_pool->th.th_info.ds.ds_gtid));
5742 TCW_4(this_th->th.th_in_pool, TRUE);
5743 __kmp_suspend_initialize_thread(this_th);
5744 __kmp_lock_suspend_mx(this_th);
5745 if (this_th->th.th_active == TRUE) {
5746 KMP_ATOMIC_INC(&__kmp_thread_pool_active_nth);
5747 this_th->th.th_active_in_pool = TRUE;
5751 KMP_DEBUG_ASSERT(this_th->th.th_active_in_pool == FALSE);
5754 __kmp_unlock_suspend_mx(this_th);
5756 TCW_4(__kmp_nth, __kmp_nth - 1);
5758 #ifdef KMP_ADJUST_BLOCKTIME
5761 if (!__kmp_env_blocktime && (__kmp_avail_proc > 0)) {
5762 KMP_DEBUG_ASSERT(__kmp_avail_proc > 0);
5763 if (__kmp_nth <= __kmp_avail_proc) {
5764 __kmp_zero_bt = FALSE;
5774 void *__kmp_launch_thread(kmp_info_t *this_thr) {
5775 int gtid = this_thr->th.th_info.ds.ds_gtid;
5777 kmp_team_t *(*
volatile pteam);
5780 KA_TRACE(10, (
"__kmp_launch_thread: T#%d start\n", gtid));
5782 if (__kmp_env_consistency_check) {
5783 this_thr->th.th_cons = __kmp_allocate_cons_stack(gtid);
5787 ompt_data_t *thread_data;
5788 if (ompt_enabled.enabled) {
5789 thread_data = &(this_thr->th.ompt_thread_info.thread_data);
5790 *thread_data = ompt_data_none;
5792 this_thr->th.ompt_thread_info.state = ompt_state_overhead;
5793 this_thr->th.ompt_thread_info.wait_id = 0;
5794 this_thr->th.ompt_thread_info.idle_frame = OMPT_GET_FRAME_ADDRESS(0);
5795 if (ompt_enabled.ompt_callback_thread_begin) {
5796 ompt_callbacks.ompt_callback(ompt_callback_thread_begin)(
5797 ompt_thread_worker, thread_data);
5803 if (ompt_enabled.enabled) {
5804 this_thr->th.ompt_thread_info.state = ompt_state_idle;
5808 while (!TCR_4(__kmp_global.g.g_done)) {
5809 KMP_DEBUG_ASSERT(this_thr == __kmp_threads[gtid]);
5813 KA_TRACE(20, (
"__kmp_launch_thread: T#%d waiting for work\n", gtid));
5816 __kmp_fork_barrier(gtid, KMP_GTID_DNE);
5819 if (ompt_enabled.enabled) {
5820 this_thr->th.ompt_thread_info.state = ompt_state_overhead;
5824 pteam = (kmp_team_t * (*))(&this_thr->th.th_team);
5827 if (TCR_SYNC_PTR(*pteam) && !TCR_4(__kmp_global.g.g_done)) {
5829 if (TCR_SYNC_PTR((*pteam)->t.t_pkfn) != NULL) {
5832 (
"__kmp_launch_thread: T#%d(%d:%d) invoke microtask = %p\n",
5833 gtid, (*pteam)->t.t_id, __kmp_tid_from_gtid(gtid),
5834 (*pteam)->t.t_pkfn));
5836 updateHWFPControl(*pteam);
5839 if (ompt_enabled.enabled) {
5840 this_thr->th.ompt_thread_info.state = ompt_state_work_parallel;
5844 rc = (*pteam)->t.t_invoke(gtid);
5848 KA_TRACE(20, (
"__kmp_launch_thread: T#%d(%d:%d) done microtask = %p\n",
5849 gtid, (*pteam)->t.t_id, __kmp_tid_from_gtid(gtid),
5850 (*pteam)->t.t_pkfn));
5853 if (ompt_enabled.enabled) {
5855 __ompt_get_task_info_object(0)->frame.exit_frame = ompt_data_none;
5857 this_thr->th.ompt_thread_info.state = ompt_state_overhead;
5861 __kmp_join_barrier(gtid);
5864 TCR_SYNC_PTR((intptr_t)__kmp_global.g.g_done);
5867 if (ompt_enabled.ompt_callback_thread_end) {
5868 ompt_callbacks.ompt_callback(ompt_callback_thread_end)(thread_data);
5872 this_thr->th.th_task_team = NULL;
5874 __kmp_common_destroy_gtid(gtid);
5876 KA_TRACE(10, (
"__kmp_launch_thread: T#%d done\n", gtid));
5883 void __kmp_internal_end_dest(
void *specific_gtid) {
5884 #if KMP_COMPILER_ICC
5885 #pragma warning(push)
5886 #pragma warning(disable : 810) // conversion from "void *" to "int" may lose
5890 int gtid = (kmp_intptr_t)specific_gtid - 1;
5891 #if KMP_COMPILER_ICC
5892 #pragma warning(pop)
5895 KA_TRACE(30, (
"__kmp_internal_end_dest: T#%d\n", gtid));
5908 if (gtid >= 0 && KMP_UBER_GTID(gtid))
5909 __kmp_gtid_set_specific(gtid);
5910 #ifdef KMP_TDATA_GTID
5913 __kmp_internal_end_thread(gtid);
5916 #if KMP_OS_UNIX && KMP_DYNAMIC_LIB
5922 __attribute__((destructor)) void __kmp_internal_end_dtor(
void) {
5923 __kmp_internal_end_atexit();
5926 void __kmp_internal_end_fini(
void) { __kmp_internal_end_atexit(); }
5932 void __kmp_internal_end_atexit(
void) {
5933 KA_TRACE(30, (
"__kmp_internal_end_atexit\n"));
5957 __kmp_internal_end_library(-1);
5959 __kmp_close_console();
5963 static void __kmp_reap_thread(kmp_info_t *thread,
int is_root) {
5968 KMP_DEBUG_ASSERT(thread != NULL);
5970 gtid = thread->th.th_info.ds.ds_gtid;
5973 if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) {
5976 20, (
"__kmp_reap_thread: releasing T#%d from fork barrier for reap\n",
5980 ANNOTATE_HAPPENS_BEFORE(thread);
5981 kmp_flag_64 flag(&thread->th.th_bar[bs_forkjoin_barrier].bb.b_go, thread);
5982 __kmp_release_64(&flag);
5986 __kmp_reap_worker(thread);
5998 if (thread->th.th_active_in_pool) {
5999 thread->th.th_active_in_pool = FALSE;
6000 KMP_ATOMIC_DEC(&__kmp_thread_pool_active_nth);
6001 KMP_DEBUG_ASSERT(__kmp_thread_pool_active_nth >= 0);
6005 __kmp_free_implicit_task(thread);
6009 __kmp_free_fast_memory(thread);
6012 __kmp_suspend_uninitialize_thread(thread);
6014 KMP_DEBUG_ASSERT(__kmp_threads[gtid] == thread);
6015 TCW_SYNC_PTR(__kmp_threads[gtid], NULL);
6020 #ifdef KMP_ADJUST_BLOCKTIME
6023 if (!__kmp_env_blocktime && (__kmp_avail_proc > 0)) {
6024 KMP_DEBUG_ASSERT(__kmp_avail_proc > 0);
6025 if (__kmp_nth <= __kmp_avail_proc) {
6026 __kmp_zero_bt = FALSE;
6032 if (__kmp_env_consistency_check) {
6033 if (thread->th.th_cons) {
6034 __kmp_free_cons_stack(thread->th.th_cons);
6035 thread->th.th_cons = NULL;
6039 if (thread->th.th_pri_common != NULL) {
6040 __kmp_free(thread->th.th_pri_common);
6041 thread->th.th_pri_common = NULL;
6044 if (thread->th.th_task_state_memo_stack != NULL) {
6045 __kmp_free(thread->th.th_task_state_memo_stack);
6046 thread->th.th_task_state_memo_stack = NULL;
6050 if (thread->th.th_local.bget_data != NULL) {
6051 __kmp_finalize_bget(thread);
6055 #if KMP_AFFINITY_SUPPORTED
6056 if (thread->th.th_affin_mask != NULL) {
6057 KMP_CPU_FREE(thread->th.th_affin_mask);
6058 thread->th.th_affin_mask = NULL;
6062 #if KMP_USE_HIER_SCHED
6063 if (thread->th.th_hier_bar_data != NULL) {
6064 __kmp_free(thread->th.th_hier_bar_data);
6065 thread->th.th_hier_bar_data = NULL;
6069 __kmp_reap_team(thread->th.th_serial_team);
6070 thread->th.th_serial_team = NULL;
6077 static void __kmp_internal_end(
void) {
6081 __kmp_unregister_library();
6088 __kmp_reclaim_dead_roots();
6092 for (i = 0; i < __kmp_threads_capacity; i++)
6094 if (__kmp_root[i]->r.r_active)
6097 TCW_SYNC_4(__kmp_global.g.g_done, TRUE);
6099 if (i < __kmp_threads_capacity) {
6111 __kmp_acquire_bootstrap_lock(&__kmp_monitor_lock);
6112 if (TCR_4(__kmp_init_monitor)) {
6113 __kmp_reap_monitor(&__kmp_monitor);
6114 TCW_4(__kmp_init_monitor, 0);
6116 __kmp_release_bootstrap_lock(&__kmp_monitor_lock);
6117 KA_TRACE(10, (
"__kmp_internal_end: monitor reaped\n"));
6118 #endif // KMP_USE_MONITOR
6123 for (i = 0; i < __kmp_threads_capacity; i++) {
6124 if (__kmp_root[i]) {
6127 KMP_ASSERT(!__kmp_root[i]->r.r_active);
6136 while (__kmp_thread_pool != NULL) {
6138 kmp_info_t *thread = CCAST(kmp_info_t *, __kmp_thread_pool);
6139 __kmp_thread_pool = thread->th.th_next_pool;
6141 KMP_DEBUG_ASSERT(thread->th.th_reap_state == KMP_SAFE_TO_REAP);
6142 thread->th.th_next_pool = NULL;
6143 thread->th.th_in_pool = FALSE;
6144 __kmp_reap_thread(thread, 0);
6146 __kmp_thread_pool_insert_pt = NULL;
6149 while (__kmp_team_pool != NULL) {
6151 kmp_team_t *team = CCAST(kmp_team_t *, __kmp_team_pool);
6152 __kmp_team_pool = team->t.t_next_pool;
6154 team->t.t_next_pool = NULL;
6155 __kmp_reap_team(team);
6158 __kmp_reap_task_teams();
6165 for (i = 0; i < __kmp_threads_capacity; i++) {
6166 kmp_info_t *thr = __kmp_threads[i];
6167 while (thr && KMP_ATOMIC_LD_ACQ(&thr->th.th_blocking))
6172 for (i = 0; i < __kmp_threads_capacity; ++i) {
6179 TCW_SYNC_4(__kmp_init_common, FALSE);
6181 KA_TRACE(10, (
"__kmp_internal_end: all workers reaped\n"));
6189 __kmp_acquire_bootstrap_lock(&__kmp_monitor_lock);
6190 if (TCR_4(__kmp_init_monitor)) {
6191 __kmp_reap_monitor(&__kmp_monitor);
6192 TCW_4(__kmp_init_monitor, 0);
6194 __kmp_release_bootstrap_lock(&__kmp_monitor_lock);
6195 KA_TRACE(10, (
"__kmp_internal_end: monitor reaped\n"));
6198 TCW_4(__kmp_init_gtid, FALSE);
6207 void __kmp_internal_end_library(
int gtid_req) {
6214 if (__kmp_global.g.g_abort) {
6215 KA_TRACE(11, (
"__kmp_internal_end_library: abort, exiting\n"));
6219 if (TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial) {
6220 KA_TRACE(10, (
"__kmp_internal_end_library: already finished\n"));
6228 int gtid = (gtid_req >= 0) ? gtid_req : __kmp_gtid_get_specific();
6230 10, (
"__kmp_internal_end_library: enter T#%d (%d)\n", gtid, gtid_req));
6231 if (gtid == KMP_GTID_SHUTDOWN) {
6232 KA_TRACE(10, (
"__kmp_internal_end_library: !__kmp_init_runtime, system "
6233 "already shutdown\n"));
6235 }
else if (gtid == KMP_GTID_MONITOR) {
6236 KA_TRACE(10, (
"__kmp_internal_end_library: monitor thread, gtid not "
6237 "registered, or system shutdown\n"));
6239 }
else if (gtid == KMP_GTID_DNE) {
6240 KA_TRACE(10, (
"__kmp_internal_end_library: gtid not registered or system "
6243 }
else if (KMP_UBER_GTID(gtid)) {
6245 if (__kmp_root[gtid]->r.r_active) {
6246 __kmp_global.g.g_abort = -1;
6247 TCW_SYNC_4(__kmp_global.g.g_done, TRUE);
6249 (
"__kmp_internal_end_library: root still active, abort T#%d\n",
6255 (
"__kmp_internal_end_library: unregistering sibling T#%d\n", gtid));
6256 __kmp_unregister_root_current_thread(gtid);
6263 #ifdef DUMP_DEBUG_ON_EXIT
6264 if (__kmp_debug_buf)
6265 __kmp_dump_debug_buffer();
6271 __kmp_acquire_bootstrap_lock(&__kmp_initz_lock);
6274 if (__kmp_global.g.g_abort) {
6275 KA_TRACE(10, (
"__kmp_internal_end_library: abort, exiting\n"));
6277 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
6280 if (TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial) {
6281 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
6290 __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock);
6293 __kmp_internal_end();
6295 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
6296 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
6298 KA_TRACE(10, (
"__kmp_internal_end_library: exit\n"));
6300 #ifdef DUMP_DEBUG_ON_EXIT
6301 if (__kmp_debug_buf)
6302 __kmp_dump_debug_buffer();
6306 __kmp_close_console();
6309 __kmp_fini_allocator();
6313 void __kmp_internal_end_thread(
int gtid_req) {
6322 if (__kmp_global.g.g_abort) {
6323 KA_TRACE(11, (
"__kmp_internal_end_thread: abort, exiting\n"));
6327 if (TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial) {
6328 KA_TRACE(10, (
"__kmp_internal_end_thread: already finished\n"));
6336 int gtid = (gtid_req >= 0) ? gtid_req : __kmp_gtid_get_specific();
6338 (
"__kmp_internal_end_thread: enter T#%d (%d)\n", gtid, gtid_req));
6339 if (gtid == KMP_GTID_SHUTDOWN) {
6340 KA_TRACE(10, (
"__kmp_internal_end_thread: !__kmp_init_runtime, system "
6341 "already shutdown\n"));
6343 }
else if (gtid == KMP_GTID_MONITOR) {
6344 KA_TRACE(10, (
"__kmp_internal_end_thread: monitor thread, gtid not "
6345 "registered, or system shutdown\n"));
6347 }
else if (gtid == KMP_GTID_DNE) {
6348 KA_TRACE(10, (
"__kmp_internal_end_thread: gtid not registered or system "
6352 }
else if (KMP_UBER_GTID(gtid)) {
6354 if (__kmp_root[gtid]->r.r_active) {
6355 __kmp_global.g.g_abort = -1;
6356 TCW_SYNC_4(__kmp_global.g.g_done, TRUE);
6358 (
"__kmp_internal_end_thread: root still active, abort T#%d\n",
6362 KA_TRACE(10, (
"__kmp_internal_end_thread: unregistering sibling T#%d\n",
6364 __kmp_unregister_root_current_thread(gtid);
6368 KA_TRACE(10, (
"__kmp_internal_end_thread: worker thread T#%d\n", gtid));
6371 __kmp_threads[gtid]->th.th_task_team = NULL;
6375 (
"__kmp_internal_end_thread: worker thread done, exiting T#%d\n",
6382 if (__kmp_pause_status != kmp_hard_paused)
6387 KA_TRACE(10, (
"__kmp_internal_end_thread: exiting T#%d\n", gtid_req));
6392 __kmp_acquire_bootstrap_lock(&__kmp_initz_lock);
6395 if (__kmp_global.g.g_abort) {
6396 KA_TRACE(10, (
"__kmp_internal_end_thread: abort, exiting\n"));
6398 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
6401 if (TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial) {
6402 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
6413 __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock);
6415 for (i = 0; i < __kmp_threads_capacity; ++i) {
6416 if (KMP_UBER_GTID(i)) {
6419 (
"__kmp_internal_end_thread: remaining sibling task: gtid==%d\n", i));
6420 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
6421 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
6428 __kmp_internal_end();
6430 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
6431 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
6433 KA_TRACE(10, (
"__kmp_internal_end_thread: exit T#%d\n", gtid_req));
6435 #ifdef DUMP_DEBUG_ON_EXIT
6436 if (__kmp_debug_buf)
6437 __kmp_dump_debug_buffer();
6444 static long __kmp_registration_flag = 0;
6446 static char *__kmp_registration_str = NULL;
6449 static inline char *__kmp_reg_status_name() {
6454 return __kmp_str_format(
"__KMP_REGISTERED_LIB_%d", (
int)getpid());
6457 void __kmp_register_library_startup(
void) {
6459 char *name = __kmp_reg_status_name();
6465 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
6466 __kmp_initialize_system_tick();
6468 __kmp_read_system_time(&time.dtime);
6469 __kmp_registration_flag = 0xCAFE0000L | (time.ltime & 0x0000FFFFL);
6470 __kmp_registration_str =
6471 __kmp_str_format(
"%p-%lx-%s", &__kmp_registration_flag,
6472 __kmp_registration_flag, KMP_LIBRARY_FILE);
6474 KA_TRACE(50, (
"__kmp_register_library_startup: %s=\"%s\"\n", name,
6475 __kmp_registration_str));
6482 __kmp_env_set(name, __kmp_registration_str, 0);
6484 value = __kmp_env_get(name);
6485 if (value != NULL && strcmp(value, __kmp_registration_str) == 0) {
6495 char *flag_addr_str = NULL;
6496 char *flag_val_str = NULL;
6497 char const *file_name = NULL;
6498 __kmp_str_split(tail,
'-', &flag_addr_str, &tail);
6499 __kmp_str_split(tail,
'-', &flag_val_str, &tail);
6502 long *flag_addr = 0;
6504 KMP_SSCANF(flag_addr_str,
"%p", RCAST(
void**, &flag_addr));
6505 KMP_SSCANF(flag_val_str,
"%lx", &flag_val);
6506 if (flag_addr != 0 && flag_val != 0 && strcmp(file_name,
"") != 0) {
6510 if (__kmp_is_address_mapped(flag_addr) && *flag_addr == flag_val) {
6524 file_name =
"unknown library";
6529 char *duplicate_ok = __kmp_env_get(
"KMP_DUPLICATE_LIB_OK");
6530 if (!__kmp_str_match_true(duplicate_ok)) {
6532 __kmp_fatal(KMP_MSG(DuplicateLibrary, KMP_LIBRARY_FILE, file_name),
6533 KMP_HNT(DuplicateLibrary), __kmp_msg_null);
6535 KMP_INTERNAL_FREE(duplicate_ok);
6536 __kmp_duplicate_library_ok = 1;
6541 __kmp_env_unset(name);
6543 default: { KMP_DEBUG_ASSERT(0); }
break;
6546 KMP_INTERNAL_FREE((
void *)value);
6548 KMP_INTERNAL_FREE((
void *)name);
6552 void __kmp_unregister_library(
void) {
6554 char *name = __kmp_reg_status_name();
6555 char *value = __kmp_env_get(name);
6557 KMP_DEBUG_ASSERT(__kmp_registration_flag != 0);
6558 KMP_DEBUG_ASSERT(__kmp_registration_str != NULL);
6559 if (value != NULL && strcmp(value, __kmp_registration_str) == 0) {
6561 __kmp_env_unset(name);
6564 KMP_INTERNAL_FREE(__kmp_registration_str);
6565 KMP_INTERNAL_FREE(value);
6566 KMP_INTERNAL_FREE(name);
6568 __kmp_registration_flag = 0;
6569 __kmp_registration_str = NULL;
6576 #if KMP_MIC_SUPPORTED
6578 static void __kmp_check_mic_type() {
6579 kmp_cpuid_t cpuid_state = {0};
6580 kmp_cpuid_t *cs_p = &cpuid_state;
6581 __kmp_x86_cpuid(1, 0, cs_p);
6583 if ((cs_p->eax & 0xff0) == 0xB10) {
6584 __kmp_mic_type = mic2;
6585 }
else if ((cs_p->eax & 0xf0ff0) == 0x50670) {
6586 __kmp_mic_type = mic3;
6588 __kmp_mic_type = non_mic;
6594 static void __kmp_do_serial_initialize(
void) {
6598 KA_TRACE(10, (
"__kmp_do_serial_initialize: enter\n"));
6600 KMP_DEBUG_ASSERT(
sizeof(kmp_int32) == 4);
6601 KMP_DEBUG_ASSERT(
sizeof(kmp_uint32) == 4);
6602 KMP_DEBUG_ASSERT(
sizeof(kmp_int64) == 8);
6603 KMP_DEBUG_ASSERT(
sizeof(kmp_uint64) == 8);
6604 KMP_DEBUG_ASSERT(
sizeof(kmp_intptr_t) ==
sizeof(
void *));
6610 __kmp_validate_locks();
6613 __kmp_init_allocator();
6618 __kmp_register_library_startup();
6621 if (TCR_4(__kmp_global.g.g_done)) {
6622 KA_TRACE(10, (
"__kmp_do_serial_initialize: reinitialization of library\n"));
6625 __kmp_global.g.g_abort = 0;
6626 TCW_SYNC_4(__kmp_global.g.g_done, FALSE);
6629 #if KMP_USE_ADAPTIVE_LOCKS
6630 #if KMP_DEBUG_ADAPTIVE_LOCKS
6631 __kmp_init_speculative_stats();
6634 #if KMP_STATS_ENABLED
6637 __kmp_init_lock(&__kmp_global_lock);
6638 __kmp_init_queuing_lock(&__kmp_dispatch_lock);
6639 __kmp_init_lock(&__kmp_debug_lock);
6640 __kmp_init_atomic_lock(&__kmp_atomic_lock);
6641 __kmp_init_atomic_lock(&__kmp_atomic_lock_1i);
6642 __kmp_init_atomic_lock(&__kmp_atomic_lock_2i);
6643 __kmp_init_atomic_lock(&__kmp_atomic_lock_4i);
6644 __kmp_init_atomic_lock(&__kmp_atomic_lock_4r);
6645 __kmp_init_atomic_lock(&__kmp_atomic_lock_8i);
6646 __kmp_init_atomic_lock(&__kmp_atomic_lock_8r);
6647 __kmp_init_atomic_lock(&__kmp_atomic_lock_8c);
6648 __kmp_init_atomic_lock(&__kmp_atomic_lock_10r);
6649 __kmp_init_atomic_lock(&__kmp_atomic_lock_16r);
6650 __kmp_init_atomic_lock(&__kmp_atomic_lock_16c);
6651 __kmp_init_atomic_lock(&__kmp_atomic_lock_20c);
6652 __kmp_init_atomic_lock(&__kmp_atomic_lock_32c);
6653 __kmp_init_bootstrap_lock(&__kmp_forkjoin_lock);
6654 __kmp_init_bootstrap_lock(&__kmp_exit_lock);
6656 __kmp_init_bootstrap_lock(&__kmp_monitor_lock);
6658 __kmp_init_bootstrap_lock(&__kmp_tp_cached_lock);
6662 __kmp_runtime_initialize();
6664 #if KMP_MIC_SUPPORTED
6665 __kmp_check_mic_type();
6672 __kmp_abort_delay = 0;
6676 __kmp_dflt_team_nth_ub = __kmp_xproc;
6677 if (__kmp_dflt_team_nth_ub < KMP_MIN_NTH) {
6678 __kmp_dflt_team_nth_ub = KMP_MIN_NTH;
6680 if (__kmp_dflt_team_nth_ub > __kmp_sys_max_nth) {
6681 __kmp_dflt_team_nth_ub = __kmp_sys_max_nth;
6683 __kmp_max_nth = __kmp_sys_max_nth;
6684 __kmp_cg_max_nth = __kmp_sys_max_nth;
6685 __kmp_teams_max_nth = __kmp_xproc;
6686 if (__kmp_teams_max_nth > __kmp_sys_max_nth) {
6687 __kmp_teams_max_nth = __kmp_sys_max_nth;
6692 __kmp_dflt_blocktime = KMP_DEFAULT_BLOCKTIME;
6694 __kmp_monitor_wakeups =
6695 KMP_WAKEUPS_FROM_BLOCKTIME(__kmp_dflt_blocktime, __kmp_monitor_wakeups);
6696 __kmp_bt_intervals =
6697 KMP_INTERVALS_FROM_BLOCKTIME(__kmp_dflt_blocktime, __kmp_monitor_wakeups);
6700 __kmp_library = library_throughput;
6702 __kmp_static = kmp_sch_static_balanced;
6709 #if KMP_FAST_REDUCTION_BARRIER
6710 #define kmp_reduction_barrier_gather_bb ((int)1)
6711 #define kmp_reduction_barrier_release_bb ((int)1)
6712 #define kmp_reduction_barrier_gather_pat bp_hyper_bar
6713 #define kmp_reduction_barrier_release_pat bp_hyper_bar
6714 #endif // KMP_FAST_REDUCTION_BARRIER
6715 for (i = bs_plain_barrier; i < bs_last_barrier; i++) {
6716 __kmp_barrier_gather_branch_bits[i] = __kmp_barrier_gather_bb_dflt;
6717 __kmp_barrier_release_branch_bits[i] = __kmp_barrier_release_bb_dflt;
6718 __kmp_barrier_gather_pattern[i] = __kmp_barrier_gather_pat_dflt;
6719 __kmp_barrier_release_pattern[i] = __kmp_barrier_release_pat_dflt;
6720 #if KMP_FAST_REDUCTION_BARRIER
6721 if (i == bs_reduction_barrier) {
6723 __kmp_barrier_gather_branch_bits[i] = kmp_reduction_barrier_gather_bb;
6724 __kmp_barrier_release_branch_bits[i] = kmp_reduction_barrier_release_bb;
6725 __kmp_barrier_gather_pattern[i] = kmp_reduction_barrier_gather_pat;
6726 __kmp_barrier_release_pattern[i] = kmp_reduction_barrier_release_pat;
6728 #endif // KMP_FAST_REDUCTION_BARRIER
6730 #if KMP_FAST_REDUCTION_BARRIER
6731 #undef kmp_reduction_barrier_release_pat
6732 #undef kmp_reduction_barrier_gather_pat
6733 #undef kmp_reduction_barrier_release_bb
6734 #undef kmp_reduction_barrier_gather_bb
6735 #endif // KMP_FAST_REDUCTION_BARRIER
6736 #if KMP_MIC_SUPPORTED
6737 if (__kmp_mic_type == mic2) {
6739 __kmp_barrier_gather_branch_bits[bs_plain_barrier] = 3;
6740 __kmp_barrier_release_branch_bits[bs_forkjoin_barrier] =
6742 __kmp_barrier_gather_pattern[bs_forkjoin_barrier] = bp_hierarchical_bar;
6743 __kmp_barrier_release_pattern[bs_forkjoin_barrier] = bp_hierarchical_bar;
6745 #if KMP_FAST_REDUCTION_BARRIER
6746 if (__kmp_mic_type == mic2) {
6747 __kmp_barrier_gather_pattern[bs_reduction_barrier] = bp_hierarchical_bar;
6748 __kmp_barrier_release_pattern[bs_reduction_barrier] = bp_hierarchical_bar;
6750 #endif // KMP_FAST_REDUCTION_BARRIER
6751 #endif // KMP_MIC_SUPPORTED
6755 __kmp_env_checks = TRUE;
6757 __kmp_env_checks = FALSE;
6761 __kmp_foreign_tp = TRUE;
6763 __kmp_global.g.g_dynamic = FALSE;
6764 __kmp_global.g.g_dynamic_mode = dynamic_default;
6766 __kmp_env_initialize(NULL);
6770 char const *val = __kmp_env_get(
"KMP_DUMP_CATALOG");
6771 if (__kmp_str_match_true(val)) {
6772 kmp_str_buf_t buffer;
6773 __kmp_str_buf_init(&buffer);
6774 __kmp_i18n_dump_catalog(&buffer);
6775 __kmp_printf(
"%s", buffer.str);
6776 __kmp_str_buf_free(&buffer);
6778 __kmp_env_free(&val);
6781 __kmp_threads_capacity =
6782 __kmp_initial_threads_capacity(__kmp_dflt_team_nth_ub);
6784 __kmp_tp_capacity = __kmp_default_tp_capacity(
6785 __kmp_dflt_team_nth_ub, __kmp_max_nth, __kmp_allThreadsSpecified);
6790 KMP_DEBUG_ASSERT(__kmp_thread_pool == NULL);
6791 KMP_DEBUG_ASSERT(__kmp_thread_pool_insert_pt == NULL);
6792 KMP_DEBUG_ASSERT(__kmp_team_pool == NULL);
6793 __kmp_thread_pool = NULL;
6794 __kmp_thread_pool_insert_pt = NULL;
6795 __kmp_team_pool = NULL;
6802 (
sizeof(kmp_info_t *) +
sizeof(kmp_root_t *)) * __kmp_threads_capacity +
6804 __kmp_threads = (kmp_info_t **)__kmp_allocate(size);
6805 __kmp_root = (kmp_root_t **)((
char *)__kmp_threads +
6806 sizeof(kmp_info_t *) * __kmp_threads_capacity);
6809 KMP_DEBUG_ASSERT(__kmp_all_nth ==
6811 KMP_DEBUG_ASSERT(__kmp_nth == 0);
6816 gtid = __kmp_register_root(TRUE);
6817 KA_TRACE(10, (
"__kmp_do_serial_initialize T#%d\n", gtid));
6818 KMP_ASSERT(KMP_UBER_GTID(gtid));
6819 KMP_ASSERT(KMP_INITIAL_GTID(gtid));
6823 __kmp_common_initialize();
6827 __kmp_register_atfork();
6830 #if !KMP_DYNAMIC_LIB
6834 int rc = atexit(__kmp_internal_end_atexit);
6836 __kmp_fatal(KMP_MSG(FunctionError,
"atexit()"), KMP_ERR(rc),
6842 #if KMP_HANDLE_SIGNALS
6848 __kmp_install_signals(FALSE);
6851 __kmp_install_signals(TRUE);
6856 __kmp_init_counter++;
6858 __kmp_init_serial = TRUE;
6860 if (__kmp_settings) {
6865 if (__kmp_display_env || __kmp_display_env_verbose) {
6866 __kmp_env_print_2();
6868 #endif // OMP_40_ENABLED
6876 KA_TRACE(10, (
"__kmp_do_serial_initialize: exit\n"));
6879 void __kmp_serial_initialize(
void) {
6880 if (__kmp_init_serial) {
6883 __kmp_acquire_bootstrap_lock(&__kmp_initz_lock);
6884 if (__kmp_init_serial) {
6885 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
6888 __kmp_do_serial_initialize();
6889 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
6892 static void __kmp_do_middle_initialize(
void) {
6894 int prev_dflt_team_nth;
6896 if (!__kmp_init_serial) {
6897 __kmp_do_serial_initialize();
6900 KA_TRACE(10, (
"__kmp_middle_initialize: enter\n"));
6904 prev_dflt_team_nth = __kmp_dflt_team_nth;
6906 #if KMP_AFFINITY_SUPPORTED
6909 __kmp_affinity_initialize();
6913 for (i = 0; i < __kmp_threads_capacity; i++) {
6914 if (TCR_PTR(__kmp_threads[i]) != NULL) {
6915 __kmp_affinity_set_init_mask(i, TRUE);
6920 KMP_ASSERT(__kmp_xproc > 0);
6921 if (__kmp_avail_proc == 0) {
6922 __kmp_avail_proc = __kmp_xproc;
6928 while ((j < __kmp_nested_nth.used) && !__kmp_nested_nth.nth[j]) {
6929 __kmp_nested_nth.nth[j] = __kmp_dflt_team_nth = __kmp_dflt_team_nth_ub =
6934 if (__kmp_dflt_team_nth == 0) {
6935 #ifdef KMP_DFLT_NTH_CORES
6937 __kmp_dflt_team_nth = __kmp_ncores;
6938 KA_TRACE(20, (
"__kmp_middle_initialize: setting __kmp_dflt_team_nth = "
6939 "__kmp_ncores (%d)\n",
6940 __kmp_dflt_team_nth));
6943 __kmp_dflt_team_nth = __kmp_avail_proc;
6944 KA_TRACE(20, (
"__kmp_middle_initialize: setting __kmp_dflt_team_nth = "
6945 "__kmp_avail_proc(%d)\n",
6946 __kmp_dflt_team_nth));
6950 if (__kmp_dflt_team_nth < KMP_MIN_NTH) {
6951 __kmp_dflt_team_nth = KMP_MIN_NTH;
6953 if (__kmp_dflt_team_nth > __kmp_sys_max_nth) {
6954 __kmp_dflt_team_nth = __kmp_sys_max_nth;
6959 KMP_DEBUG_ASSERT(__kmp_dflt_team_nth <= __kmp_dflt_team_nth_ub);
6961 if (__kmp_dflt_team_nth != prev_dflt_team_nth) {
6966 for (i = 0; i < __kmp_threads_capacity; i++) {
6967 kmp_info_t *thread = __kmp_threads[i];
6970 if (thread->th.th_current_task->td_icvs.nproc != 0)
6973 set__nproc(__kmp_threads[i], __kmp_dflt_team_nth);
6978 (
"__kmp_middle_initialize: final value for __kmp_dflt_team_nth = %d\n",
6979 __kmp_dflt_team_nth));
6981 #ifdef KMP_ADJUST_BLOCKTIME
6983 if (!__kmp_env_blocktime && (__kmp_avail_proc > 0)) {
6984 KMP_DEBUG_ASSERT(__kmp_avail_proc > 0);
6985 if (__kmp_nth > __kmp_avail_proc) {
6986 __kmp_zero_bt = TRUE;
6992 TCW_SYNC_4(__kmp_init_middle, TRUE);
6994 KA_TRACE(10, (
"__kmp_do_middle_initialize: exit\n"));
6997 void __kmp_middle_initialize(
void) {
6998 if (__kmp_init_middle) {
7001 __kmp_acquire_bootstrap_lock(&__kmp_initz_lock);
7002 if (__kmp_init_middle) {
7003 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
7006 __kmp_do_middle_initialize();
7007 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
7010 void __kmp_parallel_initialize(
void) {
7011 int gtid = __kmp_entry_gtid();
7014 if (TCR_4(__kmp_init_parallel))
7016 __kmp_acquire_bootstrap_lock(&__kmp_initz_lock);
7017 if (TCR_4(__kmp_init_parallel)) {
7018 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
7023 if (TCR_4(__kmp_global.g.g_done)) {
7026 (
"__kmp_parallel_initialize: attempt to init while shutting down\n"));
7027 __kmp_infinite_loop();
7033 if (!__kmp_init_middle) {
7034 __kmp_do_middle_initialize();
7038 __kmp_resume_if_hard_paused();
7042 KA_TRACE(10, (
"__kmp_parallel_initialize: enter\n"));
7043 KMP_ASSERT(KMP_UBER_GTID(gtid));
7045 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
7048 __kmp_store_x87_fpu_control_word(&__kmp_init_x87_fpu_control_word);
7049 __kmp_store_mxcsr(&__kmp_init_mxcsr);
7050 __kmp_init_mxcsr &= KMP_X86_MXCSR_MASK;
7054 #if KMP_HANDLE_SIGNALS
7056 __kmp_install_signals(TRUE);
7060 __kmp_suspend_initialize();
7062 #if defined(USE_LOAD_BALANCE)
7063 if (__kmp_global.g.g_dynamic_mode == dynamic_default) {
7064 __kmp_global.g.g_dynamic_mode = dynamic_load_balance;
7067 if (__kmp_global.g.g_dynamic_mode == dynamic_default) {
7068 __kmp_global.g.g_dynamic_mode = dynamic_thread_limit;
7072 if (__kmp_version) {
7073 __kmp_print_version_2();
7077 TCW_SYNC_4(__kmp_init_parallel, TRUE);
7080 KA_TRACE(10, (
"__kmp_parallel_initialize: exit\n"));
7082 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
7087 void __kmp_run_before_invoked_task(
int gtid,
int tid, kmp_info_t *this_thr,
7089 kmp_disp_t *dispatch;
7094 this_thr->th.th_local.this_construct = 0;
7095 #if KMP_CACHE_MANAGE
7096 KMP_CACHE_PREFETCH(&this_thr->th.th_bar[bs_forkjoin_barrier].bb.b_arrived);
7098 dispatch = (kmp_disp_t *)TCR_PTR(this_thr->th.th_dispatch);
7099 KMP_DEBUG_ASSERT(dispatch);
7100 KMP_DEBUG_ASSERT(team->t.t_dispatch);
7104 dispatch->th_disp_index = 0;
7106 dispatch->th_doacross_buf_idx =
7109 if (__kmp_env_consistency_check)
7110 __kmp_push_parallel(gtid, team->t.t_ident);
7115 void __kmp_run_after_invoked_task(
int gtid,
int tid, kmp_info_t *this_thr,
7117 if (__kmp_env_consistency_check)
7118 __kmp_pop_parallel(gtid, team->t.t_ident);
7120 __kmp_finish_implicit_task(this_thr);
7123 int __kmp_invoke_task_func(
int gtid) {
7125 int tid = __kmp_tid_from_gtid(gtid);
7126 kmp_info_t *this_thr = __kmp_threads[gtid];
7127 kmp_team_t *team = this_thr->th.th_team;
7129 __kmp_run_before_invoked_task(gtid, tid, this_thr, team);
7131 if (__itt_stack_caller_create_ptr) {
7132 __kmp_itt_stack_callee_enter(
7134 team->t.t_stack_id);
7137 #if INCLUDE_SSC_MARKS
7138 SSC_MARK_INVOKING();
7143 void **exit_runtime_p;
7144 ompt_data_t *my_task_data;
7145 ompt_data_t *my_parallel_data;
7148 if (ompt_enabled.enabled) {
7150 team->t.t_implicit_task_taskdata[tid].ompt_task_info.frame.exit_frame.ptr);
7152 exit_runtime_p = &dummy;
7156 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data);
7157 my_parallel_data = &(team->t.ompt_team_info.parallel_data);
7158 if (ompt_enabled.ompt_callback_implicit_task) {
7159 ompt_team_size = team->t.t_nproc;
7160 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
7161 ompt_scope_begin, my_parallel_data, my_task_data, ompt_team_size,
7162 __kmp_tid_from_gtid(gtid), ompt_task_implicit);
7163 OMPT_CUR_TASK_INFO(this_thr)->thread_num = __kmp_tid_from_gtid(gtid);
7167 #if KMP_STATS_ENABLED
7169 if (previous_state == stats_state_e::TEAMS_REGION) {
7170 KMP_PUSH_PARTITIONED_TIMER(OMP_teams);
7172 KMP_PUSH_PARTITIONED_TIMER(OMP_parallel);
7174 KMP_SET_THREAD_STATE(IMPLICIT_TASK);
7177 rc = __kmp_invoke_microtask((microtask_t)TCR_SYNC_PTR(team->t.t_pkfn), gtid,
7178 tid, (
int)team->t.t_argc, (
void **)team->t.t_argv
7185 *exit_runtime_p = NULL;
7188 #if KMP_STATS_ENABLED
7189 if (previous_state == stats_state_e::TEAMS_REGION) {
7190 KMP_SET_THREAD_STATE(previous_state);
7192 KMP_POP_PARTITIONED_TIMER();
7196 if (__itt_stack_caller_create_ptr) {
7197 __kmp_itt_stack_callee_leave(
7199 team->t.t_stack_id);
7202 __kmp_run_after_invoked_task(gtid, tid, this_thr, team);
7208 void __kmp_teams_master(
int gtid) {
7210 kmp_info_t *thr = __kmp_threads[gtid];
7211 kmp_team_t *team = thr->th.th_team;
7212 ident_t *loc = team->t.t_ident;
7213 thr->th.th_set_nproc = thr->th.th_teams_size.nth;
7214 KMP_DEBUG_ASSERT(thr->th.th_teams_microtask);
7215 KMP_DEBUG_ASSERT(thr->th.th_set_nproc);
7216 KA_TRACE(20, (
"__kmp_teams_master: T#%d, Tid %d, microtask %p\n", gtid,
7217 __kmp_tid_from_gtid(gtid), thr->th.th_teams_microtask));
7220 kmp_cg_root_t *tmp = (kmp_cg_root_t *)__kmp_allocate(
sizeof(kmp_cg_root_t));
7223 tmp->cg_thread_limit = thr->th.th_current_task->td_icvs.thread_limit;
7224 tmp->cg_nthreads = 1;
7225 KA_TRACE(100, (
"__kmp_teams_master: Thread %p created node %p and init"
7226 " cg_threads to 1\n",
7228 tmp->up = thr->th.th_cg_roots;
7229 thr->th.th_cg_roots = tmp;
7233 #if INCLUDE_SSC_MARKS
7236 __kmp_fork_call(loc, gtid, fork_context_intel, team->t.t_argc,
7237 (microtask_t)thr->th.th_teams_microtask,
7238 VOLATILE_CAST(launch_t) __kmp_invoke_task_func, NULL);
7239 #if INCLUDE_SSC_MARKS
7243 if (thr->th.th_team_nproc < thr->th.th_teams_size.nth)
7244 thr->th.th_teams_size.nth = thr->th.th_team_nproc;
7247 __kmp_join_call(loc, gtid
7256 int __kmp_invoke_teams_master(
int gtid) {
7257 kmp_info_t *this_thr = __kmp_threads[gtid];
7258 kmp_team_t *team = this_thr->th.th_team;
7260 if (!__kmp_threads[gtid]->th.th_team->t.t_serialized)
7261 KMP_DEBUG_ASSERT((
void *)__kmp_threads[gtid]->th.th_team->t.t_pkfn ==
7262 (
void *)__kmp_teams_master);
7264 __kmp_run_before_invoked_task(gtid, 0, this_thr, team);
7265 __kmp_teams_master(gtid);
7266 __kmp_run_after_invoked_task(gtid, 0, this_thr, team);
7276 void __kmp_push_num_threads(
ident_t *
id,
int gtid,
int num_threads) {
7277 kmp_info_t *thr = __kmp_threads[gtid];
7279 if (num_threads > 0)
7280 thr->th.th_set_nproc = num_threads;
7287 void __kmp_push_num_teams(
ident_t *
id,
int gtid,
int num_teams,
7289 kmp_info_t *thr = __kmp_threads[gtid];
7290 KMP_DEBUG_ASSERT(num_teams >= 0);
7291 KMP_DEBUG_ASSERT(num_threads >= 0);
7295 if (num_teams > __kmp_teams_max_nth) {
7296 if (!__kmp_reserve_warn) {
7297 __kmp_reserve_warn = 1;
7298 __kmp_msg(kmp_ms_warning,
7299 KMP_MSG(CantFormThrTeam, num_teams, __kmp_teams_max_nth),
7300 KMP_HNT(Unset_ALL_THREADS), __kmp_msg_null);
7302 num_teams = __kmp_teams_max_nth;
7306 thr->th.th_set_nproc = thr->th.th_teams_size.nteams = num_teams;
7309 if (num_threads == 0) {
7310 if (!TCR_4(__kmp_init_middle))
7311 __kmp_middle_initialize();
7312 num_threads = __kmp_avail_proc / num_teams;
7313 if (num_teams * num_threads > __kmp_teams_max_nth) {
7315 num_threads = __kmp_teams_max_nth / num_teams;
7320 thr->th.th_current_task->td_icvs.thread_limit = num_threads;
7322 if (num_teams * num_threads > __kmp_teams_max_nth) {
7323 int new_threads = __kmp_teams_max_nth / num_teams;
7324 if (!__kmp_reserve_warn) {
7325 __kmp_reserve_warn = 1;
7326 __kmp_msg(kmp_ms_warning,
7327 KMP_MSG(CantFormThrTeam, num_threads, new_threads),
7328 KMP_HNT(Unset_ALL_THREADS), __kmp_msg_null);
7330 num_threads = new_threads;
7333 thr->th.th_teams_size.nth = num_threads;
7337 void __kmp_push_proc_bind(
ident_t *
id,
int gtid, kmp_proc_bind_t proc_bind) {
7338 kmp_info_t *thr = __kmp_threads[gtid];
7339 thr->th.th_set_proc_bind = proc_bind;
7346 void __kmp_internal_fork(
ident_t *
id,
int gtid, kmp_team_t *team) {
7347 kmp_info_t *this_thr = __kmp_threads[gtid];
7353 KMP_DEBUG_ASSERT(team);
7354 KMP_DEBUG_ASSERT(this_thr->th.th_team == team);
7355 KMP_ASSERT(KMP_MASTER_GTID(gtid));
7358 team->t.t_construct = 0;
7359 team->t.t_ordered.dt.t_value =
7363 KMP_DEBUG_ASSERT(team->t.t_disp_buffer);
7364 if (team->t.t_max_nproc > 1) {
7366 for (i = 0; i < __kmp_dispatch_num_buffers; ++i) {
7367 team->t.t_disp_buffer[i].buffer_index = i;
7369 team->t.t_disp_buffer[i].doacross_buf_idx = i;
7373 team->t.t_disp_buffer[0].buffer_index = 0;
7375 team->t.t_disp_buffer[0].doacross_buf_idx = 0;
7380 KMP_ASSERT(this_thr->th.th_team == team);
7383 for (f = 0; f < team->t.t_nproc; f++) {
7384 KMP_DEBUG_ASSERT(team->t.t_threads[f] &&
7385 team->t.t_threads[f]->th.th_team_nproc == team->t.t_nproc);
7390 __kmp_fork_barrier(gtid, 0);
7393 void __kmp_internal_join(
ident_t *
id,
int gtid, kmp_team_t *team) {
7394 kmp_info_t *this_thr = __kmp_threads[gtid];
7396 KMP_DEBUG_ASSERT(team);
7397 KMP_DEBUG_ASSERT(this_thr->th.th_team == team);
7398 KMP_ASSERT(KMP_MASTER_GTID(gtid));
7404 if (__kmp_threads[gtid] &&
7405 __kmp_threads[gtid]->th.th_team_nproc != team->t.t_nproc) {
7406 __kmp_printf(
"GTID: %d, __kmp_threads[%d]=%p\n", gtid, gtid,
7407 __kmp_threads[gtid]);
7408 __kmp_printf(
"__kmp_threads[%d]->th.th_team_nproc=%d, TEAM: %p, "
7409 "team->t.t_nproc=%d\n",
7410 gtid, __kmp_threads[gtid]->th.th_team_nproc, team,
7412 __kmp_print_structure();
7414 KMP_DEBUG_ASSERT(__kmp_threads[gtid] &&
7415 __kmp_threads[gtid]->th.th_team_nproc == team->t.t_nproc);
7418 __kmp_join_barrier(gtid);
7420 if (ompt_enabled.enabled &&
7421 this_thr->th.ompt_thread_info.state == ompt_state_wait_barrier_implicit) {
7422 int ds_tid = this_thr->th.th_info.ds.ds_tid;
7423 ompt_data_t *task_data = OMPT_CUR_TASK_DATA(this_thr);
7424 this_thr->th.ompt_thread_info.state = ompt_state_overhead;
7426 void *codeptr = NULL;
7427 if (KMP_MASTER_TID(ds_tid) &&
7428 (ompt_callbacks.ompt_callback(ompt_callback_sync_region_wait) ||
7429 ompt_callbacks.ompt_callback(ompt_callback_sync_region)))
7430 codeptr = OMPT_CUR_TEAM_INFO(this_thr)->master_return_address;
7432 if (ompt_enabled.ompt_callback_sync_region_wait) {
7433 ompt_callbacks.ompt_callback(ompt_callback_sync_region_wait)(
7434 ompt_sync_region_barrier_implicit, ompt_scope_end, NULL, task_data,
7437 if (ompt_enabled.ompt_callback_sync_region) {
7438 ompt_callbacks.ompt_callback(ompt_callback_sync_region)(
7439 ompt_sync_region_barrier_implicit, ompt_scope_end, NULL, task_data,
7443 if (!KMP_MASTER_TID(ds_tid) && ompt_enabled.ompt_callback_implicit_task) {
7444 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
7445 ompt_scope_end, NULL, task_data, 0, ds_tid, ompt_task_implicit);
7451 KMP_ASSERT(this_thr->th.th_team == team);
7456 #ifdef USE_LOAD_BALANCE
7460 static int __kmp_active_hot_team_nproc(kmp_root_t *root) {
7463 kmp_team_t *hot_team;
7465 if (root->r.r_active) {
7468 hot_team = root->r.r_hot_team;
7469 if (__kmp_dflt_blocktime == KMP_MAX_BLOCKTIME) {
7470 return hot_team->t.t_nproc - 1;
7475 for (i = 1; i < hot_team->t.t_nproc; i++) {
7476 if (hot_team->t.t_threads[i]->th.th_active) {
7485 static int __kmp_load_balance_nproc(kmp_root_t *root,
int set_nproc) {
7488 int hot_team_active;
7489 int team_curr_active;
7492 KB_TRACE(20, (
"__kmp_load_balance_nproc: called root:%p set_nproc:%d\n", root,
7494 KMP_DEBUG_ASSERT(root);
7495 KMP_DEBUG_ASSERT(root->r.r_root_team->t.t_threads[0]
7496 ->th.th_current_task->td_icvs.dynamic == TRUE);
7497 KMP_DEBUG_ASSERT(set_nproc > 1);
7499 if (set_nproc == 1) {
7500 KB_TRACE(20, (
"__kmp_load_balance_nproc: serial execution.\n"));
7509 pool_active = __kmp_thread_pool_active_nth;
7510 hot_team_active = __kmp_active_hot_team_nproc(root);
7511 team_curr_active = pool_active + hot_team_active + 1;
7514 system_active = __kmp_get_load_balance(__kmp_avail_proc + team_curr_active);
7515 KB_TRACE(30, (
"__kmp_load_balance_nproc: system active = %d pool active = %d "
7516 "hot team active = %d\n",
7517 system_active, pool_active, hot_team_active));
7519 if (system_active < 0) {
7523 __kmp_global.g.g_dynamic_mode = dynamic_thread_limit;
7524 KMP_WARNING(CantLoadBalUsing,
"KMP_DYNAMIC_MODE=thread limit");
7527 retval = __kmp_avail_proc - __kmp_nth +
7528 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc);
7529 if (retval > set_nproc) {
7532 if (retval < KMP_MIN_NTH) {
7533 retval = KMP_MIN_NTH;
7536 KB_TRACE(20, (
"__kmp_load_balance_nproc: thread limit exit. retval:%d\n",
7544 if (system_active < team_curr_active) {
7545 system_active = team_curr_active;
7547 retval = __kmp_avail_proc - system_active + team_curr_active;
7548 if (retval > set_nproc) {
7551 if (retval < KMP_MIN_NTH) {
7552 retval = KMP_MIN_NTH;
7555 KB_TRACE(20, (
"__kmp_load_balance_nproc: exit. retval:%d\n", retval));
7564 void __kmp_cleanup(
void) {
7567 KA_TRACE(10, (
"__kmp_cleanup: enter\n"));
7569 if (TCR_4(__kmp_init_parallel)) {
7570 #if KMP_HANDLE_SIGNALS
7571 __kmp_remove_signals();
7573 TCW_4(__kmp_init_parallel, FALSE);
7576 if (TCR_4(__kmp_init_middle)) {
7577 #if KMP_AFFINITY_SUPPORTED
7578 __kmp_affinity_uninitialize();
7580 __kmp_cleanup_hierarchy();
7581 TCW_4(__kmp_init_middle, FALSE);
7584 KA_TRACE(10, (
"__kmp_cleanup: go serial cleanup\n"));
7586 if (__kmp_init_serial) {
7587 __kmp_runtime_destroy();
7588 __kmp_init_serial = FALSE;
7591 __kmp_cleanup_threadprivate_caches();
7593 for (f = 0; f < __kmp_threads_capacity; f++) {
7594 if (__kmp_root[f] != NULL) {
7595 __kmp_free(__kmp_root[f]);
7596 __kmp_root[f] = NULL;
7599 __kmp_free(__kmp_threads);
7602 __kmp_threads = NULL;
7604 __kmp_threads_capacity = 0;
7606 #if KMP_USE_DYNAMIC_LOCK
7607 __kmp_cleanup_indirect_user_locks();
7609 __kmp_cleanup_user_locks();
7612 #if KMP_AFFINITY_SUPPORTED
7613 KMP_INTERNAL_FREE(CCAST(
char *, __kmp_cpuinfo_file));
7614 __kmp_cpuinfo_file = NULL;
7617 #if KMP_USE_ADAPTIVE_LOCKS
7618 #if KMP_DEBUG_ADAPTIVE_LOCKS
7619 __kmp_print_speculative_stats();
7622 KMP_INTERNAL_FREE(__kmp_nested_nth.nth);
7623 __kmp_nested_nth.nth = NULL;
7624 __kmp_nested_nth.size = 0;
7625 __kmp_nested_nth.used = 0;
7626 KMP_INTERNAL_FREE(__kmp_nested_proc_bind.bind_types);
7627 __kmp_nested_proc_bind.bind_types = NULL;
7628 __kmp_nested_proc_bind.size = 0;
7629 __kmp_nested_proc_bind.used = 0;
7631 if (__kmp_affinity_format) {
7632 KMP_INTERNAL_FREE(__kmp_affinity_format);
7633 __kmp_affinity_format = NULL;
7637 __kmp_i18n_catclose();
7639 #if KMP_USE_HIER_SCHED
7640 __kmp_hier_scheds.deallocate();
7643 #if KMP_STATS_ENABLED
7647 KA_TRACE(10, (
"__kmp_cleanup: exit\n"));
7652 int __kmp_ignore_mppbeg(
void) {
7655 if ((env = getenv(
"KMP_IGNORE_MPPBEG")) != NULL) {
7656 if (__kmp_str_match_false(env))
7663 int __kmp_ignore_mppend(
void) {
7666 if ((env = getenv(
"KMP_IGNORE_MPPEND")) != NULL) {
7667 if (__kmp_str_match_false(env))
7674 void __kmp_internal_begin(
void) {
7680 gtid = __kmp_entry_gtid();
7681 root = __kmp_threads[gtid]->th.th_root;
7682 KMP_ASSERT(KMP_UBER_GTID(gtid));
7684 if (root->r.r_begin)
7686 __kmp_acquire_lock(&root->r.r_begin_lock, gtid);
7687 if (root->r.r_begin) {
7688 __kmp_release_lock(&root->r.r_begin_lock, gtid);
7692 root->r.r_begin = TRUE;
7694 __kmp_release_lock(&root->r.r_begin_lock, gtid);
7699 void __kmp_user_set_library(
enum library_type arg) {
7706 gtid = __kmp_entry_gtid();
7707 thread = __kmp_threads[gtid];
7709 root = thread->th.th_root;
7711 KA_TRACE(20, (
"__kmp_user_set_library: enter T#%d, arg: %d, %d\n", gtid, arg,
7713 if (root->r.r_in_parallel) {
7715 KMP_WARNING(SetLibraryIncorrectCall);
7720 case library_serial:
7721 thread->th.th_set_nproc = 0;
7722 set__nproc(thread, 1);
7724 case library_turnaround:
7725 thread->th.th_set_nproc = 0;
7726 set__nproc(thread, __kmp_dflt_team_nth ? __kmp_dflt_team_nth
7727 : __kmp_dflt_team_nth_ub);
7729 case library_throughput:
7730 thread->th.th_set_nproc = 0;
7731 set__nproc(thread, __kmp_dflt_team_nth ? __kmp_dflt_team_nth
7732 : __kmp_dflt_team_nth_ub);
7735 KMP_FATAL(UnknownLibraryType, arg);
7738 __kmp_aux_set_library(arg);
7741 void __kmp_aux_set_stacksize(
size_t arg) {
7742 if (!__kmp_init_serial)
7743 __kmp_serial_initialize();
7746 if (arg & (0x1000 - 1)) {
7747 arg &= ~(0x1000 - 1);
7752 __kmp_acquire_bootstrap_lock(&__kmp_initz_lock);
7755 if (!TCR_4(__kmp_init_parallel)) {
7758 if (value < __kmp_sys_min_stksize)
7759 value = __kmp_sys_min_stksize;
7760 else if (value > KMP_MAX_STKSIZE)
7761 value = KMP_MAX_STKSIZE;
7763 __kmp_stksize = value;
7765 __kmp_env_stksize = TRUE;
7768 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
7773 void __kmp_aux_set_library(
enum library_type arg) {
7774 __kmp_library = arg;
7776 switch (__kmp_library) {
7777 case library_serial: {
7778 KMP_INFORM(LibraryIsSerial);
7780 case library_turnaround:
7781 if (__kmp_use_yield == 1 && !__kmp_use_yield_exp_set)
7782 __kmp_use_yield = 2;
7784 case library_throughput:
7785 if (__kmp_dflt_blocktime == KMP_MAX_BLOCKTIME)
7786 __kmp_dflt_blocktime = 200;
7789 KMP_FATAL(UnknownLibraryType, arg);
7795 static kmp_team_t *__kmp_aux_get_team_info(
int &teams_serialized) {
7796 kmp_info_t *thr = __kmp_entry_thread();
7797 teams_serialized = 0;
7798 if (thr->th.th_teams_microtask) {
7799 kmp_team_t *team = thr->th.th_team;
7800 int tlevel = thr->th.th_teams_level;
7801 int ii = team->t.t_level;
7802 teams_serialized = team->t.t_serialized;
7803 int level = tlevel + 1;
7804 KMP_DEBUG_ASSERT(ii >= tlevel);
7805 while (ii > level) {
7806 for (teams_serialized = team->t.t_serialized;
7807 (teams_serialized > 0) && (ii > level); teams_serialized--, ii--) {
7809 if (team->t.t_serialized && (!teams_serialized)) {
7810 team = team->t.t_parent;
7814 team = team->t.t_parent;
7823 int __kmp_aux_get_team_num() {
7825 kmp_team_t *team = __kmp_aux_get_team_info(serialized);
7827 if (serialized > 1) {
7830 return team->t.t_master_tid;
7836 int __kmp_aux_get_num_teams() {
7838 kmp_team_t *team = __kmp_aux_get_team_info(serialized);
7840 if (serialized > 1) {
7843 return team->t.t_parent->t.t_nproc;
7883 typedef struct kmp_affinity_format_field_t {
7885 const char *long_name;
7888 } kmp_affinity_format_field_t;
7890 static const kmp_affinity_format_field_t __kmp_affinity_format_table[] = {
7891 #if KMP_AFFINITY_SUPPORTED
7892 {
'A',
"thread_affinity",
's'},
7894 {
't',
"team_num",
'd'},
7895 {
'T',
"num_teams",
'd'},
7896 {
'L',
"nesting_level",
'd'},
7897 {
'n',
"thread_num",
'd'},
7898 {
'N',
"num_threads",
'd'},
7899 {
'a',
"ancestor_tnum",
'd'},
7901 {
'P',
"process_id",
'd'},
7902 {
'i',
"native_thread_id",
'd'}};
7905 static int __kmp_aux_capture_affinity_field(
int gtid,
const kmp_info_t *th,
7907 kmp_str_buf_t *field_buffer) {
7908 int rc, format_index, field_value;
7909 const char *width_left, *width_right;
7910 bool pad_zeros, right_justify, parse_long_name, found_valid_name;
7911 static const int FORMAT_SIZE = 20;
7912 char format[FORMAT_SIZE] = {0};
7913 char absolute_short_name = 0;
7915 KMP_DEBUG_ASSERT(gtid >= 0);
7916 KMP_DEBUG_ASSERT(th);
7917 KMP_DEBUG_ASSERT(**ptr ==
'%');
7918 KMP_DEBUG_ASSERT(field_buffer);
7920 __kmp_str_buf_clear(field_buffer);
7927 __kmp_str_buf_cat(field_buffer,
"%", 1);
7938 right_justify =
false;
7940 right_justify =
true;
7944 width_left = width_right = NULL;
7945 if (**ptr >=
'0' && **ptr <=
'9') {
7953 format[format_index++] =
'%';
7955 format[format_index++] =
'-';
7957 format[format_index++] =
'0';
7958 if (width_left && width_right) {
7962 while (i < 8 && width_left < width_right) {
7963 format[format_index++] = *width_left;
7971 found_valid_name =
false;
7972 parse_long_name = (**ptr ==
'{');
7973 if (parse_long_name)
7975 for (
size_t i = 0; i <
sizeof(__kmp_affinity_format_table) /
7976 sizeof(__kmp_affinity_format_table[0]);
7978 char short_name = __kmp_affinity_format_table[i].short_name;
7979 const char *long_name = __kmp_affinity_format_table[i].long_name;
7980 char field_format = __kmp_affinity_format_table[i].field_format;
7981 if (parse_long_name) {
7982 int length = KMP_STRLEN(long_name);
7983 if (strncmp(*ptr, long_name, length) == 0) {
7984 found_valid_name =
true;
7987 }
else if (**ptr == short_name) {
7988 found_valid_name =
true;
7991 if (found_valid_name) {
7992 format[format_index++] = field_format;
7993 format[format_index++] =
'\0';
7994 absolute_short_name = short_name;
7998 if (parse_long_name) {
8000 absolute_short_name = 0;
8008 switch (absolute_short_name) {
8010 rc = __kmp_str_buf_print(field_buffer, format, __kmp_aux_get_team_num());
8013 rc = __kmp_str_buf_print(field_buffer, format, __kmp_aux_get_num_teams());
8016 rc = __kmp_str_buf_print(field_buffer, format, th->th.th_team->t.t_level);
8019 rc = __kmp_str_buf_print(field_buffer, format, __kmp_tid_from_gtid(gtid));
8022 static const int BUFFER_SIZE = 256;
8023 char buf[BUFFER_SIZE];
8024 __kmp_expand_host_name(buf, BUFFER_SIZE);
8025 rc = __kmp_str_buf_print(field_buffer, format, buf);
8028 rc = __kmp_str_buf_print(field_buffer, format, getpid());
8031 rc = __kmp_str_buf_print(field_buffer, format, __kmp_gettid());
8034 rc = __kmp_str_buf_print(field_buffer, format, th->th.th_team->t.t_nproc);
8038 __kmp_get_ancestor_thread_num(gtid, th->th.th_team->t.t_level - 1);
8039 rc = __kmp_str_buf_print(field_buffer, format, field_value);
8041 #if KMP_AFFINITY_SUPPORTED
8044 __kmp_str_buf_init(&buf);
8045 __kmp_affinity_str_buf_mask(&buf, th->th.th_affin_mask);
8046 rc = __kmp_str_buf_print(field_buffer, format, buf.str);
8047 __kmp_str_buf_free(&buf);
8053 rc = __kmp_str_buf_print(field_buffer,
"%s",
"undefined");
8055 if (parse_long_name) {
8064 KMP_ASSERT(format_index <= FORMAT_SIZE);
8074 size_t __kmp_aux_capture_affinity(
int gtid,
const char *format,
8075 kmp_str_buf_t *buffer) {
8076 const char *parse_ptr;
8078 const kmp_info_t *th;
8079 kmp_str_buf_t field;
8081 KMP_DEBUG_ASSERT(buffer);
8082 KMP_DEBUG_ASSERT(gtid >= 0);
8084 __kmp_str_buf_init(&field);
8085 __kmp_str_buf_clear(buffer);
8087 th = __kmp_threads[gtid];
8093 if (parse_ptr == NULL || *parse_ptr ==
'\0') {
8094 parse_ptr = __kmp_affinity_format;
8096 KMP_DEBUG_ASSERT(parse_ptr);
8098 while (*parse_ptr !=
'\0') {
8100 if (*parse_ptr ==
'%') {
8102 int rc = __kmp_aux_capture_affinity_field(gtid, th, &parse_ptr, &field);
8103 __kmp_str_buf_catbuf(buffer, &field);
8107 __kmp_str_buf_cat(buffer, parse_ptr, 1);
8112 __kmp_str_buf_free(&field);
8117 void __kmp_aux_display_affinity(
int gtid,
const char *format) {
8119 __kmp_str_buf_init(&buf);
8120 __kmp_aux_capture_affinity(gtid, format, &buf);
8121 __kmp_fprintf(kmp_out,
"%s" KMP_END_OF_LINE, buf.str);
8122 __kmp_str_buf_free(&buf);
8124 #endif // OMP_50_ENABLED
8128 void __kmp_aux_set_blocktime(
int arg, kmp_info_t *thread,
int tid) {
8129 int blocktime = arg;
8135 __kmp_save_internal_controls(thread);
8138 if (blocktime < KMP_MIN_BLOCKTIME)
8139 blocktime = KMP_MIN_BLOCKTIME;
8140 else if (blocktime > KMP_MAX_BLOCKTIME)
8141 blocktime = KMP_MAX_BLOCKTIME;
8143 set__blocktime_team(thread->th.th_team, tid, blocktime);
8144 set__blocktime_team(thread->th.th_serial_team, 0, blocktime);
8148 bt_intervals = KMP_INTERVALS_FROM_BLOCKTIME(blocktime, __kmp_monitor_wakeups);
8150 set__bt_intervals_team(thread->th.th_team, tid, bt_intervals);
8151 set__bt_intervals_team(thread->th.th_serial_team, 0, bt_intervals);
8157 set__bt_set_team(thread->th.th_team, tid, bt_set);
8158 set__bt_set_team(thread->th.th_serial_team, 0, bt_set);
8160 KF_TRACE(10, (
"kmp_set_blocktime: T#%d(%d:%d), blocktime=%d, "
8161 "bt_intervals=%d, monitor_updates=%d\n",
8162 __kmp_gtid_from_tid(tid, thread->th.th_team),
8163 thread->th.th_team->t.t_id, tid, blocktime, bt_intervals,
8164 __kmp_monitor_wakeups));
8166 KF_TRACE(10, (
"kmp_set_blocktime: T#%d(%d:%d), blocktime=%d\n",
8167 __kmp_gtid_from_tid(tid, thread->th.th_team),
8168 thread->th.th_team->t.t_id, tid, blocktime));
8172 void __kmp_aux_set_defaults(
char const *str,
int len) {
8173 if (!__kmp_init_serial) {
8174 __kmp_serial_initialize();
8176 __kmp_env_initialize(str);
8180 || __kmp_display_env || __kmp_display_env_verbose
8190 PACKED_REDUCTION_METHOD_T
8191 __kmp_determine_reduction_method(
8192 ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars,
size_t reduce_size,
8193 void *reduce_data,
void (*reduce_func)(
void *lhs_data,
void *rhs_data),
8194 kmp_critical_name *lck) {
8205 PACKED_REDUCTION_METHOD_T retval;
8209 KMP_DEBUG_ASSERT(loc);
8210 KMP_DEBUG_ASSERT(lck);
8212 #define FAST_REDUCTION_ATOMIC_METHOD_GENERATED \
8213 ((loc->flags & (KMP_IDENT_ATOMIC_REDUCE)) == (KMP_IDENT_ATOMIC_REDUCE))
8214 #define FAST_REDUCTION_TREE_METHOD_GENERATED ((reduce_data) && (reduce_func))
8216 retval = critical_reduce_block;
8219 team_size = __kmp_get_team_num_threads(global_tid);
8220 if (team_size == 1) {
8222 retval = empty_reduce_block;
8226 int atomic_available = FAST_REDUCTION_ATOMIC_METHOD_GENERATED;
8228 #if KMP_ARCH_X86_64 || KMP_ARCH_PPC64 || KMP_ARCH_AARCH64 || KMP_ARCH_MIPS64
8230 #if KMP_OS_LINUX || KMP_OS_DRAGONFLY || KMP_OS_FREEBSD || KMP_OS_NETBSD || \
8231 KMP_OS_OPENBSD || KMP_OS_WINDOWS || KMP_OS_DARWIN || KMP_OS_HURD || KMP_OS_KFREEBSD
8233 int teamsize_cutoff = 4;
8235 #if KMP_MIC_SUPPORTED
8236 if (__kmp_mic_type != non_mic) {
8237 teamsize_cutoff = 8;
8240 int tree_available = FAST_REDUCTION_TREE_METHOD_GENERATED;
8241 if (tree_available) {
8242 if (team_size <= teamsize_cutoff) {
8243 if (atomic_available) {
8244 retval = atomic_reduce_block;
8247 retval = TREE_REDUCE_BLOCK_WITH_REDUCTION_BARRIER;
8249 }
else if (atomic_available) {
8250 retval = atomic_reduce_block;
8253 #error "Unknown or unsupported OS"
8254 #endif // KMP_OS_LINUX || KMP_OS_DRAGONFLY || KMP_OS_FREEBSD || KMP_OS_NETBSD ||
8257 #elif KMP_ARCH_X86 || KMP_ARCH_ARM || KMP_ARCH_AARCH || KMP_ARCH_MIPS
8259 #if KMP_OS_LINUX || KMP_OS_FREEBSD || KMP_OS_WINDOWS || KMP_OS_HURD || KMP_OS_KFREEBSD
8263 if (atomic_available) {
8264 if (num_vars <= 2) {
8265 retval = atomic_reduce_block;
8271 int tree_available = FAST_REDUCTION_TREE_METHOD_GENERATED;
8272 if (atomic_available && (num_vars <= 3)) {
8273 retval = atomic_reduce_block;
8274 }
else if (tree_available) {
8275 if ((reduce_size > (9 *
sizeof(kmp_real64))) &&
8276 (reduce_size < (2000 *
sizeof(kmp_real64)))) {
8277 retval = TREE_REDUCE_BLOCK_WITH_PLAIN_BARRIER;
8282 #error "Unknown or unsupported OS"
8286 #error "Unknown or unsupported architecture"
8294 if (__kmp_force_reduction_method != reduction_method_not_defined &&
8297 PACKED_REDUCTION_METHOD_T forced_retval = critical_reduce_block;
8299 int atomic_available, tree_available;
8301 switch ((forced_retval = __kmp_force_reduction_method)) {
8302 case critical_reduce_block:
8306 case atomic_reduce_block:
8307 atomic_available = FAST_REDUCTION_ATOMIC_METHOD_GENERATED;
8308 if (!atomic_available) {
8309 KMP_WARNING(RedMethodNotSupported,
"atomic");
8310 forced_retval = critical_reduce_block;
8314 case tree_reduce_block:
8315 tree_available = FAST_REDUCTION_TREE_METHOD_GENERATED;
8316 if (!tree_available) {
8317 KMP_WARNING(RedMethodNotSupported,
"tree");
8318 forced_retval = critical_reduce_block;
8320 #if KMP_FAST_REDUCTION_BARRIER
8321 forced_retval = TREE_REDUCE_BLOCK_WITH_REDUCTION_BARRIER;
8330 retval = forced_retval;
8333 KA_TRACE(10, (
"reduction method selected=%08x\n", retval));
8335 #undef FAST_REDUCTION_TREE_METHOD_GENERATED
8336 #undef FAST_REDUCTION_ATOMIC_METHOD_GENERATED
8342 kmp_int32 __kmp_get_reduce_method(
void) {
8343 return ((__kmp_entry_thread()->th.th_local.packed_reduction_method) >> 8);
8350 void __kmp_soft_pause() { __kmp_pause_status = kmp_soft_paused; }
8354 void __kmp_hard_pause() {
8355 __kmp_pause_status = kmp_hard_paused;
8356 __kmp_internal_end_thread(-1);
8360 void __kmp_resume_if_soft_paused() {
8361 if (__kmp_pause_status == kmp_soft_paused) {
8362 __kmp_pause_status = kmp_not_paused;
8364 for (
int gtid = 1; gtid < __kmp_threads_capacity; ++gtid) {
8365 kmp_info_t *thread = __kmp_threads[gtid];
8367 kmp_flag_64 fl(&thread->th.th_bar[bs_forkjoin_barrier].bb.b_go, thread);
8368 if (fl.is_sleeping())
8370 else if (__kmp_try_suspend_mx(thread)) {
8371 __kmp_unlock_suspend_mx(thread);
8374 if (fl.is_sleeping()) {
8377 }
else if (__kmp_try_suspend_mx(thread)) {
8378 __kmp_unlock_suspend_mx(thread);
8390 int __kmp_pause_resource(kmp_pause_status_t level) {
8391 if (level == kmp_not_paused) {
8392 if (__kmp_pause_status == kmp_not_paused) {
8396 KMP_DEBUG_ASSERT(__kmp_pause_status == kmp_soft_paused ||
8397 __kmp_pause_status == kmp_hard_paused);
8398 __kmp_pause_status = kmp_not_paused;
8401 }
else if (level == kmp_soft_paused) {
8402 if (__kmp_pause_status != kmp_not_paused) {
8409 }
else if (level == kmp_hard_paused) {
8410 if (__kmp_pause_status != kmp_not_paused) {
8423 #endif // OMP_50_ENABLED
#define KMP_COUNT_VALUE(name, value)
Adds value to specified timer (name).
KMP_EXPORT void __kmpc_end_serialized_parallel(ident_t *, kmp_int32 global_tid)
#define KMP_INIT_PARTITIONED_TIMERS(name)
Initializes the paritioned timers to begin with name.
KMP_EXPORT void __kmpc_serialized_parallel(ident_t *, kmp_int32 global_tid)
stats_state_e
the states which a thread can be in