16 #include "kmp_error.h"
20 #include "kmp_stats.h"
23 #include "ompt-specific.h"
26 #define MAX_MESSAGE 512
42 if ((env = getenv(
"KMP_INITIAL_THREAD_BIND")) != NULL &&
43 __kmp_str_match_true(env)) {
44 __kmp_middle_initialize();
45 KC_TRACE(10, (
"__kmpc_begin: middle initialization called\n"));
46 }
else if (__kmp_ignore_mppbeg() == FALSE) {
48 __kmp_internal_begin();
49 KC_TRACE(10, (
"__kmpc_begin: called\n"));
67 if (__kmp_ignore_mppend() == FALSE) {
68 KC_TRACE(10, (
"__kmpc_end: called\n"));
69 KA_TRACE(30, (
"__kmpc_end\n"));
71 __kmp_internal_end_thread(-1);
73 #if KMP_OS_WINDOWS && OMPT_SUPPORT
78 if (ompt_enabled.enabled)
79 __kmp_internal_end_library(__kmp_gtid_get_specific());
102 kmp_int32 gtid = __kmp_entry_gtid();
104 KC_TRACE(10, (
"__kmpc_global_thread_num: T#%d\n", gtid));
125 (
"__kmpc_global_num_threads: num_threads = %d\n", __kmp_all_nth));
127 return TCR_4(__kmp_all_nth);
137 KC_TRACE(10, (
"__kmpc_bound_thread_num: called\n"));
138 return __kmp_tid_from_gtid(__kmp_entry_gtid());
147 KC_TRACE(10, (
"__kmpc_bound_num_threads: called\n"));
149 return __kmp_entry_thread()->th.th_team->t.t_nproc;
169 if (__kmp_par_range == 0) {
176 semi2 = strchr(semi2,
';');
180 semi2 = strchr(semi2 + 1,
';');
184 if (__kmp_par_range_filename[0]) {
185 const char *name = semi2 - 1;
186 while ((name > loc->
psource) && (*name !=
'/') && (*name !=
';')) {
189 if ((*name ==
'/') || (*name ==
';')) {
192 if (strncmp(__kmp_par_range_filename, name, semi2 - name)) {
193 return __kmp_par_range < 0;
196 semi3 = strchr(semi2 + 1,
';');
197 if (__kmp_par_range_routine[0]) {
198 if ((semi3 != NULL) && (semi3 > semi2) &&
199 (strncmp(__kmp_par_range_routine, semi2 + 1, semi3 - semi2 - 1))) {
200 return __kmp_par_range < 0;
203 if (KMP_SSCANF(semi3 + 1,
"%d", &line_no) == 1) {
204 if ((line_no >= __kmp_par_range_lb) && (line_no <= __kmp_par_range_ub)) {
205 return __kmp_par_range > 0;
207 return __kmp_par_range < 0;
221 return __kmp_entry_thread()->th.th_root->r.r_active;
234 kmp_int32 num_threads) {
235 KA_TRACE(20, (
"__kmpc_push_num_threads: enter T#%d num_threads=%d\n",
236 global_tid, num_threads));
238 __kmp_push_num_threads(loc, global_tid, num_threads);
241 void __kmpc_pop_num_threads(
ident_t *loc, kmp_int32 global_tid) {
242 KA_TRACE(20, (
"__kmpc_pop_num_threads: enter\n"));
249 void __kmpc_push_proc_bind(
ident_t *loc, kmp_int32 global_tid,
250 kmp_int32 proc_bind) {
251 KA_TRACE(20, (
"__kmpc_push_proc_bind: enter T#%d proc_bind=%d\n", global_tid,
254 __kmp_push_proc_bind(loc, global_tid, (kmp_proc_bind_t)proc_bind);
270 int gtid = __kmp_entry_gtid();
272 #if (KMP_STATS_ENABLED)
276 if (previous_state == stats_state_e::SERIAL_REGION) {
277 KMP_EXCHANGE_PARTITIONED_TIMER(OMP_parallel_overhead);
279 KMP_PUSH_PARTITIONED_TIMER(OMP_parallel_overhead);
292 va_start(ap, microtask);
295 ompt_frame_t *ompt_frame;
296 if (ompt_enabled.enabled) {
297 kmp_info_t *master_th = __kmp_threads[gtid];
298 kmp_team_t *parent_team = master_th->th.th_team;
299 ompt_lw_taskteam_t *lwt = parent_team->t.ompt_serialized_team_info;
301 ompt_frame = &(lwt->ompt_task_info.frame);
303 int tid = __kmp_tid_from_gtid(gtid);
305 parent_team->t.t_implicit_task_taskdata[tid].ompt_task_info.frame);
307 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
308 OMPT_STORE_RETURN_ADDRESS(gtid);
312 #if INCLUDE_SSC_MARKS
315 __kmp_fork_call(loc, gtid, fork_context_intel, argc,
316 VOLATILE_CAST(microtask_t) microtask,
317 VOLATILE_CAST(launch_t) __kmp_invoke_task_func,
319 #
if (KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && KMP_OS_LINUX
325 #if INCLUDE_SSC_MARKS
328 __kmp_join_call(loc, gtid
338 #if KMP_STATS_ENABLED
339 if (previous_state == stats_state_e::SERIAL_REGION) {
340 KMP_EXCHANGE_PARTITIONED_TIMER(OMP_serial);
342 KMP_POP_PARTITIONED_TIMER();
344 #endif // KMP_STATS_ENABLED
360 kmp_int32 num_teams, kmp_int32 num_threads) {
362 (
"__kmpc_push_num_teams: enter T#%d num_teams=%d num_threads=%d\n",
363 global_tid, num_teams, num_threads));
365 __kmp_push_num_teams(loc, global_tid, num_teams, num_threads);
380 int gtid = __kmp_entry_gtid();
381 kmp_info_t *this_thr = __kmp_threads[gtid];
383 va_start(ap, microtask);
385 #if KMP_STATS_ENABLED
388 if (previous_state == stats_state_e::SERIAL_REGION) {
389 KMP_EXCHANGE_PARTITIONED_TIMER(OMP_teams_overhead);
391 KMP_PUSH_PARTITIONED_TIMER(OMP_teams_overhead);
396 this_thr->th.th_teams_microtask = microtask;
397 this_thr->th.th_teams_level =
398 this_thr->th.th_team->t.t_level;
401 kmp_team_t *parent_team = this_thr->th.th_team;
402 int tid = __kmp_tid_from_gtid(gtid);
403 if (ompt_enabled.enabled) {
404 parent_team->t.t_implicit_task_taskdata[tid]
405 .ompt_task_info.frame.enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
407 OMPT_STORE_RETURN_ADDRESS(gtid);
412 if (this_thr->th.th_teams_size.nteams == 0) {
413 __kmp_push_num_teams(loc, gtid, 0, 0);
415 KMP_DEBUG_ASSERT(this_thr->th.th_set_nproc >= 1);
416 KMP_DEBUG_ASSERT(this_thr->th.th_teams_size.nteams >= 1);
417 KMP_DEBUG_ASSERT(this_thr->th.th_teams_size.nth >= 1);
419 __kmp_fork_call(loc, gtid, fork_context_intel, argc,
420 VOLATILE_CAST(microtask_t)
422 VOLATILE_CAST(launch_t) __kmp_invoke_teams_master,
423 #
if (KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && KMP_OS_LINUX
429 __kmp_join_call(loc, gtid
437 KMP_DEBUG_ASSERT(this_thr->th.th_cg_roots);
438 kmp_cg_root_t *tmp = this_thr->th.th_cg_roots;
439 this_thr->th.th_cg_roots = tmp->up;
440 KA_TRACE(100, (
"__kmpc_fork_teams: Thread %p popping node %p and moving up"
441 " to node %p. cg_nthreads was %d\n",
442 this_thr, tmp, this_thr->th.th_cg_roots, tmp->cg_nthreads));
445 KMP_DEBUG_ASSERT(this_thr->th.th_cg_roots);
446 this_thr->th.th_current_task->td_icvs.thread_limit =
447 this_thr->th.th_cg_roots->cg_thread_limit;
449 this_thr->th.th_teams_microtask = NULL;
450 this_thr->th.th_teams_level = 0;
451 *(kmp_int64 *)(&this_thr->th.th_teams_size) = 0L;
453 #if KMP_STATS_ENABLED
454 if (previous_state == stats_state_e::SERIAL_REGION) {
455 KMP_EXCHANGE_PARTITIONED_TIMER(OMP_serial);
457 KMP_POP_PARTITIONED_TIMER();
459 #endif // KMP_STATS_ENABLED
467 int __kmpc_invoke_task_func(
int gtid) {
return __kmp_invoke_task_func(gtid); }
486 OMPT_STORE_RETURN_ADDRESS(global_tid);
488 __kmp_serialized_parallel(loc, global_tid);
499 kmp_internal_control_t *top;
500 kmp_info_t *this_thr;
501 kmp_team_t *serial_team;
504 (
"__kmpc_end_serialized_parallel: called by T#%d\n", global_tid));
512 if (!TCR_4(__kmp_init_parallel))
513 __kmp_parallel_initialize();
516 __kmp_resume_if_soft_paused();
519 this_thr = __kmp_threads[global_tid];
520 serial_team = this_thr->th.th_serial_team;
523 kmp_task_team_t *task_team = this_thr->th.th_task_team;
526 if (task_team != NULL && task_team->tt.tt_found_proxy_tasks)
527 __kmp_task_team_wait(this_thr, serial_team USE_ITT_BUILD_ARG(NULL));
531 KMP_DEBUG_ASSERT(serial_team);
532 KMP_ASSERT(serial_team->t.t_serialized);
533 KMP_DEBUG_ASSERT(this_thr->th.th_team == serial_team);
534 KMP_DEBUG_ASSERT(serial_team != this_thr->th.th_root->r.r_root_team);
535 KMP_DEBUG_ASSERT(serial_team->t.t_threads);
536 KMP_DEBUG_ASSERT(serial_team->t.t_threads[0] == this_thr);
539 if (ompt_enabled.enabled &&
540 this_thr->th.ompt_thread_info.state != ompt_state_overhead) {
541 OMPT_CUR_TASK_INFO(this_thr)->frame.exit_frame = ompt_data_none;
542 if (ompt_enabled.ompt_callback_implicit_task) {
543 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
544 ompt_scope_end, NULL, OMPT_CUR_TASK_DATA(this_thr), 1,
545 OMPT_CUR_TASK_INFO(this_thr)->thread_num, ompt_task_implicit);
549 ompt_data_t *parent_task_data;
550 __ompt_get_task_info_internal(1, NULL, &parent_task_data, NULL, NULL, NULL);
552 if (ompt_enabled.ompt_callback_parallel_end) {
553 ompt_callbacks.ompt_callback(ompt_callback_parallel_end)(
554 &(serial_team->t.ompt_team_info.parallel_data), parent_task_data,
555 ompt_parallel_invoker_program, OMPT_LOAD_RETURN_ADDRESS(global_tid));
557 __ompt_lw_taskteam_unlink(this_thr);
558 this_thr->th.ompt_thread_info.state = ompt_state_overhead;
564 top = serial_team->t.t_control_stack_top;
565 if (top && top->serial_nesting_level == serial_team->t.t_serialized) {
566 copy_icvs(&serial_team->t.t_threads[0]->th.th_current_task->td_icvs, top);
567 serial_team->t.t_control_stack_top = top->next;
572 serial_team->t.t_level--;
575 KMP_DEBUG_ASSERT(serial_team->t.t_dispatch->th_disp_buffer);
577 dispatch_private_info_t *disp_buffer =
578 serial_team->t.t_dispatch->th_disp_buffer;
579 serial_team->t.t_dispatch->th_disp_buffer =
580 serial_team->t.t_dispatch->th_disp_buffer->next;
581 __kmp_free(disp_buffer);
584 this_thr->th.th_def_allocator = serial_team->t.t_def_allocator;
587 --serial_team->t.t_serialized;
588 if (serial_team->t.t_serialized == 0) {
592 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
593 if (__kmp_inherit_fp_control && serial_team->t.t_fp_control_saved) {
594 __kmp_clear_x87_fpu_status_word();
595 __kmp_load_x87_fpu_control_word(&serial_team->t.t_x87_fpu_control_word);
596 __kmp_load_mxcsr(&serial_team->t.t_mxcsr);
600 this_thr->th.th_team = serial_team->t.t_parent;
601 this_thr->th.th_info.ds.ds_tid = serial_team->t.t_master_tid;
604 this_thr->th.th_team_nproc = serial_team->t.t_parent->t.t_nproc;
605 this_thr->th.th_team_master =
606 serial_team->t.t_parent->t.t_threads[0];
607 this_thr->th.th_team_serialized = this_thr->th.th_team->t.t_serialized;
610 this_thr->th.th_dispatch =
611 &this_thr->th.th_team->t.t_dispatch[serial_team->t.t_master_tid];
613 __kmp_pop_current_task_from_thread(this_thr);
615 KMP_ASSERT(this_thr->th.th_current_task->td_flags.executing == 0);
616 this_thr->th.th_current_task->td_flags.executing = 1;
618 if (__kmp_tasking_mode != tskm_immediate_exec) {
620 this_thr->th.th_task_team =
621 this_thr->th.th_team->t.t_task_team[this_thr->th.th_task_state];
623 (
"__kmpc_end_serialized_parallel: T#%d restoring task_team %p / "
625 global_tid, this_thr->th.th_task_team, this_thr->th.th_team));
628 if (__kmp_tasking_mode != tskm_immediate_exec) {
629 KA_TRACE(20, (
"__kmpc_end_serialized_parallel: T#%d decreasing nesting "
630 "depth of serial team %p to %d\n",
631 global_tid, serial_team, serial_team->t.t_serialized));
635 if (__kmp_env_consistency_check)
636 __kmp_pop_parallel(global_tid, NULL);
638 if (ompt_enabled.enabled)
639 this_thr->th.ompt_thread_info.state =
640 ((this_thr->th.th_team_serialized) ? ompt_state_work_serial
641 : ompt_state_work_parallel);
654 KC_TRACE(10, (
"__kmpc_flush: called\n"));
659 #if (KMP_ARCH_X86 || KMP_ARCH_X86_64)
673 if (!__kmp_cpuinfo.initialized) {
674 __kmp_query_cpuid(&__kmp_cpuinfo);
676 if (!__kmp_cpuinfo.sse2) {
681 #elif KMP_COMPILER_MSVC
684 __sync_synchronize();
685 #endif // KMP_COMPILER_ICC
688 #elif (KMP_ARCH_ARM || KMP_ARCH_AARCH64 || KMP_ARCH_MIPS || KMP_ARCH_MIPS64)
704 #error Unknown or unsupported architecture
707 #if OMPT_SUPPORT && OMPT_OPTIONAL
708 if (ompt_enabled.ompt_callback_flush) {
709 ompt_callbacks.ompt_callback(ompt_callback_flush)(
710 __ompt_get_thread_data_internal(), OMPT_GET_RETURN_ADDRESS(0));
725 KC_TRACE(10, (
"__kmpc_barrier: called T#%d\n", global_tid));
727 if (!TCR_4(__kmp_init_parallel))
728 __kmp_parallel_initialize();
731 __kmp_resume_if_soft_paused();
734 if (__kmp_env_consistency_check) {
736 KMP_WARNING(ConstructIdentInvalid);
739 __kmp_check_barrier(global_tid, ct_barrier, loc);
743 ompt_frame_t *ompt_frame;
744 if (ompt_enabled.enabled) {
745 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
746 if (ompt_frame->enter_frame.ptr == NULL)
747 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
748 OMPT_STORE_RETURN_ADDRESS(global_tid);
751 __kmp_threads[global_tid]->th.th_ident = loc;
759 __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL);
760 #if OMPT_SUPPORT && OMPT_OPTIONAL
761 if (ompt_enabled.enabled) {
762 ompt_frame->enter_frame = ompt_data_none;
777 KC_TRACE(10, (
"__kmpc_master: called T#%d\n", global_tid));
779 if (!TCR_4(__kmp_init_parallel))
780 __kmp_parallel_initialize();
783 __kmp_resume_if_soft_paused();
786 if (KMP_MASTER_GTID(global_tid)) {
788 KMP_PUSH_PARTITIONED_TIMER(OMP_master);
792 #if OMPT_SUPPORT && OMPT_OPTIONAL
794 if (ompt_enabled.ompt_callback_master) {
795 kmp_info_t *this_thr = __kmp_threads[global_tid];
796 kmp_team_t *team = this_thr->th.th_team;
798 int tid = __kmp_tid_from_gtid(global_tid);
799 ompt_callbacks.ompt_callback(ompt_callback_master)(
800 ompt_scope_begin, &(team->t.ompt_team_info.parallel_data),
801 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
802 OMPT_GET_RETURN_ADDRESS(0));
807 if (__kmp_env_consistency_check) {
808 #if KMP_USE_DYNAMIC_LOCK
810 __kmp_push_sync(global_tid, ct_master, loc, NULL, 0);
812 __kmp_check_sync(global_tid, ct_master, loc, NULL, 0);
815 __kmp_push_sync(global_tid, ct_master, loc, NULL);
817 __kmp_check_sync(global_tid, ct_master, loc, NULL);
833 KC_TRACE(10, (
"__kmpc_end_master: called T#%d\n", global_tid));
835 KMP_DEBUG_ASSERT(KMP_MASTER_GTID(global_tid));
836 KMP_POP_PARTITIONED_TIMER();
838 #if OMPT_SUPPORT && OMPT_OPTIONAL
839 kmp_info_t *this_thr = __kmp_threads[global_tid];
840 kmp_team_t *team = this_thr->th.th_team;
841 if (ompt_enabled.ompt_callback_master) {
842 int tid = __kmp_tid_from_gtid(global_tid);
843 ompt_callbacks.ompt_callback(ompt_callback_master)(
844 ompt_scope_end, &(team->t.ompt_team_info.parallel_data),
845 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
846 OMPT_GET_RETURN_ADDRESS(0));
850 if (__kmp_env_consistency_check) {
852 KMP_WARNING(ThreadIdentInvalid);
854 if (KMP_MASTER_GTID(global_tid))
855 __kmp_pop_sync(global_tid, ct_master, loc);
869 KMP_DEBUG_ASSERT(__kmp_init_serial);
871 KC_TRACE(10, (
"__kmpc_ordered: called T#%d\n", gtid));
873 if (!TCR_4(__kmp_init_parallel))
874 __kmp_parallel_initialize();
877 __kmp_resume_if_soft_paused();
881 __kmp_itt_ordered_prep(gtid);
885 th = __kmp_threads[gtid];
887 #if OMPT_SUPPORT && OMPT_OPTIONAL
891 if (ompt_enabled.enabled) {
892 OMPT_STORE_RETURN_ADDRESS(gtid);
893 team = __kmp_team_from_gtid(gtid);
894 lck = (ompt_wait_id_t)(uintptr_t)&team->t.t_ordered.dt.t_value;
896 th->th.ompt_thread_info.wait_id = lck;
897 th->th.ompt_thread_info.state = ompt_state_wait_ordered;
900 codeptr_ra = OMPT_LOAD_RETURN_ADDRESS(gtid);
901 if (ompt_enabled.ompt_callback_mutex_acquire) {
902 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
903 ompt_mutex_ordered, omp_lock_hint_none, kmp_mutex_impl_spin, lck,
909 if (th->th.th_dispatch->th_deo_fcn != 0)
910 (*th->th.th_dispatch->th_deo_fcn)(>id, &cid, loc);
912 __kmp_parallel_deo(>id, &cid, loc);
914 #if OMPT_SUPPORT && OMPT_OPTIONAL
915 if (ompt_enabled.enabled) {
917 th->th.ompt_thread_info.state = ompt_state_work_parallel;
918 th->th.ompt_thread_info.wait_id = 0;
921 if (ompt_enabled.ompt_callback_mutex_acquired) {
922 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
923 ompt_mutex_ordered, (ompt_wait_id_t)(uintptr_t)lck, codeptr_ra);
929 __kmp_itt_ordered_start(gtid);
944 KC_TRACE(10, (
"__kmpc_end_ordered: called T#%d\n", gtid));
947 __kmp_itt_ordered_end(gtid);
951 th = __kmp_threads[gtid];
953 if (th->th.th_dispatch->th_dxo_fcn != 0)
954 (*th->th.th_dispatch->th_dxo_fcn)(>id, &cid, loc);
956 __kmp_parallel_dxo(>id, &cid, loc);
958 #if OMPT_SUPPORT && OMPT_OPTIONAL
959 OMPT_STORE_RETURN_ADDRESS(gtid);
960 if (ompt_enabled.ompt_callback_mutex_released) {
961 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
963 (ompt_wait_id_t)(uintptr_t)&__kmp_team_from_gtid(gtid)
964 ->t.t_ordered.dt.t_value,
965 OMPT_LOAD_RETURN_ADDRESS(gtid));
970 #if KMP_USE_DYNAMIC_LOCK
972 static __forceinline
void
973 __kmp_init_indirect_csptr(kmp_critical_name *crit,
ident_t const *loc,
974 kmp_int32 gtid, kmp_indirect_locktag_t tag) {
978 kmp_indirect_lock_t **lck;
979 lck = (kmp_indirect_lock_t **)crit;
980 kmp_indirect_lock_t *ilk = __kmp_allocate_indirect_lock(&idx, gtid, tag);
981 KMP_I_LOCK_FUNC(ilk, init)(ilk->lock);
982 KMP_SET_I_LOCK_LOCATION(ilk, loc);
983 KMP_SET_I_LOCK_FLAGS(ilk, kmp_lf_critical_section);
985 (
"__kmp_init_indirect_csptr: initialized indirect lock #%d\n", tag));
987 __kmp_itt_critical_creating(ilk->lock, loc);
989 int status = KMP_COMPARE_AND_STORE_PTR(lck,
nullptr, ilk);
992 __kmp_itt_critical_destroyed(ilk->lock);
998 KMP_DEBUG_ASSERT(*lck != NULL);
1002 #define KMP_ACQUIRE_TAS_LOCK(lock, gtid) \
1004 kmp_tas_lock_t *l = (kmp_tas_lock_t *)lock; \
1005 kmp_int32 tas_free = KMP_LOCK_FREE(tas); \
1006 kmp_int32 tas_busy = KMP_LOCK_BUSY(gtid + 1, tas); \
1007 if (KMP_ATOMIC_LD_RLX(&l->lk.poll) != tas_free || \
1008 !__kmp_atomic_compare_store_acq(&l->lk.poll, tas_free, tas_busy)) { \
1010 KMP_FSYNC_PREPARE(l); \
1011 KMP_INIT_YIELD(spins); \
1012 kmp_backoff_t backoff = __kmp_spin_backoff_params; \
1014 if (TCR_4(__kmp_nth) > \
1015 (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)) { \
1018 KMP_YIELD_SPIN(spins); \
1020 __kmp_spin_backoff(&backoff); \
1022 KMP_ATOMIC_LD_RLX(&l->lk.poll) != tas_free || \
1023 !__kmp_atomic_compare_store_acq(&l->lk.poll, tas_free, tas_busy)); \
1025 KMP_FSYNC_ACQUIRED(l); \
1029 #define KMP_TEST_TAS_LOCK(lock, gtid, rc) \
1031 kmp_tas_lock_t *l = (kmp_tas_lock_t *)lock; \
1032 kmp_int32 tas_free = KMP_LOCK_FREE(tas); \
1033 kmp_int32 tas_busy = KMP_LOCK_BUSY(gtid + 1, tas); \
1034 rc = KMP_ATOMIC_LD_RLX(&l->lk.poll) == tas_free && \
1035 __kmp_atomic_compare_store_acq(&l->lk.poll, tas_free, tas_busy); \
1039 #define KMP_RELEASE_TAS_LOCK(lock, gtid) \
1040 { KMP_ATOMIC_ST_REL(&((kmp_tas_lock_t *)lock)->lk.poll, KMP_LOCK_FREE(tas)); }
1044 #include <sys/syscall.h>
1047 #define FUTEX_WAIT 0
1050 #define FUTEX_WAKE 1
1054 #define KMP_ACQUIRE_FUTEX_LOCK(lock, gtid) \
1056 kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lock; \
1057 kmp_int32 gtid_code = (gtid + 1) << 1; \
1059 KMP_FSYNC_PREPARE(ftx); \
1060 kmp_int32 poll_val; \
1061 while ((poll_val = KMP_COMPARE_AND_STORE_RET32( \
1062 &(ftx->lk.poll), KMP_LOCK_FREE(futex), \
1063 KMP_LOCK_BUSY(gtid_code, futex))) != KMP_LOCK_FREE(futex)) { \
1064 kmp_int32 cond = KMP_LOCK_STRIP(poll_val) & 1; \
1066 if (!KMP_COMPARE_AND_STORE_RET32(&(ftx->lk.poll), poll_val, \
1068 KMP_LOCK_BUSY(1, futex))) { \
1071 poll_val |= KMP_LOCK_BUSY(1, futex); \
1074 if ((rc = syscall(__NR_futex, &(ftx->lk.poll), FUTEX_WAIT, poll_val, \
1075 NULL, NULL, 0)) != 0) { \
1080 KMP_FSYNC_ACQUIRED(ftx); \
1084 #define KMP_TEST_FUTEX_LOCK(lock, gtid, rc) \
1086 kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lock; \
1087 if (KMP_COMPARE_AND_STORE_ACQ32(&(ftx->lk.poll), KMP_LOCK_FREE(futex), \
1088 KMP_LOCK_BUSY(gtid + 1 << 1, futex))) { \
1089 KMP_FSYNC_ACQUIRED(ftx); \
1097 #define KMP_RELEASE_FUTEX_LOCK(lock, gtid) \
1099 kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lock; \
1101 KMP_FSYNC_RELEASING(ftx); \
1102 kmp_int32 poll_val = \
1103 KMP_XCHG_FIXED32(&(ftx->lk.poll), KMP_LOCK_FREE(futex)); \
1104 if (KMP_LOCK_STRIP(poll_val) & 1) { \
1105 syscall(__NR_futex, &(ftx->lk.poll), FUTEX_WAKE, \
1106 KMP_LOCK_BUSY(1, futex), NULL, NULL, 0); \
1109 KMP_YIELD_OVERSUB(); \
1112 #endif // KMP_USE_FUTEX
1114 #else // KMP_USE_DYNAMIC_LOCK
1116 static kmp_user_lock_p __kmp_get_critical_section_ptr(kmp_critical_name *crit,
1119 kmp_user_lock_p *lck_pp = (kmp_user_lock_p *)crit;
1122 kmp_user_lock_p lck = (kmp_user_lock_p)TCR_PTR(*lck_pp);
1129 lck = __kmp_user_lock_allocate(&idx, gtid, kmp_lf_critical_section);
1130 __kmp_init_user_lock_with_checks(lck);
1131 __kmp_set_user_lock_location(lck, loc);
1133 __kmp_itt_critical_creating(lck);
1144 int status = KMP_COMPARE_AND_STORE_PTR(lck_pp, 0, lck);
1149 __kmp_itt_critical_destroyed(lck);
1153 __kmp_destroy_user_lock_with_checks(lck);
1154 __kmp_user_lock_free(&idx, gtid, lck);
1155 lck = (kmp_user_lock_p)TCR_PTR(*lck_pp);
1156 KMP_DEBUG_ASSERT(lck != NULL);
1162 #endif // KMP_USE_DYNAMIC_LOCK
1175 kmp_critical_name *crit) {
1176 #if KMP_USE_DYNAMIC_LOCK
1177 #if OMPT_SUPPORT && OMPT_OPTIONAL
1178 OMPT_STORE_RETURN_ADDRESS(global_tid);
1179 #endif // OMPT_SUPPORT
1180 __kmpc_critical_with_hint(loc, global_tid, crit, omp_lock_hint_none);
1183 #if OMPT_SUPPORT && OMPT_OPTIONAL
1184 ompt_state_t prev_state = ompt_state_undefined;
1185 ompt_thread_info_t ti;
1187 kmp_user_lock_p lck;
1189 KC_TRACE(10, (
"__kmpc_critical: called T#%d\n", global_tid));
1193 KMP_PUSH_PARTITIONED_TIMER(OMP_critical_wait);
1194 KMP_CHECK_USER_LOCK_INIT();
1196 if ((__kmp_user_lock_kind == lk_tas) &&
1197 (
sizeof(lck->tas.lk.poll) <= OMP_CRITICAL_SIZE)) {
1198 lck = (kmp_user_lock_p)crit;
1201 else if ((__kmp_user_lock_kind == lk_futex) &&
1202 (
sizeof(lck->futex.lk.poll) <= OMP_CRITICAL_SIZE)) {
1203 lck = (kmp_user_lock_p)crit;
1207 lck = __kmp_get_critical_section_ptr(crit, loc, global_tid);
1210 if (__kmp_env_consistency_check)
1211 __kmp_push_sync(global_tid, ct_critical, loc, lck);
1219 __kmp_itt_critical_acquiring(lck);
1221 #if OMPT_SUPPORT && OMPT_OPTIONAL
1222 OMPT_STORE_RETURN_ADDRESS(gtid);
1223 void *codeptr_ra = NULL;
1224 if (ompt_enabled.enabled) {
1225 ti = __kmp_threads[global_tid]->th.ompt_thread_info;
1227 prev_state = ti.state;
1228 ti.wait_id = (ompt_wait_id_t)(uintptr_t)lck;
1229 ti.state = ompt_state_wait_critical;
1232 codeptr_ra = OMPT_LOAD_RETURN_ADDRESS(gtid);
1233 if (ompt_enabled.ompt_callback_mutex_acquire) {
1234 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
1235 ompt_mutex_critical, omp_lock_hint_none, __ompt_get_mutex_impl_type(),
1236 (ompt_wait_id_t)(uintptr_t)lck, codeptr_ra);
1242 __kmp_acquire_user_lock_with_checks(lck, global_tid);
1245 __kmp_itt_critical_acquired(lck);
1247 #if OMPT_SUPPORT && OMPT_OPTIONAL
1248 if (ompt_enabled.enabled) {
1250 ti.state = prev_state;
1254 if (ompt_enabled.ompt_callback_mutex_acquired) {
1255 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
1256 ompt_mutex_critical, (ompt_wait_id_t)(uintptr_t)lck, codeptr_ra);
1260 KMP_POP_PARTITIONED_TIMER();
1262 KMP_PUSH_PARTITIONED_TIMER(OMP_critical);
1263 KA_TRACE(15, (
"__kmpc_critical: done T#%d\n", global_tid));
1264 #endif // KMP_USE_DYNAMIC_LOCK
1267 #if KMP_USE_DYNAMIC_LOCK
1270 static __forceinline kmp_dyna_lockseq_t __kmp_map_hint_to_lock(uintptr_t hint) {
1272 #define KMP_TSX_LOCK(seq) lockseq_##seq
1274 #define KMP_TSX_LOCK(seq) __kmp_user_lock_seq
1277 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
1278 #define KMP_CPUINFO_RTM (__kmp_cpuinfo.rtm)
1280 #define KMP_CPUINFO_RTM 0
1284 if (hint & kmp_lock_hint_hle)
1285 return KMP_TSX_LOCK(hle);
1286 if (hint & kmp_lock_hint_rtm)
1287 return KMP_CPUINFO_RTM ? KMP_TSX_LOCK(rtm) : __kmp_user_lock_seq;
1288 if (hint & kmp_lock_hint_adaptive)
1289 return KMP_CPUINFO_RTM ? KMP_TSX_LOCK(adaptive) : __kmp_user_lock_seq;
1292 if ((hint & omp_lock_hint_contended) && (hint & omp_lock_hint_uncontended))
1293 return __kmp_user_lock_seq;
1294 if ((hint & omp_lock_hint_speculative) &&
1295 (hint & omp_lock_hint_nonspeculative))
1296 return __kmp_user_lock_seq;
1299 if (hint & omp_lock_hint_contended)
1300 return lockseq_queuing;
1303 if ((hint & omp_lock_hint_uncontended) && !(hint & omp_lock_hint_speculative))
1307 if (hint & omp_lock_hint_speculative)
1308 return KMP_TSX_LOCK(hle);
1310 return __kmp_user_lock_seq;
1313 #if OMPT_SUPPORT && OMPT_OPTIONAL
1314 #if KMP_USE_DYNAMIC_LOCK
1315 static kmp_mutex_impl_t
1316 __ompt_get_mutex_impl_type(
void *user_lock, kmp_indirect_lock_t *ilock = 0) {
1318 switch (KMP_EXTRACT_D_TAG(user_lock)) {
1323 return kmp_mutex_impl_queuing;
1326 return kmp_mutex_impl_spin;
1329 return kmp_mutex_impl_speculative;
1332 return kmp_mutex_impl_none;
1334 ilock = KMP_LOOKUP_I_LOCK(user_lock);
1337 switch (ilock->type) {
1339 case locktag_adaptive:
1341 return kmp_mutex_impl_speculative;
1343 case locktag_nested_tas:
1344 return kmp_mutex_impl_spin;
1346 case locktag_nested_futex:
1348 case locktag_ticket:
1349 case locktag_queuing:
1351 case locktag_nested_ticket:
1352 case locktag_nested_queuing:
1353 case locktag_nested_drdpa:
1354 return kmp_mutex_impl_queuing;
1356 return kmp_mutex_impl_none;
1361 static kmp_mutex_impl_t __ompt_get_mutex_impl_type() {
1362 switch (__kmp_user_lock_kind) {
1364 return kmp_mutex_impl_spin;
1371 return kmp_mutex_impl_queuing;
1376 return kmp_mutex_impl_speculative;
1379 return kmp_mutex_impl_none;
1382 #endif // KMP_USE_DYNAMIC_LOCK
1383 #endif // OMPT_SUPPORT && OMPT_OPTIONAL
1398 void __kmpc_critical_with_hint(
ident_t *loc, kmp_int32 global_tid,
1399 kmp_critical_name *crit, uint32_t hint) {
1401 kmp_user_lock_p lck;
1402 #if OMPT_SUPPORT && OMPT_OPTIONAL
1403 ompt_state_t prev_state = ompt_state_undefined;
1404 ompt_thread_info_t ti;
1406 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(global_tid);
1408 codeptr = OMPT_GET_RETURN_ADDRESS(0);
1411 KC_TRACE(10, (
"__kmpc_critical: called T#%d\n", global_tid));
1413 kmp_dyna_lock_t *lk = (kmp_dyna_lock_t *)crit;
1415 KMP_PUSH_PARTITIONED_TIMER(OMP_critical_wait);
1417 kmp_dyna_lockseq_t lckseq = __kmp_map_hint_to_lock(hint);
1418 if (KMP_IS_D_LOCK(lckseq)) {
1419 KMP_COMPARE_AND_STORE_ACQ32((
volatile kmp_int32 *)crit, 0,
1420 KMP_GET_D_TAG(lckseq));
1422 __kmp_init_indirect_csptr(crit, loc, global_tid, KMP_GET_I_TAG(lckseq));
1428 if (KMP_EXTRACT_D_TAG(lk) != 0) {
1429 lck = (kmp_user_lock_p)lk;
1430 if (__kmp_env_consistency_check) {
1431 __kmp_push_sync(global_tid, ct_critical, loc, lck,
1432 __kmp_map_hint_to_lock(hint));
1435 __kmp_itt_critical_acquiring(lck);
1437 #if OMPT_SUPPORT && OMPT_OPTIONAL
1438 if (ompt_enabled.enabled) {
1439 ti = __kmp_threads[global_tid]->th.ompt_thread_info;
1441 prev_state = ti.state;
1442 ti.wait_id = (ompt_wait_id_t)(uintptr_t)lck;
1443 ti.state = ompt_state_wait_critical;
1446 if (ompt_enabled.ompt_callback_mutex_acquire) {
1447 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
1448 ompt_mutex_critical, (
unsigned int)hint,
1449 __ompt_get_mutex_impl_type(crit), (ompt_wait_id_t)(uintptr_t)lck,
1454 #if KMP_USE_INLINED_TAS
1455 if (__kmp_user_lock_seq == lockseq_tas && !__kmp_env_consistency_check) {
1456 KMP_ACQUIRE_TAS_LOCK(lck, global_tid);
1458 #elif KMP_USE_INLINED_FUTEX
1459 if (__kmp_user_lock_seq == lockseq_futex && !__kmp_env_consistency_check) {
1460 KMP_ACQUIRE_FUTEX_LOCK(lck, global_tid);
1464 KMP_D_LOCK_FUNC(lk, set)(lk, global_tid);
1467 kmp_indirect_lock_t *ilk = *((kmp_indirect_lock_t **)lk);
1469 if (__kmp_env_consistency_check) {
1470 __kmp_push_sync(global_tid, ct_critical, loc, lck,
1471 __kmp_map_hint_to_lock(hint));
1474 __kmp_itt_critical_acquiring(lck);
1476 #if OMPT_SUPPORT && OMPT_OPTIONAL
1477 if (ompt_enabled.enabled) {
1478 ti = __kmp_threads[global_tid]->th.ompt_thread_info;
1480 prev_state = ti.state;
1481 ti.wait_id = (ompt_wait_id_t)(uintptr_t)lck;
1482 ti.state = ompt_state_wait_critical;
1485 if (ompt_enabled.ompt_callback_mutex_acquire) {
1486 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
1487 ompt_mutex_critical, (
unsigned int)hint,
1488 __ompt_get_mutex_impl_type(0, ilk), (ompt_wait_id_t)(uintptr_t)lck,
1493 KMP_I_LOCK_FUNC(ilk, set)(lck, global_tid);
1495 KMP_POP_PARTITIONED_TIMER();
1498 __kmp_itt_critical_acquired(lck);
1500 #if OMPT_SUPPORT && OMPT_OPTIONAL
1501 if (ompt_enabled.enabled) {
1503 ti.state = prev_state;
1507 if (ompt_enabled.ompt_callback_mutex_acquired) {
1508 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
1509 ompt_mutex_critical, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
1514 KMP_PUSH_PARTITIONED_TIMER(OMP_critical);
1515 KA_TRACE(15, (
"__kmpc_critical: done T#%d\n", global_tid));
1518 #endif // KMP_USE_DYNAMIC_LOCK
1530 kmp_critical_name *crit) {
1531 kmp_user_lock_p lck;
1533 KC_TRACE(10, (
"__kmpc_end_critical: called T#%d\n", global_tid));
1535 #if KMP_USE_DYNAMIC_LOCK
1536 if (KMP_IS_D_LOCK(__kmp_user_lock_seq)) {
1537 lck = (kmp_user_lock_p)crit;
1538 KMP_ASSERT(lck != NULL);
1539 if (__kmp_env_consistency_check) {
1540 __kmp_pop_sync(global_tid, ct_critical, loc);
1543 __kmp_itt_critical_releasing(lck);
1545 #if KMP_USE_INLINED_TAS
1546 if (__kmp_user_lock_seq == lockseq_tas && !__kmp_env_consistency_check) {
1547 KMP_RELEASE_TAS_LOCK(lck, global_tid);
1549 #elif KMP_USE_INLINED_FUTEX
1550 if (__kmp_user_lock_seq == lockseq_futex && !__kmp_env_consistency_check) {
1551 KMP_RELEASE_FUTEX_LOCK(lck, global_tid);
1555 KMP_D_LOCK_FUNC(lck, unset)((kmp_dyna_lock_t *)lck, global_tid);
1558 kmp_indirect_lock_t *ilk =
1559 (kmp_indirect_lock_t *)TCR_PTR(*((kmp_indirect_lock_t **)crit));
1560 KMP_ASSERT(ilk != NULL);
1562 if (__kmp_env_consistency_check) {
1563 __kmp_pop_sync(global_tid, ct_critical, loc);
1566 __kmp_itt_critical_releasing(lck);
1568 KMP_I_LOCK_FUNC(ilk, unset)(lck, global_tid);
1571 #else // KMP_USE_DYNAMIC_LOCK
1573 if ((__kmp_user_lock_kind == lk_tas) &&
1574 (
sizeof(lck->tas.lk.poll) <= OMP_CRITICAL_SIZE)) {
1575 lck = (kmp_user_lock_p)crit;
1578 else if ((__kmp_user_lock_kind == lk_futex) &&
1579 (
sizeof(lck->futex.lk.poll) <= OMP_CRITICAL_SIZE)) {
1580 lck = (kmp_user_lock_p)crit;
1584 lck = (kmp_user_lock_p)TCR_PTR(*((kmp_user_lock_p *)crit));
1587 KMP_ASSERT(lck != NULL);
1589 if (__kmp_env_consistency_check)
1590 __kmp_pop_sync(global_tid, ct_critical, loc);
1593 __kmp_itt_critical_releasing(lck);
1597 __kmp_release_user_lock_with_checks(lck, global_tid);
1599 #endif // KMP_USE_DYNAMIC_LOCK
1601 #if OMPT_SUPPORT && OMPT_OPTIONAL
1604 OMPT_STORE_RETURN_ADDRESS(global_tid);
1605 if (ompt_enabled.ompt_callback_mutex_released) {
1606 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
1607 ompt_mutex_critical, (ompt_wait_id_t)(uintptr_t)lck,
1608 OMPT_LOAD_RETURN_ADDRESS(0));
1612 KMP_POP_PARTITIONED_TIMER();
1613 KA_TRACE(15, (
"__kmpc_end_critical: done T#%d\n", global_tid));
1628 KC_TRACE(10, (
"__kmpc_barrier_master: called T#%d\n", global_tid));
1630 if (!TCR_4(__kmp_init_parallel))
1631 __kmp_parallel_initialize();
1634 __kmp_resume_if_soft_paused();
1637 if (__kmp_env_consistency_check)
1638 __kmp_check_barrier(global_tid, ct_barrier, loc);
1641 ompt_frame_t *ompt_frame;
1642 if (ompt_enabled.enabled) {
1643 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
1644 if (ompt_frame->enter_frame.ptr == NULL)
1645 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
1646 OMPT_STORE_RETURN_ADDRESS(global_tid);
1650 __kmp_threads[global_tid]->th.th_ident = loc;
1652 status = __kmp_barrier(bs_plain_barrier, global_tid, TRUE, 0, NULL, NULL);
1653 #if OMPT_SUPPORT && OMPT_OPTIONAL
1654 if (ompt_enabled.enabled) {
1655 ompt_frame->enter_frame = ompt_data_none;
1659 return (status != 0) ? 0 : 1;
1672 KC_TRACE(10, (
"__kmpc_end_barrier_master: called T#%d\n", global_tid));
1674 __kmp_end_split_barrier(bs_plain_barrier, global_tid);
1690 KC_TRACE(10, (
"__kmpc_barrier_master_nowait: called T#%d\n", global_tid));
1692 if (!TCR_4(__kmp_init_parallel))
1693 __kmp_parallel_initialize();
1696 __kmp_resume_if_soft_paused();
1699 if (__kmp_env_consistency_check) {
1701 KMP_WARNING(ConstructIdentInvalid);
1703 __kmp_check_barrier(global_tid, ct_barrier, loc);
1707 ompt_frame_t *ompt_frame;
1708 if (ompt_enabled.enabled) {
1709 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
1710 if (ompt_frame->enter_frame.ptr == NULL)
1711 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
1712 OMPT_STORE_RETURN_ADDRESS(global_tid);
1716 __kmp_threads[global_tid]->th.th_ident = loc;
1718 __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL);
1719 #if OMPT_SUPPORT && OMPT_OPTIONAL
1720 if (ompt_enabled.enabled) {
1721 ompt_frame->enter_frame = ompt_data_none;
1727 if (__kmp_env_consistency_check) {
1731 if (global_tid < 0) {
1732 KMP_WARNING(ThreadIdentInvalid);
1738 __kmp_pop_sync(global_tid, ct_master, loc);
1758 kmp_int32 rc = __kmp_enter_single(global_tid, loc, TRUE);
1763 KMP_PUSH_PARTITIONED_TIMER(OMP_single);
1766 #if OMPT_SUPPORT && OMPT_OPTIONAL
1767 kmp_info_t *this_thr = __kmp_threads[global_tid];
1768 kmp_team_t *team = this_thr->th.th_team;
1769 int tid = __kmp_tid_from_gtid(global_tid);
1771 if (ompt_enabled.enabled) {
1773 if (ompt_enabled.ompt_callback_work) {
1774 ompt_callbacks.ompt_callback(ompt_callback_work)(
1775 ompt_work_single_executor, ompt_scope_begin,
1776 &(team->t.ompt_team_info.parallel_data),
1777 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
1778 1, OMPT_GET_RETURN_ADDRESS(0));
1781 if (ompt_enabled.ompt_callback_work) {
1782 ompt_callbacks.ompt_callback(ompt_callback_work)(
1783 ompt_work_single_other, ompt_scope_begin,
1784 &(team->t.ompt_team_info.parallel_data),
1785 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
1786 1, OMPT_GET_RETURN_ADDRESS(0));
1787 ompt_callbacks.ompt_callback(ompt_callback_work)(
1788 ompt_work_single_other, ompt_scope_end,
1789 &(team->t.ompt_team_info.parallel_data),
1790 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
1791 1, OMPT_GET_RETURN_ADDRESS(0));
1810 __kmp_exit_single(global_tid);
1811 KMP_POP_PARTITIONED_TIMER();
1813 #if OMPT_SUPPORT && OMPT_OPTIONAL
1814 kmp_info_t *this_thr = __kmp_threads[global_tid];
1815 kmp_team_t *team = this_thr->th.th_team;
1816 int tid = __kmp_tid_from_gtid(global_tid);
1818 if (ompt_enabled.ompt_callback_work) {
1819 ompt_callbacks.ompt_callback(ompt_callback_work)(
1820 ompt_work_single_executor, ompt_scope_end,
1821 &(team->t.ompt_team_info.parallel_data),
1822 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), 1,
1823 OMPT_GET_RETURN_ADDRESS(0));
1836 KMP_POP_PARTITIONED_TIMER();
1837 KE_TRACE(10, (
"__kmpc_for_static_fini called T#%d\n", global_tid));
1839 #if OMPT_SUPPORT && OMPT_OPTIONAL
1840 if (ompt_enabled.ompt_callback_work) {
1841 ompt_work_t ompt_work_type = ompt_work_loop;
1842 ompt_team_info_t *team_info = __ompt_get_teaminfo(0, NULL);
1843 ompt_task_info_t *task_info = __ompt_get_task_info_object(0);
1847 ompt_work_type = ompt_work_loop;
1849 ompt_work_type = ompt_work_sections;
1851 ompt_work_type = ompt_work_distribute;
1856 KMP_DEBUG_ASSERT(ompt_work_type);
1858 ompt_callbacks.ompt_callback(ompt_callback_work)(
1859 ompt_work_type, ompt_scope_end, &(team_info->parallel_data),
1860 &(task_info->task_data), 0, OMPT_GET_RETURN_ADDRESS(0));
1863 if (__kmp_env_consistency_check)
1864 __kmp_pop_workshare(global_tid, ct_pdo, loc);
1870 void ompc_set_num_threads(
int arg) {
1872 __kmp_set_num_threads(arg, __kmp_entry_gtid());
1875 void ompc_set_dynamic(
int flag) {
1879 thread = __kmp_entry_thread();
1881 __kmp_save_internal_controls(thread);
1883 set__dynamic(thread, flag ? TRUE : FALSE);
1886 void ompc_set_nested(
int flag) {
1890 thread = __kmp_entry_thread();
1892 __kmp_save_internal_controls(thread);
1894 set__max_active_levels(thread, flag ? __kmp_dflt_max_active_levels : 1);
1897 void ompc_set_max_active_levels(
int max_active_levels) {
1902 __kmp_set_max_active_levels(__kmp_entry_gtid(), max_active_levels);
1905 void ompc_set_schedule(omp_sched_t kind,
int modifier) {
1907 __kmp_set_schedule(__kmp_entry_gtid(), (kmp_sched_t)kind, modifier);
1910 int ompc_get_ancestor_thread_num(
int level) {
1911 return __kmp_get_ancestor_thread_num(__kmp_entry_gtid(), level);
1914 int ompc_get_team_size(
int level) {
1915 return __kmp_get_team_size(__kmp_entry_gtid(), level);
1921 void ompc_set_affinity_format(
char const *format) {
1922 if (!__kmp_init_serial) {
1923 __kmp_serial_initialize();
1925 __kmp_strncpy_truncate(__kmp_affinity_format, KMP_AFFINITY_FORMAT_SIZE,
1926 format, KMP_STRLEN(format) + 1);
1929 size_t ompc_get_affinity_format(
char *buffer,
size_t size) {
1931 if (!__kmp_init_serial) {
1932 __kmp_serial_initialize();
1934 format_size = KMP_STRLEN(__kmp_affinity_format);
1935 if (buffer && size) {
1936 __kmp_strncpy_truncate(buffer, size, __kmp_affinity_format,
1942 void ompc_display_affinity(
char const *format) {
1944 if (!TCR_4(__kmp_init_middle)) {
1945 __kmp_middle_initialize();
1947 gtid = __kmp_get_gtid();
1948 __kmp_aux_display_affinity(gtid, format);
1951 size_t ompc_capture_affinity(
char *buffer,
size_t buf_size,
1952 char const *format) {
1954 size_t num_required;
1955 kmp_str_buf_t capture_buf;
1956 if (!TCR_4(__kmp_init_middle)) {
1957 __kmp_middle_initialize();
1959 gtid = __kmp_get_gtid();
1960 __kmp_str_buf_init(&capture_buf);
1961 num_required = __kmp_aux_capture_affinity(gtid, format, &capture_buf);
1962 if (buffer && buf_size) {
1963 __kmp_strncpy_truncate(buffer, buf_size, capture_buf.str,
1964 capture_buf.used + 1);
1966 __kmp_str_buf_free(&capture_buf);
1967 return num_required;
1971 void kmpc_set_stacksize(
int arg) {
1973 __kmp_aux_set_stacksize(arg);
1976 void kmpc_set_stacksize_s(
size_t arg) {
1978 __kmp_aux_set_stacksize(arg);
1981 void kmpc_set_blocktime(
int arg) {
1985 gtid = __kmp_entry_gtid();
1986 tid = __kmp_tid_from_gtid(gtid);
1987 thread = __kmp_thread_from_gtid(gtid);
1989 __kmp_aux_set_blocktime(arg, thread, tid);
1992 void kmpc_set_library(
int arg) {
1994 __kmp_user_set_library((
enum library_type)arg);
1997 void kmpc_set_defaults(
char const *str) {
1999 __kmp_aux_set_defaults(str, KMP_STRLEN(str));
2002 void kmpc_set_disp_num_buffers(
int arg) {
2005 if (__kmp_init_serial == 0 && arg > 0)
2006 __kmp_dispatch_num_buffers = arg;
2009 int kmpc_set_affinity_mask_proc(
int proc,
void **mask) {
2010 #if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED
2013 if (!TCR_4(__kmp_init_middle)) {
2014 __kmp_middle_initialize();
2016 return __kmp_aux_set_affinity_mask_proc(proc, mask);
2020 int kmpc_unset_affinity_mask_proc(
int proc,
void **mask) {
2021 #if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED
2024 if (!TCR_4(__kmp_init_middle)) {
2025 __kmp_middle_initialize();
2027 return __kmp_aux_unset_affinity_mask_proc(proc, mask);
2031 int kmpc_get_affinity_mask_proc(
int proc,
void **mask) {
2032 #if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED
2035 if (!TCR_4(__kmp_init_middle)) {
2036 __kmp_middle_initialize();
2038 return __kmp_aux_get_affinity_mask_proc(proc, mask);
2088 void *cpy_data,
void (*cpy_func)(
void *,
void *),
2092 KC_TRACE(10, (
"__kmpc_copyprivate: called T#%d\n", gtid));
2096 data_ptr = &__kmp_team_from_gtid(gtid)->t.t_copypriv_data;
2098 if (__kmp_env_consistency_check) {
2100 KMP_WARNING(ConstructIdentInvalid);
2107 *data_ptr = cpy_data;
2110 ompt_frame_t *ompt_frame;
2111 if (ompt_enabled.enabled) {
2112 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
2113 if (ompt_frame->enter_frame.ptr == NULL)
2114 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
2115 OMPT_STORE_RETURN_ADDRESS(gtid);
2120 __kmp_threads[gtid]->th.th_ident = loc;
2122 __kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL);
2125 (*cpy_func)(cpy_data, *data_ptr);
2131 if (ompt_enabled.enabled) {
2132 OMPT_STORE_RETURN_ADDRESS(gtid);
2136 __kmp_threads[gtid]->th.th_ident = loc;
2139 __kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL);
2140 #if OMPT_SUPPORT && OMPT_OPTIONAL
2141 if (ompt_enabled.enabled) {
2142 ompt_frame->enter_frame = ompt_data_none;
2149 #define INIT_LOCK __kmp_init_user_lock_with_checks
2150 #define INIT_NESTED_LOCK __kmp_init_nested_user_lock_with_checks
2151 #define ACQUIRE_LOCK __kmp_acquire_user_lock_with_checks
2152 #define ACQUIRE_LOCK_TIMED __kmp_acquire_user_lock_with_checks_timed
2153 #define ACQUIRE_NESTED_LOCK __kmp_acquire_nested_user_lock_with_checks
2154 #define ACQUIRE_NESTED_LOCK_TIMED \
2155 __kmp_acquire_nested_user_lock_with_checks_timed
2156 #define RELEASE_LOCK __kmp_release_user_lock_with_checks
2157 #define RELEASE_NESTED_LOCK __kmp_release_nested_user_lock_with_checks
2158 #define TEST_LOCK __kmp_test_user_lock_with_checks
2159 #define TEST_NESTED_LOCK __kmp_test_nested_user_lock_with_checks
2160 #define DESTROY_LOCK __kmp_destroy_user_lock_with_checks
2161 #define DESTROY_NESTED_LOCK __kmp_destroy_nested_user_lock_with_checks
2166 #if KMP_USE_DYNAMIC_LOCK
2169 static __forceinline
void __kmp_init_lock_with_hint(
ident_t *loc,
void **lock,
2170 kmp_dyna_lockseq_t seq) {
2171 if (KMP_IS_D_LOCK(seq)) {
2172 KMP_INIT_D_LOCK(lock, seq);
2174 __kmp_itt_lock_creating((kmp_user_lock_p)lock, NULL);
2177 KMP_INIT_I_LOCK(lock, seq);
2179 kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(lock);
2180 __kmp_itt_lock_creating(ilk->lock, loc);
2186 static __forceinline
void
2187 __kmp_init_nest_lock_with_hint(
ident_t *loc,
void **lock,
2188 kmp_dyna_lockseq_t seq) {
2191 if (seq == lockseq_hle || seq == lockseq_rtm || seq == lockseq_adaptive)
2192 seq = __kmp_user_lock_seq;
2196 seq = lockseq_nested_tas;
2200 seq = lockseq_nested_futex;
2203 case lockseq_ticket:
2204 seq = lockseq_nested_ticket;
2206 case lockseq_queuing:
2207 seq = lockseq_nested_queuing;
2210 seq = lockseq_nested_drdpa;
2213 seq = lockseq_nested_queuing;
2215 KMP_INIT_I_LOCK(lock, seq);
2217 kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(lock);
2218 __kmp_itt_lock_creating(ilk->lock, loc);
2223 void __kmpc_init_lock_with_hint(
ident_t *loc, kmp_int32 gtid,
void **user_lock,
2225 KMP_DEBUG_ASSERT(__kmp_init_serial);
2226 if (__kmp_env_consistency_check && user_lock == NULL) {
2227 KMP_FATAL(LockIsUninitialized,
"omp_init_lock_with_hint");
2230 __kmp_init_lock_with_hint(loc, user_lock, __kmp_map_hint_to_lock(hint));
2232 #if OMPT_SUPPORT && OMPT_OPTIONAL
2234 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2236 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2237 if (ompt_enabled.ompt_callback_lock_init) {
2238 ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
2239 ompt_mutex_lock, (omp_lock_hint_t)hint,
2240 __ompt_get_mutex_impl_type(user_lock),
2241 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2247 void __kmpc_init_nest_lock_with_hint(
ident_t *loc, kmp_int32 gtid,
2248 void **user_lock, uintptr_t hint) {
2249 KMP_DEBUG_ASSERT(__kmp_init_serial);
2250 if (__kmp_env_consistency_check && user_lock == NULL) {
2251 KMP_FATAL(LockIsUninitialized,
"omp_init_nest_lock_with_hint");
2254 __kmp_init_nest_lock_with_hint(loc, user_lock, __kmp_map_hint_to_lock(hint));
2256 #if OMPT_SUPPORT && OMPT_OPTIONAL
2258 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2260 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2261 if (ompt_enabled.ompt_callback_lock_init) {
2262 ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
2263 ompt_mutex_nest_lock, (omp_lock_hint_t)hint,
2264 __ompt_get_mutex_impl_type(user_lock),
2265 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2270 #endif // KMP_USE_DYNAMIC_LOCK
2273 void __kmpc_init_lock(
ident_t *loc, kmp_int32 gtid,
void **user_lock) {
2274 #if KMP_USE_DYNAMIC_LOCK
2276 KMP_DEBUG_ASSERT(__kmp_init_serial);
2277 if (__kmp_env_consistency_check && user_lock == NULL) {
2278 KMP_FATAL(LockIsUninitialized,
"omp_init_lock");
2280 __kmp_init_lock_with_hint(loc, user_lock, __kmp_user_lock_seq);
2282 #if OMPT_SUPPORT && OMPT_OPTIONAL
2284 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2286 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2287 if (ompt_enabled.ompt_callback_lock_init) {
2288 ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
2289 ompt_mutex_lock, omp_lock_hint_none,
2290 __ompt_get_mutex_impl_type(user_lock),
2291 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2295 #else // KMP_USE_DYNAMIC_LOCK
2297 static char const *
const func =
"omp_init_lock";
2298 kmp_user_lock_p lck;
2299 KMP_DEBUG_ASSERT(__kmp_init_serial);
2301 if (__kmp_env_consistency_check) {
2302 if (user_lock == NULL) {
2303 KMP_FATAL(LockIsUninitialized, func);
2307 KMP_CHECK_USER_LOCK_INIT();
2309 if ((__kmp_user_lock_kind == lk_tas) &&
2310 (
sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
2311 lck = (kmp_user_lock_p)user_lock;
2314 else if ((__kmp_user_lock_kind == lk_futex) &&
2315 (
sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
2316 lck = (kmp_user_lock_p)user_lock;
2320 lck = __kmp_user_lock_allocate(user_lock, gtid, 0);
2323 __kmp_set_user_lock_location(lck, loc);
2325 #if OMPT_SUPPORT && OMPT_OPTIONAL
2327 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2329 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2330 if (ompt_enabled.ompt_callback_lock_init) {
2331 ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
2332 ompt_mutex_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(),
2333 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2338 __kmp_itt_lock_creating(lck);
2341 #endif // KMP_USE_DYNAMIC_LOCK
2345 void __kmpc_init_nest_lock(
ident_t *loc, kmp_int32 gtid,
void **user_lock) {
2346 #if KMP_USE_DYNAMIC_LOCK
2348 KMP_DEBUG_ASSERT(__kmp_init_serial);
2349 if (__kmp_env_consistency_check && user_lock == NULL) {
2350 KMP_FATAL(LockIsUninitialized,
"omp_init_nest_lock");
2352 __kmp_init_nest_lock_with_hint(loc, user_lock, __kmp_user_lock_seq);
2354 #if OMPT_SUPPORT && OMPT_OPTIONAL
2356 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2358 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2359 if (ompt_enabled.ompt_callback_lock_init) {
2360 ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
2361 ompt_mutex_nest_lock, omp_lock_hint_none,
2362 __ompt_get_mutex_impl_type(user_lock),
2363 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2367 #else // KMP_USE_DYNAMIC_LOCK
2369 static char const *
const func =
"omp_init_nest_lock";
2370 kmp_user_lock_p lck;
2371 KMP_DEBUG_ASSERT(__kmp_init_serial);
2373 if (__kmp_env_consistency_check) {
2374 if (user_lock == NULL) {
2375 KMP_FATAL(LockIsUninitialized, func);
2379 KMP_CHECK_USER_LOCK_INIT();
2381 if ((__kmp_user_lock_kind == lk_tas) &&
2382 (
sizeof(lck->tas.lk.poll) +
sizeof(lck->tas.lk.depth_locked) <=
2383 OMP_NEST_LOCK_T_SIZE)) {
2384 lck = (kmp_user_lock_p)user_lock;
2387 else if ((__kmp_user_lock_kind == lk_futex) &&
2388 (
sizeof(lck->futex.lk.poll) +
sizeof(lck->futex.lk.depth_locked) <=
2389 OMP_NEST_LOCK_T_SIZE)) {
2390 lck = (kmp_user_lock_p)user_lock;
2394 lck = __kmp_user_lock_allocate(user_lock, gtid, 0);
2397 INIT_NESTED_LOCK(lck);
2398 __kmp_set_user_lock_location(lck, loc);
2400 #if OMPT_SUPPORT && OMPT_OPTIONAL
2402 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2404 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2405 if (ompt_enabled.ompt_callback_lock_init) {
2406 ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
2407 ompt_mutex_nest_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(),
2408 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2413 __kmp_itt_lock_creating(lck);
2416 #endif // KMP_USE_DYNAMIC_LOCK
2419 void __kmpc_destroy_lock(
ident_t *loc, kmp_int32 gtid,
void **user_lock) {
2420 #if KMP_USE_DYNAMIC_LOCK
2423 kmp_user_lock_p lck;
2424 if (KMP_EXTRACT_D_TAG(user_lock) == 0) {
2425 lck = ((kmp_indirect_lock_t *)KMP_LOOKUP_I_LOCK(user_lock))->lock;
2427 lck = (kmp_user_lock_p)user_lock;
2429 __kmp_itt_lock_destroyed(lck);
2431 #if OMPT_SUPPORT && OMPT_OPTIONAL
2433 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2435 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2436 if (ompt_enabled.ompt_callback_lock_destroy) {
2437 kmp_user_lock_p lck;
2438 if (KMP_EXTRACT_D_TAG(user_lock) == 0) {
2439 lck = ((kmp_indirect_lock_t *)KMP_LOOKUP_I_LOCK(user_lock))->lock;
2441 lck = (kmp_user_lock_p)user_lock;
2443 ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)(
2444 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2447 KMP_D_LOCK_FUNC(user_lock, destroy)((kmp_dyna_lock_t *)user_lock);
2449 kmp_user_lock_p lck;
2451 if ((__kmp_user_lock_kind == lk_tas) &&
2452 (
sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
2453 lck = (kmp_user_lock_p)user_lock;
2456 else if ((__kmp_user_lock_kind == lk_futex) &&
2457 (
sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
2458 lck = (kmp_user_lock_p)user_lock;
2462 lck = __kmp_lookup_user_lock(user_lock,
"omp_destroy_lock");
2465 #if OMPT_SUPPORT && OMPT_OPTIONAL
2467 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2469 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2470 if (ompt_enabled.ompt_callback_lock_destroy) {
2471 ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)(
2472 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2477 __kmp_itt_lock_destroyed(lck);
2481 if ((__kmp_user_lock_kind == lk_tas) &&
2482 (
sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
2486 else if ((__kmp_user_lock_kind == lk_futex) &&
2487 (
sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
2492 __kmp_user_lock_free(user_lock, gtid, lck);
2494 #endif // KMP_USE_DYNAMIC_LOCK
2498 void __kmpc_destroy_nest_lock(
ident_t *loc, kmp_int32 gtid,
void **user_lock) {
2499 #if KMP_USE_DYNAMIC_LOCK
2502 kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(user_lock);
2503 __kmp_itt_lock_destroyed(ilk->lock);
2505 #if OMPT_SUPPORT && OMPT_OPTIONAL
2507 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2509 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2510 if (ompt_enabled.ompt_callback_lock_destroy) {
2511 ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)(
2512 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2515 KMP_D_LOCK_FUNC(user_lock, destroy)((kmp_dyna_lock_t *)user_lock);
2517 #else // KMP_USE_DYNAMIC_LOCK
2519 kmp_user_lock_p lck;
2521 if ((__kmp_user_lock_kind == lk_tas) &&
2522 (
sizeof(lck->tas.lk.poll) +
sizeof(lck->tas.lk.depth_locked) <=
2523 OMP_NEST_LOCK_T_SIZE)) {
2524 lck = (kmp_user_lock_p)user_lock;
2527 else if ((__kmp_user_lock_kind == lk_futex) &&
2528 (
sizeof(lck->futex.lk.poll) +
sizeof(lck->futex.lk.depth_locked) <=
2529 OMP_NEST_LOCK_T_SIZE)) {
2530 lck = (kmp_user_lock_p)user_lock;
2534 lck = __kmp_lookup_user_lock(user_lock,
"omp_destroy_nest_lock");
2537 #if OMPT_SUPPORT && OMPT_OPTIONAL
2539 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2541 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2542 if (ompt_enabled.ompt_callback_lock_destroy) {
2543 ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)(
2544 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2549 __kmp_itt_lock_destroyed(lck);
2552 DESTROY_NESTED_LOCK(lck);
2554 if ((__kmp_user_lock_kind == lk_tas) &&
2555 (
sizeof(lck->tas.lk.poll) +
sizeof(lck->tas.lk.depth_locked) <=
2556 OMP_NEST_LOCK_T_SIZE)) {
2560 else if ((__kmp_user_lock_kind == lk_futex) &&
2561 (
sizeof(lck->futex.lk.poll) +
sizeof(lck->futex.lk.depth_locked) <=
2562 OMP_NEST_LOCK_T_SIZE)) {
2567 __kmp_user_lock_free(user_lock, gtid, lck);
2569 #endif // KMP_USE_DYNAMIC_LOCK
2572 void __kmpc_set_lock(
ident_t *loc, kmp_int32 gtid,
void **user_lock) {
2574 #if KMP_USE_DYNAMIC_LOCK
2575 int tag = KMP_EXTRACT_D_TAG(user_lock);
2577 __kmp_itt_lock_acquiring(
2581 #if OMPT_SUPPORT && OMPT_OPTIONAL
2583 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2585 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2586 if (ompt_enabled.ompt_callback_mutex_acquire) {
2587 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
2588 ompt_mutex_lock, omp_lock_hint_none,
2589 __ompt_get_mutex_impl_type(user_lock),
2590 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2593 #if KMP_USE_INLINED_TAS
2594 if (tag == locktag_tas && !__kmp_env_consistency_check) {
2595 KMP_ACQUIRE_TAS_LOCK(user_lock, gtid);
2597 #elif KMP_USE_INLINED_FUTEX
2598 if (tag == locktag_futex && !__kmp_env_consistency_check) {
2599 KMP_ACQUIRE_FUTEX_LOCK(user_lock, gtid);
2603 __kmp_direct_set[tag]((kmp_dyna_lock_t *)user_lock, gtid);
2606 __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock);
2608 #if OMPT_SUPPORT && OMPT_OPTIONAL
2609 if (ompt_enabled.ompt_callback_mutex_acquired) {
2610 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
2611 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2615 #else // KMP_USE_DYNAMIC_LOCK
2617 kmp_user_lock_p lck;
2619 if ((__kmp_user_lock_kind == lk_tas) &&
2620 (
sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
2621 lck = (kmp_user_lock_p)user_lock;
2624 else if ((__kmp_user_lock_kind == lk_futex) &&
2625 (
sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
2626 lck = (kmp_user_lock_p)user_lock;
2630 lck = __kmp_lookup_user_lock(user_lock,
"omp_set_lock");
2634 __kmp_itt_lock_acquiring(lck);
2636 #if OMPT_SUPPORT && OMPT_OPTIONAL
2638 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2640 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2641 if (ompt_enabled.ompt_callback_mutex_acquire) {
2642 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
2643 ompt_mutex_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(),
2644 (ompt_wait_id_t)(uintptr_t)lck, codeptr);
2648 ACQUIRE_LOCK(lck, gtid);
2651 __kmp_itt_lock_acquired(lck);
2654 #if OMPT_SUPPORT && OMPT_OPTIONAL
2655 if (ompt_enabled.ompt_callback_mutex_acquired) {
2656 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
2657 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
2661 #endif // KMP_USE_DYNAMIC_LOCK
2664 void __kmpc_set_nest_lock(
ident_t *loc, kmp_int32 gtid,
void **user_lock) {
2665 #if KMP_USE_DYNAMIC_LOCK
2668 __kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock);
2670 #if OMPT_SUPPORT && OMPT_OPTIONAL
2672 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2674 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2675 if (ompt_enabled.enabled) {
2676 if (ompt_enabled.ompt_callback_mutex_acquire) {
2677 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
2678 ompt_mutex_nest_lock, omp_lock_hint_none,
2679 __ompt_get_mutex_impl_type(user_lock),
2680 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2684 int acquire_status =
2685 KMP_D_LOCK_FUNC(user_lock, set)((kmp_dyna_lock_t *)user_lock, gtid);
2686 (void) acquire_status;
2688 __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock);
2691 #if OMPT_SUPPORT && OMPT_OPTIONAL
2692 if (ompt_enabled.enabled) {
2693 if (acquire_status == KMP_LOCK_ACQUIRED_FIRST) {
2694 if (ompt_enabled.ompt_callback_mutex_acquired) {
2696 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
2697 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock,
2701 if (ompt_enabled.ompt_callback_nest_lock) {
2703 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
2704 ompt_scope_begin, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2710 #else // KMP_USE_DYNAMIC_LOCK
2712 kmp_user_lock_p lck;
2714 if ((__kmp_user_lock_kind == lk_tas) &&
2715 (
sizeof(lck->tas.lk.poll) +
sizeof(lck->tas.lk.depth_locked) <=
2716 OMP_NEST_LOCK_T_SIZE)) {
2717 lck = (kmp_user_lock_p)user_lock;
2720 else if ((__kmp_user_lock_kind == lk_futex) &&
2721 (
sizeof(lck->futex.lk.poll) +
sizeof(lck->futex.lk.depth_locked) <=
2722 OMP_NEST_LOCK_T_SIZE)) {
2723 lck = (kmp_user_lock_p)user_lock;
2727 lck = __kmp_lookup_user_lock(user_lock,
"omp_set_nest_lock");
2731 __kmp_itt_lock_acquiring(lck);
2733 #if OMPT_SUPPORT && OMPT_OPTIONAL
2735 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2737 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2738 if (ompt_enabled.enabled) {
2739 if (ompt_enabled.ompt_callback_mutex_acquire) {
2740 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
2741 ompt_mutex_nest_lock, omp_lock_hint_none,
2742 __ompt_get_mutex_impl_type(), (ompt_wait_id_t)(uintptr_t)lck,
2748 ACQUIRE_NESTED_LOCK(lck, gtid, &acquire_status);
2751 __kmp_itt_lock_acquired(lck);
2754 #if OMPT_SUPPORT && OMPT_OPTIONAL
2755 if (ompt_enabled.enabled) {
2756 if (acquire_status == KMP_LOCK_ACQUIRED_FIRST) {
2757 if (ompt_enabled.ompt_callback_mutex_acquired) {
2759 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
2760 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
2763 if (ompt_enabled.ompt_callback_nest_lock) {
2765 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
2766 ompt_scope_begin, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
2772 #endif // KMP_USE_DYNAMIC_LOCK
2775 void __kmpc_unset_lock(
ident_t *loc, kmp_int32 gtid,
void **user_lock) {
2776 #if KMP_USE_DYNAMIC_LOCK
2778 int tag = KMP_EXTRACT_D_TAG(user_lock);
2780 __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock);
2782 #if KMP_USE_INLINED_TAS
2783 if (tag == locktag_tas && !__kmp_env_consistency_check) {
2784 KMP_RELEASE_TAS_LOCK(user_lock, gtid);
2786 #elif KMP_USE_INLINED_FUTEX
2787 if (tag == locktag_futex && !__kmp_env_consistency_check) {
2788 KMP_RELEASE_FUTEX_LOCK(user_lock, gtid);
2792 __kmp_direct_unset[tag]((kmp_dyna_lock_t *)user_lock, gtid);
2795 #if OMPT_SUPPORT && OMPT_OPTIONAL
2797 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2799 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2800 if (ompt_enabled.ompt_callback_mutex_released) {
2801 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
2802 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2806 #else // KMP_USE_DYNAMIC_LOCK
2808 kmp_user_lock_p lck;
2813 if ((__kmp_user_lock_kind == lk_tas) &&
2814 (
sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
2815 #if KMP_OS_LINUX && \
2816 (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
2819 __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock);
2821 TCW_4(((kmp_user_lock_p)user_lock)->tas.lk.poll, 0);
2824 #if OMPT_SUPPORT && OMPT_OPTIONAL
2826 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2828 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2829 if (ompt_enabled.ompt_callback_mutex_released) {
2830 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
2831 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
2837 lck = (kmp_user_lock_p)user_lock;
2841 else if ((__kmp_user_lock_kind == lk_futex) &&
2842 (
sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
2843 lck = (kmp_user_lock_p)user_lock;
2847 lck = __kmp_lookup_user_lock(user_lock,
"omp_unset_lock");
2851 __kmp_itt_lock_releasing(lck);
2854 RELEASE_LOCK(lck, gtid);
2856 #if OMPT_SUPPORT && OMPT_OPTIONAL
2858 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2860 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2861 if (ompt_enabled.ompt_callback_mutex_released) {
2862 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
2863 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
2867 #endif // KMP_USE_DYNAMIC_LOCK
2871 void __kmpc_unset_nest_lock(
ident_t *loc, kmp_int32 gtid,
void **user_lock) {
2872 #if KMP_USE_DYNAMIC_LOCK
2875 __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock);
2877 int release_status =
2878 KMP_D_LOCK_FUNC(user_lock, unset)((kmp_dyna_lock_t *)user_lock, gtid);
2879 (void) release_status;
2881 #if OMPT_SUPPORT && OMPT_OPTIONAL
2883 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2885 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2886 if (ompt_enabled.enabled) {
2887 if (release_status == KMP_LOCK_RELEASED) {
2888 if (ompt_enabled.ompt_callback_mutex_released) {
2890 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
2891 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock,
2894 } else if (ompt_enabled.ompt_callback_nest_lock) {
2896 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
2897 ompt_scope_end, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2902 #else // KMP_USE_DYNAMIC_LOCK
2904 kmp_user_lock_p lck;
2908 if ((__kmp_user_lock_kind == lk_tas) &&
2909 (
sizeof(lck->tas.lk.poll) +
sizeof(lck->tas.lk.depth_locked) <=
2910 OMP_NEST_LOCK_T_SIZE)) {
2911 #if KMP_OS_LINUX && \
2912 (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
2914 kmp_tas_lock_t *tl = (kmp_tas_lock_t *)user_lock;
2916 __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock);
2919 #if OMPT_SUPPORT && OMPT_OPTIONAL
2920 int release_status = KMP_LOCK_STILL_HELD;
2923 if (--(tl->lk.depth_locked) == 0) {
2924 TCW_4(tl->lk.poll, 0);
2925 #if OMPT_SUPPORT && OMPT_OPTIONAL
2926 release_status = KMP_LOCK_RELEASED;
2931 #if OMPT_SUPPORT && OMPT_OPTIONAL
2933 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2935 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2936 if (ompt_enabled.enabled) {
2937 if (release_status == KMP_LOCK_RELEASED) {
2938 if (ompt_enabled.ompt_callback_mutex_released) {
2940 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
2941 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
2943 } else if (ompt_enabled.ompt_callback_nest_lock) {
2945 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
2946 ompt_mutex_scope_end, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
2953 lck = (kmp_user_lock_p)user_lock;
2957 else if ((__kmp_user_lock_kind == lk_futex) &&
2958 (
sizeof(lck->futex.lk.poll) +
sizeof(lck->futex.lk.depth_locked) <=
2959 OMP_NEST_LOCK_T_SIZE)) {
2960 lck = (kmp_user_lock_p)user_lock;
2964 lck = __kmp_lookup_user_lock(user_lock,
"omp_unset_nest_lock");
2968 __kmp_itt_lock_releasing(lck);
2972 release_status = RELEASE_NESTED_LOCK(lck, gtid);
2973 #if OMPT_SUPPORT && OMPT_OPTIONAL
2975 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2977 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2978 if (ompt_enabled.enabled) {
2979 if (release_status == KMP_LOCK_RELEASED) {
2980 if (ompt_enabled.ompt_callback_mutex_released) {
2982 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
2983 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
2985 } else if (ompt_enabled.ompt_callback_nest_lock) {
2987 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
2988 ompt_mutex_scope_end, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
2993 #endif // KMP_USE_DYNAMIC_LOCK
2997 int __kmpc_test_lock(
ident_t *loc, kmp_int32 gtid,
void **user_lock) {
3000 #if KMP_USE_DYNAMIC_LOCK
3002 int tag = KMP_EXTRACT_D_TAG(user_lock);
3004 __kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock);
3006 #if OMPT_SUPPORT && OMPT_OPTIONAL
3008 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
3010 codeptr = OMPT_GET_RETURN_ADDRESS(0);
3011 if (ompt_enabled.ompt_callback_mutex_acquire) {
3012 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
3013 ompt_mutex_lock, omp_lock_hint_none,
3014 __ompt_get_mutex_impl_type(user_lock),
3015 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
3018 #if KMP_USE_INLINED_TAS
3019 if (tag == locktag_tas && !__kmp_env_consistency_check) {
3020 KMP_TEST_TAS_LOCK(user_lock, gtid, rc);
3022 #elif KMP_USE_INLINED_FUTEX
3023 if (tag == locktag_futex && !__kmp_env_consistency_check) {
3024 KMP_TEST_FUTEX_LOCK(user_lock, gtid, rc);
3028 rc = __kmp_direct_test[tag]((kmp_dyna_lock_t *)user_lock, gtid);
3032 __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock);
3034 #if OMPT_SUPPORT && OMPT_OPTIONAL
3035 if (ompt_enabled.ompt_callback_mutex_acquired) {
3036 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
3037 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
3043 __kmp_itt_lock_cancelled((kmp_user_lock_p)user_lock);
3048 #else // KMP_USE_DYNAMIC_LOCK
3050 kmp_user_lock_p lck;
3053 if ((__kmp_user_lock_kind == lk_tas) &&
3054 (
sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
3055 lck = (kmp_user_lock_p)user_lock;
3058 else if ((__kmp_user_lock_kind == lk_futex) &&
3059 (
sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
3060 lck = (kmp_user_lock_p)user_lock;
3064 lck = __kmp_lookup_user_lock(user_lock,
"omp_test_lock");
3068 __kmp_itt_lock_acquiring(lck);
3070 #if OMPT_SUPPORT && OMPT_OPTIONAL
3072 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
3074 codeptr = OMPT_GET_RETURN_ADDRESS(0);
3075 if (ompt_enabled.ompt_callback_mutex_acquire) {
3076 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
3077 ompt_mutex_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(),
3078 (ompt_wait_id_t)(uintptr_t)lck, codeptr);
3082 rc = TEST_LOCK(lck, gtid);
3085 __kmp_itt_lock_acquired(lck);
3087 __kmp_itt_lock_cancelled(lck);
3090 #if OMPT_SUPPORT && OMPT_OPTIONAL
3091 if (rc && ompt_enabled.ompt_callback_mutex_acquired) {
3092 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
3093 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
3097 return (rc ? FTN_TRUE : FTN_FALSE);
3101 #endif // KMP_USE_DYNAMIC_LOCK
3105 int __kmpc_test_nest_lock(
ident_t *loc, kmp_int32 gtid,
void **user_lock) {
3106 #if KMP_USE_DYNAMIC_LOCK
3109 __kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock);
3111 #if OMPT_SUPPORT && OMPT_OPTIONAL
3113 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
3115 codeptr = OMPT_GET_RETURN_ADDRESS(0);
3116 if (ompt_enabled.ompt_callback_mutex_acquire) {
3117 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
3118 ompt_mutex_nest_lock, omp_lock_hint_none,
3119 __ompt_get_mutex_impl_type(user_lock),
3120 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
3123 rc = KMP_D_LOCK_FUNC(user_lock, test)((kmp_dyna_lock_t *)user_lock, gtid);
3126 __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock);
3128 __kmp_itt_lock_cancelled((kmp_user_lock_p)user_lock);
3131 #if OMPT_SUPPORT && OMPT_OPTIONAL
3132 if (ompt_enabled.enabled && rc) {
3134 if (ompt_enabled.ompt_callback_mutex_acquired) {
3136 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
3137 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock,
3141 if (ompt_enabled.ompt_callback_nest_lock) {
3143 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
3144 ompt_scope_begin, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
3151 #else // KMP_USE_DYNAMIC_LOCK
3153 kmp_user_lock_p lck;
3156 if ((__kmp_user_lock_kind == lk_tas) &&
3157 (
sizeof(lck->tas.lk.poll) +
sizeof(lck->tas.lk.depth_locked) <=
3158 OMP_NEST_LOCK_T_SIZE)) {
3159 lck = (kmp_user_lock_p)user_lock;
3162 else if ((__kmp_user_lock_kind == lk_futex) &&
3163 (
sizeof(lck->futex.lk.poll) +
sizeof(lck->futex.lk.depth_locked) <=
3164 OMP_NEST_LOCK_T_SIZE)) {
3165 lck = (kmp_user_lock_p)user_lock;
3169 lck = __kmp_lookup_user_lock(user_lock,
"omp_test_nest_lock");
3173 __kmp_itt_lock_acquiring(lck);
3176 #if OMPT_SUPPORT && OMPT_OPTIONAL
3178 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
3180 codeptr = OMPT_GET_RETURN_ADDRESS(0);
3181 if (ompt_enabled.enabled) &&
3182 ompt_enabled.ompt_callback_mutex_acquire) {
3183 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
3184 ompt_mutex_nest_lock, omp_lock_hint_none,
3185 __ompt_get_mutex_impl_type(), (ompt_wait_id_t)(uintptr_t)lck,
3190 rc = TEST_NESTED_LOCK(lck, gtid);
3193 __kmp_itt_lock_acquired(lck);
3195 __kmp_itt_lock_cancelled(lck);
3198 #if OMPT_SUPPORT && OMPT_OPTIONAL
3199 if (ompt_enabled.enabled && rc) {
3201 if (ompt_enabled.ompt_callback_mutex_acquired) {
3203 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
3204 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
3207 if (ompt_enabled.ompt_callback_nest_lock) {
3209 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
3210 ompt_mutex_scope_begin, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
3219 #endif // KMP_USE_DYNAMIC_LOCK
3229 #define __KMP_SET_REDUCTION_METHOD(gtid, rmethod) \
3230 ((__kmp_threads[(gtid)]->th.th_local.packed_reduction_method) = (rmethod))
3232 #define __KMP_GET_REDUCTION_METHOD(gtid) \
3233 (__kmp_threads[(gtid)]->th.th_local.packed_reduction_method)
3239 static __forceinline
void
3240 __kmp_enter_critical_section_reduce_block(
ident_t *loc, kmp_int32 global_tid,
3241 kmp_critical_name *crit) {
3247 kmp_user_lock_p lck;
3249 #if KMP_USE_DYNAMIC_LOCK
3251 kmp_dyna_lock_t *lk = (kmp_dyna_lock_t *)crit;
3254 if (KMP_IS_D_LOCK(__kmp_user_lock_seq)) {
3255 KMP_COMPARE_AND_STORE_ACQ32((
volatile kmp_int32 *)crit, 0,
3256 KMP_GET_D_TAG(__kmp_user_lock_seq));
3258 __kmp_init_indirect_csptr(crit, loc, global_tid,
3259 KMP_GET_I_TAG(__kmp_user_lock_seq));
3265 if (KMP_EXTRACT_D_TAG(lk) != 0) {
3266 lck = (kmp_user_lock_p)lk;
3267 KMP_DEBUG_ASSERT(lck != NULL);
3268 if (__kmp_env_consistency_check) {
3269 __kmp_push_sync(global_tid, ct_critical, loc, lck, __kmp_user_lock_seq);
3271 KMP_D_LOCK_FUNC(lk, set)(lk, global_tid);
3273 kmp_indirect_lock_t *ilk = *((kmp_indirect_lock_t **)lk);
3275 KMP_DEBUG_ASSERT(lck != NULL);
3276 if (__kmp_env_consistency_check) {
3277 __kmp_push_sync(global_tid, ct_critical, loc, lck, __kmp_user_lock_seq);
3279 KMP_I_LOCK_FUNC(ilk, set)(lck, global_tid);
3282 #else // KMP_USE_DYNAMIC_LOCK
3287 if (__kmp_base_user_lock_size <= INTEL_CRITICAL_SIZE) {
3288 lck = (kmp_user_lock_p)crit;
3290 lck = __kmp_get_critical_section_ptr(crit, loc, global_tid);
3292 KMP_DEBUG_ASSERT(lck != NULL);
3294 if (__kmp_env_consistency_check)
3295 __kmp_push_sync(global_tid, ct_critical, loc, lck);
3297 __kmp_acquire_user_lock_with_checks(lck, global_tid);
3299 #endif // KMP_USE_DYNAMIC_LOCK
3303 static __forceinline
void
3304 __kmp_end_critical_section_reduce_block(
ident_t *loc, kmp_int32 global_tid,
3305 kmp_critical_name *crit) {
3307 kmp_user_lock_p lck;
3309 #if KMP_USE_DYNAMIC_LOCK
3311 if (KMP_IS_D_LOCK(__kmp_user_lock_seq)) {
3312 lck = (kmp_user_lock_p)crit;
3313 if (__kmp_env_consistency_check)
3314 __kmp_pop_sync(global_tid, ct_critical, loc);
3315 KMP_D_LOCK_FUNC(lck, unset)((kmp_dyna_lock_t *)lck, global_tid);
3317 kmp_indirect_lock_t *ilk =
3318 (kmp_indirect_lock_t *)TCR_PTR(*((kmp_indirect_lock_t **)crit));
3319 if (__kmp_env_consistency_check)
3320 __kmp_pop_sync(global_tid, ct_critical, loc);
3321 KMP_I_LOCK_FUNC(ilk, unset)(ilk->lock, global_tid);
3324 #else // KMP_USE_DYNAMIC_LOCK
3329 if (__kmp_base_user_lock_size > 32) {
3330 lck = *((kmp_user_lock_p *)crit);
3331 KMP_ASSERT(lck != NULL);
3333 lck = (kmp_user_lock_p)crit;
3336 if (__kmp_env_consistency_check)
3337 __kmp_pop_sync(global_tid, ct_critical, loc);
3339 __kmp_release_user_lock_with_checks(lck, global_tid);
3341 #endif // KMP_USE_DYNAMIC_LOCK
3345 static __forceinline
int
3346 __kmp_swap_teams_for_teams_reduction(kmp_info_t *th, kmp_team_t **team_p,
3351 if (th->th.th_teams_microtask) {
3352 *team_p = team = th->th.th_team;
3353 if (team->t.t_level == th->th.th_teams_level) {
3355 KMP_DEBUG_ASSERT(!th->th.th_info.ds.ds_tid);
3357 th->th.th_info.ds.ds_tid = team->t.t_master_tid;
3358 th->th.th_team = team->t.t_parent;
3359 th->th.th_team_nproc = th->th.th_team->t.t_nproc;
3360 th->th.th_task_team = th->th.th_team->t.t_task_team[0];
3361 *task_state = th->th.th_task_state;
3362 th->th.th_task_state = 0;
3370 static __forceinline
void
3371 __kmp_restore_swapped_teams(kmp_info_t *th, kmp_team_t *team,
int task_state) {
3373 th->th.th_info.ds.ds_tid = 0;
3374 th->th.th_team = team;
3375 th->th.th_team_nproc = team->t.t_nproc;
3376 th->th.th_task_team = team->t.t_task_team[task_state];
3377 th->th.th_task_state = task_state;
3399 size_t reduce_size,
void *reduce_data,
3400 void (*reduce_func)(
void *lhs_data,
void *rhs_data),
3401 kmp_critical_name *lck) {
3405 PACKED_REDUCTION_METHOD_T packed_reduction_method;
3409 int teams_swapped = 0, task_state;
3411 KA_TRACE(10, (
"__kmpc_reduce_nowait() enter: called T#%d\n", global_tid));
3419 if (!TCR_4(__kmp_init_parallel))
3420 __kmp_parallel_initialize();
3423 __kmp_resume_if_soft_paused();
3427 #if KMP_USE_DYNAMIC_LOCK
3428 if (__kmp_env_consistency_check)
3429 __kmp_push_sync(global_tid, ct_reduce, loc, NULL, 0);
3431 if (__kmp_env_consistency_check)
3432 __kmp_push_sync(global_tid, ct_reduce, loc, NULL);
3436 th = __kmp_thread_from_gtid(global_tid);
3437 teams_swapped = __kmp_swap_teams_for_teams_reduction(th, &team, &task_state);
3438 #endif // OMP_40_ENABLED
3456 packed_reduction_method = __kmp_determine_reduction_method(
3457 loc, global_tid, num_vars, reduce_size, reduce_data, reduce_func, lck);
3458 __KMP_SET_REDUCTION_METHOD(global_tid, packed_reduction_method);
3460 if (packed_reduction_method == critical_reduce_block) {
3462 __kmp_enter_critical_section_reduce_block(loc, global_tid, lck);
3465 }
else if (packed_reduction_method == empty_reduce_block) {
3471 }
else if (packed_reduction_method == atomic_reduce_block) {
3481 if (__kmp_env_consistency_check)
3482 __kmp_pop_sync(global_tid, ct_reduce, loc);
3484 }
else if (TEST_REDUCTION_METHOD(packed_reduction_method,
3485 tree_reduce_block)) {
3505 ompt_frame_t *ompt_frame;
3506 if (ompt_enabled.enabled) {
3507 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
3508 if (ompt_frame->enter_frame.ptr == NULL)
3509 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
3510 OMPT_STORE_RETURN_ADDRESS(global_tid);
3514 __kmp_threads[global_tid]->th.th_ident = loc;
3517 __kmp_barrier(UNPACK_REDUCTION_BARRIER(packed_reduction_method),
3518 global_tid, FALSE, reduce_size, reduce_data, reduce_func);
3519 retval = (retval != 0) ? (0) : (1);
3520 #if OMPT_SUPPORT && OMPT_OPTIONAL
3521 if (ompt_enabled.enabled) {
3522 ompt_frame->enter_frame = ompt_data_none;
3528 if (__kmp_env_consistency_check) {
3530 __kmp_pop_sync(global_tid, ct_reduce, loc);
3540 if (teams_swapped) {
3541 __kmp_restore_swapped_teams(th, team, task_state);
3546 (
"__kmpc_reduce_nowait() exit: called T#%d: method %08x, returns %08x\n",
3547 global_tid, packed_reduction_method, retval));
3561 kmp_critical_name *lck) {
3563 PACKED_REDUCTION_METHOD_T packed_reduction_method;
3565 KA_TRACE(10, (
"__kmpc_end_reduce_nowait() enter: called T#%d\n", global_tid));
3567 packed_reduction_method = __KMP_GET_REDUCTION_METHOD(global_tid);
3569 if (packed_reduction_method == critical_reduce_block) {
3571 __kmp_end_critical_section_reduce_block(loc, global_tid, lck);
3573 }
else if (packed_reduction_method == empty_reduce_block) {
3578 }
else if (packed_reduction_method == atomic_reduce_block) {
3585 }
else if (TEST_REDUCTION_METHOD(packed_reduction_method,
3586 tree_reduce_block)) {
3596 if (__kmp_env_consistency_check)
3597 __kmp_pop_sync(global_tid, ct_reduce, loc);
3599 KA_TRACE(10, (
"__kmpc_end_reduce_nowait() exit: called T#%d: method %08x\n",
3600 global_tid, packed_reduction_method));
3623 size_t reduce_size,
void *reduce_data,
3624 void (*reduce_func)(
void *lhs_data,
void *rhs_data),
3625 kmp_critical_name *lck) {
3628 PACKED_REDUCTION_METHOD_T packed_reduction_method;
3632 int teams_swapped = 0, task_state;
3635 KA_TRACE(10, (
"__kmpc_reduce() enter: called T#%d\n", global_tid));
3643 if (!TCR_4(__kmp_init_parallel))
3644 __kmp_parallel_initialize();
3647 __kmp_resume_if_soft_paused();
3651 #if KMP_USE_DYNAMIC_LOCK
3652 if (__kmp_env_consistency_check)
3653 __kmp_push_sync(global_tid, ct_reduce, loc, NULL, 0);
3655 if (__kmp_env_consistency_check)
3656 __kmp_push_sync(global_tid, ct_reduce, loc, NULL);
3660 th = __kmp_thread_from_gtid(global_tid);
3661 teams_swapped = __kmp_swap_teams_for_teams_reduction(th, &team, &task_state);
3662 #endif // OMP_40_ENABLED
3664 packed_reduction_method = __kmp_determine_reduction_method(
3665 loc, global_tid, num_vars, reduce_size, reduce_data, reduce_func, lck);
3666 __KMP_SET_REDUCTION_METHOD(global_tid, packed_reduction_method);
3668 if (packed_reduction_method == critical_reduce_block) {
3670 __kmp_enter_critical_section_reduce_block(loc, global_tid, lck);
3673 }
else if (packed_reduction_method == empty_reduce_block) {
3679 }
else if (packed_reduction_method == atomic_reduce_block) {
3683 }
else if (TEST_REDUCTION_METHOD(packed_reduction_method,
3684 tree_reduce_block)) {
3690 ompt_frame_t *ompt_frame;
3691 if (ompt_enabled.enabled) {
3692 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
3693 if (ompt_frame->enter_frame.ptr == NULL)
3694 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
3695 OMPT_STORE_RETURN_ADDRESS(global_tid);
3699 __kmp_threads[global_tid]->th.th_ident =
3703 __kmp_barrier(UNPACK_REDUCTION_BARRIER(packed_reduction_method),
3704 global_tid, TRUE, reduce_size, reduce_data, reduce_func);
3705 retval = (retval != 0) ? (0) : (1);
3706 #if OMPT_SUPPORT && OMPT_OPTIONAL
3707 if (ompt_enabled.enabled) {
3708 ompt_frame->enter_frame = ompt_data_none;
3714 if (__kmp_env_consistency_check) {
3716 __kmp_pop_sync(global_tid, ct_reduce, loc);
3726 if (teams_swapped) {
3727 __kmp_restore_swapped_teams(th, team, task_state);
3732 (
"__kmpc_reduce() exit: called T#%d: method %08x, returns %08x\n",
3733 global_tid, packed_reduction_method, retval));
3749 kmp_critical_name *lck) {
3751 PACKED_REDUCTION_METHOD_T packed_reduction_method;
3755 int teams_swapped = 0, task_state;
3758 KA_TRACE(10, (
"__kmpc_end_reduce() enter: called T#%d\n", global_tid));
3761 th = __kmp_thread_from_gtid(global_tid);
3762 teams_swapped = __kmp_swap_teams_for_teams_reduction(th, &team, &task_state);
3763 #endif // OMP_40_ENABLED
3765 packed_reduction_method = __KMP_GET_REDUCTION_METHOD(global_tid);
3770 if (packed_reduction_method == critical_reduce_block) {
3772 __kmp_end_critical_section_reduce_block(loc, global_tid, lck);
3776 ompt_frame_t *ompt_frame;
3777 if (ompt_enabled.enabled) {
3778 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
3779 if (ompt_frame->enter_frame.ptr == NULL)
3780 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
3781 OMPT_STORE_RETURN_ADDRESS(global_tid);
3785 __kmp_threads[global_tid]->th.th_ident = loc;
3787 __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL);
3788 #if OMPT_SUPPORT && OMPT_OPTIONAL
3789 if (ompt_enabled.enabled) {
3790 ompt_frame->enter_frame = ompt_data_none;
3794 }
else if (packed_reduction_method == empty_reduce_block) {
3800 ompt_frame_t *ompt_frame;
3801 if (ompt_enabled.enabled) {
3802 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
3803 if (ompt_frame->enter_frame.ptr == NULL)
3804 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
3805 OMPT_STORE_RETURN_ADDRESS(global_tid);
3809 __kmp_threads[global_tid]->th.th_ident = loc;
3811 __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL);
3812 #if OMPT_SUPPORT && OMPT_OPTIONAL
3813 if (ompt_enabled.enabled) {
3814 ompt_frame->enter_frame = ompt_data_none;
3818 }
else if (packed_reduction_method == atomic_reduce_block) {
3821 ompt_frame_t *ompt_frame;
3822 if (ompt_enabled.enabled) {
3823 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
3824 if (ompt_frame->enter_frame.ptr == NULL)
3825 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
3826 OMPT_STORE_RETURN_ADDRESS(global_tid);
3831 __kmp_threads[global_tid]->th.th_ident = loc;
3833 __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL);
3834 #if OMPT_SUPPORT && OMPT_OPTIONAL
3835 if (ompt_enabled.enabled) {
3836 ompt_frame->enter_frame = ompt_data_none;
3840 }
else if (TEST_REDUCTION_METHOD(packed_reduction_method,
3841 tree_reduce_block)) {
3844 __kmp_end_split_barrier(UNPACK_REDUCTION_BARRIER(packed_reduction_method),
3853 if (teams_swapped) {
3854 __kmp_restore_swapped_teams(th, team, task_state);
3858 if (__kmp_env_consistency_check)
3859 __kmp_pop_sync(global_tid, ct_reduce, loc);
3861 KA_TRACE(10, (
"__kmpc_end_reduce() exit: called T#%d: method %08x\n",
3862 global_tid, packed_reduction_method));
3867 #undef __KMP_GET_REDUCTION_METHOD
3868 #undef __KMP_SET_REDUCTION_METHOD
3872 kmp_uint64 __kmpc_get_taskid() {
3877 gtid = __kmp_get_gtid();
3881 thread = __kmp_thread_from_gtid(gtid);
3882 return thread->th.th_current_task->td_task_id;
3886 kmp_uint64 __kmpc_get_parent_taskid() {
3890 kmp_taskdata_t *parent_task;
3892 gtid = __kmp_get_gtid();
3896 thread = __kmp_thread_from_gtid(gtid);
3897 parent_task = thread->th.th_current_task->td_parent;
3898 return (parent_task == NULL ? 0 : parent_task->td_task_id);
3914 void __kmpc_doacross_init(
ident_t *loc,
int gtid,
int num_dims,
3915 const struct kmp_dim *dims) {
3917 kmp_int64 last, trace_count;
3918 kmp_info_t *th = __kmp_threads[gtid];
3919 kmp_team_t *team = th->th.th_team;
3921 kmp_disp_t *pr_buf = th->th.th_dispatch;
3922 dispatch_shared_info_t *sh_buf;
3926 (
"__kmpc_doacross_init() enter: called T#%d, num dims %d, active %d\n",
3927 gtid, num_dims, !team->t.t_serialized));
3928 KMP_DEBUG_ASSERT(dims != NULL);
3929 KMP_DEBUG_ASSERT(num_dims > 0);
3931 if (team->t.t_serialized) {
3932 KA_TRACE(20, (
"__kmpc_doacross_init() exit: serialized team\n"));
3935 KMP_DEBUG_ASSERT(team->t.t_nproc > 1);
3936 idx = pr_buf->th_doacross_buf_idx++;
3938 sh_buf = &team->t.t_disp_buffer[idx % __kmp_dispatch_num_buffers];
3941 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info == NULL);
3942 pr_buf->th_doacross_info = (kmp_int64 *)__kmp_thread_malloc(
3943 th,
sizeof(kmp_int64) * (4 * num_dims + 1));
3944 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info != NULL);
3945 pr_buf->th_doacross_info[0] =
3946 (kmp_int64)num_dims;
3949 pr_buf->th_doacross_info[1] = (kmp_int64)&sh_buf->doacross_num_done;
3950 pr_buf->th_doacross_info[2] = dims[0].lo;
3951 pr_buf->th_doacross_info[3] = dims[0].up;
3952 pr_buf->th_doacross_info[4] = dims[0].st;
3954 for (j = 1; j < num_dims; ++j) {
3957 if (dims[j].st == 1) {
3959 range_length = dims[j].up - dims[j].lo + 1;
3961 if (dims[j].st > 0) {
3962 KMP_DEBUG_ASSERT(dims[j].up > dims[j].lo);
3963 range_length = (kmp_uint64)(dims[j].up - dims[j].lo) / dims[j].st + 1;
3965 KMP_DEBUG_ASSERT(dims[j].lo > dims[j].up);
3967 (kmp_uint64)(dims[j].lo - dims[j].up) / (-dims[j].st) + 1;
3970 pr_buf->th_doacross_info[last++] = range_length;
3971 pr_buf->th_doacross_info[last++] = dims[j].lo;
3972 pr_buf->th_doacross_info[last++] = dims[j].up;
3973 pr_buf->th_doacross_info[last++] = dims[j].st;
3978 if (dims[0].st == 1) {
3979 trace_count = dims[0].up - dims[0].lo + 1;
3980 }
else if (dims[0].st > 0) {
3981 KMP_DEBUG_ASSERT(dims[0].up > dims[0].lo);
3982 trace_count = (kmp_uint64)(dims[0].up - dims[0].lo) / dims[0].st + 1;
3984 KMP_DEBUG_ASSERT(dims[0].lo > dims[0].up);
3985 trace_count = (kmp_uint64)(dims[0].lo - dims[0].up) / (-dims[0].st) + 1;
3987 for (j = 1; j < num_dims; ++j) {
3988 trace_count *= pr_buf->th_doacross_info[4 * j + 1];
3990 KMP_DEBUG_ASSERT(trace_count > 0);
3994 if (idx != sh_buf->doacross_buf_idx) {
3996 __kmp_wait_4((
volatile kmp_uint32 *)&sh_buf->doacross_buf_idx, idx,
4003 flags = (kmp_uint32 *)KMP_COMPARE_AND_STORE_RET32(
4004 (
volatile kmp_int32 *)&sh_buf->doacross_flags, NULL, 1);
4006 flags = (kmp_uint32 *)KMP_COMPARE_AND_STORE_RET64(
4007 (
volatile kmp_int64 *)&sh_buf->doacross_flags, NULL, 1LL);
4009 if (flags == NULL) {
4011 size_t size = trace_count / 8 + 8;
4012 flags = (kmp_uint32 *)__kmp_thread_calloc(th, size, 1);
4014 sh_buf->doacross_flags = flags;
4015 }
else if (flags == (kmp_uint32 *)1) {
4018 while (*(
volatile kmp_int32 *)&sh_buf->doacross_flags == 1)
4020 while (*(
volatile kmp_int64 *)&sh_buf->doacross_flags == 1LL)
4027 KMP_DEBUG_ASSERT(sh_buf->doacross_flags > (kmp_uint32 *)1);
4028 pr_buf->th_doacross_flags =
4029 sh_buf->doacross_flags;
4031 KA_TRACE(20, (
"__kmpc_doacross_init() exit: T#%d\n", gtid));
4034 void __kmpc_doacross_wait(
ident_t *loc,
int gtid,
const kmp_int64 *vec) {
4035 kmp_int32 shft, num_dims, i;
4037 kmp_int64 iter_number;
4038 kmp_info_t *th = __kmp_threads[gtid];
4039 kmp_team_t *team = th->th.th_team;
4041 kmp_int64 lo, up, st;
4043 KA_TRACE(20, (
"__kmpc_doacross_wait() enter: called T#%d\n", gtid));
4044 if (team->t.t_serialized) {
4045 KA_TRACE(20, (
"__kmpc_doacross_wait() exit: serialized team\n"));
4050 pr_buf = th->th.th_dispatch;
4051 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info != NULL);
4052 num_dims = pr_buf->th_doacross_info[0];
4053 lo = pr_buf->th_doacross_info[2];
4054 up = pr_buf->th_doacross_info[3];
4055 st = pr_buf->th_doacross_info[4];
4057 if (vec[0] < lo || vec[0] > up) {
4058 KA_TRACE(20, (
"__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
4059 "bounds [%lld,%lld]\n",
4060 gtid, vec[0], lo, up));
4063 iter_number = vec[0] - lo;
4064 }
else if (st > 0) {
4065 if (vec[0] < lo || vec[0] > up) {
4066 KA_TRACE(20, (
"__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
4067 "bounds [%lld,%lld]\n",
4068 gtid, vec[0], lo, up));
4071 iter_number = (kmp_uint64)(vec[0] - lo) / st;
4073 if (vec[0] > lo || vec[0] < up) {
4074 KA_TRACE(20, (
"__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
4075 "bounds [%lld,%lld]\n",
4076 gtid, vec[0], lo, up));
4079 iter_number = (kmp_uint64)(lo - vec[0]) / (-st);
4081 for (i = 1; i < num_dims; ++i) {
4083 kmp_int32 j = i * 4;
4084 ln = pr_buf->th_doacross_info[j + 1];
4085 lo = pr_buf->th_doacross_info[j + 2];
4086 up = pr_buf->th_doacross_info[j + 3];
4087 st = pr_buf->th_doacross_info[j + 4];
4089 if (vec[i] < lo || vec[i] > up) {
4090 KA_TRACE(20, (
"__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
4091 "bounds [%lld,%lld]\n",
4092 gtid, vec[i], lo, up));
4096 }
else if (st > 0) {
4097 if (vec[i] < lo || vec[i] > up) {
4098 KA_TRACE(20, (
"__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
4099 "bounds [%lld,%lld]\n",
4100 gtid, vec[i], lo, up));
4103 iter = (kmp_uint64)(vec[i] - lo) / st;
4105 if (vec[i] > lo || vec[i] < up) {
4106 KA_TRACE(20, (
"__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
4107 "bounds [%lld,%lld]\n",
4108 gtid, vec[i], lo, up));
4111 iter = (kmp_uint64)(lo - vec[i]) / (-st);
4113 iter_number = iter + ln * iter_number;
4115 shft = iter_number % 32;
4118 while ((flag & pr_buf->th_doacross_flags[iter_number]) == 0) {
4123 (
"__kmpc_doacross_wait() exit: T#%d wait for iter %lld completed\n",
4124 gtid, (iter_number << 5) + shft));
4127 void __kmpc_doacross_post(
ident_t *loc,
int gtid,
const kmp_int64 *vec) {
4128 kmp_int32 shft, num_dims, i;
4130 kmp_int64 iter_number;
4131 kmp_info_t *th = __kmp_threads[gtid];
4132 kmp_team_t *team = th->th.th_team;
4136 KA_TRACE(20, (
"__kmpc_doacross_post() enter: called T#%d\n", gtid));
4137 if (team->t.t_serialized) {
4138 KA_TRACE(20, (
"__kmpc_doacross_post() exit: serialized team\n"));
4144 pr_buf = th->th.th_dispatch;
4145 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info != NULL);
4146 num_dims = pr_buf->th_doacross_info[0];
4147 lo = pr_buf->th_doacross_info[2];
4148 st = pr_buf->th_doacross_info[4];
4150 iter_number = vec[0] - lo;
4151 }
else if (st > 0) {
4152 iter_number = (kmp_uint64)(vec[0] - lo) / st;
4154 iter_number = (kmp_uint64)(lo - vec[0]) / (-st);
4156 for (i = 1; i < num_dims; ++i) {
4158 kmp_int32 j = i * 4;
4159 ln = pr_buf->th_doacross_info[j + 1];
4160 lo = pr_buf->th_doacross_info[j + 2];
4161 st = pr_buf->th_doacross_info[j + 4];
4164 }
else if (st > 0) {
4165 iter = (kmp_uint64)(vec[i] - lo) / st;
4167 iter = (kmp_uint64)(lo - vec[i]) / (-st);
4169 iter_number = iter + ln * iter_number;
4171 shft = iter_number % 32;
4175 if ((flag & pr_buf->th_doacross_flags[iter_number]) == 0)
4176 KMP_TEST_THEN_OR32(&pr_buf->th_doacross_flags[iter_number], flag);
4177 KA_TRACE(20, (
"__kmpc_doacross_post() exit: T#%d iter %lld posted\n", gtid,
4178 (iter_number << 5) + shft));
4181 void __kmpc_doacross_fini(
ident_t *loc,
int gtid) {
4183 kmp_info_t *th = __kmp_threads[gtid];
4184 kmp_team_t *team = th->th.th_team;
4185 kmp_disp_t *pr_buf = th->th.th_dispatch;
4187 KA_TRACE(20, (
"__kmpc_doacross_fini() enter: called T#%d\n", gtid));
4188 if (team->t.t_serialized) {
4189 KA_TRACE(20, (
"__kmpc_doacross_fini() exit: serialized team %p\n", team));
4192 num_done = KMP_TEST_THEN_INC32((kmp_int32 *)pr_buf->th_doacross_info[1]) + 1;
4193 if (num_done == th->th.th_team_nproc) {
4195 int idx = pr_buf->th_doacross_buf_idx - 1;
4196 dispatch_shared_info_t *sh_buf =
4197 &team->t.t_disp_buffer[idx % __kmp_dispatch_num_buffers];
4198 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info[1] ==
4199 (kmp_int64)&sh_buf->doacross_num_done);
4200 KMP_DEBUG_ASSERT(num_done == sh_buf->doacross_num_done);
4201 KMP_DEBUG_ASSERT(idx == sh_buf->doacross_buf_idx);
4202 __kmp_thread_free(th, CCAST(kmp_uint32 *, sh_buf->doacross_flags));
4203 sh_buf->doacross_flags = NULL;
4204 sh_buf->doacross_num_done = 0;
4205 sh_buf->doacross_buf_idx +=
4206 __kmp_dispatch_num_buffers;
4209 pr_buf->th_doacross_flags = NULL;
4210 __kmp_thread_free(th, (
void *)pr_buf->th_doacross_info);
4211 pr_buf->th_doacross_info = NULL;
4212 KA_TRACE(20, (
"__kmpc_doacross_fini() exit: T#%d\n", gtid));
4218 void *omp_alloc(
size_t size, omp_allocator_handle_t allocator) {
4219 return __kmpc_alloc(__kmp_entry_gtid(), size, allocator);
4222 void omp_free(
void *ptr, omp_allocator_handle_t allocator) {
4223 __kmpc_free(__kmp_entry_gtid(), ptr, allocator);
4226 int __kmpc_get_target_offload(
void) {
4227 if (!__kmp_init_serial) {
4228 __kmp_serial_initialize();
4230 return __kmp_target_offload;
4233 int __kmpc_pause_resource(kmp_pause_status_t level) {
4234 if (!__kmp_init_serial) {
4237 return __kmp_pause_resource(level);
4239 #endif // OMP_50_ENABLED
kmp_int32 __kmpc_master(ident_t *loc, kmp_int32 global_tid)
kmp_int32 __kmpc_barrier_master(ident_t *loc, kmp_int32 global_tid)
void __kmpc_end_single(ident_t *loc, kmp_int32 global_tid)
void(* kmpc_micro)(kmp_int32 *global_tid, kmp_int32 *bound_tid,...)
kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void(*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name *lck)
kmp_int32 __kmpc_global_thread_num(ident_t *loc)
void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid)
void __kmpc_flush(ident_t *loc)
kmp_int32 __kmpc_single(ident_t *loc, kmp_int32 global_tid)
void __kmpc_end(ident_t *loc)
void __kmpc_end_ordered(ident_t *loc, kmp_int32 gtid)
void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32 global_tid)
void __kmpc_begin(ident_t *loc, kmp_int32 flags)
kmp_int32 __kmpc_bound_thread_num(ident_t *loc)
kmp_int32 __kmpc_reduce_nowait(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void(*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name *lck)
void __kmpc_copyprivate(ident_t *loc, kmp_int32 gtid, size_t cpy_size, void *cpy_data, void(*cpy_func)(void *, void *), kmp_int32 didit)
void __kmpc_ordered(ident_t *loc, kmp_int32 gtid)
#define KMP_COUNT_BLOCK(name)
Increments specified counter (name).
void __kmpc_critical(ident_t *loc, kmp_int32 global_tid, kmp_critical_name *crit)
void __kmpc_end_barrier_master(ident_t *loc, kmp_int32 global_tid)
void __kmpc_end_master(ident_t *loc, kmp_int32 global_tid)
void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_threads)
void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro microtask,...)
kmp_int32 __kmpc_in_parallel(ident_t *loc)
kmp_int32 __kmpc_ok_to_fork(ident_t *loc)
kmp_int32 __kmpc_global_num_threads(ident_t *loc)
kmp_int32 __kmpc_bound_num_threads(ident_t *loc)
void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid, kmp_critical_name *lck)
void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid)
void __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid, kmp_critical_name *lck)
void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid, kmp_critical_name *crit)
void __kmpc_push_num_teams(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_teams, kmp_int32 num_threads)
kmp_int32 __kmpc_barrier_master_nowait(ident_t *loc, kmp_int32 global_tid)
void __kmpc_serialized_parallel(ident_t *loc, kmp_int32 global_tid)
stats_state_e
the states which a thread can be in
void __kmpc_fork_call(ident_t *loc, kmp_int32 argc, kmpc_micro microtask,...)