LLVM OpenMP* Runtime Library
 All Classes Functions Variables Typedefs Enumerations Enumerator Groups Pages
kmp_csupport.cpp
1 /*
2  * kmp_csupport.cpp -- kfront linkage support for OpenMP.
3  */
4 
5 //===----------------------------------------------------------------------===//
6 //
7 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
8 // See https://llvm.org/LICENSE.txt for license information.
9 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #define __KMP_IMP
14 #include "omp.h" /* extern "C" declarations of user-visible routines */
15 #include "kmp.h"
16 #include "kmp_error.h"
17 #include "kmp_i18n.h"
18 #include "kmp_itt.h"
19 #include "kmp_lock.h"
20 #include "kmp_stats.h"
21 
22 #if OMPT_SUPPORT
23 #include "ompt-specific.h"
24 #endif
25 
26 #define MAX_MESSAGE 512
27 
28 // flags will be used in future, e.g. to implement openmp_strict library
29 // restrictions
30 
39 void __kmpc_begin(ident_t *loc, kmp_int32 flags) {
40  // By default __kmpc_begin() is no-op.
41  char *env;
42  if ((env = getenv("KMP_INITIAL_THREAD_BIND")) != NULL &&
43  __kmp_str_match_true(env)) {
44  __kmp_middle_initialize();
45  KC_TRACE(10, ("__kmpc_begin: middle initialization called\n"));
46  } else if (__kmp_ignore_mppbeg() == FALSE) {
47  // By default __kmp_ignore_mppbeg() returns TRUE.
48  __kmp_internal_begin();
49  KC_TRACE(10, ("__kmpc_begin: called\n"));
50  }
51 }
52 
61 void __kmpc_end(ident_t *loc) {
62  // By default, __kmp_ignore_mppend() returns TRUE which makes __kmpc_end()
63  // call no-op. However, this can be overridden with KMP_IGNORE_MPPEND
64  // environment variable. If KMP_IGNORE_MPPEND is 0, __kmp_ignore_mppend()
65  // returns FALSE and __kmpc_end() will unregister this root (it can cause
66  // library shut down).
67  if (__kmp_ignore_mppend() == FALSE) {
68  KC_TRACE(10, ("__kmpc_end: called\n"));
69  KA_TRACE(30, ("__kmpc_end\n"));
70 
71  __kmp_internal_end_thread(-1);
72  }
73 #if KMP_OS_WINDOWS && OMPT_SUPPORT
74  // Normal exit process on Windows does not allow worker threads of the final
75  // parallel region to finish reporting their events, so shutting down the
76  // library here fixes the issue at least for the cases where __kmpc_end() is
77  // placed properly.
78  if (ompt_enabled.enabled)
79  __kmp_internal_end_library(__kmp_gtid_get_specific());
80 #endif
81 }
82 
102  kmp_int32 gtid = __kmp_entry_gtid();
103 
104  KC_TRACE(10, ("__kmpc_global_thread_num: T#%d\n", gtid));
105 
106  return gtid;
107 }
108 
124  KC_TRACE(10,
125  ("__kmpc_global_num_threads: num_threads = %d\n", __kmp_all_nth));
126 
127  return TCR_4(__kmp_all_nth);
128 }
129 
137  KC_TRACE(10, ("__kmpc_bound_thread_num: called\n"));
138  return __kmp_tid_from_gtid(__kmp_entry_gtid());
139 }
140 
147  KC_TRACE(10, ("__kmpc_bound_num_threads: called\n"));
148 
149  return __kmp_entry_thread()->th.th_team->t.t_nproc;
150 }
151 
158 kmp_int32 __kmpc_ok_to_fork(ident_t *loc) {
159 #ifndef KMP_DEBUG
160 
161  return TRUE;
162 
163 #else
164 
165  const char *semi2;
166  const char *semi3;
167  int line_no;
168 
169  if (__kmp_par_range == 0) {
170  return TRUE;
171  }
172  semi2 = loc->psource;
173  if (semi2 == NULL) {
174  return TRUE;
175  }
176  semi2 = strchr(semi2, ';');
177  if (semi2 == NULL) {
178  return TRUE;
179  }
180  semi2 = strchr(semi2 + 1, ';');
181  if (semi2 == NULL) {
182  return TRUE;
183  }
184  if (__kmp_par_range_filename[0]) {
185  const char *name = semi2 - 1;
186  while ((name > loc->psource) && (*name != '/') && (*name != ';')) {
187  name--;
188  }
189  if ((*name == '/') || (*name == ';')) {
190  name++;
191  }
192  if (strncmp(__kmp_par_range_filename, name, semi2 - name)) {
193  return __kmp_par_range < 0;
194  }
195  }
196  semi3 = strchr(semi2 + 1, ';');
197  if (__kmp_par_range_routine[0]) {
198  if ((semi3 != NULL) && (semi3 > semi2) &&
199  (strncmp(__kmp_par_range_routine, semi2 + 1, semi3 - semi2 - 1))) {
200  return __kmp_par_range < 0;
201  }
202  }
203  if (KMP_SSCANF(semi3 + 1, "%d", &line_no) == 1) {
204  if ((line_no >= __kmp_par_range_lb) && (line_no <= __kmp_par_range_ub)) {
205  return __kmp_par_range > 0;
206  }
207  return __kmp_par_range < 0;
208  }
209  return TRUE;
210 
211 #endif /* KMP_DEBUG */
212 }
213 
220 kmp_int32 __kmpc_in_parallel(ident_t *loc) {
221  return __kmp_entry_thread()->th.th_root->r.r_active;
222 }
223 
233 void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid,
234  kmp_int32 num_threads) {
235  KA_TRACE(20, ("__kmpc_push_num_threads: enter T#%d num_threads=%d\n",
236  global_tid, num_threads));
237 
238  __kmp_push_num_threads(loc, global_tid, num_threads);
239 }
240 
241 void __kmpc_pop_num_threads(ident_t *loc, kmp_int32 global_tid) {
242  KA_TRACE(20, ("__kmpc_pop_num_threads: enter\n"));
243 
244  /* the num_threads are automatically popped */
245 }
246 
247 #if OMP_40_ENABLED
248 
249 void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid,
250  kmp_int32 proc_bind) {
251  KA_TRACE(20, ("__kmpc_push_proc_bind: enter T#%d proc_bind=%d\n", global_tid,
252  proc_bind));
253 
254  __kmp_push_proc_bind(loc, global_tid, (kmp_proc_bind_t)proc_bind);
255 }
256 
257 #endif /* OMP_40_ENABLED */
258 
269 void __kmpc_fork_call(ident_t *loc, kmp_int32 argc, kmpc_micro microtask, ...) {
270  int gtid = __kmp_entry_gtid();
271 
272 #if (KMP_STATS_ENABLED)
273  // If we were in a serial region, then stop the serial timer, record
274  // the event, and start parallel region timer
275  stats_state_e previous_state = KMP_GET_THREAD_STATE();
276  if (previous_state == stats_state_e::SERIAL_REGION) {
277  KMP_EXCHANGE_PARTITIONED_TIMER(OMP_parallel_overhead);
278  } else {
279  KMP_PUSH_PARTITIONED_TIMER(OMP_parallel_overhead);
280  }
281  int inParallel = __kmpc_in_parallel(loc);
282  if (inParallel) {
283  KMP_COUNT_BLOCK(OMP_NESTED_PARALLEL);
284  } else {
285  KMP_COUNT_BLOCK(OMP_PARALLEL);
286  }
287 #endif
288 
289  // maybe to save thr_state is enough here
290  {
291  va_list ap;
292  va_start(ap, microtask);
293 
294 #if OMPT_SUPPORT
295  ompt_frame_t *ompt_frame;
296  if (ompt_enabled.enabled) {
297  kmp_info_t *master_th = __kmp_threads[gtid];
298  kmp_team_t *parent_team = master_th->th.th_team;
299  ompt_lw_taskteam_t *lwt = parent_team->t.ompt_serialized_team_info;
300  if (lwt)
301  ompt_frame = &(lwt->ompt_task_info.frame);
302  else {
303  int tid = __kmp_tid_from_gtid(gtid);
304  ompt_frame = &(
305  parent_team->t.t_implicit_task_taskdata[tid].ompt_task_info.frame);
306  }
307  ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
308  OMPT_STORE_RETURN_ADDRESS(gtid);
309  }
310 #endif
311 
312 #if INCLUDE_SSC_MARKS
313  SSC_MARK_FORKING();
314 #endif
315  __kmp_fork_call(loc, gtid, fork_context_intel, argc,
316  VOLATILE_CAST(microtask_t) microtask, // "wrapped" task
317  VOLATILE_CAST(launch_t) __kmp_invoke_task_func,
318 /* TODO: revert workaround for Intel(R) 64 tracker #96 */
319 #if (KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && KMP_OS_LINUX
320  &ap
321 #else
322  ap
323 #endif
324  );
325 #if INCLUDE_SSC_MARKS
326  SSC_MARK_JOINING();
327 #endif
328  __kmp_join_call(loc, gtid
329 #if OMPT_SUPPORT
330  ,
331  fork_context_intel
332 #endif
333  );
334 
335  va_end(ap);
336  }
337 
338 #if KMP_STATS_ENABLED
339  if (previous_state == stats_state_e::SERIAL_REGION) {
340  KMP_EXCHANGE_PARTITIONED_TIMER(OMP_serial);
341  } else {
342  KMP_POP_PARTITIONED_TIMER();
343  }
344 #endif // KMP_STATS_ENABLED
345 }
346 
347 #if OMP_40_ENABLED
348 
359 void __kmpc_push_num_teams(ident_t *loc, kmp_int32 global_tid,
360  kmp_int32 num_teams, kmp_int32 num_threads) {
361  KA_TRACE(20,
362  ("__kmpc_push_num_teams: enter T#%d num_teams=%d num_threads=%d\n",
363  global_tid, num_teams, num_threads));
364 
365  __kmp_push_num_teams(loc, global_tid, num_teams, num_threads);
366 }
367 
378 void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro microtask,
379  ...) {
380  int gtid = __kmp_entry_gtid();
381  kmp_info_t *this_thr = __kmp_threads[gtid];
382  va_list ap;
383  va_start(ap, microtask);
384 
385 #if KMP_STATS_ENABLED
386  KMP_COUNT_BLOCK(OMP_TEAMS);
387  stats_state_e previous_state = KMP_GET_THREAD_STATE();
388  if (previous_state == stats_state_e::SERIAL_REGION) {
389  KMP_EXCHANGE_PARTITIONED_TIMER(OMP_teams_overhead);
390  } else {
391  KMP_PUSH_PARTITIONED_TIMER(OMP_teams_overhead);
392  }
393 #endif
394 
395  // remember teams entry point and nesting level
396  this_thr->th.th_teams_microtask = microtask;
397  this_thr->th.th_teams_level =
398  this_thr->th.th_team->t.t_level; // AC: can be >0 on host
399 
400 #if OMPT_SUPPORT
401  kmp_team_t *parent_team = this_thr->th.th_team;
402  int tid = __kmp_tid_from_gtid(gtid);
403  if (ompt_enabled.enabled) {
404  parent_team->t.t_implicit_task_taskdata[tid]
405  .ompt_task_info.frame.enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
406  }
407  OMPT_STORE_RETURN_ADDRESS(gtid);
408 #endif
409 
410  // check if __kmpc_push_num_teams called, set default number of teams
411  // otherwise
412  if (this_thr->th.th_teams_size.nteams == 0) {
413  __kmp_push_num_teams(loc, gtid, 0, 0);
414  }
415  KMP_DEBUG_ASSERT(this_thr->th.th_set_nproc >= 1);
416  KMP_DEBUG_ASSERT(this_thr->th.th_teams_size.nteams >= 1);
417  KMP_DEBUG_ASSERT(this_thr->th.th_teams_size.nth >= 1);
418 
419  __kmp_fork_call(loc, gtid, fork_context_intel, argc,
420  VOLATILE_CAST(microtask_t)
421  __kmp_teams_master, // "wrapped" task
422  VOLATILE_CAST(launch_t) __kmp_invoke_teams_master,
423 #if (KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && KMP_OS_LINUX
424  &ap
425 #else
426  ap
427 #endif
428  );
429  __kmp_join_call(loc, gtid
430 #if OMPT_SUPPORT
431  ,
432  fork_context_intel
433 #endif
434  );
435 
436  // Pop current CG root off list
437  KMP_DEBUG_ASSERT(this_thr->th.th_cg_roots);
438  kmp_cg_root_t *tmp = this_thr->th.th_cg_roots;
439  this_thr->th.th_cg_roots = tmp->up;
440  KA_TRACE(100, ("__kmpc_fork_teams: Thread %p popping node %p and moving up"
441  " to node %p. cg_nthreads was %d\n",
442  this_thr, tmp, this_thr->th.th_cg_roots, tmp->cg_nthreads));
443  __kmp_free(tmp);
444  // Restore current task's thread_limit from CG root
445  KMP_DEBUG_ASSERT(this_thr->th.th_cg_roots);
446  this_thr->th.th_current_task->td_icvs.thread_limit =
447  this_thr->th.th_cg_roots->cg_thread_limit;
448 
449  this_thr->th.th_teams_microtask = NULL;
450  this_thr->th.th_teams_level = 0;
451  *(kmp_int64 *)(&this_thr->th.th_teams_size) = 0L;
452  va_end(ap);
453 #if KMP_STATS_ENABLED
454  if (previous_state == stats_state_e::SERIAL_REGION) {
455  KMP_EXCHANGE_PARTITIONED_TIMER(OMP_serial);
456  } else {
457  KMP_POP_PARTITIONED_TIMER();
458  }
459 #endif // KMP_STATS_ENABLED
460 }
461 #endif /* OMP_40_ENABLED */
462 
463 // I don't think this function should ever have been exported.
464 // The __kmpc_ prefix was misapplied. I'm fairly certain that no generated
465 // openmp code ever called it, but it's been exported from the RTL for so
466 // long that I'm afraid to remove the definition.
467 int __kmpc_invoke_task_func(int gtid) { return __kmp_invoke_task_func(gtid); }
468 
481 void __kmpc_serialized_parallel(ident_t *loc, kmp_int32 global_tid) {
482 // The implementation is now in kmp_runtime.cpp so that it can share static
483 // functions with kmp_fork_call since the tasks to be done are similar in
484 // each case.
485 #if OMPT_SUPPORT
486  OMPT_STORE_RETURN_ADDRESS(global_tid);
487 #endif
488  __kmp_serialized_parallel(loc, global_tid);
489 }
490 
498 void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32 global_tid) {
499  kmp_internal_control_t *top;
500  kmp_info_t *this_thr;
501  kmp_team_t *serial_team;
502 
503  KC_TRACE(10,
504  ("__kmpc_end_serialized_parallel: called by T#%d\n", global_tid));
505 
506  /* skip all this code for autopar serialized loops since it results in
507  unacceptable overhead */
508  if (loc != NULL && (loc->flags & KMP_IDENT_AUTOPAR))
509  return;
510 
511  // Not autopar code
512  if (!TCR_4(__kmp_init_parallel))
513  __kmp_parallel_initialize();
514 
515 #if OMP_50_ENABLED
516  __kmp_resume_if_soft_paused();
517 #endif
518 
519  this_thr = __kmp_threads[global_tid];
520  serial_team = this_thr->th.th_serial_team;
521 
522 #if OMP_45_ENABLED
523  kmp_task_team_t *task_team = this_thr->th.th_task_team;
524 
525  // we need to wait for the proxy tasks before finishing the thread
526  if (task_team != NULL && task_team->tt.tt_found_proxy_tasks)
527  __kmp_task_team_wait(this_thr, serial_team USE_ITT_BUILD_ARG(NULL));
528 #endif
529 
530  KMP_MB();
531  KMP_DEBUG_ASSERT(serial_team);
532  KMP_ASSERT(serial_team->t.t_serialized);
533  KMP_DEBUG_ASSERT(this_thr->th.th_team == serial_team);
534  KMP_DEBUG_ASSERT(serial_team != this_thr->th.th_root->r.r_root_team);
535  KMP_DEBUG_ASSERT(serial_team->t.t_threads);
536  KMP_DEBUG_ASSERT(serial_team->t.t_threads[0] == this_thr);
537 
538 #if OMPT_SUPPORT
539  if (ompt_enabled.enabled &&
540  this_thr->th.ompt_thread_info.state != ompt_state_overhead) {
541  OMPT_CUR_TASK_INFO(this_thr)->frame.exit_frame = ompt_data_none;
542  if (ompt_enabled.ompt_callback_implicit_task) {
543  ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
544  ompt_scope_end, NULL, OMPT_CUR_TASK_DATA(this_thr), 1,
545  OMPT_CUR_TASK_INFO(this_thr)->thread_num, ompt_task_implicit);
546  }
547 
548  // reset clear the task id only after unlinking the task
549  ompt_data_t *parent_task_data;
550  __ompt_get_task_info_internal(1, NULL, &parent_task_data, NULL, NULL, NULL);
551 
552  if (ompt_enabled.ompt_callback_parallel_end) {
553  ompt_callbacks.ompt_callback(ompt_callback_parallel_end)(
554  &(serial_team->t.ompt_team_info.parallel_data), parent_task_data,
555  ompt_parallel_invoker_program, OMPT_LOAD_RETURN_ADDRESS(global_tid));
556  }
557  __ompt_lw_taskteam_unlink(this_thr);
558  this_thr->th.ompt_thread_info.state = ompt_state_overhead;
559  }
560 #endif
561 
562  /* If necessary, pop the internal control stack values and replace the team
563  * values */
564  top = serial_team->t.t_control_stack_top;
565  if (top && top->serial_nesting_level == serial_team->t.t_serialized) {
566  copy_icvs(&serial_team->t.t_threads[0]->th.th_current_task->td_icvs, top);
567  serial_team->t.t_control_stack_top = top->next;
568  __kmp_free(top);
569  }
570 
571  // if( serial_team -> t.t_serialized > 1 )
572  serial_team->t.t_level--;
573 
574  /* pop dispatch buffers stack */
575  KMP_DEBUG_ASSERT(serial_team->t.t_dispatch->th_disp_buffer);
576  {
577  dispatch_private_info_t *disp_buffer =
578  serial_team->t.t_dispatch->th_disp_buffer;
579  serial_team->t.t_dispatch->th_disp_buffer =
580  serial_team->t.t_dispatch->th_disp_buffer->next;
581  __kmp_free(disp_buffer);
582  }
583 #if OMP_50_ENABLED
584  this_thr->th.th_def_allocator = serial_team->t.t_def_allocator; // restore
585 #endif
586 
587  --serial_team->t.t_serialized;
588  if (serial_team->t.t_serialized == 0) {
589 
590 /* return to the parallel section */
591 
592 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
593  if (__kmp_inherit_fp_control && serial_team->t.t_fp_control_saved) {
594  __kmp_clear_x87_fpu_status_word();
595  __kmp_load_x87_fpu_control_word(&serial_team->t.t_x87_fpu_control_word);
596  __kmp_load_mxcsr(&serial_team->t.t_mxcsr);
597  }
598 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
599 
600  this_thr->th.th_team = serial_team->t.t_parent;
601  this_thr->th.th_info.ds.ds_tid = serial_team->t.t_master_tid;
602 
603  /* restore values cached in the thread */
604  this_thr->th.th_team_nproc = serial_team->t.t_parent->t.t_nproc; /* JPH */
605  this_thr->th.th_team_master =
606  serial_team->t.t_parent->t.t_threads[0]; /* JPH */
607  this_thr->th.th_team_serialized = this_thr->th.th_team->t.t_serialized;
608 
609  /* TODO the below shouldn't need to be adjusted for serialized teams */
610  this_thr->th.th_dispatch =
611  &this_thr->th.th_team->t.t_dispatch[serial_team->t.t_master_tid];
612 
613  __kmp_pop_current_task_from_thread(this_thr);
614 
615  KMP_ASSERT(this_thr->th.th_current_task->td_flags.executing == 0);
616  this_thr->th.th_current_task->td_flags.executing = 1;
617 
618  if (__kmp_tasking_mode != tskm_immediate_exec) {
619  // Copy the task team from the new child / old parent team to the thread.
620  this_thr->th.th_task_team =
621  this_thr->th.th_team->t.t_task_team[this_thr->th.th_task_state];
622  KA_TRACE(20,
623  ("__kmpc_end_serialized_parallel: T#%d restoring task_team %p / "
624  "team %p\n",
625  global_tid, this_thr->th.th_task_team, this_thr->th.th_team));
626  }
627  } else {
628  if (__kmp_tasking_mode != tskm_immediate_exec) {
629  KA_TRACE(20, ("__kmpc_end_serialized_parallel: T#%d decreasing nesting "
630  "depth of serial team %p to %d\n",
631  global_tid, serial_team, serial_team->t.t_serialized));
632  }
633  }
634 
635  if (__kmp_env_consistency_check)
636  __kmp_pop_parallel(global_tid, NULL);
637 #if OMPT_SUPPORT
638  if (ompt_enabled.enabled)
639  this_thr->th.ompt_thread_info.state =
640  ((this_thr->th.th_team_serialized) ? ompt_state_work_serial
641  : ompt_state_work_parallel);
642 #endif
643 }
644 
653 void __kmpc_flush(ident_t *loc) {
654  KC_TRACE(10, ("__kmpc_flush: called\n"));
655 
656  /* need explicit __mf() here since use volatile instead in library */
657  KMP_MB(); /* Flush all pending memory write invalidates. */
658 
659 #if (KMP_ARCH_X86 || KMP_ARCH_X86_64)
660 #if KMP_MIC
661 // fence-style instructions do not exist, but lock; xaddl $0,(%rsp) can be used.
662 // We shouldn't need it, though, since the ABI rules require that
663 // * If the compiler generates NGO stores it also generates the fence
664 // * If users hand-code NGO stores they should insert the fence
665 // therefore no incomplete unordered stores should be visible.
666 #else
667  // C74404
668  // This is to address non-temporal store instructions (sfence needed).
669  // The clflush instruction is addressed either (mfence needed).
670  // Probably the non-temporal load monvtdqa instruction should also be
671  // addressed.
672  // mfence is a SSE2 instruction. Do not execute it if CPU is not SSE2.
673  if (!__kmp_cpuinfo.initialized) {
674  __kmp_query_cpuid(&__kmp_cpuinfo);
675  }
676  if (!__kmp_cpuinfo.sse2) {
677  // CPU cannot execute SSE2 instructions.
678  } else {
679 #if KMP_COMPILER_ICC
680  _mm_mfence();
681 #elif KMP_COMPILER_MSVC
682  MemoryBarrier();
683 #else
684  __sync_synchronize();
685 #endif // KMP_COMPILER_ICC
686  }
687 #endif // KMP_MIC
688 #elif (KMP_ARCH_ARM || KMP_ARCH_AARCH64 || KMP_ARCH_MIPS || KMP_ARCH_MIPS64)
689 // Nothing to see here move along
690 #elif KMP_ARCH_PPC64
691 // Nothing needed here (we have a real MB above).
692 #if KMP_OS_CNK
693  // The flushing thread needs to yield here; this prevents a
694  // busy-waiting thread from saturating the pipeline. flush is
695  // often used in loops like this:
696  // while (!flag) {
697  // #pragma omp flush(flag)
698  // }
699  // and adding the yield here is good for at least a 10x speedup
700  // when running >2 threads per core (on the NAS LU benchmark).
701  __kmp_yield();
702 #endif
703 #else
704 #error Unknown or unsupported architecture
705 #endif
706 
707 #if OMPT_SUPPORT && OMPT_OPTIONAL
708  if (ompt_enabled.ompt_callback_flush) {
709  ompt_callbacks.ompt_callback(ompt_callback_flush)(
710  __ompt_get_thread_data_internal(), OMPT_GET_RETURN_ADDRESS(0));
711  }
712 #endif
713 }
714 
715 /* -------------------------------------------------------------------------- */
723 void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid) {
724  KMP_COUNT_BLOCK(OMP_BARRIER);
725  KC_TRACE(10, ("__kmpc_barrier: called T#%d\n", global_tid));
726 
727  if (!TCR_4(__kmp_init_parallel))
728  __kmp_parallel_initialize();
729 
730 #if OMP_50_ENABLED
731  __kmp_resume_if_soft_paused();
732 #endif
733 
734  if (__kmp_env_consistency_check) {
735  if (loc == 0) {
736  KMP_WARNING(ConstructIdentInvalid); // ??? What does it mean for the user?
737  }
738 
739  __kmp_check_barrier(global_tid, ct_barrier, loc);
740  }
741 
742 #if OMPT_SUPPORT
743  ompt_frame_t *ompt_frame;
744  if (ompt_enabled.enabled) {
745  __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
746  if (ompt_frame->enter_frame.ptr == NULL)
747  ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
748  OMPT_STORE_RETURN_ADDRESS(global_tid);
749  }
750 #endif
751  __kmp_threads[global_tid]->th.th_ident = loc;
752  // TODO: explicit barrier_wait_id:
753  // this function is called when 'barrier' directive is present or
754  // implicit barrier at the end of a worksharing construct.
755  // 1) better to add a per-thread barrier counter to a thread data structure
756  // 2) set to 0 when a new team is created
757  // 4) no sync is required
758 
759  __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL);
760 #if OMPT_SUPPORT && OMPT_OPTIONAL
761  if (ompt_enabled.enabled) {
762  ompt_frame->enter_frame = ompt_data_none;
763  }
764 #endif
765 }
766 
767 /* The BARRIER for a MASTER section is always explicit */
774 kmp_int32 __kmpc_master(ident_t *loc, kmp_int32 global_tid) {
775  int status = 0;
776 
777  KC_TRACE(10, ("__kmpc_master: called T#%d\n", global_tid));
778 
779  if (!TCR_4(__kmp_init_parallel))
780  __kmp_parallel_initialize();
781 
782 #if OMP_50_ENABLED
783  __kmp_resume_if_soft_paused();
784 #endif
785 
786  if (KMP_MASTER_GTID(global_tid)) {
787  KMP_COUNT_BLOCK(OMP_MASTER);
788  KMP_PUSH_PARTITIONED_TIMER(OMP_master);
789  status = 1;
790  }
791 
792 #if OMPT_SUPPORT && OMPT_OPTIONAL
793  if (status) {
794  if (ompt_enabled.ompt_callback_master) {
795  kmp_info_t *this_thr = __kmp_threads[global_tid];
796  kmp_team_t *team = this_thr->th.th_team;
797 
798  int tid = __kmp_tid_from_gtid(global_tid);
799  ompt_callbacks.ompt_callback(ompt_callback_master)(
800  ompt_scope_begin, &(team->t.ompt_team_info.parallel_data),
801  &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
802  OMPT_GET_RETURN_ADDRESS(0));
803  }
804  }
805 #endif
806 
807  if (__kmp_env_consistency_check) {
808 #if KMP_USE_DYNAMIC_LOCK
809  if (status)
810  __kmp_push_sync(global_tid, ct_master, loc, NULL, 0);
811  else
812  __kmp_check_sync(global_tid, ct_master, loc, NULL, 0);
813 #else
814  if (status)
815  __kmp_push_sync(global_tid, ct_master, loc, NULL);
816  else
817  __kmp_check_sync(global_tid, ct_master, loc, NULL);
818 #endif
819  }
820 
821  return status;
822 }
823 
832 void __kmpc_end_master(ident_t *loc, kmp_int32 global_tid) {
833  KC_TRACE(10, ("__kmpc_end_master: called T#%d\n", global_tid));
834 
835  KMP_DEBUG_ASSERT(KMP_MASTER_GTID(global_tid));
836  KMP_POP_PARTITIONED_TIMER();
837 
838 #if OMPT_SUPPORT && OMPT_OPTIONAL
839  kmp_info_t *this_thr = __kmp_threads[global_tid];
840  kmp_team_t *team = this_thr->th.th_team;
841  if (ompt_enabled.ompt_callback_master) {
842  int tid = __kmp_tid_from_gtid(global_tid);
843  ompt_callbacks.ompt_callback(ompt_callback_master)(
844  ompt_scope_end, &(team->t.ompt_team_info.parallel_data),
845  &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
846  OMPT_GET_RETURN_ADDRESS(0));
847  }
848 #endif
849 
850  if (__kmp_env_consistency_check) {
851  if (global_tid < 0)
852  KMP_WARNING(ThreadIdentInvalid);
853 
854  if (KMP_MASTER_GTID(global_tid))
855  __kmp_pop_sync(global_tid, ct_master, loc);
856  }
857 }
858 
866 void __kmpc_ordered(ident_t *loc, kmp_int32 gtid) {
867  int cid = 0;
868  kmp_info_t *th;
869  KMP_DEBUG_ASSERT(__kmp_init_serial);
870 
871  KC_TRACE(10, ("__kmpc_ordered: called T#%d\n", gtid));
872 
873  if (!TCR_4(__kmp_init_parallel))
874  __kmp_parallel_initialize();
875 
876 #if OMP_50_ENABLED
877  __kmp_resume_if_soft_paused();
878 #endif
879 
880 #if USE_ITT_BUILD
881  __kmp_itt_ordered_prep(gtid);
882 // TODO: ordered_wait_id
883 #endif /* USE_ITT_BUILD */
884 
885  th = __kmp_threads[gtid];
886 
887 #if OMPT_SUPPORT && OMPT_OPTIONAL
888  kmp_team_t *team;
889  ompt_wait_id_t lck;
890  void *codeptr_ra;
891  if (ompt_enabled.enabled) {
892  OMPT_STORE_RETURN_ADDRESS(gtid);
893  team = __kmp_team_from_gtid(gtid);
894  lck = (ompt_wait_id_t)(uintptr_t)&team->t.t_ordered.dt.t_value;
895  /* OMPT state update */
896  th->th.ompt_thread_info.wait_id = lck;
897  th->th.ompt_thread_info.state = ompt_state_wait_ordered;
898 
899  /* OMPT event callback */
900  codeptr_ra = OMPT_LOAD_RETURN_ADDRESS(gtid);
901  if (ompt_enabled.ompt_callback_mutex_acquire) {
902  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
903  ompt_mutex_ordered, omp_lock_hint_none, kmp_mutex_impl_spin, lck,
904  codeptr_ra);
905  }
906  }
907 #endif
908 
909  if (th->th.th_dispatch->th_deo_fcn != 0)
910  (*th->th.th_dispatch->th_deo_fcn)(&gtid, &cid, loc);
911  else
912  __kmp_parallel_deo(&gtid, &cid, loc);
913 
914 #if OMPT_SUPPORT && OMPT_OPTIONAL
915  if (ompt_enabled.enabled) {
916  /* OMPT state update */
917  th->th.ompt_thread_info.state = ompt_state_work_parallel;
918  th->th.ompt_thread_info.wait_id = 0;
919 
920  /* OMPT event callback */
921  if (ompt_enabled.ompt_callback_mutex_acquired) {
922  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
923  ompt_mutex_ordered, (ompt_wait_id_t)(uintptr_t)lck, codeptr_ra);
924  }
925  }
926 #endif
927 
928 #if USE_ITT_BUILD
929  __kmp_itt_ordered_start(gtid);
930 #endif /* USE_ITT_BUILD */
931 }
932 
940 void __kmpc_end_ordered(ident_t *loc, kmp_int32 gtid) {
941  int cid = 0;
942  kmp_info_t *th;
943 
944  KC_TRACE(10, ("__kmpc_end_ordered: called T#%d\n", gtid));
945 
946 #if USE_ITT_BUILD
947  __kmp_itt_ordered_end(gtid);
948 // TODO: ordered_wait_id
949 #endif /* USE_ITT_BUILD */
950 
951  th = __kmp_threads[gtid];
952 
953  if (th->th.th_dispatch->th_dxo_fcn != 0)
954  (*th->th.th_dispatch->th_dxo_fcn)(&gtid, &cid, loc);
955  else
956  __kmp_parallel_dxo(&gtid, &cid, loc);
957 
958 #if OMPT_SUPPORT && OMPT_OPTIONAL
959  OMPT_STORE_RETURN_ADDRESS(gtid);
960  if (ompt_enabled.ompt_callback_mutex_released) {
961  ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
962  ompt_mutex_ordered,
963  (ompt_wait_id_t)(uintptr_t)&__kmp_team_from_gtid(gtid)
964  ->t.t_ordered.dt.t_value,
965  OMPT_LOAD_RETURN_ADDRESS(gtid));
966  }
967 #endif
968 }
969 
970 #if KMP_USE_DYNAMIC_LOCK
971 
972 static __forceinline void
973 __kmp_init_indirect_csptr(kmp_critical_name *crit, ident_t const *loc,
974  kmp_int32 gtid, kmp_indirect_locktag_t tag) {
975  // Pointer to the allocated indirect lock is written to crit, while indexing
976  // is ignored.
977  void *idx;
978  kmp_indirect_lock_t **lck;
979  lck = (kmp_indirect_lock_t **)crit;
980  kmp_indirect_lock_t *ilk = __kmp_allocate_indirect_lock(&idx, gtid, tag);
981  KMP_I_LOCK_FUNC(ilk, init)(ilk->lock);
982  KMP_SET_I_LOCK_LOCATION(ilk, loc);
983  KMP_SET_I_LOCK_FLAGS(ilk, kmp_lf_critical_section);
984  KA_TRACE(20,
985  ("__kmp_init_indirect_csptr: initialized indirect lock #%d\n", tag));
986 #if USE_ITT_BUILD
987  __kmp_itt_critical_creating(ilk->lock, loc);
988 #endif
989  int status = KMP_COMPARE_AND_STORE_PTR(lck, nullptr, ilk);
990  if (status == 0) {
991 #if USE_ITT_BUILD
992  __kmp_itt_critical_destroyed(ilk->lock);
993 #endif
994  // We don't really need to destroy the unclaimed lock here since it will be
995  // cleaned up at program exit.
996  // KMP_D_LOCK_FUNC(&idx, destroy)((kmp_dyna_lock_t *)&idx);
997  }
998  KMP_DEBUG_ASSERT(*lck != NULL);
999 }
1000 
1001 // Fast-path acquire tas lock
1002 #define KMP_ACQUIRE_TAS_LOCK(lock, gtid) \
1003  { \
1004  kmp_tas_lock_t *l = (kmp_tas_lock_t *)lock; \
1005  kmp_int32 tas_free = KMP_LOCK_FREE(tas); \
1006  kmp_int32 tas_busy = KMP_LOCK_BUSY(gtid + 1, tas); \
1007  if (KMP_ATOMIC_LD_RLX(&l->lk.poll) != tas_free || \
1008  !__kmp_atomic_compare_store_acq(&l->lk.poll, tas_free, tas_busy)) { \
1009  kmp_uint32 spins; \
1010  KMP_FSYNC_PREPARE(l); \
1011  KMP_INIT_YIELD(spins); \
1012  kmp_backoff_t backoff = __kmp_spin_backoff_params; \
1013  do { \
1014  if (TCR_4(__kmp_nth) > \
1015  (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)) { \
1016  KMP_YIELD(TRUE); \
1017  } else { \
1018  KMP_YIELD_SPIN(spins); \
1019  } \
1020  __kmp_spin_backoff(&backoff); \
1021  } while ( \
1022  KMP_ATOMIC_LD_RLX(&l->lk.poll) != tas_free || \
1023  !__kmp_atomic_compare_store_acq(&l->lk.poll, tas_free, tas_busy)); \
1024  } \
1025  KMP_FSYNC_ACQUIRED(l); \
1026  }
1027 
1028 // Fast-path test tas lock
1029 #define KMP_TEST_TAS_LOCK(lock, gtid, rc) \
1030  { \
1031  kmp_tas_lock_t *l = (kmp_tas_lock_t *)lock; \
1032  kmp_int32 tas_free = KMP_LOCK_FREE(tas); \
1033  kmp_int32 tas_busy = KMP_LOCK_BUSY(gtid + 1, tas); \
1034  rc = KMP_ATOMIC_LD_RLX(&l->lk.poll) == tas_free && \
1035  __kmp_atomic_compare_store_acq(&l->lk.poll, tas_free, tas_busy); \
1036  }
1037 
1038 // Fast-path release tas lock
1039 #define KMP_RELEASE_TAS_LOCK(lock, gtid) \
1040  { KMP_ATOMIC_ST_REL(&((kmp_tas_lock_t *)lock)->lk.poll, KMP_LOCK_FREE(tas)); }
1041 
1042 #if KMP_USE_FUTEX
1043 
1044 #include <sys/syscall.h>
1045 #include <unistd.h>
1046 #ifndef FUTEX_WAIT
1047 #define FUTEX_WAIT 0
1048 #endif
1049 #ifndef FUTEX_WAKE
1050 #define FUTEX_WAKE 1
1051 #endif
1052 
1053 // Fast-path acquire futex lock
1054 #define KMP_ACQUIRE_FUTEX_LOCK(lock, gtid) \
1055  { \
1056  kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lock; \
1057  kmp_int32 gtid_code = (gtid + 1) << 1; \
1058  KMP_MB(); \
1059  KMP_FSYNC_PREPARE(ftx); \
1060  kmp_int32 poll_val; \
1061  while ((poll_val = KMP_COMPARE_AND_STORE_RET32( \
1062  &(ftx->lk.poll), KMP_LOCK_FREE(futex), \
1063  KMP_LOCK_BUSY(gtid_code, futex))) != KMP_LOCK_FREE(futex)) { \
1064  kmp_int32 cond = KMP_LOCK_STRIP(poll_val) & 1; \
1065  if (!cond) { \
1066  if (!KMP_COMPARE_AND_STORE_RET32(&(ftx->lk.poll), poll_val, \
1067  poll_val | \
1068  KMP_LOCK_BUSY(1, futex))) { \
1069  continue; \
1070  } \
1071  poll_val |= KMP_LOCK_BUSY(1, futex); \
1072  } \
1073  kmp_int32 rc; \
1074  if ((rc = syscall(__NR_futex, &(ftx->lk.poll), FUTEX_WAIT, poll_val, \
1075  NULL, NULL, 0)) != 0) { \
1076  continue; \
1077  } \
1078  gtid_code |= 1; \
1079  } \
1080  KMP_FSYNC_ACQUIRED(ftx); \
1081  }
1082 
1083 // Fast-path test futex lock
1084 #define KMP_TEST_FUTEX_LOCK(lock, gtid, rc) \
1085  { \
1086  kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lock; \
1087  if (KMP_COMPARE_AND_STORE_ACQ32(&(ftx->lk.poll), KMP_LOCK_FREE(futex), \
1088  KMP_LOCK_BUSY(gtid + 1 << 1, futex))) { \
1089  KMP_FSYNC_ACQUIRED(ftx); \
1090  rc = TRUE; \
1091  } else { \
1092  rc = FALSE; \
1093  } \
1094  }
1095 
1096 // Fast-path release futex lock
1097 #define KMP_RELEASE_FUTEX_LOCK(lock, gtid) \
1098  { \
1099  kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lock; \
1100  KMP_MB(); \
1101  KMP_FSYNC_RELEASING(ftx); \
1102  kmp_int32 poll_val = \
1103  KMP_XCHG_FIXED32(&(ftx->lk.poll), KMP_LOCK_FREE(futex)); \
1104  if (KMP_LOCK_STRIP(poll_val) & 1) { \
1105  syscall(__NR_futex, &(ftx->lk.poll), FUTEX_WAKE, \
1106  KMP_LOCK_BUSY(1, futex), NULL, NULL, 0); \
1107  } \
1108  KMP_MB(); \
1109  KMP_YIELD_OVERSUB(); \
1110  }
1111 
1112 #endif // KMP_USE_FUTEX
1113 
1114 #else // KMP_USE_DYNAMIC_LOCK
1115 
1116 static kmp_user_lock_p __kmp_get_critical_section_ptr(kmp_critical_name *crit,
1117  ident_t const *loc,
1118  kmp_int32 gtid) {
1119  kmp_user_lock_p *lck_pp = (kmp_user_lock_p *)crit;
1120 
1121  // Because of the double-check, the following load doesn't need to be volatile
1122  kmp_user_lock_p lck = (kmp_user_lock_p)TCR_PTR(*lck_pp);
1123 
1124  if (lck == NULL) {
1125  void *idx;
1126 
1127  // Allocate & initialize the lock.
1128  // Remember alloc'ed locks in table in order to free them in __kmp_cleanup()
1129  lck = __kmp_user_lock_allocate(&idx, gtid, kmp_lf_critical_section);
1130  __kmp_init_user_lock_with_checks(lck);
1131  __kmp_set_user_lock_location(lck, loc);
1132 #if USE_ITT_BUILD
1133  __kmp_itt_critical_creating(lck);
1134 // __kmp_itt_critical_creating() should be called *before* the first usage
1135 // of underlying lock. It is the only place where we can guarantee it. There
1136 // are chances the lock will destroyed with no usage, but it is not a
1137 // problem, because this is not real event seen by user but rather setting
1138 // name for object (lock). See more details in kmp_itt.h.
1139 #endif /* USE_ITT_BUILD */
1140 
1141  // Use a cmpxchg instruction to slam the start of the critical section with
1142  // the lock pointer. If another thread beat us to it, deallocate the lock,
1143  // and use the lock that the other thread allocated.
1144  int status = KMP_COMPARE_AND_STORE_PTR(lck_pp, 0, lck);
1145 
1146  if (status == 0) {
1147 // Deallocate the lock and reload the value.
1148 #if USE_ITT_BUILD
1149  __kmp_itt_critical_destroyed(lck);
1150 // Let ITT know the lock is destroyed and the same memory location may be reused
1151 // for another purpose.
1152 #endif /* USE_ITT_BUILD */
1153  __kmp_destroy_user_lock_with_checks(lck);
1154  __kmp_user_lock_free(&idx, gtid, lck);
1155  lck = (kmp_user_lock_p)TCR_PTR(*lck_pp);
1156  KMP_DEBUG_ASSERT(lck != NULL);
1157  }
1158  }
1159  return lck;
1160 }
1161 
1162 #endif // KMP_USE_DYNAMIC_LOCK
1163 
1174 void __kmpc_critical(ident_t *loc, kmp_int32 global_tid,
1175  kmp_critical_name *crit) {
1176 #if KMP_USE_DYNAMIC_LOCK
1177 #if OMPT_SUPPORT && OMPT_OPTIONAL
1178  OMPT_STORE_RETURN_ADDRESS(global_tid);
1179 #endif // OMPT_SUPPORT
1180  __kmpc_critical_with_hint(loc, global_tid, crit, omp_lock_hint_none);
1181 #else
1182  KMP_COUNT_BLOCK(OMP_CRITICAL);
1183 #if OMPT_SUPPORT && OMPT_OPTIONAL
1184  ompt_state_t prev_state = ompt_state_undefined;
1185  ompt_thread_info_t ti;
1186 #endif
1187  kmp_user_lock_p lck;
1188 
1189  KC_TRACE(10, ("__kmpc_critical: called T#%d\n", global_tid));
1190 
1191  // TODO: add THR_OVHD_STATE
1192 
1193  KMP_PUSH_PARTITIONED_TIMER(OMP_critical_wait);
1194  KMP_CHECK_USER_LOCK_INIT();
1195 
1196  if ((__kmp_user_lock_kind == lk_tas) &&
1197  (sizeof(lck->tas.lk.poll) <= OMP_CRITICAL_SIZE)) {
1198  lck = (kmp_user_lock_p)crit;
1199  }
1200 #if KMP_USE_FUTEX
1201  else if ((__kmp_user_lock_kind == lk_futex) &&
1202  (sizeof(lck->futex.lk.poll) <= OMP_CRITICAL_SIZE)) {
1203  lck = (kmp_user_lock_p)crit;
1204  }
1205 #endif
1206  else { // ticket, queuing or drdpa
1207  lck = __kmp_get_critical_section_ptr(crit, loc, global_tid);
1208  }
1209 
1210  if (__kmp_env_consistency_check)
1211  __kmp_push_sync(global_tid, ct_critical, loc, lck);
1212 
1213 // since the critical directive binds to all threads, not just the current
1214 // team we have to check this even if we are in a serialized team.
1215 // also, even if we are the uber thread, we still have to conduct the lock,
1216 // as we have to contend with sibling threads.
1217 
1218 #if USE_ITT_BUILD
1219  __kmp_itt_critical_acquiring(lck);
1220 #endif /* USE_ITT_BUILD */
1221 #if OMPT_SUPPORT && OMPT_OPTIONAL
1222  OMPT_STORE_RETURN_ADDRESS(gtid);
1223  void *codeptr_ra = NULL;
1224  if (ompt_enabled.enabled) {
1225  ti = __kmp_threads[global_tid]->th.ompt_thread_info;
1226  /* OMPT state update */
1227  prev_state = ti.state;
1228  ti.wait_id = (ompt_wait_id_t)(uintptr_t)lck;
1229  ti.state = ompt_state_wait_critical;
1230 
1231  /* OMPT event callback */
1232  codeptr_ra = OMPT_LOAD_RETURN_ADDRESS(gtid);
1233  if (ompt_enabled.ompt_callback_mutex_acquire) {
1234  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
1235  ompt_mutex_critical, omp_lock_hint_none, __ompt_get_mutex_impl_type(),
1236  (ompt_wait_id_t)(uintptr_t)lck, codeptr_ra);
1237  }
1238  }
1239 #endif
1240  // Value of 'crit' should be good for using as a critical_id of the critical
1241  // section directive.
1242  __kmp_acquire_user_lock_with_checks(lck, global_tid);
1243 
1244 #if USE_ITT_BUILD
1245  __kmp_itt_critical_acquired(lck);
1246 #endif /* USE_ITT_BUILD */
1247 #if OMPT_SUPPORT && OMPT_OPTIONAL
1248  if (ompt_enabled.enabled) {
1249  /* OMPT state update */
1250  ti.state = prev_state;
1251  ti.wait_id = 0;
1252 
1253  /* OMPT event callback */
1254  if (ompt_enabled.ompt_callback_mutex_acquired) {
1255  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
1256  ompt_mutex_critical, (ompt_wait_id_t)(uintptr_t)lck, codeptr_ra);
1257  }
1258  }
1259 #endif
1260  KMP_POP_PARTITIONED_TIMER();
1261 
1262  KMP_PUSH_PARTITIONED_TIMER(OMP_critical);
1263  KA_TRACE(15, ("__kmpc_critical: done T#%d\n", global_tid));
1264 #endif // KMP_USE_DYNAMIC_LOCK
1265 }
1266 
1267 #if KMP_USE_DYNAMIC_LOCK
1268 
1269 // Converts the given hint to an internal lock implementation
1270 static __forceinline kmp_dyna_lockseq_t __kmp_map_hint_to_lock(uintptr_t hint) {
1271 #if KMP_USE_TSX
1272 #define KMP_TSX_LOCK(seq) lockseq_##seq
1273 #else
1274 #define KMP_TSX_LOCK(seq) __kmp_user_lock_seq
1275 #endif
1276 
1277 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
1278 #define KMP_CPUINFO_RTM (__kmp_cpuinfo.rtm)
1279 #else
1280 #define KMP_CPUINFO_RTM 0
1281 #endif
1282 
1283  // Hints that do not require further logic
1284  if (hint & kmp_lock_hint_hle)
1285  return KMP_TSX_LOCK(hle);
1286  if (hint & kmp_lock_hint_rtm)
1287  return KMP_CPUINFO_RTM ? KMP_TSX_LOCK(rtm) : __kmp_user_lock_seq;
1288  if (hint & kmp_lock_hint_adaptive)
1289  return KMP_CPUINFO_RTM ? KMP_TSX_LOCK(adaptive) : __kmp_user_lock_seq;
1290 
1291  // Rule out conflicting hints first by returning the default lock
1292  if ((hint & omp_lock_hint_contended) && (hint & omp_lock_hint_uncontended))
1293  return __kmp_user_lock_seq;
1294  if ((hint & omp_lock_hint_speculative) &&
1295  (hint & omp_lock_hint_nonspeculative))
1296  return __kmp_user_lock_seq;
1297 
1298  // Do not even consider speculation when it appears to be contended
1299  if (hint & omp_lock_hint_contended)
1300  return lockseq_queuing;
1301 
1302  // Uncontended lock without speculation
1303  if ((hint & omp_lock_hint_uncontended) && !(hint & omp_lock_hint_speculative))
1304  return lockseq_tas;
1305 
1306  // HLE lock for speculation
1307  if (hint & omp_lock_hint_speculative)
1308  return KMP_TSX_LOCK(hle);
1309 
1310  return __kmp_user_lock_seq;
1311 }
1312 
1313 #if OMPT_SUPPORT && OMPT_OPTIONAL
1314 #if KMP_USE_DYNAMIC_LOCK
1315 static kmp_mutex_impl_t
1316 __ompt_get_mutex_impl_type(void *user_lock, kmp_indirect_lock_t *ilock = 0) {
1317  if (user_lock) {
1318  switch (KMP_EXTRACT_D_TAG(user_lock)) {
1319  case 0:
1320  break;
1321 #if KMP_USE_FUTEX
1322  case locktag_futex:
1323  return kmp_mutex_impl_queuing;
1324 #endif
1325  case locktag_tas:
1326  return kmp_mutex_impl_spin;
1327 #if KMP_USE_TSX
1328  case locktag_hle:
1329  return kmp_mutex_impl_speculative;
1330 #endif
1331  default:
1332  return kmp_mutex_impl_none;
1333  }
1334  ilock = KMP_LOOKUP_I_LOCK(user_lock);
1335  }
1336  KMP_ASSERT(ilock);
1337  switch (ilock->type) {
1338 #if KMP_USE_TSX
1339  case locktag_adaptive:
1340  case locktag_rtm:
1341  return kmp_mutex_impl_speculative;
1342 #endif
1343  case locktag_nested_tas:
1344  return kmp_mutex_impl_spin;
1345 #if KMP_USE_FUTEX
1346  case locktag_nested_futex:
1347 #endif
1348  case locktag_ticket:
1349  case locktag_queuing:
1350  case locktag_drdpa:
1351  case locktag_nested_ticket:
1352  case locktag_nested_queuing:
1353  case locktag_nested_drdpa:
1354  return kmp_mutex_impl_queuing;
1355  default:
1356  return kmp_mutex_impl_none;
1357  }
1358 }
1359 #else
1360 // For locks without dynamic binding
1361 static kmp_mutex_impl_t __ompt_get_mutex_impl_type() {
1362  switch (__kmp_user_lock_kind) {
1363  case lk_tas:
1364  return kmp_mutex_impl_spin;
1365 #if KMP_USE_FUTEX
1366  case lk_futex:
1367 #endif
1368  case lk_ticket:
1369  case lk_queuing:
1370  case lk_drdpa:
1371  return kmp_mutex_impl_queuing;
1372 #if KMP_USE_TSX
1373  case lk_hle:
1374  case lk_rtm:
1375  case lk_adaptive:
1376  return kmp_mutex_impl_speculative;
1377 #endif
1378  default:
1379  return kmp_mutex_impl_none;
1380  }
1381 }
1382 #endif // KMP_USE_DYNAMIC_LOCK
1383 #endif // OMPT_SUPPORT && OMPT_OPTIONAL
1384 
1398 void __kmpc_critical_with_hint(ident_t *loc, kmp_int32 global_tid,
1399  kmp_critical_name *crit, uint32_t hint) {
1400  KMP_COUNT_BLOCK(OMP_CRITICAL);
1401  kmp_user_lock_p lck;
1402 #if OMPT_SUPPORT && OMPT_OPTIONAL
1403  ompt_state_t prev_state = ompt_state_undefined;
1404  ompt_thread_info_t ti;
1405  // This is the case, if called from __kmpc_critical:
1406  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(global_tid);
1407  if (!codeptr)
1408  codeptr = OMPT_GET_RETURN_ADDRESS(0);
1409 #endif
1410 
1411  KC_TRACE(10, ("__kmpc_critical: called T#%d\n", global_tid));
1412 
1413  kmp_dyna_lock_t *lk = (kmp_dyna_lock_t *)crit;
1414  // Check if it is initialized.
1415  KMP_PUSH_PARTITIONED_TIMER(OMP_critical_wait);
1416  if (*lk == 0) {
1417  kmp_dyna_lockseq_t lckseq = __kmp_map_hint_to_lock(hint);
1418  if (KMP_IS_D_LOCK(lckseq)) {
1419  KMP_COMPARE_AND_STORE_ACQ32((volatile kmp_int32 *)crit, 0,
1420  KMP_GET_D_TAG(lckseq));
1421  } else {
1422  __kmp_init_indirect_csptr(crit, loc, global_tid, KMP_GET_I_TAG(lckseq));
1423  }
1424  }
1425  // Branch for accessing the actual lock object and set operation. This
1426  // branching is inevitable since this lock initialization does not follow the
1427  // normal dispatch path (lock table is not used).
1428  if (KMP_EXTRACT_D_TAG(lk) != 0) {
1429  lck = (kmp_user_lock_p)lk;
1430  if (__kmp_env_consistency_check) {
1431  __kmp_push_sync(global_tid, ct_critical, loc, lck,
1432  __kmp_map_hint_to_lock(hint));
1433  }
1434 #if USE_ITT_BUILD
1435  __kmp_itt_critical_acquiring(lck);
1436 #endif
1437 #if OMPT_SUPPORT && OMPT_OPTIONAL
1438  if (ompt_enabled.enabled) {
1439  ti = __kmp_threads[global_tid]->th.ompt_thread_info;
1440  /* OMPT state update */
1441  prev_state = ti.state;
1442  ti.wait_id = (ompt_wait_id_t)(uintptr_t)lck;
1443  ti.state = ompt_state_wait_critical;
1444 
1445  /* OMPT event callback */
1446  if (ompt_enabled.ompt_callback_mutex_acquire) {
1447  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
1448  ompt_mutex_critical, (unsigned int)hint,
1449  __ompt_get_mutex_impl_type(crit), (ompt_wait_id_t)(uintptr_t)lck,
1450  codeptr);
1451  }
1452  }
1453 #endif
1454 #if KMP_USE_INLINED_TAS
1455  if (__kmp_user_lock_seq == lockseq_tas && !__kmp_env_consistency_check) {
1456  KMP_ACQUIRE_TAS_LOCK(lck, global_tid);
1457  } else
1458 #elif KMP_USE_INLINED_FUTEX
1459  if (__kmp_user_lock_seq == lockseq_futex && !__kmp_env_consistency_check) {
1460  KMP_ACQUIRE_FUTEX_LOCK(lck, global_tid);
1461  } else
1462 #endif
1463  {
1464  KMP_D_LOCK_FUNC(lk, set)(lk, global_tid);
1465  }
1466  } else {
1467  kmp_indirect_lock_t *ilk = *((kmp_indirect_lock_t **)lk);
1468  lck = ilk->lock;
1469  if (__kmp_env_consistency_check) {
1470  __kmp_push_sync(global_tid, ct_critical, loc, lck,
1471  __kmp_map_hint_to_lock(hint));
1472  }
1473 #if USE_ITT_BUILD
1474  __kmp_itt_critical_acquiring(lck);
1475 #endif
1476 #if OMPT_SUPPORT && OMPT_OPTIONAL
1477  if (ompt_enabled.enabled) {
1478  ti = __kmp_threads[global_tid]->th.ompt_thread_info;
1479  /* OMPT state update */
1480  prev_state = ti.state;
1481  ti.wait_id = (ompt_wait_id_t)(uintptr_t)lck;
1482  ti.state = ompt_state_wait_critical;
1483 
1484  /* OMPT event callback */
1485  if (ompt_enabled.ompt_callback_mutex_acquire) {
1486  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
1487  ompt_mutex_critical, (unsigned int)hint,
1488  __ompt_get_mutex_impl_type(0, ilk), (ompt_wait_id_t)(uintptr_t)lck,
1489  codeptr);
1490  }
1491  }
1492 #endif
1493  KMP_I_LOCK_FUNC(ilk, set)(lck, global_tid);
1494  }
1495  KMP_POP_PARTITIONED_TIMER();
1496 
1497 #if USE_ITT_BUILD
1498  __kmp_itt_critical_acquired(lck);
1499 #endif /* USE_ITT_BUILD */
1500 #if OMPT_SUPPORT && OMPT_OPTIONAL
1501  if (ompt_enabled.enabled) {
1502  /* OMPT state update */
1503  ti.state = prev_state;
1504  ti.wait_id = 0;
1505 
1506  /* OMPT event callback */
1507  if (ompt_enabled.ompt_callback_mutex_acquired) {
1508  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
1509  ompt_mutex_critical, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
1510  }
1511  }
1512 #endif
1513 
1514  KMP_PUSH_PARTITIONED_TIMER(OMP_critical);
1515  KA_TRACE(15, ("__kmpc_critical: done T#%d\n", global_tid));
1516 } // __kmpc_critical_with_hint
1517 
1518 #endif // KMP_USE_DYNAMIC_LOCK
1519 
1529 void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid,
1530  kmp_critical_name *crit) {
1531  kmp_user_lock_p lck;
1532 
1533  KC_TRACE(10, ("__kmpc_end_critical: called T#%d\n", global_tid));
1534 
1535 #if KMP_USE_DYNAMIC_LOCK
1536  if (KMP_IS_D_LOCK(__kmp_user_lock_seq)) {
1537  lck = (kmp_user_lock_p)crit;
1538  KMP_ASSERT(lck != NULL);
1539  if (__kmp_env_consistency_check) {
1540  __kmp_pop_sync(global_tid, ct_critical, loc);
1541  }
1542 #if USE_ITT_BUILD
1543  __kmp_itt_critical_releasing(lck);
1544 #endif
1545 #if KMP_USE_INLINED_TAS
1546  if (__kmp_user_lock_seq == lockseq_tas && !__kmp_env_consistency_check) {
1547  KMP_RELEASE_TAS_LOCK(lck, global_tid);
1548  } else
1549 #elif KMP_USE_INLINED_FUTEX
1550  if (__kmp_user_lock_seq == lockseq_futex && !__kmp_env_consistency_check) {
1551  KMP_RELEASE_FUTEX_LOCK(lck, global_tid);
1552  } else
1553 #endif
1554  {
1555  KMP_D_LOCK_FUNC(lck, unset)((kmp_dyna_lock_t *)lck, global_tid);
1556  }
1557  } else {
1558  kmp_indirect_lock_t *ilk =
1559  (kmp_indirect_lock_t *)TCR_PTR(*((kmp_indirect_lock_t **)crit));
1560  KMP_ASSERT(ilk != NULL);
1561  lck = ilk->lock;
1562  if (__kmp_env_consistency_check) {
1563  __kmp_pop_sync(global_tid, ct_critical, loc);
1564  }
1565 #if USE_ITT_BUILD
1566  __kmp_itt_critical_releasing(lck);
1567 #endif
1568  KMP_I_LOCK_FUNC(ilk, unset)(lck, global_tid);
1569  }
1570 
1571 #else // KMP_USE_DYNAMIC_LOCK
1572 
1573  if ((__kmp_user_lock_kind == lk_tas) &&
1574  (sizeof(lck->tas.lk.poll) <= OMP_CRITICAL_SIZE)) {
1575  lck = (kmp_user_lock_p)crit;
1576  }
1577 #if KMP_USE_FUTEX
1578  else if ((__kmp_user_lock_kind == lk_futex) &&
1579  (sizeof(lck->futex.lk.poll) <= OMP_CRITICAL_SIZE)) {
1580  lck = (kmp_user_lock_p)crit;
1581  }
1582 #endif
1583  else { // ticket, queuing or drdpa
1584  lck = (kmp_user_lock_p)TCR_PTR(*((kmp_user_lock_p *)crit));
1585  }
1586 
1587  KMP_ASSERT(lck != NULL);
1588 
1589  if (__kmp_env_consistency_check)
1590  __kmp_pop_sync(global_tid, ct_critical, loc);
1591 
1592 #if USE_ITT_BUILD
1593  __kmp_itt_critical_releasing(lck);
1594 #endif /* USE_ITT_BUILD */
1595  // Value of 'crit' should be good for using as a critical_id of the critical
1596  // section directive.
1597  __kmp_release_user_lock_with_checks(lck, global_tid);
1598 
1599 #endif // KMP_USE_DYNAMIC_LOCK
1600 
1601 #if OMPT_SUPPORT && OMPT_OPTIONAL
1602  /* OMPT release event triggers after lock is released; place here to trigger
1603  * for all #if branches */
1604  OMPT_STORE_RETURN_ADDRESS(global_tid);
1605  if (ompt_enabled.ompt_callback_mutex_released) {
1606  ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
1607  ompt_mutex_critical, (ompt_wait_id_t)(uintptr_t)lck,
1608  OMPT_LOAD_RETURN_ADDRESS(0));
1609  }
1610 #endif
1611 
1612  KMP_POP_PARTITIONED_TIMER();
1613  KA_TRACE(15, ("__kmpc_end_critical: done T#%d\n", global_tid));
1614 }
1615 
1625 kmp_int32 __kmpc_barrier_master(ident_t *loc, kmp_int32 global_tid) {
1626  int status;
1627 
1628  KC_TRACE(10, ("__kmpc_barrier_master: called T#%d\n", global_tid));
1629 
1630  if (!TCR_4(__kmp_init_parallel))
1631  __kmp_parallel_initialize();
1632 
1633 #if OMP_50_ENABLED
1634  __kmp_resume_if_soft_paused();
1635 #endif
1636 
1637  if (__kmp_env_consistency_check)
1638  __kmp_check_barrier(global_tid, ct_barrier, loc);
1639 
1640 #if OMPT_SUPPORT
1641  ompt_frame_t *ompt_frame;
1642  if (ompt_enabled.enabled) {
1643  __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
1644  if (ompt_frame->enter_frame.ptr == NULL)
1645  ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
1646  OMPT_STORE_RETURN_ADDRESS(global_tid);
1647  }
1648 #endif
1649 #if USE_ITT_NOTIFY
1650  __kmp_threads[global_tid]->th.th_ident = loc;
1651 #endif
1652  status = __kmp_barrier(bs_plain_barrier, global_tid, TRUE, 0, NULL, NULL);
1653 #if OMPT_SUPPORT && OMPT_OPTIONAL
1654  if (ompt_enabled.enabled) {
1655  ompt_frame->enter_frame = ompt_data_none;
1656  }
1657 #endif
1658 
1659  return (status != 0) ? 0 : 1;
1660 }
1661 
1671 void __kmpc_end_barrier_master(ident_t *loc, kmp_int32 global_tid) {
1672  KC_TRACE(10, ("__kmpc_end_barrier_master: called T#%d\n", global_tid));
1673 
1674  __kmp_end_split_barrier(bs_plain_barrier, global_tid);
1675 }
1676 
1687 kmp_int32 __kmpc_barrier_master_nowait(ident_t *loc, kmp_int32 global_tid) {
1688  kmp_int32 ret;
1689 
1690  KC_TRACE(10, ("__kmpc_barrier_master_nowait: called T#%d\n", global_tid));
1691 
1692  if (!TCR_4(__kmp_init_parallel))
1693  __kmp_parallel_initialize();
1694 
1695 #if OMP_50_ENABLED
1696  __kmp_resume_if_soft_paused();
1697 #endif
1698 
1699  if (__kmp_env_consistency_check) {
1700  if (loc == 0) {
1701  KMP_WARNING(ConstructIdentInvalid); // ??? What does it mean for the user?
1702  }
1703  __kmp_check_barrier(global_tid, ct_barrier, loc);
1704  }
1705 
1706 #if OMPT_SUPPORT
1707  ompt_frame_t *ompt_frame;
1708  if (ompt_enabled.enabled) {
1709  __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
1710  if (ompt_frame->enter_frame.ptr == NULL)
1711  ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
1712  OMPT_STORE_RETURN_ADDRESS(global_tid);
1713  }
1714 #endif
1715 #if USE_ITT_NOTIFY
1716  __kmp_threads[global_tid]->th.th_ident = loc;
1717 #endif
1718  __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL);
1719 #if OMPT_SUPPORT && OMPT_OPTIONAL
1720  if (ompt_enabled.enabled) {
1721  ompt_frame->enter_frame = ompt_data_none;
1722  }
1723 #endif
1724 
1725  ret = __kmpc_master(loc, global_tid);
1726 
1727  if (__kmp_env_consistency_check) {
1728  /* there's no __kmpc_end_master called; so the (stats) */
1729  /* actions of __kmpc_end_master are done here */
1730 
1731  if (global_tid < 0) {
1732  KMP_WARNING(ThreadIdentInvalid);
1733  }
1734  if (ret) {
1735  /* only one thread should do the pop since only */
1736  /* one did the push (see __kmpc_master()) */
1737 
1738  __kmp_pop_sync(global_tid, ct_master, loc);
1739  }
1740  }
1741 
1742  return (ret);
1743 }
1744 
1745 /* The BARRIER for a SINGLE process section is always explicit */
1757 kmp_int32 __kmpc_single(ident_t *loc, kmp_int32 global_tid) {
1758  kmp_int32 rc = __kmp_enter_single(global_tid, loc, TRUE);
1759 
1760  if (rc) {
1761  // We are going to execute the single statement, so we should count it.
1762  KMP_COUNT_BLOCK(OMP_SINGLE);
1763  KMP_PUSH_PARTITIONED_TIMER(OMP_single);
1764  }
1765 
1766 #if OMPT_SUPPORT && OMPT_OPTIONAL
1767  kmp_info_t *this_thr = __kmp_threads[global_tid];
1768  kmp_team_t *team = this_thr->th.th_team;
1769  int tid = __kmp_tid_from_gtid(global_tid);
1770 
1771  if (ompt_enabled.enabled) {
1772  if (rc) {
1773  if (ompt_enabled.ompt_callback_work) {
1774  ompt_callbacks.ompt_callback(ompt_callback_work)(
1775  ompt_work_single_executor, ompt_scope_begin,
1776  &(team->t.ompt_team_info.parallel_data),
1777  &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
1778  1, OMPT_GET_RETURN_ADDRESS(0));
1779  }
1780  } else {
1781  if (ompt_enabled.ompt_callback_work) {
1782  ompt_callbacks.ompt_callback(ompt_callback_work)(
1783  ompt_work_single_other, ompt_scope_begin,
1784  &(team->t.ompt_team_info.parallel_data),
1785  &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
1786  1, OMPT_GET_RETURN_ADDRESS(0));
1787  ompt_callbacks.ompt_callback(ompt_callback_work)(
1788  ompt_work_single_other, ompt_scope_end,
1789  &(team->t.ompt_team_info.parallel_data),
1790  &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
1791  1, OMPT_GET_RETURN_ADDRESS(0));
1792  }
1793  }
1794  }
1795 #endif
1796 
1797  return rc;
1798 }
1799 
1809 void __kmpc_end_single(ident_t *loc, kmp_int32 global_tid) {
1810  __kmp_exit_single(global_tid);
1811  KMP_POP_PARTITIONED_TIMER();
1812 
1813 #if OMPT_SUPPORT && OMPT_OPTIONAL
1814  kmp_info_t *this_thr = __kmp_threads[global_tid];
1815  kmp_team_t *team = this_thr->th.th_team;
1816  int tid = __kmp_tid_from_gtid(global_tid);
1817 
1818  if (ompt_enabled.ompt_callback_work) {
1819  ompt_callbacks.ompt_callback(ompt_callback_work)(
1820  ompt_work_single_executor, ompt_scope_end,
1821  &(team->t.ompt_team_info.parallel_data),
1822  &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), 1,
1823  OMPT_GET_RETURN_ADDRESS(0));
1824  }
1825 #endif
1826 }
1827 
1835 void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid) {
1836  KMP_POP_PARTITIONED_TIMER();
1837  KE_TRACE(10, ("__kmpc_for_static_fini called T#%d\n", global_tid));
1838 
1839 #if OMPT_SUPPORT && OMPT_OPTIONAL
1840  if (ompt_enabled.ompt_callback_work) {
1841  ompt_work_t ompt_work_type = ompt_work_loop;
1842  ompt_team_info_t *team_info = __ompt_get_teaminfo(0, NULL);
1843  ompt_task_info_t *task_info = __ompt_get_task_info_object(0);
1844  // Determine workshare type
1845  if (loc != NULL) {
1846  if ((loc->flags & KMP_IDENT_WORK_LOOP) != 0) {
1847  ompt_work_type = ompt_work_loop;
1848  } else if ((loc->flags & KMP_IDENT_WORK_SECTIONS) != 0) {
1849  ompt_work_type = ompt_work_sections;
1850  } else if ((loc->flags & KMP_IDENT_WORK_DISTRIBUTE) != 0) {
1851  ompt_work_type = ompt_work_distribute;
1852  } else {
1853  // use default set above.
1854  // a warning about this case is provided in __kmpc_for_static_init
1855  }
1856  KMP_DEBUG_ASSERT(ompt_work_type);
1857  }
1858  ompt_callbacks.ompt_callback(ompt_callback_work)(
1859  ompt_work_type, ompt_scope_end, &(team_info->parallel_data),
1860  &(task_info->task_data), 0, OMPT_GET_RETURN_ADDRESS(0));
1861  }
1862 #endif
1863  if (__kmp_env_consistency_check)
1864  __kmp_pop_workshare(global_tid, ct_pdo, loc);
1865 }
1866 
1867 // User routines which take C-style arguments (call by value)
1868 // different from the Fortran equivalent routines
1869 
1870 void ompc_set_num_threads(int arg) {
1871  // !!!!! TODO: check the per-task binding
1872  __kmp_set_num_threads(arg, __kmp_entry_gtid());
1873 }
1874 
1875 void ompc_set_dynamic(int flag) {
1876  kmp_info_t *thread;
1877 
1878  /* For the thread-private implementation of the internal controls */
1879  thread = __kmp_entry_thread();
1880 
1881  __kmp_save_internal_controls(thread);
1882 
1883  set__dynamic(thread, flag ? TRUE : FALSE);
1884 }
1885 
1886 void ompc_set_nested(int flag) {
1887  kmp_info_t *thread;
1888 
1889  /* For the thread-private internal controls implementation */
1890  thread = __kmp_entry_thread();
1891 
1892  __kmp_save_internal_controls(thread);
1893 
1894  set__max_active_levels(thread, flag ? __kmp_dflt_max_active_levels : 1);
1895 }
1896 
1897 void ompc_set_max_active_levels(int max_active_levels) {
1898  /* TO DO */
1899  /* we want per-task implementation of this internal control */
1900 
1901  /* For the per-thread internal controls implementation */
1902  __kmp_set_max_active_levels(__kmp_entry_gtid(), max_active_levels);
1903 }
1904 
1905 void ompc_set_schedule(omp_sched_t kind, int modifier) {
1906  // !!!!! TODO: check the per-task binding
1907  __kmp_set_schedule(__kmp_entry_gtid(), (kmp_sched_t)kind, modifier);
1908 }
1909 
1910 int ompc_get_ancestor_thread_num(int level) {
1911  return __kmp_get_ancestor_thread_num(__kmp_entry_gtid(), level);
1912 }
1913 
1914 int ompc_get_team_size(int level) {
1915  return __kmp_get_team_size(__kmp_entry_gtid(), level);
1916 }
1917 
1918 #if OMP_50_ENABLED
1919 /* OpenMP 5.0 Affinity Format API */
1920 
1921 void ompc_set_affinity_format(char const *format) {
1922  if (!__kmp_init_serial) {
1923  __kmp_serial_initialize();
1924  }
1925  __kmp_strncpy_truncate(__kmp_affinity_format, KMP_AFFINITY_FORMAT_SIZE,
1926  format, KMP_STRLEN(format) + 1);
1927 }
1928 
1929 size_t ompc_get_affinity_format(char *buffer, size_t size) {
1930  size_t format_size;
1931  if (!__kmp_init_serial) {
1932  __kmp_serial_initialize();
1933  }
1934  format_size = KMP_STRLEN(__kmp_affinity_format);
1935  if (buffer && size) {
1936  __kmp_strncpy_truncate(buffer, size, __kmp_affinity_format,
1937  format_size + 1);
1938  }
1939  return format_size;
1940 }
1941 
1942 void ompc_display_affinity(char const *format) {
1943  int gtid;
1944  if (!TCR_4(__kmp_init_middle)) {
1945  __kmp_middle_initialize();
1946  }
1947  gtid = __kmp_get_gtid();
1948  __kmp_aux_display_affinity(gtid, format);
1949 }
1950 
1951 size_t ompc_capture_affinity(char *buffer, size_t buf_size,
1952  char const *format) {
1953  int gtid;
1954  size_t num_required;
1955  kmp_str_buf_t capture_buf;
1956  if (!TCR_4(__kmp_init_middle)) {
1957  __kmp_middle_initialize();
1958  }
1959  gtid = __kmp_get_gtid();
1960  __kmp_str_buf_init(&capture_buf);
1961  num_required = __kmp_aux_capture_affinity(gtid, format, &capture_buf);
1962  if (buffer && buf_size) {
1963  __kmp_strncpy_truncate(buffer, buf_size, capture_buf.str,
1964  capture_buf.used + 1);
1965  }
1966  __kmp_str_buf_free(&capture_buf);
1967  return num_required;
1968 }
1969 #endif /* OMP_50_ENABLED */
1970 
1971 void kmpc_set_stacksize(int arg) {
1972  // __kmp_aux_set_stacksize initializes the library if needed
1973  __kmp_aux_set_stacksize(arg);
1974 }
1975 
1976 void kmpc_set_stacksize_s(size_t arg) {
1977  // __kmp_aux_set_stacksize initializes the library if needed
1978  __kmp_aux_set_stacksize(arg);
1979 }
1980 
1981 void kmpc_set_blocktime(int arg) {
1982  int gtid, tid;
1983  kmp_info_t *thread;
1984 
1985  gtid = __kmp_entry_gtid();
1986  tid = __kmp_tid_from_gtid(gtid);
1987  thread = __kmp_thread_from_gtid(gtid);
1988 
1989  __kmp_aux_set_blocktime(arg, thread, tid);
1990 }
1991 
1992 void kmpc_set_library(int arg) {
1993  // __kmp_user_set_library initializes the library if needed
1994  __kmp_user_set_library((enum library_type)arg);
1995 }
1996 
1997 void kmpc_set_defaults(char const *str) {
1998  // __kmp_aux_set_defaults initializes the library if needed
1999  __kmp_aux_set_defaults(str, KMP_STRLEN(str));
2000 }
2001 
2002 void kmpc_set_disp_num_buffers(int arg) {
2003  // ignore after initialization because some teams have already
2004  // allocated dispatch buffers
2005  if (__kmp_init_serial == 0 && arg > 0)
2006  __kmp_dispatch_num_buffers = arg;
2007 }
2008 
2009 int kmpc_set_affinity_mask_proc(int proc, void **mask) {
2010 #if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED
2011  return -1;
2012 #else
2013  if (!TCR_4(__kmp_init_middle)) {
2014  __kmp_middle_initialize();
2015  }
2016  return __kmp_aux_set_affinity_mask_proc(proc, mask);
2017 #endif
2018 }
2019 
2020 int kmpc_unset_affinity_mask_proc(int proc, void **mask) {
2021 #if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED
2022  return -1;
2023 #else
2024  if (!TCR_4(__kmp_init_middle)) {
2025  __kmp_middle_initialize();
2026  }
2027  return __kmp_aux_unset_affinity_mask_proc(proc, mask);
2028 #endif
2029 }
2030 
2031 int kmpc_get_affinity_mask_proc(int proc, void **mask) {
2032 #if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED
2033  return -1;
2034 #else
2035  if (!TCR_4(__kmp_init_middle)) {
2036  __kmp_middle_initialize();
2037  }
2038  return __kmp_aux_get_affinity_mask_proc(proc, mask);
2039 #endif
2040 }
2041 
2042 /* -------------------------------------------------------------------------- */
2087 void __kmpc_copyprivate(ident_t *loc, kmp_int32 gtid, size_t cpy_size,
2088  void *cpy_data, void (*cpy_func)(void *, void *),
2089  kmp_int32 didit) {
2090  void **data_ptr;
2091 
2092  KC_TRACE(10, ("__kmpc_copyprivate: called T#%d\n", gtid));
2093 
2094  KMP_MB();
2095 
2096  data_ptr = &__kmp_team_from_gtid(gtid)->t.t_copypriv_data;
2097 
2098  if (__kmp_env_consistency_check) {
2099  if (loc == 0) {
2100  KMP_WARNING(ConstructIdentInvalid);
2101  }
2102  }
2103 
2104  // ToDo: Optimize the following two barriers into some kind of split barrier
2105 
2106  if (didit)
2107  *data_ptr = cpy_data;
2108 
2109 #if OMPT_SUPPORT
2110  ompt_frame_t *ompt_frame;
2111  if (ompt_enabled.enabled) {
2112  __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
2113  if (ompt_frame->enter_frame.ptr == NULL)
2114  ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
2115  OMPT_STORE_RETURN_ADDRESS(gtid);
2116  }
2117 #endif
2118 /* This barrier is not a barrier region boundary */
2119 #if USE_ITT_NOTIFY
2120  __kmp_threads[gtid]->th.th_ident = loc;
2121 #endif
2122  __kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL);
2123 
2124  if (!didit)
2125  (*cpy_func)(cpy_data, *data_ptr);
2126 
2127 // Consider next barrier a user-visible barrier for barrier region boundaries
2128 // Nesting checks are already handled by the single construct checks
2129 
2130 #if OMPT_SUPPORT
2131  if (ompt_enabled.enabled) {
2132  OMPT_STORE_RETURN_ADDRESS(gtid);
2133  }
2134 #endif
2135 #if USE_ITT_NOTIFY
2136  __kmp_threads[gtid]->th.th_ident = loc; // TODO: check if it is needed (e.g.
2137 // tasks can overwrite the location)
2138 #endif
2139  __kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL);
2140 #if OMPT_SUPPORT && OMPT_OPTIONAL
2141  if (ompt_enabled.enabled) {
2142  ompt_frame->enter_frame = ompt_data_none;
2143  }
2144 #endif
2145 }
2146 
2147 /* -------------------------------------------------------------------------- */
2148 
2149 #define INIT_LOCK __kmp_init_user_lock_with_checks
2150 #define INIT_NESTED_LOCK __kmp_init_nested_user_lock_with_checks
2151 #define ACQUIRE_LOCK __kmp_acquire_user_lock_with_checks
2152 #define ACQUIRE_LOCK_TIMED __kmp_acquire_user_lock_with_checks_timed
2153 #define ACQUIRE_NESTED_LOCK __kmp_acquire_nested_user_lock_with_checks
2154 #define ACQUIRE_NESTED_LOCK_TIMED \
2155  __kmp_acquire_nested_user_lock_with_checks_timed
2156 #define RELEASE_LOCK __kmp_release_user_lock_with_checks
2157 #define RELEASE_NESTED_LOCK __kmp_release_nested_user_lock_with_checks
2158 #define TEST_LOCK __kmp_test_user_lock_with_checks
2159 #define TEST_NESTED_LOCK __kmp_test_nested_user_lock_with_checks
2160 #define DESTROY_LOCK __kmp_destroy_user_lock_with_checks
2161 #define DESTROY_NESTED_LOCK __kmp_destroy_nested_user_lock_with_checks
2162 
2163 // TODO: Make check abort messages use location info & pass it into
2164 // with_checks routines
2165 
2166 #if KMP_USE_DYNAMIC_LOCK
2167 
2168 // internal lock initializer
2169 static __forceinline void __kmp_init_lock_with_hint(ident_t *loc, void **lock,
2170  kmp_dyna_lockseq_t seq) {
2171  if (KMP_IS_D_LOCK(seq)) {
2172  KMP_INIT_D_LOCK(lock, seq);
2173 #if USE_ITT_BUILD
2174  __kmp_itt_lock_creating((kmp_user_lock_p)lock, NULL);
2175 #endif
2176  } else {
2177  KMP_INIT_I_LOCK(lock, seq);
2178 #if USE_ITT_BUILD
2179  kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(lock);
2180  __kmp_itt_lock_creating(ilk->lock, loc);
2181 #endif
2182  }
2183 }
2184 
2185 // internal nest lock initializer
2186 static __forceinline void
2187 __kmp_init_nest_lock_with_hint(ident_t *loc, void **lock,
2188  kmp_dyna_lockseq_t seq) {
2189 #if KMP_USE_TSX
2190  // Don't have nested lock implementation for speculative locks
2191  if (seq == lockseq_hle || seq == lockseq_rtm || seq == lockseq_adaptive)
2192  seq = __kmp_user_lock_seq;
2193 #endif
2194  switch (seq) {
2195  case lockseq_tas:
2196  seq = lockseq_nested_tas;
2197  break;
2198 #if KMP_USE_FUTEX
2199  case lockseq_futex:
2200  seq = lockseq_nested_futex;
2201  break;
2202 #endif
2203  case lockseq_ticket:
2204  seq = lockseq_nested_ticket;
2205  break;
2206  case lockseq_queuing:
2207  seq = lockseq_nested_queuing;
2208  break;
2209  case lockseq_drdpa:
2210  seq = lockseq_nested_drdpa;
2211  break;
2212  default:
2213  seq = lockseq_nested_queuing;
2214  }
2215  KMP_INIT_I_LOCK(lock, seq);
2216 #if USE_ITT_BUILD
2217  kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(lock);
2218  __kmp_itt_lock_creating(ilk->lock, loc);
2219 #endif
2220 }
2221 
2222 /* initialize the lock with a hint */
2223 void __kmpc_init_lock_with_hint(ident_t *loc, kmp_int32 gtid, void **user_lock,
2224  uintptr_t hint) {
2225  KMP_DEBUG_ASSERT(__kmp_init_serial);
2226  if (__kmp_env_consistency_check && user_lock == NULL) {
2227  KMP_FATAL(LockIsUninitialized, "omp_init_lock_with_hint");
2228  }
2229 
2230  __kmp_init_lock_with_hint(loc, user_lock, __kmp_map_hint_to_lock(hint));
2231 
2232 #if OMPT_SUPPORT && OMPT_OPTIONAL
2233  // This is the case, if called from omp_init_lock_with_hint:
2234  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2235  if (!codeptr)
2236  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2237  if (ompt_enabled.ompt_callback_lock_init) {
2238  ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
2239  ompt_mutex_lock, (omp_lock_hint_t)hint,
2240  __ompt_get_mutex_impl_type(user_lock),
2241  (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2242  }
2243 #endif
2244 }
2245 
2246 /* initialize the lock with a hint */
2247 void __kmpc_init_nest_lock_with_hint(ident_t *loc, kmp_int32 gtid,
2248  void **user_lock, uintptr_t hint) {
2249  KMP_DEBUG_ASSERT(__kmp_init_serial);
2250  if (__kmp_env_consistency_check && user_lock == NULL) {
2251  KMP_FATAL(LockIsUninitialized, "omp_init_nest_lock_with_hint");
2252  }
2253 
2254  __kmp_init_nest_lock_with_hint(loc, user_lock, __kmp_map_hint_to_lock(hint));
2255 
2256 #if OMPT_SUPPORT && OMPT_OPTIONAL
2257  // This is the case, if called from omp_init_lock_with_hint:
2258  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2259  if (!codeptr)
2260  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2261  if (ompt_enabled.ompt_callback_lock_init) {
2262  ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
2263  ompt_mutex_nest_lock, (omp_lock_hint_t)hint,
2264  __ompt_get_mutex_impl_type(user_lock),
2265  (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2266  }
2267 #endif
2268 }
2269 
2270 #endif // KMP_USE_DYNAMIC_LOCK
2271 
2272 /* initialize the lock */
2273 void __kmpc_init_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
2274 #if KMP_USE_DYNAMIC_LOCK
2275 
2276  KMP_DEBUG_ASSERT(__kmp_init_serial);
2277  if (__kmp_env_consistency_check && user_lock == NULL) {
2278  KMP_FATAL(LockIsUninitialized, "omp_init_lock");
2279  }
2280  __kmp_init_lock_with_hint(loc, user_lock, __kmp_user_lock_seq);
2281 
2282 #if OMPT_SUPPORT && OMPT_OPTIONAL
2283  // This is the case, if called from omp_init_lock_with_hint:
2284  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2285  if (!codeptr)
2286  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2287  if (ompt_enabled.ompt_callback_lock_init) {
2288  ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
2289  ompt_mutex_lock, omp_lock_hint_none,
2290  __ompt_get_mutex_impl_type(user_lock),
2291  (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2292  }
2293 #endif
2294 
2295 #else // KMP_USE_DYNAMIC_LOCK
2296 
2297  static char const *const func = "omp_init_lock";
2298  kmp_user_lock_p lck;
2299  KMP_DEBUG_ASSERT(__kmp_init_serial);
2300 
2301  if (__kmp_env_consistency_check) {
2302  if (user_lock == NULL) {
2303  KMP_FATAL(LockIsUninitialized, func);
2304  }
2305  }
2306 
2307  KMP_CHECK_USER_LOCK_INIT();
2308 
2309  if ((__kmp_user_lock_kind == lk_tas) &&
2310  (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
2311  lck = (kmp_user_lock_p)user_lock;
2312  }
2313 #if KMP_USE_FUTEX
2314  else if ((__kmp_user_lock_kind == lk_futex) &&
2315  (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
2316  lck = (kmp_user_lock_p)user_lock;
2317  }
2318 #endif
2319  else {
2320  lck = __kmp_user_lock_allocate(user_lock, gtid, 0);
2321  }
2322  INIT_LOCK(lck);
2323  __kmp_set_user_lock_location(lck, loc);
2324 
2325 #if OMPT_SUPPORT && OMPT_OPTIONAL
2326  // This is the case, if called from omp_init_lock_with_hint:
2327  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2328  if (!codeptr)
2329  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2330  if (ompt_enabled.ompt_callback_lock_init) {
2331  ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
2332  ompt_mutex_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(),
2333  (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2334  }
2335 #endif
2336 
2337 #if USE_ITT_BUILD
2338  __kmp_itt_lock_creating(lck);
2339 #endif /* USE_ITT_BUILD */
2340 
2341 #endif // KMP_USE_DYNAMIC_LOCK
2342 } // __kmpc_init_lock
2343 
2344 /* initialize the lock */
2345 void __kmpc_init_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
2346 #if KMP_USE_DYNAMIC_LOCK
2347 
2348  KMP_DEBUG_ASSERT(__kmp_init_serial);
2349  if (__kmp_env_consistency_check && user_lock == NULL) {
2350  KMP_FATAL(LockIsUninitialized, "omp_init_nest_lock");
2351  }
2352  __kmp_init_nest_lock_with_hint(loc, user_lock, __kmp_user_lock_seq);
2353 
2354 #if OMPT_SUPPORT && OMPT_OPTIONAL
2355  // This is the case, if called from omp_init_lock_with_hint:
2356  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2357  if (!codeptr)
2358  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2359  if (ompt_enabled.ompt_callback_lock_init) {
2360  ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
2361  ompt_mutex_nest_lock, omp_lock_hint_none,
2362  __ompt_get_mutex_impl_type(user_lock),
2363  (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2364  }
2365 #endif
2366 
2367 #else // KMP_USE_DYNAMIC_LOCK
2368 
2369  static char const *const func = "omp_init_nest_lock";
2370  kmp_user_lock_p lck;
2371  KMP_DEBUG_ASSERT(__kmp_init_serial);
2372 
2373  if (__kmp_env_consistency_check) {
2374  if (user_lock == NULL) {
2375  KMP_FATAL(LockIsUninitialized, func);
2376  }
2377  }
2378 
2379  KMP_CHECK_USER_LOCK_INIT();
2380 
2381  if ((__kmp_user_lock_kind == lk_tas) &&
2382  (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <=
2383  OMP_NEST_LOCK_T_SIZE)) {
2384  lck = (kmp_user_lock_p)user_lock;
2385  }
2386 #if KMP_USE_FUTEX
2387  else if ((__kmp_user_lock_kind == lk_futex) &&
2388  (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <=
2389  OMP_NEST_LOCK_T_SIZE)) {
2390  lck = (kmp_user_lock_p)user_lock;
2391  }
2392 #endif
2393  else {
2394  lck = __kmp_user_lock_allocate(user_lock, gtid, 0);
2395  }
2396 
2397  INIT_NESTED_LOCK(lck);
2398  __kmp_set_user_lock_location(lck, loc);
2399 
2400 #if OMPT_SUPPORT && OMPT_OPTIONAL
2401  // This is the case, if called from omp_init_lock_with_hint:
2402  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2403  if (!codeptr)
2404  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2405  if (ompt_enabled.ompt_callback_lock_init) {
2406  ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
2407  ompt_mutex_nest_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(),
2408  (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2409  }
2410 #endif
2411 
2412 #if USE_ITT_BUILD
2413  __kmp_itt_lock_creating(lck);
2414 #endif /* USE_ITT_BUILD */
2415 
2416 #endif // KMP_USE_DYNAMIC_LOCK
2417 } // __kmpc_init_nest_lock
2418 
2419 void __kmpc_destroy_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
2420 #if KMP_USE_DYNAMIC_LOCK
2421 
2422 #if USE_ITT_BUILD
2423  kmp_user_lock_p lck;
2424  if (KMP_EXTRACT_D_TAG(user_lock) == 0) {
2425  lck = ((kmp_indirect_lock_t *)KMP_LOOKUP_I_LOCK(user_lock))->lock;
2426  } else {
2427  lck = (kmp_user_lock_p)user_lock;
2428  }
2429  __kmp_itt_lock_destroyed(lck);
2430 #endif
2431 #if OMPT_SUPPORT && OMPT_OPTIONAL
2432  // This is the case, if called from omp_init_lock_with_hint:
2433  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2434  if (!codeptr)
2435  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2436  if (ompt_enabled.ompt_callback_lock_destroy) {
2437  kmp_user_lock_p lck;
2438  if (KMP_EXTRACT_D_TAG(user_lock) == 0) {
2439  lck = ((kmp_indirect_lock_t *)KMP_LOOKUP_I_LOCK(user_lock))->lock;
2440  } else {
2441  lck = (kmp_user_lock_p)user_lock;
2442  }
2443  ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)(
2444  ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2445  }
2446 #endif
2447  KMP_D_LOCK_FUNC(user_lock, destroy)((kmp_dyna_lock_t *)user_lock);
2448 #else
2449  kmp_user_lock_p lck;
2450 
2451  if ((__kmp_user_lock_kind == lk_tas) &&
2452  (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
2453  lck = (kmp_user_lock_p)user_lock;
2454  }
2455 #if KMP_USE_FUTEX
2456  else if ((__kmp_user_lock_kind == lk_futex) &&
2457  (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
2458  lck = (kmp_user_lock_p)user_lock;
2459  }
2460 #endif
2461  else {
2462  lck = __kmp_lookup_user_lock(user_lock, "omp_destroy_lock");
2463  }
2464 
2465 #if OMPT_SUPPORT && OMPT_OPTIONAL
2466  // This is the case, if called from omp_init_lock_with_hint:
2467  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2468  if (!codeptr)
2469  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2470  if (ompt_enabled.ompt_callback_lock_destroy) {
2471  ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)(
2472  ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2473  }
2474 #endif
2475 
2476 #if USE_ITT_BUILD
2477  __kmp_itt_lock_destroyed(lck);
2478 #endif /* USE_ITT_BUILD */
2479  DESTROY_LOCK(lck);
2480 
2481  if ((__kmp_user_lock_kind == lk_tas) &&
2482  (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
2483  ;
2484  }
2485 #if KMP_USE_FUTEX
2486  else if ((__kmp_user_lock_kind == lk_futex) &&
2487  (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
2488  ;
2489  }
2490 #endif
2491  else {
2492  __kmp_user_lock_free(user_lock, gtid, lck);
2493  }
2494 #endif // KMP_USE_DYNAMIC_LOCK
2495 } // __kmpc_destroy_lock
2496 
2497 /* destroy the lock */
2498 void __kmpc_destroy_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
2499 #if KMP_USE_DYNAMIC_LOCK
2500 
2501 #if USE_ITT_BUILD
2502  kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(user_lock);
2503  __kmp_itt_lock_destroyed(ilk->lock);
2504 #endif
2505 #if OMPT_SUPPORT && OMPT_OPTIONAL
2506  // This is the case, if called from omp_init_lock_with_hint:
2507  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2508  if (!codeptr)
2509  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2510  if (ompt_enabled.ompt_callback_lock_destroy) {
2511  ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)(
2512  ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2513  }
2514 #endif
2515  KMP_D_LOCK_FUNC(user_lock, destroy)((kmp_dyna_lock_t *)user_lock);
2516 
2517 #else // KMP_USE_DYNAMIC_LOCK
2518 
2519  kmp_user_lock_p lck;
2520 
2521  if ((__kmp_user_lock_kind == lk_tas) &&
2522  (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <=
2523  OMP_NEST_LOCK_T_SIZE)) {
2524  lck = (kmp_user_lock_p)user_lock;
2525  }
2526 #if KMP_USE_FUTEX
2527  else if ((__kmp_user_lock_kind == lk_futex) &&
2528  (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <=
2529  OMP_NEST_LOCK_T_SIZE)) {
2530  lck = (kmp_user_lock_p)user_lock;
2531  }
2532 #endif
2533  else {
2534  lck = __kmp_lookup_user_lock(user_lock, "omp_destroy_nest_lock");
2535  }
2536 
2537 #if OMPT_SUPPORT && OMPT_OPTIONAL
2538  // This is the case, if called from omp_init_lock_with_hint:
2539  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2540  if (!codeptr)
2541  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2542  if (ompt_enabled.ompt_callback_lock_destroy) {
2543  ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)(
2544  ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2545  }
2546 #endif
2547 
2548 #if USE_ITT_BUILD
2549  __kmp_itt_lock_destroyed(lck);
2550 #endif /* USE_ITT_BUILD */
2551 
2552  DESTROY_NESTED_LOCK(lck);
2553 
2554  if ((__kmp_user_lock_kind == lk_tas) &&
2555  (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <=
2556  OMP_NEST_LOCK_T_SIZE)) {
2557  ;
2558  }
2559 #if KMP_USE_FUTEX
2560  else if ((__kmp_user_lock_kind == lk_futex) &&
2561  (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <=
2562  OMP_NEST_LOCK_T_SIZE)) {
2563  ;
2564  }
2565 #endif
2566  else {
2567  __kmp_user_lock_free(user_lock, gtid, lck);
2568  }
2569 #endif // KMP_USE_DYNAMIC_LOCK
2570 } // __kmpc_destroy_nest_lock
2571 
2572 void __kmpc_set_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
2573  KMP_COUNT_BLOCK(OMP_set_lock);
2574 #if KMP_USE_DYNAMIC_LOCK
2575  int tag = KMP_EXTRACT_D_TAG(user_lock);
2576 #if USE_ITT_BUILD
2577  __kmp_itt_lock_acquiring(
2578  (kmp_user_lock_p)
2579  user_lock); // itt function will get to the right lock object.
2580 #endif
2581 #if OMPT_SUPPORT && OMPT_OPTIONAL
2582  // This is the case, if called from omp_init_lock_with_hint:
2583  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2584  if (!codeptr)
2585  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2586  if (ompt_enabled.ompt_callback_mutex_acquire) {
2587  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
2588  ompt_mutex_lock, omp_lock_hint_none,
2589  __ompt_get_mutex_impl_type(user_lock),
2590  (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2591  }
2592 #endif
2593 #if KMP_USE_INLINED_TAS
2594  if (tag == locktag_tas && !__kmp_env_consistency_check) {
2595  KMP_ACQUIRE_TAS_LOCK(user_lock, gtid);
2596  } else
2597 #elif KMP_USE_INLINED_FUTEX
2598  if (tag == locktag_futex && !__kmp_env_consistency_check) {
2599  KMP_ACQUIRE_FUTEX_LOCK(user_lock, gtid);
2600  } else
2601 #endif
2602  {
2603  __kmp_direct_set[tag]((kmp_dyna_lock_t *)user_lock, gtid);
2604  }
2605 #if USE_ITT_BUILD
2606  __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock);
2607 #endif
2608 #if OMPT_SUPPORT && OMPT_OPTIONAL
2609  if (ompt_enabled.ompt_callback_mutex_acquired) {
2610  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
2611  ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2612  }
2613 #endif
2614 
2615 #else // KMP_USE_DYNAMIC_LOCK
2616 
2617  kmp_user_lock_p lck;
2618 
2619  if ((__kmp_user_lock_kind == lk_tas) &&
2620  (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
2621  lck = (kmp_user_lock_p)user_lock;
2622  }
2623 #if KMP_USE_FUTEX
2624  else if ((__kmp_user_lock_kind == lk_futex) &&
2625  (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
2626  lck = (kmp_user_lock_p)user_lock;
2627  }
2628 #endif
2629  else {
2630  lck = __kmp_lookup_user_lock(user_lock, "omp_set_lock");
2631  }
2632 
2633 #if USE_ITT_BUILD
2634  __kmp_itt_lock_acquiring(lck);
2635 #endif /* USE_ITT_BUILD */
2636 #if OMPT_SUPPORT && OMPT_OPTIONAL
2637  // This is the case, if called from omp_init_lock_with_hint:
2638  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2639  if (!codeptr)
2640  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2641  if (ompt_enabled.ompt_callback_mutex_acquire) {
2642  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
2643  ompt_mutex_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(),
2644  (ompt_wait_id_t)(uintptr_t)lck, codeptr);
2645  }
2646 #endif
2647 
2648  ACQUIRE_LOCK(lck, gtid);
2649 
2650 #if USE_ITT_BUILD
2651  __kmp_itt_lock_acquired(lck);
2652 #endif /* USE_ITT_BUILD */
2653 
2654 #if OMPT_SUPPORT && OMPT_OPTIONAL
2655  if (ompt_enabled.ompt_callback_mutex_acquired) {
2656  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
2657  ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
2658  }
2659 #endif
2660 
2661 #endif // KMP_USE_DYNAMIC_LOCK
2662 }
2663 
2664 void __kmpc_set_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
2665 #if KMP_USE_DYNAMIC_LOCK
2666 
2667 #if USE_ITT_BUILD
2668  __kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock);
2669 #endif
2670 #if OMPT_SUPPORT && OMPT_OPTIONAL
2671  // This is the case, if called from omp_init_lock_with_hint:
2672  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2673  if (!codeptr)
2674  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2675  if (ompt_enabled.enabled) {
2676  if (ompt_enabled.ompt_callback_mutex_acquire) {
2677  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
2678  ompt_mutex_nest_lock, omp_lock_hint_none,
2679  __ompt_get_mutex_impl_type(user_lock),
2680  (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2681  }
2682  }
2683 #endif
2684  int acquire_status =
2685  KMP_D_LOCK_FUNC(user_lock, set)((kmp_dyna_lock_t *)user_lock, gtid);
2686  (void) acquire_status;
2687 #if USE_ITT_BUILD
2688  __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock);
2689 #endif
2690 
2691 #if OMPT_SUPPORT && OMPT_OPTIONAL
2692  if (ompt_enabled.enabled) {
2693  if (acquire_status == KMP_LOCK_ACQUIRED_FIRST) {
2694  if (ompt_enabled.ompt_callback_mutex_acquired) {
2695  // lock_first
2696  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
2697  ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock,
2698  codeptr);
2699  }
2700  } else {
2701  if (ompt_enabled.ompt_callback_nest_lock) {
2702  // lock_next
2703  ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
2704  ompt_scope_begin, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2705  }
2706  }
2707  }
2708 #endif
2709 
2710 #else // KMP_USE_DYNAMIC_LOCK
2711  int acquire_status;
2712  kmp_user_lock_p lck;
2713 
2714  if ((__kmp_user_lock_kind == lk_tas) &&
2715  (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <=
2716  OMP_NEST_LOCK_T_SIZE)) {
2717  lck = (kmp_user_lock_p)user_lock;
2718  }
2719 #if KMP_USE_FUTEX
2720  else if ((__kmp_user_lock_kind == lk_futex) &&
2721  (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <=
2722  OMP_NEST_LOCK_T_SIZE)) {
2723  lck = (kmp_user_lock_p)user_lock;
2724  }
2725 #endif
2726  else {
2727  lck = __kmp_lookup_user_lock(user_lock, "omp_set_nest_lock");
2728  }
2729 
2730 #if USE_ITT_BUILD
2731  __kmp_itt_lock_acquiring(lck);
2732 #endif /* USE_ITT_BUILD */
2733 #if OMPT_SUPPORT && OMPT_OPTIONAL
2734  // This is the case, if called from omp_init_lock_with_hint:
2735  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2736  if (!codeptr)
2737  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2738  if (ompt_enabled.enabled) {
2739  if (ompt_enabled.ompt_callback_mutex_acquire) {
2740  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
2741  ompt_mutex_nest_lock, omp_lock_hint_none,
2742  __ompt_get_mutex_impl_type(), (ompt_wait_id_t)(uintptr_t)lck,
2743  codeptr);
2744  }
2745  }
2746 #endif
2747 
2748  ACQUIRE_NESTED_LOCK(lck, gtid, &acquire_status);
2749 
2750 #if USE_ITT_BUILD
2751  __kmp_itt_lock_acquired(lck);
2752 #endif /* USE_ITT_BUILD */
2753 
2754 #if OMPT_SUPPORT && OMPT_OPTIONAL
2755  if (ompt_enabled.enabled) {
2756  if (acquire_status == KMP_LOCK_ACQUIRED_FIRST) {
2757  if (ompt_enabled.ompt_callback_mutex_acquired) {
2758  // lock_first
2759  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
2760  ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
2761  }
2762  } else {
2763  if (ompt_enabled.ompt_callback_nest_lock) {
2764  // lock_next
2765  ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
2766  ompt_scope_begin, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
2767  }
2768  }
2769  }
2770 #endif
2771 
2772 #endif // KMP_USE_DYNAMIC_LOCK
2773 }
2774 
2775 void __kmpc_unset_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
2776 #if KMP_USE_DYNAMIC_LOCK
2777 
2778  int tag = KMP_EXTRACT_D_TAG(user_lock);
2779 #if USE_ITT_BUILD
2780  __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock);
2781 #endif
2782 #if KMP_USE_INLINED_TAS
2783  if (tag == locktag_tas && !__kmp_env_consistency_check) {
2784  KMP_RELEASE_TAS_LOCK(user_lock, gtid);
2785  } else
2786 #elif KMP_USE_INLINED_FUTEX
2787  if (tag == locktag_futex && !__kmp_env_consistency_check) {
2788  KMP_RELEASE_FUTEX_LOCK(user_lock, gtid);
2789  } else
2790 #endif
2791  {
2792  __kmp_direct_unset[tag]((kmp_dyna_lock_t *)user_lock, gtid);
2793  }
2794 
2795 #if OMPT_SUPPORT && OMPT_OPTIONAL
2796  // This is the case, if called from omp_init_lock_with_hint:
2797  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2798  if (!codeptr)
2799  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2800  if (ompt_enabled.ompt_callback_mutex_released) {
2801  ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
2802  ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2803  }
2804 #endif
2805 
2806 #else // KMP_USE_DYNAMIC_LOCK
2807 
2808  kmp_user_lock_p lck;
2809 
2810  /* Can't use serial interval since not block structured */
2811  /* release the lock */
2812 
2813  if ((__kmp_user_lock_kind == lk_tas) &&
2814  (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
2815 #if KMP_OS_LINUX && \
2816  (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
2817 // "fast" path implemented to fix customer performance issue
2818 #if USE_ITT_BUILD
2819  __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock);
2820 #endif /* USE_ITT_BUILD */
2821  TCW_4(((kmp_user_lock_p)user_lock)->tas.lk.poll, 0);
2822  KMP_MB();
2823 
2824 #if OMPT_SUPPORT && OMPT_OPTIONAL
2825  // This is the case, if called from omp_init_lock_with_hint:
2826  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2827  if (!codeptr)
2828  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2829  if (ompt_enabled.ompt_callback_mutex_released) {
2830  ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
2831  ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
2832  }
2833 #endif
2834 
2835  return;
2836 #else
2837  lck = (kmp_user_lock_p)user_lock;
2838 #endif
2839  }
2840 #if KMP_USE_FUTEX
2841  else if ((__kmp_user_lock_kind == lk_futex) &&
2842  (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
2843  lck = (kmp_user_lock_p)user_lock;
2844  }
2845 #endif
2846  else {
2847  lck = __kmp_lookup_user_lock(user_lock, "omp_unset_lock");
2848  }
2849 
2850 #if USE_ITT_BUILD
2851  __kmp_itt_lock_releasing(lck);
2852 #endif /* USE_ITT_BUILD */
2853 
2854  RELEASE_LOCK(lck, gtid);
2855 
2856 #if OMPT_SUPPORT && OMPT_OPTIONAL
2857  // This is the case, if called from omp_init_lock_with_hint:
2858  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2859  if (!codeptr)
2860  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2861  if (ompt_enabled.ompt_callback_mutex_released) {
2862  ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
2863  ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
2864  }
2865 #endif
2866 
2867 #endif // KMP_USE_DYNAMIC_LOCK
2868 }
2869 
2870 /* release the lock */
2871 void __kmpc_unset_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
2872 #if KMP_USE_DYNAMIC_LOCK
2873 
2874 #if USE_ITT_BUILD
2875  __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock);
2876 #endif
2877  int release_status =
2878  KMP_D_LOCK_FUNC(user_lock, unset)((kmp_dyna_lock_t *)user_lock, gtid);
2879  (void) release_status;
2880 
2881 #if OMPT_SUPPORT && OMPT_OPTIONAL
2882  // This is the case, if called from omp_init_lock_with_hint:
2883  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2884  if (!codeptr)
2885  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2886  if (ompt_enabled.enabled) {
2887  if (release_status == KMP_LOCK_RELEASED) {
2888  if (ompt_enabled.ompt_callback_mutex_released) {
2889  // release_lock_last
2890  ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
2891  ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock,
2892  codeptr);
2893  }
2894  } else if (ompt_enabled.ompt_callback_nest_lock) {
2895  // release_lock_prev
2896  ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
2897  ompt_scope_end, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2898  }
2899  }
2900 #endif
2901 
2902 #else // KMP_USE_DYNAMIC_LOCK
2903 
2904  kmp_user_lock_p lck;
2905 
2906  /* Can't use serial interval since not block structured */
2907 
2908  if ((__kmp_user_lock_kind == lk_tas) &&
2909  (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <=
2910  OMP_NEST_LOCK_T_SIZE)) {
2911 #if KMP_OS_LINUX && \
2912  (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
2913  // "fast" path implemented to fix customer performance issue
2914  kmp_tas_lock_t *tl = (kmp_tas_lock_t *)user_lock;
2915 #if USE_ITT_BUILD
2916  __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock);
2917 #endif /* USE_ITT_BUILD */
2918 
2919 #if OMPT_SUPPORT && OMPT_OPTIONAL
2920  int release_status = KMP_LOCK_STILL_HELD;
2921 #endif
2922 
2923  if (--(tl->lk.depth_locked) == 0) {
2924  TCW_4(tl->lk.poll, 0);
2925 #if OMPT_SUPPORT && OMPT_OPTIONAL
2926  release_status = KMP_LOCK_RELEASED;
2927 #endif
2928  }
2929  KMP_MB();
2930 
2931 #if OMPT_SUPPORT && OMPT_OPTIONAL
2932  // This is the case, if called from omp_init_lock_with_hint:
2933  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2934  if (!codeptr)
2935  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2936  if (ompt_enabled.enabled) {
2937  if (release_status == KMP_LOCK_RELEASED) {
2938  if (ompt_enabled.ompt_callback_mutex_released) {
2939  // release_lock_last
2940  ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
2941  ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
2942  }
2943  } else if (ompt_enabled.ompt_callback_nest_lock) {
2944  // release_lock_previous
2945  ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
2946  ompt_mutex_scope_end, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
2947  }
2948  }
2949 #endif
2950 
2951  return;
2952 #else
2953  lck = (kmp_user_lock_p)user_lock;
2954 #endif
2955  }
2956 #if KMP_USE_FUTEX
2957  else if ((__kmp_user_lock_kind == lk_futex) &&
2958  (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <=
2959  OMP_NEST_LOCK_T_SIZE)) {
2960  lck = (kmp_user_lock_p)user_lock;
2961  }
2962 #endif
2963  else {
2964  lck = __kmp_lookup_user_lock(user_lock, "omp_unset_nest_lock");
2965  }
2966 
2967 #if USE_ITT_BUILD
2968  __kmp_itt_lock_releasing(lck);
2969 #endif /* USE_ITT_BUILD */
2970 
2971  int release_status;
2972  release_status = RELEASE_NESTED_LOCK(lck, gtid);
2973 #if OMPT_SUPPORT && OMPT_OPTIONAL
2974  // This is the case, if called from omp_init_lock_with_hint:
2975  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2976  if (!codeptr)
2977  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2978  if (ompt_enabled.enabled) {
2979  if (release_status == KMP_LOCK_RELEASED) {
2980  if (ompt_enabled.ompt_callback_mutex_released) {
2981  // release_lock_last
2982  ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
2983  ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
2984  }
2985  } else if (ompt_enabled.ompt_callback_nest_lock) {
2986  // release_lock_previous
2987  ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
2988  ompt_mutex_scope_end, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
2989  }
2990  }
2991 #endif
2992 
2993 #endif // KMP_USE_DYNAMIC_LOCK
2994 }
2995 
2996 /* try to acquire the lock */
2997 int __kmpc_test_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
2998  KMP_COUNT_BLOCK(OMP_test_lock);
2999 
3000 #if KMP_USE_DYNAMIC_LOCK
3001  int rc;
3002  int tag = KMP_EXTRACT_D_TAG(user_lock);
3003 #if USE_ITT_BUILD
3004  __kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock);
3005 #endif
3006 #if OMPT_SUPPORT && OMPT_OPTIONAL
3007  // This is the case, if called from omp_init_lock_with_hint:
3008  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
3009  if (!codeptr)
3010  codeptr = OMPT_GET_RETURN_ADDRESS(0);
3011  if (ompt_enabled.ompt_callback_mutex_acquire) {
3012  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
3013  ompt_mutex_lock, omp_lock_hint_none,
3014  __ompt_get_mutex_impl_type(user_lock),
3015  (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
3016  }
3017 #endif
3018 #if KMP_USE_INLINED_TAS
3019  if (tag == locktag_tas && !__kmp_env_consistency_check) {
3020  KMP_TEST_TAS_LOCK(user_lock, gtid, rc);
3021  } else
3022 #elif KMP_USE_INLINED_FUTEX
3023  if (tag == locktag_futex && !__kmp_env_consistency_check) {
3024  KMP_TEST_FUTEX_LOCK(user_lock, gtid, rc);
3025  } else
3026 #endif
3027  {
3028  rc = __kmp_direct_test[tag]((kmp_dyna_lock_t *)user_lock, gtid);
3029  }
3030  if (rc) {
3031 #if USE_ITT_BUILD
3032  __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock);
3033 #endif
3034 #if OMPT_SUPPORT && OMPT_OPTIONAL
3035  if (ompt_enabled.ompt_callback_mutex_acquired) {
3036  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
3037  ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
3038  }
3039 #endif
3040  return FTN_TRUE;
3041  } else {
3042 #if USE_ITT_BUILD
3043  __kmp_itt_lock_cancelled((kmp_user_lock_p)user_lock);
3044 #endif
3045  return FTN_FALSE;
3046  }
3047 
3048 #else // KMP_USE_DYNAMIC_LOCK
3049 
3050  kmp_user_lock_p lck;
3051  int rc;
3052 
3053  if ((__kmp_user_lock_kind == lk_tas) &&
3054  (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
3055  lck = (kmp_user_lock_p)user_lock;
3056  }
3057 #if KMP_USE_FUTEX
3058  else if ((__kmp_user_lock_kind == lk_futex) &&
3059  (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
3060  lck = (kmp_user_lock_p)user_lock;
3061  }
3062 #endif
3063  else {
3064  lck = __kmp_lookup_user_lock(user_lock, "omp_test_lock");
3065  }
3066 
3067 #if USE_ITT_BUILD
3068  __kmp_itt_lock_acquiring(lck);
3069 #endif /* USE_ITT_BUILD */
3070 #if OMPT_SUPPORT && OMPT_OPTIONAL
3071  // This is the case, if called from omp_init_lock_with_hint:
3072  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
3073  if (!codeptr)
3074  codeptr = OMPT_GET_RETURN_ADDRESS(0);
3075  if (ompt_enabled.ompt_callback_mutex_acquire) {
3076  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
3077  ompt_mutex_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(),
3078  (ompt_wait_id_t)(uintptr_t)lck, codeptr);
3079  }
3080 #endif
3081 
3082  rc = TEST_LOCK(lck, gtid);
3083 #if USE_ITT_BUILD
3084  if (rc) {
3085  __kmp_itt_lock_acquired(lck);
3086  } else {
3087  __kmp_itt_lock_cancelled(lck);
3088  }
3089 #endif /* USE_ITT_BUILD */
3090 #if OMPT_SUPPORT && OMPT_OPTIONAL
3091  if (rc && ompt_enabled.ompt_callback_mutex_acquired) {
3092  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
3093  ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
3094  }
3095 #endif
3096 
3097  return (rc ? FTN_TRUE : FTN_FALSE);
3098 
3099 /* Can't use serial interval since not block structured */
3100 
3101 #endif // KMP_USE_DYNAMIC_LOCK
3102 }
3103 
3104 /* try to acquire the lock */
3105 int __kmpc_test_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
3106 #if KMP_USE_DYNAMIC_LOCK
3107  int rc;
3108 #if USE_ITT_BUILD
3109  __kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock);
3110 #endif
3111 #if OMPT_SUPPORT && OMPT_OPTIONAL
3112  // This is the case, if called from omp_init_lock_with_hint:
3113  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
3114  if (!codeptr)
3115  codeptr = OMPT_GET_RETURN_ADDRESS(0);
3116  if (ompt_enabled.ompt_callback_mutex_acquire) {
3117  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
3118  ompt_mutex_nest_lock, omp_lock_hint_none,
3119  __ompt_get_mutex_impl_type(user_lock),
3120  (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
3121  }
3122 #endif
3123  rc = KMP_D_LOCK_FUNC(user_lock, test)((kmp_dyna_lock_t *)user_lock, gtid);
3124 #if USE_ITT_BUILD
3125  if (rc) {
3126  __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock);
3127  } else {
3128  __kmp_itt_lock_cancelled((kmp_user_lock_p)user_lock);
3129  }
3130 #endif
3131 #if OMPT_SUPPORT && OMPT_OPTIONAL
3132  if (ompt_enabled.enabled && rc) {
3133  if (rc == 1) {
3134  if (ompt_enabled.ompt_callback_mutex_acquired) {
3135  // lock_first
3136  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
3137  ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock,
3138  codeptr);
3139  }
3140  } else {
3141  if (ompt_enabled.ompt_callback_nest_lock) {
3142  // lock_next
3143  ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
3144  ompt_scope_begin, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
3145  }
3146  }
3147  }
3148 #endif
3149  return rc;
3150 
3151 #else // KMP_USE_DYNAMIC_LOCK
3152 
3153  kmp_user_lock_p lck;
3154  int rc;
3155 
3156  if ((__kmp_user_lock_kind == lk_tas) &&
3157  (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <=
3158  OMP_NEST_LOCK_T_SIZE)) {
3159  lck = (kmp_user_lock_p)user_lock;
3160  }
3161 #if KMP_USE_FUTEX
3162  else if ((__kmp_user_lock_kind == lk_futex) &&
3163  (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <=
3164  OMP_NEST_LOCK_T_SIZE)) {
3165  lck = (kmp_user_lock_p)user_lock;
3166  }
3167 #endif
3168  else {
3169  lck = __kmp_lookup_user_lock(user_lock, "omp_test_nest_lock");
3170  }
3171 
3172 #if USE_ITT_BUILD
3173  __kmp_itt_lock_acquiring(lck);
3174 #endif /* USE_ITT_BUILD */
3175 
3176 #if OMPT_SUPPORT && OMPT_OPTIONAL
3177  // This is the case, if called from omp_init_lock_with_hint:
3178  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
3179  if (!codeptr)
3180  codeptr = OMPT_GET_RETURN_ADDRESS(0);
3181  if (ompt_enabled.enabled) &&
3182  ompt_enabled.ompt_callback_mutex_acquire) {
3183  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
3184  ompt_mutex_nest_lock, omp_lock_hint_none,
3185  __ompt_get_mutex_impl_type(), (ompt_wait_id_t)(uintptr_t)lck,
3186  codeptr);
3187  }
3188 #endif
3189 
3190  rc = TEST_NESTED_LOCK(lck, gtid);
3191 #if USE_ITT_BUILD
3192  if (rc) {
3193  __kmp_itt_lock_acquired(lck);
3194  } else {
3195  __kmp_itt_lock_cancelled(lck);
3196  }
3197 #endif /* USE_ITT_BUILD */
3198 #if OMPT_SUPPORT && OMPT_OPTIONAL
3199  if (ompt_enabled.enabled && rc) {
3200  if (rc == 1) {
3201  if (ompt_enabled.ompt_callback_mutex_acquired) {
3202  // lock_first
3203  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
3204  ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
3205  }
3206  } else {
3207  if (ompt_enabled.ompt_callback_nest_lock) {
3208  // lock_next
3209  ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
3210  ompt_mutex_scope_begin, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
3211  }
3212  }
3213  }
3214 #endif
3215  return rc;
3216 
3217 /* Can't use serial interval since not block structured */
3218 
3219 #endif // KMP_USE_DYNAMIC_LOCK
3220 }
3221 
3222 // Interface to fast scalable reduce methods routines
3223 
3224 // keep the selected method in a thread local structure for cross-function
3225 // usage: will be used in __kmpc_end_reduce* functions;
3226 // another solution: to re-determine the method one more time in
3227 // __kmpc_end_reduce* functions (new prototype required then)
3228 // AT: which solution is better?
3229 #define __KMP_SET_REDUCTION_METHOD(gtid, rmethod) \
3230  ((__kmp_threads[(gtid)]->th.th_local.packed_reduction_method) = (rmethod))
3231 
3232 #define __KMP_GET_REDUCTION_METHOD(gtid) \
3233  (__kmp_threads[(gtid)]->th.th_local.packed_reduction_method)
3234 
3235 // description of the packed_reduction_method variable: look at the macros in
3236 // kmp.h
3237 
3238 // used in a critical section reduce block
3239 static __forceinline void
3240 __kmp_enter_critical_section_reduce_block(ident_t *loc, kmp_int32 global_tid,
3241  kmp_critical_name *crit) {
3242 
3243  // this lock was visible to a customer and to the threading profile tool as a
3244  // serial overhead span (although it's used for an internal purpose only)
3245  // why was it visible in previous implementation?
3246  // should we keep it visible in new reduce block?
3247  kmp_user_lock_p lck;
3248 
3249 #if KMP_USE_DYNAMIC_LOCK
3250 
3251  kmp_dyna_lock_t *lk = (kmp_dyna_lock_t *)crit;
3252  // Check if it is initialized.
3253  if (*lk == 0) {
3254  if (KMP_IS_D_LOCK(__kmp_user_lock_seq)) {
3255  KMP_COMPARE_AND_STORE_ACQ32((volatile kmp_int32 *)crit, 0,
3256  KMP_GET_D_TAG(__kmp_user_lock_seq));
3257  } else {
3258  __kmp_init_indirect_csptr(crit, loc, global_tid,
3259  KMP_GET_I_TAG(__kmp_user_lock_seq));
3260  }
3261  }
3262  // Branch for accessing the actual lock object and set operation. This
3263  // branching is inevitable since this lock initialization does not follow the
3264  // normal dispatch path (lock table is not used).
3265  if (KMP_EXTRACT_D_TAG(lk) != 0) {
3266  lck = (kmp_user_lock_p)lk;
3267  KMP_DEBUG_ASSERT(lck != NULL);
3268  if (__kmp_env_consistency_check) {
3269  __kmp_push_sync(global_tid, ct_critical, loc, lck, __kmp_user_lock_seq);
3270  }
3271  KMP_D_LOCK_FUNC(lk, set)(lk, global_tid);
3272  } else {
3273  kmp_indirect_lock_t *ilk = *((kmp_indirect_lock_t **)lk);
3274  lck = ilk->lock;
3275  KMP_DEBUG_ASSERT(lck != NULL);
3276  if (__kmp_env_consistency_check) {
3277  __kmp_push_sync(global_tid, ct_critical, loc, lck, __kmp_user_lock_seq);
3278  }
3279  KMP_I_LOCK_FUNC(ilk, set)(lck, global_tid);
3280  }
3281 
3282 #else // KMP_USE_DYNAMIC_LOCK
3283 
3284  // We know that the fast reduction code is only emitted by Intel compilers
3285  // with 32 byte critical sections. If there isn't enough space, then we
3286  // have to use a pointer.
3287  if (__kmp_base_user_lock_size <= INTEL_CRITICAL_SIZE) {
3288  lck = (kmp_user_lock_p)crit;
3289  } else {
3290  lck = __kmp_get_critical_section_ptr(crit, loc, global_tid);
3291  }
3292  KMP_DEBUG_ASSERT(lck != NULL);
3293 
3294  if (__kmp_env_consistency_check)
3295  __kmp_push_sync(global_tid, ct_critical, loc, lck);
3296 
3297  __kmp_acquire_user_lock_with_checks(lck, global_tid);
3298 
3299 #endif // KMP_USE_DYNAMIC_LOCK
3300 }
3301 
3302 // used in a critical section reduce block
3303 static __forceinline void
3304 __kmp_end_critical_section_reduce_block(ident_t *loc, kmp_int32 global_tid,
3305  kmp_critical_name *crit) {
3306 
3307  kmp_user_lock_p lck;
3308 
3309 #if KMP_USE_DYNAMIC_LOCK
3310 
3311  if (KMP_IS_D_LOCK(__kmp_user_lock_seq)) {
3312  lck = (kmp_user_lock_p)crit;
3313  if (__kmp_env_consistency_check)
3314  __kmp_pop_sync(global_tid, ct_critical, loc);
3315  KMP_D_LOCK_FUNC(lck, unset)((kmp_dyna_lock_t *)lck, global_tid);
3316  } else {
3317  kmp_indirect_lock_t *ilk =
3318  (kmp_indirect_lock_t *)TCR_PTR(*((kmp_indirect_lock_t **)crit));
3319  if (__kmp_env_consistency_check)
3320  __kmp_pop_sync(global_tid, ct_critical, loc);
3321  KMP_I_LOCK_FUNC(ilk, unset)(ilk->lock, global_tid);
3322  }
3323 
3324 #else // KMP_USE_DYNAMIC_LOCK
3325 
3326  // We know that the fast reduction code is only emitted by Intel compilers
3327  // with 32 byte critical sections. If there isn't enough space, then we have
3328  // to use a pointer.
3329  if (__kmp_base_user_lock_size > 32) {
3330  lck = *((kmp_user_lock_p *)crit);
3331  KMP_ASSERT(lck != NULL);
3332  } else {
3333  lck = (kmp_user_lock_p)crit;
3334  }
3335 
3336  if (__kmp_env_consistency_check)
3337  __kmp_pop_sync(global_tid, ct_critical, loc);
3338 
3339  __kmp_release_user_lock_with_checks(lck, global_tid);
3340 
3341 #endif // KMP_USE_DYNAMIC_LOCK
3342 } // __kmp_end_critical_section_reduce_block
3343 
3344 #if OMP_40_ENABLED
3345 static __forceinline int
3346 __kmp_swap_teams_for_teams_reduction(kmp_info_t *th, kmp_team_t **team_p,
3347  int *task_state) {
3348  kmp_team_t *team;
3349 
3350  // Check if we are inside the teams construct?
3351  if (th->th.th_teams_microtask) {
3352  *team_p = team = th->th.th_team;
3353  if (team->t.t_level == th->th.th_teams_level) {
3354  // This is reduction at teams construct.
3355  KMP_DEBUG_ASSERT(!th->th.th_info.ds.ds_tid); // AC: check that tid == 0
3356  // Let's swap teams temporarily for the reduction.
3357  th->th.th_info.ds.ds_tid = team->t.t_master_tid;
3358  th->th.th_team = team->t.t_parent;
3359  th->th.th_team_nproc = th->th.th_team->t.t_nproc;
3360  th->th.th_task_team = th->th.th_team->t.t_task_team[0];
3361  *task_state = th->th.th_task_state;
3362  th->th.th_task_state = 0;
3363 
3364  return 1;
3365  }
3366  }
3367  return 0;
3368 }
3369 
3370 static __forceinline void
3371 __kmp_restore_swapped_teams(kmp_info_t *th, kmp_team_t *team, int task_state) {
3372  // Restore thread structure swapped in __kmp_swap_teams_for_teams_reduction.
3373  th->th.th_info.ds.ds_tid = 0;
3374  th->th.th_team = team;
3375  th->th.th_team_nproc = team->t.t_nproc;
3376  th->th.th_task_team = team->t.t_task_team[task_state];
3377  th->th.th_task_state = task_state;
3378 }
3379 #endif
3380 
3381 /* 2.a.i. Reduce Block without a terminating barrier */
3397 kmp_int32
3398 __kmpc_reduce_nowait(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars,
3399  size_t reduce_size, void *reduce_data,
3400  void (*reduce_func)(void *lhs_data, void *rhs_data),
3401  kmp_critical_name *lck) {
3402 
3403  KMP_COUNT_BLOCK(REDUCE_nowait);
3404  int retval = 0;
3405  PACKED_REDUCTION_METHOD_T packed_reduction_method;
3406 #if OMP_40_ENABLED
3407  kmp_info_t *th;
3408  kmp_team_t *team;
3409  int teams_swapped = 0, task_state;
3410 #endif
3411  KA_TRACE(10, ("__kmpc_reduce_nowait() enter: called T#%d\n", global_tid));
3412 
3413  // why do we need this initialization here at all?
3414  // Reduction clause can not be used as a stand-alone directive.
3415 
3416  // do not call __kmp_serial_initialize(), it will be called by
3417  // __kmp_parallel_initialize() if needed
3418  // possible detection of false-positive race by the threadchecker ???
3419  if (!TCR_4(__kmp_init_parallel))
3420  __kmp_parallel_initialize();
3421 
3422 #if OMP_50_ENABLED
3423  __kmp_resume_if_soft_paused();
3424 #endif
3425 
3426 // check correctness of reduce block nesting
3427 #if KMP_USE_DYNAMIC_LOCK
3428  if (__kmp_env_consistency_check)
3429  __kmp_push_sync(global_tid, ct_reduce, loc, NULL, 0);
3430 #else
3431  if (__kmp_env_consistency_check)
3432  __kmp_push_sync(global_tid, ct_reduce, loc, NULL);
3433 #endif
3434 
3435 #if OMP_40_ENABLED
3436  th = __kmp_thread_from_gtid(global_tid);
3437  teams_swapped = __kmp_swap_teams_for_teams_reduction(th, &team, &task_state);
3438 #endif // OMP_40_ENABLED
3439 
3440  // packed_reduction_method value will be reused by __kmp_end_reduce* function,
3441  // the value should be kept in a variable
3442  // the variable should be either a construct-specific or thread-specific
3443  // property, not a team specific property
3444  // (a thread can reach the next reduce block on the next construct, reduce
3445  // method may differ on the next construct)
3446  // an ident_t "loc" parameter could be used as a construct-specific property
3447  // (what if loc == 0?)
3448  // (if both construct-specific and team-specific variables were shared,
3449  // then unness extra syncs should be needed)
3450  // a thread-specific variable is better regarding two issues above (next
3451  // construct and extra syncs)
3452  // a thread-specific "th_local.reduction_method" variable is used currently
3453  // each thread executes 'determine' and 'set' lines (no need to execute by one
3454  // thread, to avoid unness extra syncs)
3455 
3456  packed_reduction_method = __kmp_determine_reduction_method(
3457  loc, global_tid, num_vars, reduce_size, reduce_data, reduce_func, lck);
3458  __KMP_SET_REDUCTION_METHOD(global_tid, packed_reduction_method);
3459 
3460  if (packed_reduction_method == critical_reduce_block) {
3461 
3462  __kmp_enter_critical_section_reduce_block(loc, global_tid, lck);
3463  retval = 1;
3464 
3465  } else if (packed_reduction_method == empty_reduce_block) {
3466 
3467  // usage: if team size == 1, no synchronization is required ( Intel
3468  // platforms only )
3469  retval = 1;
3470 
3471  } else if (packed_reduction_method == atomic_reduce_block) {
3472 
3473  retval = 2;
3474 
3475  // all threads should do this pop here (because __kmpc_end_reduce_nowait()
3476  // won't be called by the code gen)
3477  // (it's not quite good, because the checking block has been closed by
3478  // this 'pop',
3479  // but atomic operation has not been executed yet, will be executed
3480  // slightly later, literally on next instruction)
3481  if (__kmp_env_consistency_check)
3482  __kmp_pop_sync(global_tid, ct_reduce, loc);
3483 
3484  } else if (TEST_REDUCTION_METHOD(packed_reduction_method,
3485  tree_reduce_block)) {
3486 
3487 // AT: performance issue: a real barrier here
3488 // AT: (if master goes slow, other threads are blocked here waiting for the
3489 // master to come and release them)
3490 // AT: (it's not what a customer might expect specifying NOWAIT clause)
3491 // AT: (specifying NOWAIT won't result in improvement of performance, it'll
3492 // be confusing to a customer)
3493 // AT: another implementation of *barrier_gather*nowait() (or some other design)
3494 // might go faster and be more in line with sense of NOWAIT
3495 // AT: TO DO: do epcc test and compare times
3496 
3497 // this barrier should be invisible to a customer and to the threading profile
3498 // tool (it's neither a terminating barrier nor customer's code, it's
3499 // used for an internal purpose)
3500 #if OMPT_SUPPORT
3501  // JP: can this barrier potentially leed to task scheduling?
3502  // JP: as long as there is a barrier in the implementation, OMPT should and
3503  // will provide the barrier events
3504  // so we set-up the necessary frame/return addresses.
3505  ompt_frame_t *ompt_frame;
3506  if (ompt_enabled.enabled) {
3507  __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
3508  if (ompt_frame->enter_frame.ptr == NULL)
3509  ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
3510  OMPT_STORE_RETURN_ADDRESS(global_tid);
3511  }
3512 #endif
3513 #if USE_ITT_NOTIFY
3514  __kmp_threads[global_tid]->th.th_ident = loc;
3515 #endif
3516  retval =
3517  __kmp_barrier(UNPACK_REDUCTION_BARRIER(packed_reduction_method),
3518  global_tid, FALSE, reduce_size, reduce_data, reduce_func);
3519  retval = (retval != 0) ? (0) : (1);
3520 #if OMPT_SUPPORT && OMPT_OPTIONAL
3521  if (ompt_enabled.enabled) {
3522  ompt_frame->enter_frame = ompt_data_none;
3523  }
3524 #endif
3525 
3526  // all other workers except master should do this pop here
3527  // ( none of other workers will get to __kmpc_end_reduce_nowait() )
3528  if (__kmp_env_consistency_check) {
3529  if (retval == 0) {
3530  __kmp_pop_sync(global_tid, ct_reduce, loc);
3531  }
3532  }
3533 
3534  } else {
3535 
3536  // should never reach this block
3537  KMP_ASSERT(0); // "unexpected method"
3538  }
3539 #if OMP_40_ENABLED
3540  if (teams_swapped) {
3541  __kmp_restore_swapped_teams(th, team, task_state);
3542  }
3543 #endif
3544  KA_TRACE(
3545  10,
3546  ("__kmpc_reduce_nowait() exit: called T#%d: method %08x, returns %08x\n",
3547  global_tid, packed_reduction_method, retval));
3548 
3549  return retval;
3550 }
3551 
3560 void __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid,
3561  kmp_critical_name *lck) {
3562 
3563  PACKED_REDUCTION_METHOD_T packed_reduction_method;
3564 
3565  KA_TRACE(10, ("__kmpc_end_reduce_nowait() enter: called T#%d\n", global_tid));
3566 
3567  packed_reduction_method = __KMP_GET_REDUCTION_METHOD(global_tid);
3568 
3569  if (packed_reduction_method == critical_reduce_block) {
3570 
3571  __kmp_end_critical_section_reduce_block(loc, global_tid, lck);
3572 
3573  } else if (packed_reduction_method == empty_reduce_block) {
3574 
3575  // usage: if team size == 1, no synchronization is required ( on Intel
3576  // platforms only )
3577 
3578  } else if (packed_reduction_method == atomic_reduce_block) {
3579 
3580  // neither master nor other workers should get here
3581  // (code gen does not generate this call in case 2: atomic reduce block)
3582  // actually it's better to remove this elseif at all;
3583  // after removal this value will checked by the 'else' and will assert
3584 
3585  } else if (TEST_REDUCTION_METHOD(packed_reduction_method,
3586  tree_reduce_block)) {
3587 
3588  // only master gets here
3589 
3590  } else {
3591 
3592  // should never reach this block
3593  KMP_ASSERT(0); // "unexpected method"
3594  }
3595 
3596  if (__kmp_env_consistency_check)
3597  __kmp_pop_sync(global_tid, ct_reduce, loc);
3598 
3599  KA_TRACE(10, ("__kmpc_end_reduce_nowait() exit: called T#%d: method %08x\n",
3600  global_tid, packed_reduction_method));
3601 
3602  return;
3603 }
3604 
3605 /* 2.a.ii. Reduce Block with a terminating barrier */
3606 
3622 kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars,
3623  size_t reduce_size, void *reduce_data,
3624  void (*reduce_func)(void *lhs_data, void *rhs_data),
3625  kmp_critical_name *lck) {
3626  KMP_COUNT_BLOCK(REDUCE_wait);
3627  int retval = 0;
3628  PACKED_REDUCTION_METHOD_T packed_reduction_method;
3629 #if OMP_40_ENABLED
3630  kmp_info_t *th;
3631  kmp_team_t *team;
3632  int teams_swapped = 0, task_state;
3633 #endif
3634 
3635  KA_TRACE(10, ("__kmpc_reduce() enter: called T#%d\n", global_tid));
3636 
3637  // why do we need this initialization here at all?
3638  // Reduction clause can not be a stand-alone directive.
3639 
3640  // do not call __kmp_serial_initialize(), it will be called by
3641  // __kmp_parallel_initialize() if needed
3642  // possible detection of false-positive race by the threadchecker ???
3643  if (!TCR_4(__kmp_init_parallel))
3644  __kmp_parallel_initialize();
3645 
3646 #if OMP_50_ENABLED
3647  __kmp_resume_if_soft_paused();
3648 #endif
3649 
3650 // check correctness of reduce block nesting
3651 #if KMP_USE_DYNAMIC_LOCK
3652  if (__kmp_env_consistency_check)
3653  __kmp_push_sync(global_tid, ct_reduce, loc, NULL, 0);
3654 #else
3655  if (__kmp_env_consistency_check)
3656  __kmp_push_sync(global_tid, ct_reduce, loc, NULL);
3657 #endif
3658 
3659 #if OMP_40_ENABLED
3660  th = __kmp_thread_from_gtid(global_tid);
3661  teams_swapped = __kmp_swap_teams_for_teams_reduction(th, &team, &task_state);
3662 #endif // OMP_40_ENABLED
3663 
3664  packed_reduction_method = __kmp_determine_reduction_method(
3665  loc, global_tid, num_vars, reduce_size, reduce_data, reduce_func, lck);
3666  __KMP_SET_REDUCTION_METHOD(global_tid, packed_reduction_method);
3667 
3668  if (packed_reduction_method == critical_reduce_block) {
3669 
3670  __kmp_enter_critical_section_reduce_block(loc, global_tid, lck);
3671  retval = 1;
3672 
3673  } else if (packed_reduction_method == empty_reduce_block) {
3674 
3675  // usage: if team size == 1, no synchronization is required ( Intel
3676  // platforms only )
3677  retval = 1;
3678 
3679  } else if (packed_reduction_method == atomic_reduce_block) {
3680 
3681  retval = 2;
3682 
3683  } else if (TEST_REDUCTION_METHOD(packed_reduction_method,
3684  tree_reduce_block)) {
3685 
3686 // case tree_reduce_block:
3687 // this barrier should be visible to a customer and to the threading profile
3688 // tool (it's a terminating barrier on constructs if NOWAIT not specified)
3689 #if OMPT_SUPPORT
3690  ompt_frame_t *ompt_frame;
3691  if (ompt_enabled.enabled) {
3692  __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
3693  if (ompt_frame->enter_frame.ptr == NULL)
3694  ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
3695  OMPT_STORE_RETURN_ADDRESS(global_tid);
3696  }
3697 #endif
3698 #if USE_ITT_NOTIFY
3699  __kmp_threads[global_tid]->th.th_ident =
3700  loc; // needed for correct notification of frames
3701 #endif
3702  retval =
3703  __kmp_barrier(UNPACK_REDUCTION_BARRIER(packed_reduction_method),
3704  global_tid, TRUE, reduce_size, reduce_data, reduce_func);
3705  retval = (retval != 0) ? (0) : (1);
3706 #if OMPT_SUPPORT && OMPT_OPTIONAL
3707  if (ompt_enabled.enabled) {
3708  ompt_frame->enter_frame = ompt_data_none;
3709  }
3710 #endif
3711 
3712  // all other workers except master should do this pop here
3713  // ( none of other workers except master will enter __kmpc_end_reduce() )
3714  if (__kmp_env_consistency_check) {
3715  if (retval == 0) { // 0: all other workers; 1: master
3716  __kmp_pop_sync(global_tid, ct_reduce, loc);
3717  }
3718  }
3719 
3720  } else {
3721 
3722  // should never reach this block
3723  KMP_ASSERT(0); // "unexpected method"
3724  }
3725 #if OMP_40_ENABLED
3726  if (teams_swapped) {
3727  __kmp_restore_swapped_teams(th, team, task_state);
3728  }
3729 #endif
3730 
3731  KA_TRACE(10,
3732  ("__kmpc_reduce() exit: called T#%d: method %08x, returns %08x\n",
3733  global_tid, packed_reduction_method, retval));
3734 
3735  return retval;
3736 }
3737 
3748 void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid,
3749  kmp_critical_name *lck) {
3750 
3751  PACKED_REDUCTION_METHOD_T packed_reduction_method;
3752 #if OMP_40_ENABLED
3753  kmp_info_t *th;
3754  kmp_team_t *team;
3755  int teams_swapped = 0, task_state;
3756 #endif
3757 
3758  KA_TRACE(10, ("__kmpc_end_reduce() enter: called T#%d\n", global_tid));
3759 
3760 #if OMP_40_ENABLED
3761  th = __kmp_thread_from_gtid(global_tid);
3762  teams_swapped = __kmp_swap_teams_for_teams_reduction(th, &team, &task_state);
3763 #endif // OMP_40_ENABLED
3764 
3765  packed_reduction_method = __KMP_GET_REDUCTION_METHOD(global_tid);
3766 
3767  // this barrier should be visible to a customer and to the threading profile
3768  // tool (it's a terminating barrier on constructs if NOWAIT not specified)
3769 
3770  if (packed_reduction_method == critical_reduce_block) {
3771 
3772  __kmp_end_critical_section_reduce_block(loc, global_tid, lck);
3773 
3774 // TODO: implicit barrier: should be exposed
3775 #if OMPT_SUPPORT
3776  ompt_frame_t *ompt_frame;
3777  if (ompt_enabled.enabled) {
3778  __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
3779  if (ompt_frame->enter_frame.ptr == NULL)
3780  ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
3781  OMPT_STORE_RETURN_ADDRESS(global_tid);
3782  }
3783 #endif
3784 #if USE_ITT_NOTIFY
3785  __kmp_threads[global_tid]->th.th_ident = loc;
3786 #endif
3787  __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL);
3788 #if OMPT_SUPPORT && OMPT_OPTIONAL
3789  if (ompt_enabled.enabled) {
3790  ompt_frame->enter_frame = ompt_data_none;
3791  }
3792 #endif
3793 
3794  } else if (packed_reduction_method == empty_reduce_block) {
3795 
3796 // usage: if team size==1, no synchronization is required (Intel platforms only)
3797 
3798 // TODO: implicit barrier: should be exposed
3799 #if OMPT_SUPPORT
3800  ompt_frame_t *ompt_frame;
3801  if (ompt_enabled.enabled) {
3802  __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
3803  if (ompt_frame->enter_frame.ptr == NULL)
3804  ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
3805  OMPT_STORE_RETURN_ADDRESS(global_tid);
3806  }
3807 #endif
3808 #if USE_ITT_NOTIFY
3809  __kmp_threads[global_tid]->th.th_ident = loc;
3810 #endif
3811  __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL);
3812 #if OMPT_SUPPORT && OMPT_OPTIONAL
3813  if (ompt_enabled.enabled) {
3814  ompt_frame->enter_frame = ompt_data_none;
3815  }
3816 #endif
3817 
3818  } else if (packed_reduction_method == atomic_reduce_block) {
3819 
3820 #if OMPT_SUPPORT
3821  ompt_frame_t *ompt_frame;
3822  if (ompt_enabled.enabled) {
3823  __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
3824  if (ompt_frame->enter_frame.ptr == NULL)
3825  ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
3826  OMPT_STORE_RETURN_ADDRESS(global_tid);
3827  }
3828 #endif
3829 // TODO: implicit barrier: should be exposed
3830 #if USE_ITT_NOTIFY
3831  __kmp_threads[global_tid]->th.th_ident = loc;
3832 #endif
3833  __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL);
3834 #if OMPT_SUPPORT && OMPT_OPTIONAL
3835  if (ompt_enabled.enabled) {
3836  ompt_frame->enter_frame = ompt_data_none;
3837  }
3838 #endif
3839 
3840  } else if (TEST_REDUCTION_METHOD(packed_reduction_method,
3841  tree_reduce_block)) {
3842 
3843  // only master executes here (master releases all other workers)
3844  __kmp_end_split_barrier(UNPACK_REDUCTION_BARRIER(packed_reduction_method),
3845  global_tid);
3846 
3847  } else {
3848 
3849  // should never reach this block
3850  KMP_ASSERT(0); // "unexpected method"
3851  }
3852 #if OMP_40_ENABLED
3853  if (teams_swapped) {
3854  __kmp_restore_swapped_teams(th, team, task_state);
3855  }
3856 #endif
3857 
3858  if (__kmp_env_consistency_check)
3859  __kmp_pop_sync(global_tid, ct_reduce, loc);
3860 
3861  KA_TRACE(10, ("__kmpc_end_reduce() exit: called T#%d: method %08x\n",
3862  global_tid, packed_reduction_method));
3863 
3864  return;
3865 }
3866 
3867 #undef __KMP_GET_REDUCTION_METHOD
3868 #undef __KMP_SET_REDUCTION_METHOD
3869 
3870 /* end of interface to fast scalable reduce routines */
3871 
3872 kmp_uint64 __kmpc_get_taskid() {
3873 
3874  kmp_int32 gtid;
3875  kmp_info_t *thread;
3876 
3877  gtid = __kmp_get_gtid();
3878  if (gtid < 0) {
3879  return 0;
3880  }
3881  thread = __kmp_thread_from_gtid(gtid);
3882  return thread->th.th_current_task->td_task_id;
3883 
3884 } // __kmpc_get_taskid
3885 
3886 kmp_uint64 __kmpc_get_parent_taskid() {
3887 
3888  kmp_int32 gtid;
3889  kmp_info_t *thread;
3890  kmp_taskdata_t *parent_task;
3891 
3892  gtid = __kmp_get_gtid();
3893  if (gtid < 0) {
3894  return 0;
3895  }
3896  thread = __kmp_thread_from_gtid(gtid);
3897  parent_task = thread->th.th_current_task->td_parent;
3898  return (parent_task == NULL ? 0 : parent_task->td_task_id);
3899 
3900 } // __kmpc_get_parent_taskid
3901 
3902 #if OMP_45_ENABLED
3903 
3914 void __kmpc_doacross_init(ident_t *loc, int gtid, int num_dims,
3915  const struct kmp_dim *dims) {
3916  int j, idx;
3917  kmp_int64 last, trace_count;
3918  kmp_info_t *th = __kmp_threads[gtid];
3919  kmp_team_t *team = th->th.th_team;
3920  kmp_uint32 *flags;
3921  kmp_disp_t *pr_buf = th->th.th_dispatch;
3922  dispatch_shared_info_t *sh_buf;
3923 
3924  KA_TRACE(
3925  20,
3926  ("__kmpc_doacross_init() enter: called T#%d, num dims %d, active %d\n",
3927  gtid, num_dims, !team->t.t_serialized));
3928  KMP_DEBUG_ASSERT(dims != NULL);
3929  KMP_DEBUG_ASSERT(num_dims > 0);
3930 
3931  if (team->t.t_serialized) {
3932  KA_TRACE(20, ("__kmpc_doacross_init() exit: serialized team\n"));
3933  return; // no dependencies if team is serialized
3934  }
3935  KMP_DEBUG_ASSERT(team->t.t_nproc > 1);
3936  idx = pr_buf->th_doacross_buf_idx++; // Increment index of shared buffer for
3937  // the next loop
3938  sh_buf = &team->t.t_disp_buffer[idx % __kmp_dispatch_num_buffers];
3939 
3940  // Save bounds info into allocated private buffer
3941  KMP_DEBUG_ASSERT(pr_buf->th_doacross_info == NULL);
3942  pr_buf->th_doacross_info = (kmp_int64 *)__kmp_thread_malloc(
3943  th, sizeof(kmp_int64) * (4 * num_dims + 1));
3944  KMP_DEBUG_ASSERT(pr_buf->th_doacross_info != NULL);
3945  pr_buf->th_doacross_info[0] =
3946  (kmp_int64)num_dims; // first element is number of dimensions
3947  // Save also address of num_done in order to access it later without knowing
3948  // the buffer index
3949  pr_buf->th_doacross_info[1] = (kmp_int64)&sh_buf->doacross_num_done;
3950  pr_buf->th_doacross_info[2] = dims[0].lo;
3951  pr_buf->th_doacross_info[3] = dims[0].up;
3952  pr_buf->th_doacross_info[4] = dims[0].st;
3953  last = 5;
3954  for (j = 1; j < num_dims; ++j) {
3955  kmp_int64
3956  range_length; // To keep ranges of all dimensions but the first dims[0]
3957  if (dims[j].st == 1) { // most common case
3958  // AC: should we care of ranges bigger than LLONG_MAX? (not for now)
3959  range_length = dims[j].up - dims[j].lo + 1;
3960  } else {
3961  if (dims[j].st > 0) {
3962  KMP_DEBUG_ASSERT(dims[j].up > dims[j].lo);
3963  range_length = (kmp_uint64)(dims[j].up - dims[j].lo) / dims[j].st + 1;
3964  } else { // negative increment
3965  KMP_DEBUG_ASSERT(dims[j].lo > dims[j].up);
3966  range_length =
3967  (kmp_uint64)(dims[j].lo - dims[j].up) / (-dims[j].st) + 1;
3968  }
3969  }
3970  pr_buf->th_doacross_info[last++] = range_length;
3971  pr_buf->th_doacross_info[last++] = dims[j].lo;
3972  pr_buf->th_doacross_info[last++] = dims[j].up;
3973  pr_buf->th_doacross_info[last++] = dims[j].st;
3974  }
3975 
3976  // Compute total trip count.
3977  // Start with range of dims[0] which we don't need to keep in the buffer.
3978  if (dims[0].st == 1) { // most common case
3979  trace_count = dims[0].up - dims[0].lo + 1;
3980  } else if (dims[0].st > 0) {
3981  KMP_DEBUG_ASSERT(dims[0].up > dims[0].lo);
3982  trace_count = (kmp_uint64)(dims[0].up - dims[0].lo) / dims[0].st + 1;
3983  } else { // negative increment
3984  KMP_DEBUG_ASSERT(dims[0].lo > dims[0].up);
3985  trace_count = (kmp_uint64)(dims[0].lo - dims[0].up) / (-dims[0].st) + 1;
3986  }
3987  for (j = 1; j < num_dims; ++j) {
3988  trace_count *= pr_buf->th_doacross_info[4 * j + 1]; // use kept ranges
3989  }
3990  KMP_DEBUG_ASSERT(trace_count > 0);
3991 
3992  // Check if shared buffer is not occupied by other loop (idx -
3993  // __kmp_dispatch_num_buffers)
3994  if (idx != sh_buf->doacross_buf_idx) {
3995  // Shared buffer is occupied, wait for it to be free
3996  __kmp_wait_4((volatile kmp_uint32 *)&sh_buf->doacross_buf_idx, idx,
3997  __kmp_eq_4, NULL);
3998  }
3999 #if KMP_32_BIT_ARCH
4000  // Check if we are the first thread. After the CAS the first thread gets 0,
4001  // others get 1 if initialization is in progress, allocated pointer otherwise.
4002  // Treat pointer as volatile integer (value 0 or 1) until memory is allocated.
4003  flags = (kmp_uint32 *)KMP_COMPARE_AND_STORE_RET32(
4004  (volatile kmp_int32 *)&sh_buf->doacross_flags, NULL, 1);
4005 #else
4006  flags = (kmp_uint32 *)KMP_COMPARE_AND_STORE_RET64(
4007  (volatile kmp_int64 *)&sh_buf->doacross_flags, NULL, 1LL);
4008 #endif
4009  if (flags == NULL) {
4010  // we are the first thread, allocate the array of flags
4011  size_t size = trace_count / 8 + 8; // in bytes, use single bit per iteration
4012  flags = (kmp_uint32 *)__kmp_thread_calloc(th, size, 1);
4013  KMP_MB();
4014  sh_buf->doacross_flags = flags;
4015  } else if (flags == (kmp_uint32 *)1) {
4016 #if KMP_32_BIT_ARCH
4017  // initialization is still in progress, need to wait
4018  while (*(volatile kmp_int32 *)&sh_buf->doacross_flags == 1)
4019 #else
4020  while (*(volatile kmp_int64 *)&sh_buf->doacross_flags == 1LL)
4021 #endif
4022  KMP_YIELD(TRUE);
4023  KMP_MB();
4024  } else {
4025  KMP_MB();
4026  }
4027  KMP_DEBUG_ASSERT(sh_buf->doacross_flags > (kmp_uint32 *)1); // check ptr value
4028  pr_buf->th_doacross_flags =
4029  sh_buf->doacross_flags; // save private copy in order to not
4030  // touch shared buffer on each iteration
4031  KA_TRACE(20, ("__kmpc_doacross_init() exit: T#%d\n", gtid));
4032 }
4033 
4034 void __kmpc_doacross_wait(ident_t *loc, int gtid, const kmp_int64 *vec) {
4035  kmp_int32 shft, num_dims, i;
4036  kmp_uint32 flag;
4037  kmp_int64 iter_number; // iteration number of "collapsed" loop nest
4038  kmp_info_t *th = __kmp_threads[gtid];
4039  kmp_team_t *team = th->th.th_team;
4040  kmp_disp_t *pr_buf;
4041  kmp_int64 lo, up, st;
4042 
4043  KA_TRACE(20, ("__kmpc_doacross_wait() enter: called T#%d\n", gtid));
4044  if (team->t.t_serialized) {
4045  KA_TRACE(20, ("__kmpc_doacross_wait() exit: serialized team\n"));
4046  return; // no dependencies if team is serialized
4047  }
4048 
4049  // calculate sequential iteration number and check out-of-bounds condition
4050  pr_buf = th->th.th_dispatch;
4051  KMP_DEBUG_ASSERT(pr_buf->th_doacross_info != NULL);
4052  num_dims = pr_buf->th_doacross_info[0];
4053  lo = pr_buf->th_doacross_info[2];
4054  up = pr_buf->th_doacross_info[3];
4055  st = pr_buf->th_doacross_info[4];
4056  if (st == 1) { // most common case
4057  if (vec[0] < lo || vec[0] > up) {
4058  KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
4059  "bounds [%lld,%lld]\n",
4060  gtid, vec[0], lo, up));
4061  return;
4062  }
4063  iter_number = vec[0] - lo;
4064  } else if (st > 0) {
4065  if (vec[0] < lo || vec[0] > up) {
4066  KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
4067  "bounds [%lld,%lld]\n",
4068  gtid, vec[0], lo, up));
4069  return;
4070  }
4071  iter_number = (kmp_uint64)(vec[0] - lo) / st;
4072  } else { // negative increment
4073  if (vec[0] > lo || vec[0] < up) {
4074  KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
4075  "bounds [%lld,%lld]\n",
4076  gtid, vec[0], lo, up));
4077  return;
4078  }
4079  iter_number = (kmp_uint64)(lo - vec[0]) / (-st);
4080  }
4081  for (i = 1; i < num_dims; ++i) {
4082  kmp_int64 iter, ln;
4083  kmp_int32 j = i * 4;
4084  ln = pr_buf->th_doacross_info[j + 1];
4085  lo = pr_buf->th_doacross_info[j + 2];
4086  up = pr_buf->th_doacross_info[j + 3];
4087  st = pr_buf->th_doacross_info[j + 4];
4088  if (st == 1) {
4089  if (vec[i] < lo || vec[i] > up) {
4090  KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
4091  "bounds [%lld,%lld]\n",
4092  gtid, vec[i], lo, up));
4093  return;
4094  }
4095  iter = vec[i] - lo;
4096  } else if (st > 0) {
4097  if (vec[i] < lo || vec[i] > up) {
4098  KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
4099  "bounds [%lld,%lld]\n",
4100  gtid, vec[i], lo, up));
4101  return;
4102  }
4103  iter = (kmp_uint64)(vec[i] - lo) / st;
4104  } else { // st < 0
4105  if (vec[i] > lo || vec[i] < up) {
4106  KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
4107  "bounds [%lld,%lld]\n",
4108  gtid, vec[i], lo, up));
4109  return;
4110  }
4111  iter = (kmp_uint64)(lo - vec[i]) / (-st);
4112  }
4113  iter_number = iter + ln * iter_number;
4114  }
4115  shft = iter_number % 32; // use 32-bit granularity
4116  iter_number >>= 5; // divided by 32
4117  flag = 1 << shft;
4118  while ((flag & pr_buf->th_doacross_flags[iter_number]) == 0) {
4119  KMP_YIELD(TRUE);
4120  }
4121  KMP_MB();
4122  KA_TRACE(20,
4123  ("__kmpc_doacross_wait() exit: T#%d wait for iter %lld completed\n",
4124  gtid, (iter_number << 5) + shft));
4125 }
4126 
4127 void __kmpc_doacross_post(ident_t *loc, int gtid, const kmp_int64 *vec) {
4128  kmp_int32 shft, num_dims, i;
4129  kmp_uint32 flag;
4130  kmp_int64 iter_number; // iteration number of "collapsed" loop nest
4131  kmp_info_t *th = __kmp_threads[gtid];
4132  kmp_team_t *team = th->th.th_team;
4133  kmp_disp_t *pr_buf;
4134  kmp_int64 lo, st;
4135 
4136  KA_TRACE(20, ("__kmpc_doacross_post() enter: called T#%d\n", gtid));
4137  if (team->t.t_serialized) {
4138  KA_TRACE(20, ("__kmpc_doacross_post() exit: serialized team\n"));
4139  return; // no dependencies if team is serialized
4140  }
4141 
4142  // calculate sequential iteration number (same as in "wait" but no
4143  // out-of-bounds checks)
4144  pr_buf = th->th.th_dispatch;
4145  KMP_DEBUG_ASSERT(pr_buf->th_doacross_info != NULL);
4146  num_dims = pr_buf->th_doacross_info[0];
4147  lo = pr_buf->th_doacross_info[2];
4148  st = pr_buf->th_doacross_info[4];
4149  if (st == 1) { // most common case
4150  iter_number = vec[0] - lo;
4151  } else if (st > 0) {
4152  iter_number = (kmp_uint64)(vec[0] - lo) / st;
4153  } else { // negative increment
4154  iter_number = (kmp_uint64)(lo - vec[0]) / (-st);
4155  }
4156  for (i = 1; i < num_dims; ++i) {
4157  kmp_int64 iter, ln;
4158  kmp_int32 j = i * 4;
4159  ln = pr_buf->th_doacross_info[j + 1];
4160  lo = pr_buf->th_doacross_info[j + 2];
4161  st = pr_buf->th_doacross_info[j + 4];
4162  if (st == 1) {
4163  iter = vec[i] - lo;
4164  } else if (st > 0) {
4165  iter = (kmp_uint64)(vec[i] - lo) / st;
4166  } else { // st < 0
4167  iter = (kmp_uint64)(lo - vec[i]) / (-st);
4168  }
4169  iter_number = iter + ln * iter_number;
4170  }
4171  shft = iter_number % 32; // use 32-bit granularity
4172  iter_number >>= 5; // divided by 32
4173  flag = 1 << shft;
4174  KMP_MB();
4175  if ((flag & pr_buf->th_doacross_flags[iter_number]) == 0)
4176  KMP_TEST_THEN_OR32(&pr_buf->th_doacross_flags[iter_number], flag);
4177  KA_TRACE(20, ("__kmpc_doacross_post() exit: T#%d iter %lld posted\n", gtid,
4178  (iter_number << 5) + shft));
4179 }
4180 
4181 void __kmpc_doacross_fini(ident_t *loc, int gtid) {
4182  kmp_int32 num_done;
4183  kmp_info_t *th = __kmp_threads[gtid];
4184  kmp_team_t *team = th->th.th_team;
4185  kmp_disp_t *pr_buf = th->th.th_dispatch;
4186 
4187  KA_TRACE(20, ("__kmpc_doacross_fini() enter: called T#%d\n", gtid));
4188  if (team->t.t_serialized) {
4189  KA_TRACE(20, ("__kmpc_doacross_fini() exit: serialized team %p\n", team));
4190  return; // nothing to do
4191  }
4192  num_done = KMP_TEST_THEN_INC32((kmp_int32 *)pr_buf->th_doacross_info[1]) + 1;
4193  if (num_done == th->th.th_team_nproc) {
4194  // we are the last thread, need to free shared resources
4195  int idx = pr_buf->th_doacross_buf_idx - 1;
4196  dispatch_shared_info_t *sh_buf =
4197  &team->t.t_disp_buffer[idx % __kmp_dispatch_num_buffers];
4198  KMP_DEBUG_ASSERT(pr_buf->th_doacross_info[1] ==
4199  (kmp_int64)&sh_buf->doacross_num_done);
4200  KMP_DEBUG_ASSERT(num_done == sh_buf->doacross_num_done);
4201  KMP_DEBUG_ASSERT(idx == sh_buf->doacross_buf_idx);
4202  __kmp_thread_free(th, CCAST(kmp_uint32 *, sh_buf->doacross_flags));
4203  sh_buf->doacross_flags = NULL;
4204  sh_buf->doacross_num_done = 0;
4205  sh_buf->doacross_buf_idx +=
4206  __kmp_dispatch_num_buffers; // free buffer for future re-use
4207  }
4208  // free private resources (need to keep buffer index forever)
4209  pr_buf->th_doacross_flags = NULL;
4210  __kmp_thread_free(th, (void *)pr_buf->th_doacross_info);
4211  pr_buf->th_doacross_info = NULL;
4212  KA_TRACE(20, ("__kmpc_doacross_fini() exit: T#%d\n", gtid));
4213 }
4214 #endif
4215 
4216 #if OMP_50_ENABLED
4217 /* omp_alloc/omp_free only defined for C/C++, not for Fortran */
4218 void *omp_alloc(size_t size, omp_allocator_handle_t allocator) {
4219  return __kmpc_alloc(__kmp_entry_gtid(), size, allocator);
4220 }
4221 
4222 void omp_free(void *ptr, omp_allocator_handle_t allocator) {
4223  __kmpc_free(__kmp_entry_gtid(), ptr, allocator);
4224 }
4225 
4226 int __kmpc_get_target_offload(void) {
4227  if (!__kmp_init_serial) {
4228  __kmp_serial_initialize();
4229  }
4230  return __kmp_target_offload;
4231 }
4232 
4233 int __kmpc_pause_resource(kmp_pause_status_t level) {
4234  if (!__kmp_init_serial) {
4235  return 1; // Can't pause if runtime is not initialized
4236  }
4237  return __kmp_pause_resource(level);
4238 }
4239 #endif // OMP_50_ENABLED
4240 
4241 // end of file //
kmp_int32 __kmpc_master(ident_t *loc, kmp_int32 global_tid)
kmp_int32 __kmpc_barrier_master(ident_t *loc, kmp_int32 global_tid)
void __kmpc_end_single(ident_t *loc, kmp_int32 global_tid)
void(* kmpc_micro)(kmp_int32 *global_tid, kmp_int32 *bound_tid,...)
Definition: kmp.h:1440
kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void(*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name *lck)
kmp_int32 __kmpc_global_thread_num(ident_t *loc)
void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid)
void __kmpc_flush(ident_t *loc)
kmp_int32 __kmpc_single(ident_t *loc, kmp_int32 global_tid)
void __kmpc_end(ident_t *loc)
void __kmpc_end_ordered(ident_t *loc, kmp_int32 gtid)
void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32 global_tid)
void __kmpc_begin(ident_t *loc, kmp_int32 flags)
kmp_int32 __kmpc_bound_thread_num(ident_t *loc)
kmp_int32 __kmpc_reduce_nowait(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void(*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name *lck)
void __kmpc_copyprivate(ident_t *loc, kmp_int32 gtid, size_t cpy_size, void *cpy_data, void(*cpy_func)(void *, void *), kmp_int32 didit)
void __kmpc_ordered(ident_t *loc, kmp_int32 gtid)
#define KMP_COUNT_BLOCK(name)
Increments specified counter (name).
Definition: kmp_stats.h:900
void __kmpc_critical(ident_t *loc, kmp_int32 global_tid, kmp_critical_name *crit)
Definition: kmp.h:229
void __kmpc_end_barrier_master(ident_t *loc, kmp_int32 global_tid)
void __kmpc_end_master(ident_t *loc, kmp_int32 global_tid)
void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_threads)
void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro microtask,...)
kmp_int32 __kmpc_in_parallel(ident_t *loc)
kmp_int32 __kmpc_ok_to_fork(ident_t *loc)
kmp_int32 __kmpc_global_num_threads(ident_t *loc)
kmp_int32 __kmpc_bound_num_threads(ident_t *loc)
void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid, kmp_critical_name *lck)
void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid)
void __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid, kmp_critical_name *lck)
void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid, kmp_critical_name *crit)
void __kmpc_push_num_teams(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_teams, kmp_int32 num_threads)
kmp_int32 __kmpc_barrier_master_nowait(ident_t *loc, kmp_int32 global_tid)
void __kmpc_serialized_parallel(ident_t *loc, kmp_int32 global_tid)
stats_state_e
the states which a thread can be in
Definition: kmp_stats.h:63
void __kmpc_fork_call(ident_t *loc, kmp_int32 argc, kmpc_micro microtask,...)
char const * psource
Definition: kmp.h:239
kmp_int32 flags
Definition: kmp.h:231