LLVM OpenMP* Runtime Library
 All Classes Functions Variables Typedefs Enumerations Enumerator Groups Pages
kmp_wait_release.h
1 /*
2  * kmp_wait_release.h -- Wait/Release implementation
3  */
4 
5 //===----------------------------------------------------------------------===//
6 //
7 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
8 // See https://llvm.org/LICENSE.txt for license information.
9 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #ifndef KMP_WAIT_RELEASE_H
14 #define KMP_WAIT_RELEASE_H
15 
16 #include "kmp.h"
17 #include "kmp_itt.h"
18 #include "kmp_stats.h"
19 #if OMPT_SUPPORT
20 #include "ompt-specific.h"
21 #endif
22 
39 enum flag_type {
43 };
44 
48 template <typename P> class kmp_flag_native {
49  volatile P *loc;
50  flag_type t;
51 
52 public:
53  typedef P flag_t;
54  kmp_flag_native(volatile P *p, flag_type ft) : loc(p), t(ft) {}
55  volatile P *get() { return loc; }
56  void *get_void_p() { return RCAST(void *, CCAST(P *, loc)); }
57  void set(volatile P *new_loc) { loc = new_loc; }
58  flag_type get_type() { return t; }
59  P load() { return *loc; }
60  void store(P val) { *loc = val; }
61 };
62 
66 template <typename P> class kmp_flag {
67  std::atomic<P>
68  *loc;
71 public:
72  typedef P flag_t;
73  kmp_flag(std::atomic<P> *p, flag_type ft) : loc(p), t(ft) {}
77  std::atomic<P> *get() { return loc; }
81  void *get_void_p() { return RCAST(void *, loc); }
85  void set(std::atomic<P> *new_loc) { loc = new_loc; }
89  flag_type get_type() { return t; }
93  P load() { return loc->load(std::memory_order_acquire); }
97  void store(P val) { loc->store(val, std::memory_order_release); }
98  // Derived classes must provide the following:
99  /*
100  kmp_info_t * get_waiter(kmp_uint32 i);
101  kmp_uint32 get_num_waiters();
102  bool done_check();
103  bool done_check_val(P old_loc);
104  bool notdone_check();
105  P internal_release();
106  void suspend(int th_gtid);
107  void resume(int th_gtid);
108  P set_sleeping();
109  P unset_sleeping();
110  bool is_sleeping();
111  bool is_any_sleeping();
112  bool is_sleeping_val(P old_loc);
113  int execute_tasks(kmp_info_t *this_thr, kmp_int32 gtid, int final_spin,
114  int *thread_finished
115  USE_ITT_BUILD_ARG(void * itt_sync_obj), kmp_int32
116  is_constrained);
117  */
118 };
119 
120 #if OMPT_SUPPORT
121 OMPT_NOINLINE
122 static void __ompt_implicit_task_end(kmp_info_t *this_thr,
123  ompt_state_t ompt_state,
124  ompt_data_t *tId) {
125  int ds_tid = this_thr->th.th_info.ds.ds_tid;
126  if (ompt_state == ompt_state_wait_barrier_implicit) {
127  this_thr->th.ompt_thread_info.state = ompt_state_overhead;
128 #if OMPT_OPTIONAL
129  void *codeptr = NULL;
130  if (ompt_enabled.ompt_callback_sync_region_wait) {
131  ompt_callbacks.ompt_callback(ompt_callback_sync_region_wait)(
132  ompt_sync_region_barrier_implicit, ompt_scope_end, NULL, tId,
133  codeptr);
134  }
135  if (ompt_enabled.ompt_callback_sync_region) {
136  ompt_callbacks.ompt_callback(ompt_callback_sync_region)(
137  ompt_sync_region_barrier_implicit, ompt_scope_end, NULL, tId,
138  codeptr);
139  }
140 #endif
141  if (!KMP_MASTER_TID(ds_tid)) {
142  if (ompt_enabled.ompt_callback_implicit_task) {
143  ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
144  ompt_scope_end, NULL, tId, 0, ds_tid, ompt_task_implicit);
145  }
146  // return to idle state
147  this_thr->th.ompt_thread_info.state = ompt_state_idle;
148  } else {
149  this_thr->th.ompt_thread_info.state = ompt_state_overhead;
150  }
151  }
152 }
153 #endif
154 
155 /* Spin wait loop that first does pause/yield, then sleep. A thread that calls
156  __kmp_wait_* must make certain that another thread calls __kmp_release
157  to wake it back up to prevent deadlocks!
158 
159  NOTE: We may not belong to a team at this point. */
160 template <class C, int final_spin, bool cancellable = false,
161  bool sleepable = true>
162 static inline bool
163 __kmp_wait_template(kmp_info_t *this_thr,
164  C *flag USE_ITT_BUILD_ARG(void *itt_sync_obj)) {
165 #if USE_ITT_BUILD && USE_ITT_NOTIFY
166  volatile void *spin = flag->get();
167 #endif
168  kmp_uint32 spins;
169  int th_gtid;
170  int tasks_completed = FALSE;
171  int oversubscribed;
172 #if !KMP_USE_MONITOR
173  kmp_uint64 poll_count;
174  kmp_uint64 hibernate_goal;
175 #else
176  kmp_uint32 hibernate;
177 #endif
178 
179  KMP_FSYNC_SPIN_INIT(spin, NULL);
180  if (flag->done_check()) {
181  KMP_FSYNC_SPIN_ACQUIRED(CCAST(void *, spin));
182  return false;
183  }
184  th_gtid = this_thr->th.th_info.ds.ds_gtid;
185  if (cancellable) {
186  kmp_team_t *team = this_thr->th.th_team;
187  if (team && team->t.t_cancel_request == cancel_parallel)
188  return true;
189  }
190 #if KMP_OS_UNIX
191  if (final_spin)
192  KMP_ATOMIC_ST_REL(&this_thr->th.th_blocking, true);
193 #endif
194  KA_TRACE(20,
195  ("__kmp_wait_sleep: T#%d waiting for flag(%p)\n", th_gtid, flag));
196 #if KMP_STATS_ENABLED
197  stats_state_e thread_state = KMP_GET_THREAD_STATE();
198 #endif
199 
200 /* OMPT Behavior:
201 THIS function is called from
202  __kmp_barrier (2 times) (implicit or explicit barrier in parallel regions)
203  these have join / fork behavior
204 
205  In these cases, we don't change the state or trigger events in THIS
206 function.
207  Events are triggered in the calling code (__kmp_barrier):
208 
209  state := ompt_state_overhead
210  barrier-begin
211  barrier-wait-begin
212  state := ompt_state_wait_barrier
213  call join-barrier-implementation (finally arrive here)
214  {}
215  call fork-barrier-implementation (finally arrive here)
216  {}
217  state := ompt_state_overhead
218  barrier-wait-end
219  barrier-end
220  state := ompt_state_work_parallel
221 
222 
223  __kmp_fork_barrier (after thread creation, before executing implicit task)
224  call fork-barrier-implementation (finally arrive here)
225  {} // worker arrive here with state = ompt_state_idle
226 
227 
228  __kmp_join_barrier (implicit barrier at end of parallel region)
229  state := ompt_state_barrier_implicit
230  barrier-begin
231  barrier-wait-begin
232  call join-barrier-implementation (finally arrive here
233 final_spin=FALSE)
234  {
235  }
236  __kmp_fork_barrier (implicit barrier at end of parallel region)
237  call fork-barrier-implementation (finally arrive here final_spin=TRUE)
238 
239  Worker after task-team is finished:
240  barrier-wait-end
241  barrier-end
242  implicit-task-end
243  idle-begin
244  state := ompt_state_idle
245 
246  Before leaving, if state = ompt_state_idle
247  idle-end
248  state := ompt_state_overhead
249 */
250 #if OMPT_SUPPORT
251  ompt_state_t ompt_entry_state;
252  ompt_data_t *tId;
253  if (ompt_enabled.enabled) {
254  ompt_entry_state = this_thr->th.ompt_thread_info.state;
255  if (!final_spin || ompt_entry_state != ompt_state_wait_barrier_implicit ||
256  KMP_MASTER_TID(this_thr->th.th_info.ds.ds_tid)) {
257  ompt_lw_taskteam_t *team =
258  this_thr->th.th_team->t.ompt_serialized_team_info;
259  if (team) {
260  tId = &(team->ompt_task_info.task_data);
261  } else {
262  tId = OMPT_CUR_TASK_DATA(this_thr);
263  }
264  } else {
265  tId = &(this_thr->th.ompt_thread_info.task_data);
266  }
267  if (final_spin && (__kmp_tasking_mode == tskm_immediate_exec ||
268  this_thr->th.th_task_team == NULL)) {
269  // implicit task is done. Either no taskqueue, or task-team finished
270  __ompt_implicit_task_end(this_thr, ompt_entry_state, tId);
271  }
272  }
273 #endif
274 
275  KMP_INIT_YIELD(spins); // Setup for waiting
276 
277  if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME
278 #if OMP_50_ENABLED
279  || __kmp_pause_status == kmp_soft_paused
280 #endif
281  ) {
282 #if KMP_USE_MONITOR
283 // The worker threads cannot rely on the team struct existing at this point.
284 // Use the bt values cached in the thread struct instead.
285 #ifdef KMP_ADJUST_BLOCKTIME
286  if (
287 #if OMP_50_ENABLED
288  __kmp_pause_status == kmp_soft_paused ||
289 #endif
290  (__kmp_zero_bt && !this_thr->th.th_team_bt_set))
291  // Force immediate suspend if not set by user and more threads than
292  // available procs
293  hibernate = 0;
294  else
295  hibernate = this_thr->th.th_team_bt_intervals;
296 #else
297  hibernate = this_thr->th.th_team_bt_intervals;
298 #endif /* KMP_ADJUST_BLOCKTIME */
299 
300  /* If the blocktime is nonzero, we want to make sure that we spin wait for
301  the entirety of the specified #intervals, plus up to one interval more.
302  This increment make certain that this thread doesn't go to sleep too
303  soon. */
304  if (hibernate != 0)
305  hibernate++;
306 
307  // Add in the current time value.
308  hibernate += TCR_4(__kmp_global.g.g_time.dt.t_value);
309  KF_TRACE(20, ("__kmp_wait_sleep: T#%d now=%d, hibernate=%d, intervals=%d\n",
310  th_gtid, __kmp_global.g.g_time.dt.t_value, hibernate,
311  hibernate - __kmp_global.g.g_time.dt.t_value));
312 #else
313 #if OMP_50_ENABLED
314  if (__kmp_pause_status == kmp_soft_paused) {
315  // Force immediate suspend
316  hibernate_goal = KMP_NOW();
317  } else
318 #endif
319  hibernate_goal = KMP_NOW() + this_thr->th.th_team_bt_intervals;
320  poll_count = 0;
321 #endif // KMP_USE_MONITOR
322  }
323 
324  oversubscribed = (TCR_4(__kmp_nth) > __kmp_avail_proc);
325  KMP_MB();
326 
327  // Main wait spin loop
328  while (flag->notdone_check()) {
329  kmp_task_team_t *task_team = NULL;
330  if (__kmp_tasking_mode != tskm_immediate_exec) {
331  task_team = this_thr->th.th_task_team;
332  /* If the thread's task team pointer is NULL, it means one of 3 things:
333  1) A newly-created thread is first being released by
334  __kmp_fork_barrier(), and its task team has not been set up yet.
335  2) All tasks have been executed to completion.
336  3) Tasking is off for this region. This could be because we are in a
337  serialized region (perhaps the outer one), or else tasking was manually
338  disabled (KMP_TASKING=0). */
339  if (task_team != NULL) {
340  if (TCR_SYNC_4(task_team->tt.tt_active)) {
341  if (KMP_TASKING_ENABLED(task_team))
342  flag->execute_tasks(
343  this_thr, th_gtid, final_spin,
344  &tasks_completed USE_ITT_BUILD_ARG(itt_sync_obj), 0);
345  else
346  this_thr->th.th_reap_state = KMP_SAFE_TO_REAP;
347  } else {
348  KMP_DEBUG_ASSERT(!KMP_MASTER_TID(this_thr->th.th_info.ds.ds_tid));
349 #if OMPT_SUPPORT
350  // task-team is done now, other cases should be catched above
351  if (final_spin && ompt_enabled.enabled)
352  __ompt_implicit_task_end(this_thr, ompt_entry_state, tId);
353 #endif
354  this_thr->th.th_task_team = NULL;
355  this_thr->th.th_reap_state = KMP_SAFE_TO_REAP;
356  }
357  } else {
358  this_thr->th.th_reap_state = KMP_SAFE_TO_REAP;
359  } // if
360  } // if
361 
362  KMP_FSYNC_SPIN_PREPARE(CCAST(void *, spin));
363  if (TCR_4(__kmp_global.g.g_done)) {
364  if (__kmp_global.g.g_abort)
365  __kmp_abort_thread();
366  break;
367  }
368 
369  // If we are oversubscribed, or have waited a bit (and
370  // KMP_LIBRARY=throughput), then yield
371  KMP_YIELD_OVERSUB_ELSE_SPIN(spins);
372 
373 #if KMP_STATS_ENABLED
374  // Check if thread has been signalled to idle state
375  // This indicates that the logical "join-barrier" has finished
376  if (this_thr->th.th_stats->isIdle() &&
377  KMP_GET_THREAD_STATE() == FORK_JOIN_BARRIER) {
378  KMP_SET_THREAD_STATE(IDLE);
379  KMP_PUSH_PARTITIONED_TIMER(OMP_idle);
380  }
381 #endif
382  // Check if the barrier surrounding this wait loop has been cancelled
383  if (cancellable) {
384  kmp_team_t *team = this_thr->th.th_team;
385  if (team && team->t.t_cancel_request == cancel_parallel)
386  break;
387  }
388 
389  // Don't suspend if KMP_BLOCKTIME is set to "infinite"
390  if (__kmp_dflt_blocktime == KMP_MAX_BLOCKTIME
391 #if OMP_50_ENABLED
392  && __kmp_pause_status != kmp_soft_paused
393 #endif
394  )
395  continue;
396 
397  // Don't suspend if there is a likelihood of new tasks being spawned.
398  if ((task_team != NULL) && TCR_4(task_team->tt.tt_found_tasks))
399  continue;
400 
401 #if KMP_USE_MONITOR
402  // If we have waited a bit more, fall asleep
403  if (TCR_4(__kmp_global.g.g_time.dt.t_value) < hibernate)
404  continue;
405 #else
406  if (KMP_BLOCKING(hibernate_goal, poll_count++))
407  continue;
408 #endif
409  // Don't suspend if wait loop designated non-sleepable
410  // in template parameters
411  if (!sleepable)
412  continue;
413 
414 #if OMP_50_ENABLED
415  if (__kmp_dflt_blocktime == KMP_MAX_BLOCKTIME &&
416  __kmp_pause_status != kmp_soft_paused)
417  continue;
418 #endif
419 
420  KF_TRACE(50, ("__kmp_wait_sleep: T#%d suspend time reached\n", th_gtid));
421 
422 #if KMP_OS_UNIX
423  if (final_spin)
424  KMP_ATOMIC_ST_REL(&this_thr->th.th_blocking, false);
425 #endif
426  flag->suspend(th_gtid);
427 #if KMP_OS_UNIX
428  if (final_spin)
429  KMP_ATOMIC_ST_REL(&this_thr->th.th_blocking, true);
430 #endif
431 
432  if (TCR_4(__kmp_global.g.g_done)) {
433  if (__kmp_global.g.g_abort)
434  __kmp_abort_thread();
435  break;
436  } else if (__kmp_tasking_mode != tskm_immediate_exec &&
437  this_thr->th.th_reap_state == KMP_SAFE_TO_REAP) {
438  this_thr->th.th_reap_state = KMP_NOT_SAFE_TO_REAP;
439  }
440  // TODO: If thread is done with work and times out, disband/free
441  }
442 
443 #if OMPT_SUPPORT
444  ompt_state_t ompt_exit_state = this_thr->th.ompt_thread_info.state;
445  if (ompt_enabled.enabled && ompt_exit_state != ompt_state_undefined) {
446 #if OMPT_OPTIONAL
447  if (final_spin) {
448  __ompt_implicit_task_end(this_thr, ompt_exit_state, tId);
449  ompt_exit_state = this_thr->th.ompt_thread_info.state;
450  }
451 #endif
452  if (ompt_exit_state == ompt_state_idle) {
453  this_thr->th.ompt_thread_info.state = ompt_state_overhead;
454  }
455  }
456 #endif
457 #if KMP_STATS_ENABLED
458  // If we were put into idle state, pop that off the state stack
459  if (KMP_GET_THREAD_STATE() == IDLE) {
460  KMP_POP_PARTITIONED_TIMER();
461  KMP_SET_THREAD_STATE(thread_state);
462  this_thr->th.th_stats->resetIdleFlag();
463  }
464 #endif
465 
466 #if KMP_OS_UNIX
467  if (final_spin)
468  KMP_ATOMIC_ST_REL(&this_thr->th.th_blocking, false);
469 #endif
470  KMP_FSYNC_SPIN_ACQUIRED(CCAST(void *, spin));
471  if (cancellable) {
472  kmp_team_t *team = this_thr->th.th_team;
473  if (team && team->t.t_cancel_request == cancel_parallel) {
474  if (tasks_completed) {
475  // undo the previous decrement of unfinished_threads so that the
476  // thread can decrement at the join barrier with no problem
477  kmp_task_team_t *task_team = this_thr->th.th_task_team;
478  std::atomic<kmp_int32> *unfinished_threads =
479  &(task_team->tt.tt_unfinished_threads);
480  KMP_ATOMIC_INC(unfinished_threads);
481  }
482  return true;
483  }
484  }
485  return false;
486 }
487 
488 /* Release any threads specified as waiting on the flag by releasing the flag
489  and resume the waiting thread if indicated by the sleep bit(s). A thread that
490  calls __kmp_wait_template must call this function to wake up the potentially
491  sleeping thread and prevent deadlocks! */
492 template <class C> static inline void __kmp_release_template(C *flag) {
493 #ifdef KMP_DEBUG
494  int gtid = TCR_4(__kmp_init_gtid) ? __kmp_get_gtid() : -1;
495 #endif
496  KF_TRACE(20, ("__kmp_release: T#%d releasing flag(%x)\n", gtid, flag->get()));
497  KMP_DEBUG_ASSERT(flag->get());
498  KMP_FSYNC_RELEASING(flag->get_void_p());
499 
500  flag->internal_release();
501 
502  KF_TRACE(100, ("__kmp_release: T#%d set new spin=%d\n", gtid, flag->get(),
503  flag->load()));
504 
505  if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) {
506  // Only need to check sleep stuff if infinite block time not set.
507  // Are *any* threads waiting on flag sleeping?
508  if (flag->is_any_sleeping()) {
509  for (unsigned int i = 0; i < flag->get_num_waiters(); ++i) {
510  // if sleeping waiter exists at i, sets current_waiter to i inside flag
511  kmp_info_t *waiter = flag->get_waiter(i);
512  if (waiter) {
513  int wait_gtid = waiter->th.th_info.ds.ds_gtid;
514  // Wake up thread if needed
515  KF_TRACE(50, ("__kmp_release: T#%d waking up thread T#%d since sleep "
516  "flag(%p) set\n",
517  gtid, wait_gtid, flag->get()));
518  flag->resume(wait_gtid); // unsets flag's current_waiter when done
519  }
520  }
521  }
522  }
523 }
524 
525 template <typename FlagType> struct flag_traits {};
526 
527 template <> struct flag_traits<kmp_uint32> {
528  typedef kmp_uint32 flag_t;
529  static const flag_type t = flag32;
530  static inline flag_t tcr(flag_t f) { return TCR_4(f); }
531  static inline flag_t test_then_add4(volatile flag_t *f) {
532  return KMP_TEST_THEN_ADD4_32(RCAST(volatile kmp_int32 *, f));
533  }
534  static inline flag_t test_then_or(volatile flag_t *f, flag_t v) {
535  return KMP_TEST_THEN_OR32(f, v);
536  }
537  static inline flag_t test_then_and(volatile flag_t *f, flag_t v) {
538  return KMP_TEST_THEN_AND32(f, v);
539  }
540 };
541 
542 template <> struct flag_traits<kmp_uint64> {
543  typedef kmp_uint64 flag_t;
544  static const flag_type t = flag64;
545  static inline flag_t tcr(flag_t f) { return TCR_8(f); }
546  static inline flag_t test_then_add4(volatile flag_t *f) {
547  return KMP_TEST_THEN_ADD4_64(RCAST(volatile kmp_int64 *, f));
548  }
549  static inline flag_t test_then_or(volatile flag_t *f, flag_t v) {
550  return KMP_TEST_THEN_OR64(f, v);
551  }
552  static inline flag_t test_then_and(volatile flag_t *f, flag_t v) {
553  return KMP_TEST_THEN_AND64(f, v);
554  }
555 };
556 
557 // Basic flag that does not use C11 Atomics
558 template <typename FlagType>
559 class kmp_basic_flag_native : public kmp_flag_native<FlagType> {
560  typedef flag_traits<FlagType> traits_type;
561  FlagType checker;
563  kmp_info_t
564  *waiting_threads[1];
565  kmp_uint32
566  num_waiting_threads;
567 public:
568  kmp_basic_flag_native(volatile FlagType *p)
569  : kmp_flag_native<FlagType>(p, traits_type::t), num_waiting_threads(0) {}
570  kmp_basic_flag_native(volatile FlagType *p, kmp_info_t *thr)
571  : kmp_flag_native<FlagType>(p, traits_type::t), num_waiting_threads(1) {
572  waiting_threads[0] = thr;
573  }
574  kmp_basic_flag_native(volatile FlagType *p, FlagType c)
575  : kmp_flag_native<FlagType>(p, traits_type::t), checker(c),
576  num_waiting_threads(0) {}
581  kmp_info_t *get_waiter(kmp_uint32 i) {
582  KMP_DEBUG_ASSERT(i < num_waiting_threads);
583  return waiting_threads[i];
584  }
588  kmp_uint32 get_num_waiters() { return num_waiting_threads; }
594  void set_waiter(kmp_info_t *thr) {
595  waiting_threads[0] = thr;
596  num_waiting_threads = 1;
597  }
601  bool done_check() { return traits_type::tcr(*(this->get())) == checker; }
606  bool done_check_val(FlagType old_loc) { return old_loc == checker; }
614  bool notdone_check() { return traits_type::tcr(*(this->get())) != checker; }
619  void internal_release() {
620  (void)traits_type::test_then_add4((volatile FlagType *)this->get());
621  }
627  FlagType set_sleeping() {
628  return traits_type::test_then_or((volatile FlagType *)this->get(),
629  KMP_BARRIER_SLEEP_STATE);
630  }
636  FlagType unset_sleeping() {
637  return traits_type::test_then_and((volatile FlagType *)this->get(),
638  ~KMP_BARRIER_SLEEP_STATE);
639  }
644  bool is_sleeping_val(FlagType old_loc) {
645  return old_loc & KMP_BARRIER_SLEEP_STATE;
646  }
650  bool is_sleeping() { return is_sleeping_val(*(this->get())); }
651  bool is_any_sleeping() { return is_sleeping_val(*(this->get())); }
652  kmp_uint8 *get_stolen() { return NULL; }
653  enum barrier_type get_bt() { return bs_last_barrier; }
654 };
655 
656 template <typename FlagType> class kmp_basic_flag : public kmp_flag<FlagType> {
657  typedef flag_traits<FlagType> traits_type;
658  FlagType checker;
660  kmp_info_t
661  *waiting_threads[1];
662  kmp_uint32
663  num_waiting_threads;
664 public:
665  kmp_basic_flag(std::atomic<FlagType> *p)
666  : kmp_flag<FlagType>(p, traits_type::t), num_waiting_threads(0) {}
667  kmp_basic_flag(std::atomic<FlagType> *p, kmp_info_t *thr)
668  : kmp_flag<FlagType>(p, traits_type::t), num_waiting_threads(1) {
669  waiting_threads[0] = thr;
670  }
671  kmp_basic_flag(std::atomic<FlagType> *p, FlagType c)
672  : kmp_flag<FlagType>(p, traits_type::t), checker(c),
673  num_waiting_threads(0) {}
678  kmp_info_t *get_waiter(kmp_uint32 i) {
679  KMP_DEBUG_ASSERT(i < num_waiting_threads);
680  return waiting_threads[i];
681  }
685  kmp_uint32 get_num_waiters() { return num_waiting_threads; }
691  void set_waiter(kmp_info_t *thr) {
692  waiting_threads[0] = thr;
693  num_waiting_threads = 1;
694  }
698  bool done_check() { return this->load() == checker; }
703  bool done_check_val(FlagType old_loc) { return old_loc == checker; }
711  bool notdone_check() { return this->load() != checker; }
716  void internal_release() { KMP_ATOMIC_ADD(this->get(), 4); }
722  FlagType set_sleeping() {
723  return KMP_ATOMIC_OR(this->get(), KMP_BARRIER_SLEEP_STATE);
724  }
730  FlagType unset_sleeping() {
731  return KMP_ATOMIC_AND(this->get(), ~KMP_BARRIER_SLEEP_STATE);
732  }
737  bool is_sleeping_val(FlagType old_loc) {
738  return old_loc & KMP_BARRIER_SLEEP_STATE;
739  }
743  bool is_sleeping() { return is_sleeping_val(this->load()); }
744  bool is_any_sleeping() { return is_sleeping_val(this->load()); }
745  kmp_uint8 *get_stolen() { return NULL; }
746  enum barrier_type get_bt() { return bs_last_barrier; }
747 };
748 
749 class kmp_flag_32 : public kmp_basic_flag<kmp_uint32> {
750 public:
751  kmp_flag_32(std::atomic<kmp_uint32> *p) : kmp_basic_flag<kmp_uint32>(p) {}
752  kmp_flag_32(std::atomic<kmp_uint32> *p, kmp_info_t *thr)
753  : kmp_basic_flag<kmp_uint32>(p, thr) {}
754  kmp_flag_32(std::atomic<kmp_uint32> *p, kmp_uint32 c)
755  : kmp_basic_flag<kmp_uint32>(p, c) {}
756  void suspend(int th_gtid) { __kmp_suspend_32(th_gtid, this); }
757  void resume(int th_gtid) { __kmp_resume_32(th_gtid, this); }
758  int execute_tasks(kmp_info_t *this_thr, kmp_int32 gtid, int final_spin,
759  int *thread_finished USE_ITT_BUILD_ARG(void *itt_sync_obj),
760  kmp_int32 is_constrained) {
761  return __kmp_execute_tasks_32(
762  this_thr, gtid, this, final_spin,
763  thread_finished USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained);
764  }
765  void wait(kmp_info_t *this_thr,
766  int final_spin USE_ITT_BUILD_ARG(void *itt_sync_obj)) {
767  if (final_spin)
768  __kmp_wait_template<kmp_flag_32, TRUE>(
769  this_thr, this USE_ITT_BUILD_ARG(itt_sync_obj));
770  else
771  __kmp_wait_template<kmp_flag_32, FALSE>(
772  this_thr, this USE_ITT_BUILD_ARG(itt_sync_obj));
773  }
774  void release() { __kmp_release_template(this); }
775  flag_type get_ptr_type() { return flag32; }
776 };
777 
778 class kmp_flag_64 : public kmp_basic_flag_native<kmp_uint64> {
779 public:
780  kmp_flag_64(volatile kmp_uint64 *p) : kmp_basic_flag_native<kmp_uint64>(p) {}
781  kmp_flag_64(volatile kmp_uint64 *p, kmp_info_t *thr)
782  : kmp_basic_flag_native<kmp_uint64>(p, thr) {}
783  kmp_flag_64(volatile kmp_uint64 *p, kmp_uint64 c)
784  : kmp_basic_flag_native<kmp_uint64>(p, c) {}
785  void suspend(int th_gtid) { __kmp_suspend_64(th_gtid, this); }
786  void resume(int th_gtid) { __kmp_resume_64(th_gtid, this); }
787  int execute_tasks(kmp_info_t *this_thr, kmp_int32 gtid, int final_spin,
788  int *thread_finished USE_ITT_BUILD_ARG(void *itt_sync_obj),
789  kmp_int32 is_constrained) {
790  return __kmp_execute_tasks_64(
791  this_thr, gtid, this, final_spin,
792  thread_finished USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained);
793  }
794  void wait(kmp_info_t *this_thr,
795  int final_spin USE_ITT_BUILD_ARG(void *itt_sync_obj)) {
796  if (final_spin)
797  __kmp_wait_template<kmp_flag_64, TRUE>(
798  this_thr, this USE_ITT_BUILD_ARG(itt_sync_obj));
799  else
800  __kmp_wait_template<kmp_flag_64, FALSE>(
801  this_thr, this USE_ITT_BUILD_ARG(itt_sync_obj));
802  }
803  bool wait_cancellable_nosleep(kmp_info_t *this_thr,
804  int final_spin
805  USE_ITT_BUILD_ARG(void *itt_sync_obj)) {
806  bool retval = false;
807  if (final_spin)
808  retval = __kmp_wait_template<kmp_flag_64, TRUE, true, false>(
809  this_thr, this USE_ITT_BUILD_ARG(itt_sync_obj));
810  else
811  retval = __kmp_wait_template<kmp_flag_64, FALSE, true, false>(
812  this_thr, this USE_ITT_BUILD_ARG(itt_sync_obj));
813  return retval;
814  }
815  void release() { __kmp_release_template(this); }
816  flag_type get_ptr_type() { return flag64; }
817 };
818 
819 // Hierarchical 64-bit on-core barrier instantiation
820 class kmp_flag_oncore : public kmp_flag_native<kmp_uint64> {
821  kmp_uint64 checker;
822  kmp_info_t *waiting_threads[1];
823  kmp_uint32 num_waiting_threads;
824  kmp_uint32
825  offset;
826  bool flag_switch;
827  enum barrier_type bt;
828  kmp_info_t *this_thr;
830 #if USE_ITT_BUILD
831  void *
832  itt_sync_obj;
833 #endif
834  unsigned char &byteref(volatile kmp_uint64 *loc, size_t offset) {
835  return (RCAST(unsigned char *, CCAST(kmp_uint64 *, loc)))[offset];
836  }
837 
838 public:
839  kmp_flag_oncore(volatile kmp_uint64 *p)
840  : kmp_flag_native<kmp_uint64>(p, flag_oncore), num_waiting_threads(0),
841  flag_switch(false) {}
842  kmp_flag_oncore(volatile kmp_uint64 *p, kmp_uint32 idx)
843  : kmp_flag_native<kmp_uint64>(p, flag_oncore), num_waiting_threads(0),
844  offset(idx), flag_switch(false) {}
845  kmp_flag_oncore(volatile kmp_uint64 *p, kmp_uint64 c, kmp_uint32 idx,
846  enum barrier_type bar_t,
847  kmp_info_t *thr USE_ITT_BUILD_ARG(void *itt))
848  : kmp_flag_native<kmp_uint64>(p, flag_oncore), checker(c),
849  num_waiting_threads(0), offset(idx), flag_switch(false), bt(bar_t),
850  this_thr(thr) USE_ITT_BUILD_ARG(itt_sync_obj(itt)) {}
851  kmp_info_t *get_waiter(kmp_uint32 i) {
852  KMP_DEBUG_ASSERT(i < num_waiting_threads);
853  return waiting_threads[i];
854  }
855  kmp_uint32 get_num_waiters() { return num_waiting_threads; }
856  void set_waiter(kmp_info_t *thr) {
857  waiting_threads[0] = thr;
858  num_waiting_threads = 1;
859  }
860  bool done_check_val(kmp_uint64 old_loc) {
861  return byteref(&old_loc, offset) == checker;
862  }
863  bool done_check() { return done_check_val(*get()); }
864  bool notdone_check() {
865  // Calculate flag_switch
866  if (this_thr->th.th_bar[bt].bb.wait_flag == KMP_BARRIER_SWITCH_TO_OWN_FLAG)
867  flag_switch = true;
868  if (byteref(get(), offset) != 1 && !flag_switch)
869  return true;
870  else if (flag_switch) {
871  this_thr->th.th_bar[bt].bb.wait_flag = KMP_BARRIER_SWITCHING;
872  kmp_flag_64 flag(&this_thr->th.th_bar[bt].bb.b_go,
873  (kmp_uint64)KMP_BARRIER_STATE_BUMP);
874  __kmp_wait_64(this_thr, &flag, TRUE USE_ITT_BUILD_ARG(itt_sync_obj));
875  }
876  return false;
877  }
878  void internal_release() {
879  // Other threads can write their own bytes simultaneously.
880  if (__kmp_dflt_blocktime == KMP_MAX_BLOCKTIME) {
881  byteref(get(), offset) = 1;
882  } else {
883  kmp_uint64 mask = 0;
884  byteref(&mask, offset) = 1;
885  KMP_TEST_THEN_OR64(get(), mask);
886  }
887  }
888  kmp_uint64 set_sleeping() {
889  return KMP_TEST_THEN_OR64(get(), KMP_BARRIER_SLEEP_STATE);
890  }
891  kmp_uint64 unset_sleeping() {
892  return KMP_TEST_THEN_AND64(get(), ~KMP_BARRIER_SLEEP_STATE);
893  }
894  bool is_sleeping_val(kmp_uint64 old_loc) {
895  return old_loc & KMP_BARRIER_SLEEP_STATE;
896  }
897  bool is_sleeping() { return is_sleeping_val(*get()); }
898  bool is_any_sleeping() { return is_sleeping_val(*get()); }
899  void wait(kmp_info_t *this_thr, int final_spin) {
900  if (final_spin)
901  __kmp_wait_template<kmp_flag_oncore, TRUE>(
902  this_thr, this USE_ITT_BUILD_ARG(itt_sync_obj));
903  else
904  __kmp_wait_template<kmp_flag_oncore, FALSE>(
905  this_thr, this USE_ITT_BUILD_ARG(itt_sync_obj));
906  }
907  void release() { __kmp_release_template(this); }
908  void suspend(int th_gtid) { __kmp_suspend_oncore(th_gtid, this); }
909  void resume(int th_gtid) { __kmp_resume_oncore(th_gtid, this); }
910  int execute_tasks(kmp_info_t *this_thr, kmp_int32 gtid, int final_spin,
911  int *thread_finished USE_ITT_BUILD_ARG(void *itt_sync_obj),
912  kmp_int32 is_constrained) {
913  return __kmp_execute_tasks_oncore(
914  this_thr, gtid, this, final_spin,
915  thread_finished USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained);
916  }
917  kmp_uint8 *get_stolen() { return NULL; }
918  enum barrier_type get_bt() { return bt; }
919  flag_type get_ptr_type() { return flag_oncore; }
920 };
921 
922 // Used to wake up threads, volatile void* flag is usually the th_sleep_loc
923 // associated with int gtid.
924 static inline void __kmp_null_resume_wrapper(int gtid, volatile void *flag) {
925  if (!flag)
926  return;
927 
928  switch (RCAST(kmp_flag_64 *, CCAST(void *, flag))->get_type()) {
929  case flag32:
930  __kmp_resume_32(gtid, NULL);
931  break;
932  case flag64:
933  __kmp_resume_64(gtid, NULL);
934  break;
935  case flag_oncore:
936  __kmp_resume_oncore(gtid, NULL);
937  break;
938  }
939 }
940 
945 #endif // KMP_WAIT_RELEASE_H
void set(std::atomic< P > *new_loc)
std::atomic< P > * loc
void * get_void_p()
flag_type get_type()
void store(P val)
flag_type
flag_type t
stats_state_e
the states which a thread can be in
Definition: kmp_stats.h:63