/build/source/openmp/runtime/src/kmp

Bug Summary

File:	build/source/openmp/runtime/src/kmp_tasking.cpp
Warning:	line 3715, column 30 Dereference of null pointer

Annotated Source Code

Press '?' to see keyboard shortcuts

Show analyzer invocation

clang -cc1 -cc1 -triple x86_64-pc-linux-gnu -analyze -disable-free -clear-ast-before-backend -disable-llvm-verifier -discard-value-names -main-file-name kmp_tasking.cpp -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=cplusplus -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -setup-static-analyzer -analyzer-config-compatibility-mode=true -mrelocation-model pic -pic-level 2 -mframe-pointer=none -fmath-errno -ffp-contract=on -fno-rounding-math -mconstructor-aliases -funwind-tables=2 -target-cpu x86-64 -tune-cpu generic -debugger-tuning=gdb -ffunction-sections -fdata-sections -fcoverage-compilation-dir=/build/source/build-llvm/tools/clang/stage2-bins -resource-dir /usr/lib/llvm-17/lib/clang/17 -D _DEBUG -D _GLIBCXX_ASSERTIONS -D _GNU_SOURCE -D _LIBCPP_ENABLE_ASSERTIONS -D __STDC_CONSTANT_MACROS -D __STDC_FORMAT_MACROS -D __STDC_LIMIT_MACROS -D omp_EXPORTS -I projects/openmp/runtime/src -I /build/source/openmp/runtime/src -I include -I /build/source/llvm/include -I /build/source/openmp/runtime/src/i18n -I /build/source/openmp/runtime/src/include -I /build/source/openmp/runtime/src/thirdparty/ittnotify -D _FORTIFY_SOURCE=2 -D NDEBUG -D _GNU_SOURCE -D _REENTRANT -D _FORTIFY_SOURCE=2 -U NDEBUG -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/c++/10 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/x86_64-linux-gnu/c++/10 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/c++/10/backward -internal-isystem /usr/lib/llvm-17/lib/clang/17/include -internal-isystem /usr/local/include -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../x86_64-linux-gnu/include -internal-externc-isystem /usr/include/x86_64-linux-gnu -internal-externc-isystem /include -internal-externc-isystem /usr/include -fmacro-prefix-map=/build/source/build-llvm/tools/clang/stage2-bins=build-llvm/tools/clang/stage2-bins -fmacro-prefix-map=/build/source/= -fcoverage-prefix-map=/build/source/build-llvm/tools/clang/stage2-bins=build-llvm/tools/clang/stage2-bins -fcoverage-prefix-map=/build/source/= -source-date-epoch 1683717183 -O2 -Wno-unused-command-line-argument -Wno-unused-parameter -Wwrite-strings -Wno-missing-field-initializers -Wno-long-long -Wno-maybe-uninitialized -Wno-class-memaccess -Wno-redundant-move -Wno-pessimizing-move -Wno-noexcept-type -Wno-comment -Wno-misleading-indentation -Wno-extra -Wno-pedantic -Wno-maybe-uninitialized -Wno-class-memaccess -Wno-frame-address -Wno-strict-aliasing -Wno-stringop-truncation -Wno-switch -Wno-uninitialized -Wno-cast-qual -std=c++17 -fdeprecated-macro -fdebug-compilation-dir=/build/source/build-llvm/tools/clang/stage2-bins -fdebug-prefix-map=/build/source/build-llvm/tools/clang/stage2-bins=build-llvm/tools/clang/stage2-bins -fdebug-prefix-map=/build/source/= -ferror-limit 19 -fvisibility-inlines-hidden -stack-protector 2 -fno-rtti -fgnuc-version=4.2.1 -fcolor-diagnostics -vectorize-loops -vectorize-slp -analyzer-output=html -analyzer-config stable-report-filename=true -faddrsig -D__GCC_HAVE_DWARF2_CFI_ASM=1 -o /tmp/scan-build-2023-05-10-133810-16478-1 -x c++ /build/source/openmp/runtime/src/kmp_tasking.cpp

/build/source/openmp/runtime/src/kmp_tasking.cpp

→

1/*
2 * kmp_tasking.cpp -- OpenMP 3.0 tasking support.
3 */
4 
5//===----------------------------------------------------------------------===//
6//
7// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
8// See https://llvm.org/LICENSE.txt for license information.
9// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
10//
11//===----------------------------------------------------------------------===//
12 
13#include "kmp.h"
14#include "kmp_i18n.h"
15#include "kmp_itt.h"
16#include "kmp_stats.h"
17#include "kmp_wait_release.h"
18#include "kmp_taskdeps.h"
19 
20#if OMPT_SUPPORT1
21#include "ompt-specific.h"
22#endif
23 
24#if ENABLE_LIBOMPTARGET1
25static void (*tgt_target_nowait_query)(void **);
26 
27void __kmp_init_target_task() {
28  *(void **)(&tgt_target_nowait_query) = KMP_DLSYM("__tgt_target_nowait_query")dlsym(((void *) 0), "__tgt_target_nowait_query");
29}
30#endif
31 
32/* forward declaration */
33static void __kmp_enable_tasking(kmp_task_team_t *task_team,
34                                 kmp_info_t *this_thr);
35static void __kmp_alloc_task_deque(kmp_info_t *thread,
36                                   kmp_thread_data_t *thread_data);
37static int __kmp_realloc_task_threads_data(kmp_info_t *thread,
38                                           kmp_task_team_t *task_team);
39static void __kmp_bottom_half_finish_proxy(kmp_int32 gtid, kmp_task_t *ptask);
40 
41#ifdef BUILD_TIED_TASK_STACK
42 
43//  __kmp_trace_task_stack: print the tied tasks from the task stack in order
44//  from top do bottom
45//
46//  gtid: global thread identifier for thread containing stack
47//  thread_data: thread data for task team thread containing stack
48//  threshold: value above which the trace statement triggers
49//  location: string identifying call site of this function (for trace)
50static void __kmp_trace_task_stack(kmp_int32 gtid,
51                                   kmp_thread_data_t *thread_data,
52                                   int threshold, char *location) {
53  kmp_task_stack_t *task_stack = &thread_data->td.td_susp_tied_tasks;
54  kmp_taskdata_t **stack_top = task_stack->ts_top;
55  kmp_int32 entries = task_stack->ts_entries;
56  kmp_taskdata_t *tied_task;
57 
58  KA_TRACE(if (kmp_a_debug >= threshold) { __kmp_debug_printf ("__kmp_trace_task_stack(start): location = %s, gtid = %d, entries = %d, "
 "first_block = %p, stack_top = %p \n", location, gtid, entries
, task_stack->ts_first_block, stack_top); }
59      threshold,if (kmp_a_debug >= threshold) { __kmp_debug_printf ("__kmp_trace_task_stack(start): location = %s, gtid = %d, entries = %d, "
 "first_block = %p, stack_top = %p \n", location, gtid, entries
, task_stack->ts_first_block, stack_top); }
60      ("__kmp_trace_task_stack(start): location = %s, gtid = %d, entries = %d, "if (kmp_a_debug >= threshold) { __kmp_debug_printf ("__kmp_trace_task_stack(start): location = %s, gtid = %d, entries = %d, "
 "first_block = %p, stack_top = %p \n", location, gtid, entries
, task_stack->ts_first_block, stack_top); }
61       "first_block = %p, stack_top = %p \n",if (kmp_a_debug >= threshold) { __kmp_debug_printf ("__kmp_trace_task_stack(start): location = %s, gtid = %d, entries = %d, "
 "first_block = %p, stack_top = %p \n", location, gtid, entries
, task_stack->ts_first_block, stack_top); }
62       location, gtid, entries, task_stack->ts_first_block, stack_top))if (kmp_a_debug >= threshold) { __kmp_debug_printf ("__kmp_trace_task_stack(start): location = %s, gtid = %d, entries = %d, "
 "first_block = %p, stack_top = %p \n", location, gtid, entries
, task_stack->ts_first_block, stack_top); };
63 
64  KMP_DEBUG_ASSERT(stack_top != NULL)if (!(stack_top != __null)) { __kmp_debug_assert("stack_top != __null"
, "openmp/runtime/src/kmp_tasking.cpp", 64); };
65  KMP_DEBUG_ASSERT(entries > 0)if (!(entries > 0)) { __kmp_debug_assert("entries > 0",
 "openmp/runtime/src/kmp_tasking.cpp", 65); };
66 
67  while (entries != 0) {
68    KMP_DEBUG_ASSERT(stack_top != &task_stack->ts_first_block.sb_block[0])if (!(stack_top != &task_stack->ts_first_block.sb_block
[0])) { __kmp_debug_assert("stack_top != &task_stack->ts_first_block.sb_block[0]"
, "openmp/runtime/src/kmp_tasking.cpp", 68); };
69    // fix up ts_top if we need to pop from previous block
70    if (entries & TASK_STACK_INDEX_MASK == 0) {
71      kmp_stack_block_t *stack_block = (kmp_stack_block_t *)(stack_top);
72 
73      stack_block = stack_block->sb_prev;
74      stack_top = &stack_block->sb_block[TASK_STACK_BLOCK_SIZE];
75    }
76 
77    // finish bookkeeping
78    stack_top--;
79    entries--;
80 
81    tied_task = *stack_top;
82 
83    KMP_DEBUG_ASSERT(tied_task != NULL)if (!(tied_task != __null)) { __kmp_debug_assert("tied_task != __null"
, "openmp/runtime/src/kmp_tasking.cpp", 83); };
84    KMP_DEBUG_ASSERT(tied_task->td_flags.tasktype == TASK_TIED)if (!(tied_task->td_flags.tasktype == 1)) { __kmp_debug_assert
("tied_task->td_flags.tasktype == 1", "openmp/runtime/src/kmp_tasking.cpp"
, 84); };
85 
86    KA_TRACE(threshold,if (kmp_a_debug >= threshold) { __kmp_debug_printf ("__kmp_trace_task_stack(%s):             gtid=%d, entry=%d, "
 "stack_top=%p, tied_task=%p\n", location, gtid, entries, stack_top
, tied_task); }
87             ("__kmp_trace_task_stack(%s):             gtid=%d, entry=%d, "if (kmp_a_debug >= threshold) { __kmp_debug_printf ("__kmp_trace_task_stack(%s):             gtid=%d, entry=%d, "
 "stack_top=%p, tied_task=%p\n", location, gtid, entries, stack_top
, tied_task); }
88              "stack_top=%p, tied_task=%p\n",if (kmp_a_debug >= threshold) { __kmp_debug_printf ("__kmp_trace_task_stack(%s):             gtid=%d, entry=%d, "
 "stack_top=%p, tied_task=%p\n", location, gtid, entries, stack_top
, tied_task); }
89              location, gtid, entries, stack_top, tied_task))if (kmp_a_debug >= threshold) { __kmp_debug_printf ("__kmp_trace_task_stack(%s):             gtid=%d, entry=%d, "
 "stack_top=%p, tied_task=%p\n", location, gtid, entries, stack_top
, tied_task); };
90  }
91  KMP_DEBUG_ASSERT(stack_top == &task_stack->ts_first_block.sb_block[0])if (!(stack_top == &task_stack->ts_first_block.sb_block
[0])) { __kmp_debug_assert("stack_top == &task_stack->ts_first_block.sb_block[0]"
, "openmp/runtime/src/kmp_tasking.cpp", 91); };
92 
93  KA_TRACE(threshold,if (kmp_a_debug >= threshold) { __kmp_debug_printf ("__kmp_trace_task_stack(exit): location = %s, gtid = %d\n"
, location, gtid); }
94           ("__kmp_trace_task_stack(exit): location = %s, gtid = %d\n",if (kmp_a_debug >= threshold) { __kmp_debug_printf ("__kmp_trace_task_stack(exit): location = %s, gtid = %d\n"
, location, gtid); }
95            location, gtid))if (kmp_a_debug >= threshold) { __kmp_debug_printf ("__kmp_trace_task_stack(exit): location = %s, gtid = %d\n"
, location, gtid); };
96}
97 
98//  __kmp_init_task_stack: initialize the task stack for the first time
99//  after a thread_data structure is created.
100//  It should not be necessary to do this again (assuming the stack works).
101//
102//  gtid: global thread identifier of calling thread
103//  thread_data: thread data for task team thread containing stack
104static void __kmp_init_task_stack(kmp_int32 gtid,
105                                  kmp_thread_data_t *thread_data) {
106  kmp_task_stack_t *task_stack = &thread_data->td.td_susp_tied_tasks;
107  kmp_stack_block_t *first_block;
108 
109  // set up the first block of the stack
110  first_block = &task_stack->ts_first_block;
111  task_stack->ts_top = (kmp_taskdata_t **)first_block;
112  memset((void *)first_block, '\0',
113         TASK_STACK_BLOCK_SIZE * sizeof(kmp_taskdata_t *));
114 
115  // initialize the stack to be empty
116  task_stack->ts_entries = TASK_STACK_EMPTY;
117  first_block->sb_next = NULL__null;
118  first_block->sb_prev = NULL__null;
119}
120 
121//  __kmp_free_task_stack: free the task stack when thread_data is destroyed.
122//
123//  gtid: global thread identifier for calling thread
124//  thread_data: thread info for thread containing stack
125static void __kmp_free_task_stack(kmp_int32 gtid,
126                                  kmp_thread_data_t *thread_data) {
127  kmp_task_stack_t *task_stack = &thread_data->td.td_susp_tied_tasks;
128  kmp_stack_block_t *stack_block = &task_stack->ts_first_block;
129 
130  KMP_DEBUG_ASSERT(task_stack->ts_entries == TASK_STACK_EMPTY)if (!(task_stack->ts_entries == TASK_STACK_EMPTY)) { __kmp_debug_assert
("task_stack->ts_entries == TASK_STACK_EMPTY", "openmp/runtime/src/kmp_tasking.cpp"
, 130); };
131  // free from the second block of the stack
132  while (stack_block != NULL__null) {
133    kmp_stack_block_t *next_block = (stack_block) ? stack_block->sb_next : NULL__null;
134 
135    stack_block->sb_next = NULL__null;
136    stack_block->sb_prev = NULL__null;
137    if (stack_block != &task_stack->ts_first_block) {
138      __kmp_thread_free(thread,___kmp_thread_free((thread), (stack_block), "openmp/runtime/src/kmp_tasking.cpp"
, 139)
139                        stack_block)___kmp_thread_free((thread), (stack_block), "openmp/runtime/src/kmp_tasking.cpp"
, 139); // free the block, if not the first
140    }
141    stack_block = next_block;
142  }
143  // initialize the stack to be empty
144  task_stack->ts_entries = 0;
145  task_stack->ts_top = NULL__null;
146}
147 
148//  __kmp_push_task_stack: Push the tied task onto the task stack.
149//     Grow the stack if necessary by allocating another block.
150//
151//  gtid: global thread identifier for calling thread
152//  thread: thread info for thread containing stack
153//  tied_task: the task to push on the stack
154static void __kmp_push_task_stack(kmp_int32 gtid, kmp_info_t *thread,
155                                  kmp_taskdata_t *tied_task) {
156  // GEH - need to consider what to do if tt_threads_data not allocated yet
157  kmp_thread_data_t *thread_data =
158      &thread->th.th_task_team->tt.tt_threads_data[__kmp_tid_from_gtid(gtid)];
159  kmp_task_stack_t *task_stack = &thread_data->td.td_susp_tied_tasks;
160 
161  if (tied_task->td_flags.team_serial || tied_task->td_flags.tasking_ser) {
162    return; // Don't push anything on stack if team or team tasks are serialized
163  }
164 
165  KMP_DEBUG_ASSERT(tied_task->td_flags.tasktype == TASK_TIED)if (!(tied_task->td_flags.tasktype == 1)) { __kmp_debug_assert
("tied_task->td_flags.tasktype == 1", "openmp/runtime/src/kmp_tasking.cpp"
, 165); };
166  KMP_DEBUG_ASSERT(task_stack->ts_top != NULL)if (!(task_stack->ts_top != __null)) { __kmp_debug_assert(
"task_stack->ts_top != __null", "openmp/runtime/src/kmp_tasking.cpp"
, 166); };
167 
168  KA_TRACE(20,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_push_task_stack(enter): GTID: %d; THREAD: %p; TASK: %p\n"
, gtid, thread, tied_task); }
169           ("__kmp_push_task_stack(enter): GTID: %d; THREAD: %p; TASK: %p\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_push_task_stack(enter): GTID: %d; THREAD: %p; TASK: %p\n"
, gtid, thread, tied_task); }
170            gtid, thread, tied_task))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_push_task_stack(enter): GTID: %d; THREAD: %p; TASK: %p\n"
, gtid, thread, tied_task); };
171  // Store entry
172  *(task_stack->ts_top) = tied_task;
173 
174  // Do bookkeeping for next push
175  task_stack->ts_top++;
176  task_stack->ts_entries++;
177 
178  if (task_stack->ts_entries & TASK_STACK_INDEX_MASK == 0) {
179    // Find beginning of this task block
180    kmp_stack_block_t *stack_block =
181        (kmp_stack_block_t *)(task_stack->ts_top - TASK_STACK_BLOCK_SIZE);
182 
183    // Check if we already have a block
184    if (stack_block->sb_next !=
185        NULL__null) { // reset ts_top to beginning of next block
186      task_stack->ts_top = &stack_block->sb_next->sb_block[0];
187    } else { // Alloc new block and link it up
188      kmp_stack_block_t *new_block = (kmp_stack_block_t *)__kmp_thread_calloc(
189          thread, sizeof(kmp_stack_block_t));
190 
191      task_stack->ts_top = &new_block->sb_block[0];
192      stack_block->sb_next = new_block;
193      new_block->sb_prev = stack_block;
194      new_block->sb_next = NULL__null;
195 
196      KA_TRACE(if (kmp_a_debug >= 30) { __kmp_debug_printf ("__kmp_push_task_stack(): GTID: %d; TASK: %p; Alloc new block: %p\n"
, gtid, tied_task, new_block); }
197          30,if (kmp_a_debug >= 30) { __kmp_debug_printf ("__kmp_push_task_stack(): GTID: %d; TASK: %p; Alloc new block: %p\n"
, gtid, tied_task, new_block); }
198          ("__kmp_push_task_stack(): GTID: %d; TASK: %p; Alloc new block: %p\n",if (kmp_a_debug >= 30) { __kmp_debug_printf ("__kmp_push_task_stack(): GTID: %d; TASK: %p; Alloc new block: %p\n"
, gtid, tied_task, new_block); }
199           gtid, tied_task, new_block))if (kmp_a_debug >= 30) { __kmp_debug_printf ("__kmp_push_task_stack(): GTID: %d; TASK: %p; Alloc new block: %p\n"
, gtid, tied_task, new_block); };
200    }
201  }
202  KA_TRACE(20, ("__kmp_push_task_stack(exit): GTID: %d; TASK: %p\n", gtid,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_push_task_stack(exit): GTID: %d; TASK: %p\n"
, gtid, tied_task); }
203                tied_task))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_push_task_stack(exit): GTID: %d; TASK: %p\n"
, gtid, tied_task); };
204}
205 
206//  __kmp_pop_task_stack: Pop the tied task from the task stack.  Don't return
207//  the task, just check to make sure it matches the ending task passed in.
208//
209//  gtid: global thread identifier for the calling thread
210//  thread: thread info structure containing stack
211//  tied_task: the task popped off the stack
212//  ending_task: the task that is ending (should match popped task)
213static void __kmp_pop_task_stack(kmp_int32 gtid, kmp_info_t *thread,
214                                 kmp_taskdata_t *ending_task) {
215  // GEH - need to consider what to do if tt_threads_data not allocated yet
216  kmp_thread_data_t *thread_data =
217      &thread->th.th_task_team->tt_threads_data[__kmp_tid_from_gtid(gtid)];
218  kmp_task_stack_t *task_stack = &thread_data->td.td_susp_tied_tasks;
219  kmp_taskdata_t *tied_task;
220 
221  if (ending_task->td_flags.team_serial || ending_task->td_flags.tasking_ser) {
222    // Don't pop anything from stack if team or team tasks are serialized
223    return;
224  }
225 
226  KMP_DEBUG_ASSERT(task_stack->ts_top != NULL)if (!(task_stack->ts_top != __null)) { __kmp_debug_assert(
"task_stack->ts_top != __null", "openmp/runtime/src/kmp_tasking.cpp"
, 226); };
227  KMP_DEBUG_ASSERT(task_stack->ts_entries > 0)if (!(task_stack->ts_entries > 0)) { __kmp_debug_assert
("task_stack->ts_entries > 0", "openmp/runtime/src/kmp_tasking.cpp"
, 227); };
228 
229  KA_TRACE(20, ("__kmp_pop_task_stack(enter): GTID: %d; THREAD: %p\n", gtid,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_pop_task_stack(enter): GTID: %d; THREAD: %p\n"
, gtid, thread); }
230                thread))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_pop_task_stack(enter): GTID: %d; THREAD: %p\n"
, gtid, thread); };
231 
232  // fix up ts_top if we need to pop from previous block
233  if (task_stack->ts_entries & TASK_STACK_INDEX_MASK == 0) {
234    kmp_stack_block_t *stack_block = (kmp_stack_block_t *)(task_stack->ts_top);
235 
236    stack_block = stack_block->sb_prev;
237    task_stack->ts_top = &stack_block->sb_block[TASK_STACK_BLOCK_SIZE];
238  }
239 
240  // finish bookkeeping
241  task_stack->ts_top--;
242  task_stack->ts_entries--;
243 
244  tied_task = *(task_stack->ts_top);
245 
246  KMP_DEBUG_ASSERT(tied_task != NULL)if (!(tied_task != __null)) { __kmp_debug_assert("tied_task != __null"
, "openmp/runtime/src/kmp_tasking.cpp", 246); };
247  KMP_DEBUG_ASSERT(tied_task->td_flags.tasktype == TASK_TIED)if (!(tied_task->td_flags.tasktype == 1)) { __kmp_debug_assert
("tied_task->td_flags.tasktype == 1", "openmp/runtime/src/kmp_tasking.cpp"
, 247); };
248  KMP_DEBUG_ASSERT(tied_task == ending_task)if (!(tied_task == ending_task)) { __kmp_debug_assert("tied_task == ending_task"
, "openmp/runtime/src/kmp_tasking.cpp", 248); }; // If we built the stack correctly
249 
250  KA_TRACE(20, ("__kmp_pop_task_stack(exit): GTID: %d; TASK: %p\n", gtid,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_pop_task_stack(exit): GTID: %d; TASK: %p\n"
, gtid, tied_task); }
251                tied_task))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_pop_task_stack(exit): GTID: %d; TASK: %p\n"
, gtid, tied_task); };
252  return;
253}
254#endif /* BUILD_TIED_TASK_STACK */
255 
256// returns 1 if new task is allowed to execute, 0 otherwise
257// checks Task Scheduling constraint (if requested) and
258// mutexinoutset dependencies if any
259static bool __kmp_task_is_allowed(int gtid, const kmp_int32 is_constrained,
260                                  const kmp_taskdata_t *tasknew,
261                                  const kmp_taskdata_t *taskcurr) {
262  if (is_constrained && (tasknew->td_flags.tiedness == TASK_TIED1)) {
263    // Check if the candidate obeys the Task Scheduling Constraints (TSC)
264    // only descendant of all deferred tied tasks can be scheduled, checking
265    // the last one is enough, as it in turn is the descendant of all others
266    kmp_taskdata_t *current = taskcurr->td_last_tied;
267    KMP_DEBUG_ASSERT(current != NULL)if (!(current != __null)) { __kmp_debug_assert("current != __null"
, "openmp/runtime/src/kmp_tasking.cpp", 267); };
268    // check if the task is not suspended on barrier
269    if (current->td_flags.tasktype == TASK_EXPLICIT1 ||
270        current->td_taskwait_thread > 0) { // <= 0 on barrier
271      kmp_int32 level = current->td_level;
272      kmp_taskdata_t *parent = tasknew->td_parent;
273      while (parent != current && parent->td_level > level) {
274        // check generation up to the level of the current task
275        parent = parent->td_parent;
276        KMP_DEBUG_ASSERT(parent != NULL)if (!(parent != __null)) { __kmp_debug_assert("parent != __null"
, "openmp/runtime/src/kmp_tasking.cpp", 276); };
277      }
278      if (parent != current)
279        return false;
280    }
281  }
282  // Check mutexinoutset dependencies, acquire locks
283  kmp_depnode_t *node = tasknew->td_depnode;
284  if (UNLIKELY(node && (node->dn.mtx_num_locks > 0))__builtin_expect(!!(node && (node->dn.mtx_num_locks
 > 0)), 0)) {
285    for (int i = 0; i < node->dn.mtx_num_locks; ++i) {
286      KMP_DEBUG_ASSERT(node->dn.mtx_locks[i] != NULL)if (!(node->dn.mtx_locks[i] != __null)) { __kmp_debug_assert
("node->dn.mtx_locks[i] != __null", "openmp/runtime/src/kmp_tasking.cpp"
, 286); };
287      if (__kmp_test_lock(node->dn.mtx_locks[i], gtid))
288        continue;
289      // could not get the lock, release previous locks
290      for (int j = i - 1; j >= 0; --j)
291        __kmp_release_lock(node->dn.mtx_locks[j], gtid);
292      return false;
293    }
294    // negative num_locks means all locks acquired successfully
295    node->dn.mtx_num_locks = -node->dn.mtx_num_locks;
296  }
297  return true;
298}
299 
300// __kmp_realloc_task_deque:
301// Re-allocates a task deque for a particular thread, copies the content from
302// the old deque and adjusts the necessary data structures relating to the
303// deque. This operation must be done with the deque_lock being held
304static void __kmp_realloc_task_deque(kmp_info_t *thread,
305                                     kmp_thread_data_t *thread_data) {
306  kmp_int32 size = TASK_DEQUE_SIZE(thread_data->td)((thread_data->td).td_deque_size);
307  KMP_DEBUG_ASSERT(TCR_4(thread_data->td.td_deque_ntasks) == size)if (!((thread_data->td.td_deque_ntasks) == size)) { __kmp_debug_assert
("(thread_data->td.td_deque_ntasks) == size", "openmp/runtime/src/kmp_tasking.cpp"
, 307); };
308  kmp_int32 new_size = 2 * size;
309 
310  KE_TRACE(10, ("__kmp_realloc_task_deque: T#%d reallocating deque[from %d to "if (kmp_e_debug >= 10) { __kmp_debug_printf ("__kmp_realloc_task_deque: T#%d reallocating deque[from %d to "
 "%d] for thread_data %p\n", __kmp_gtid_from_thread(thread), size
, new_size, thread_data); }
311                "%d] for thread_data %p\n",if (kmp_e_debug >= 10) { __kmp_debug_printf ("__kmp_realloc_task_deque: T#%d reallocating deque[from %d to "
 "%d] for thread_data %p\n", __kmp_gtid_from_thread(thread), size
, new_size, thread_data); }
312                __kmp_gtid_from_thread(thread), size, new_size, thread_data))if (kmp_e_debug >= 10) { __kmp_debug_printf ("__kmp_realloc_task_deque: T#%d reallocating deque[from %d to "
 "%d] for thread_data %p\n", __kmp_gtid_from_thread(thread), size
, new_size, thread_data); };
313 
314  kmp_taskdata_t **new_deque =
315      (kmp_taskdata_t **)__kmp_allocate(new_size * sizeof(kmp_taskdata_t *))___kmp_allocate((new_size * sizeof(kmp_taskdata_t *)), "openmp/runtime/src/kmp_tasking.cpp"
, 315);
316 
317  int i, j;
318  for (i = thread_data->td.td_deque_head, j = 0; j < size;
319       i = (i + 1) & TASK_DEQUE_MASK(thread_data->td)((thread_data->td).td_deque_size - 1), j++)
320    new_deque[j] = thread_data->td.td_deque[i];
321 
322  __kmp_free(thread_data->td.td_deque)___kmp_free((thread_data->td.td_deque), "openmp/runtime/src/kmp_tasking.cpp"
, 322);
323 
324  thread_data->td.td_deque_head = 0;
325  thread_data->td.td_deque_tail = size;
326  thread_data->td.td_deque = new_deque;
327  thread_data->td.td_deque_size = new_size;
328}
329 
330static kmp_task_pri_t *__kmp_alloc_task_pri_list() {
331  kmp_task_pri_t *l = (kmp_task_pri_t *)__kmp_allocate(sizeof(kmp_task_pri_t))___kmp_allocate((sizeof(kmp_task_pri_t)), "openmp/runtime/src/kmp_tasking.cpp"
, 331);
332  kmp_thread_data_t *thread_data = &l->td;
333  __kmp_init_bootstrap_lock(&thread_data->td.td_deque_lock);
334  thread_data->td.td_deque_last_stolen = -1;
335  KE_TRACE(20, ("__kmp_alloc_task_pri_list: T#%d allocating deque[%d] "if (kmp_e_debug >= 20) { __kmp_debug_printf ("__kmp_alloc_task_pri_list: T#%d allocating deque[%d] "
 "for thread_data %p\n", __kmp_get_global_thread_id(), (1 <<
 8), thread_data); }
336                "for thread_data %p\n",if (kmp_e_debug >= 20) { __kmp_debug_printf ("__kmp_alloc_task_pri_list: T#%d allocating deque[%d] "
 "for thread_data %p\n", __kmp_get_global_thread_id(), (1 <<
 8), thread_data); }
337                __kmp_get_gtid(), INITIAL_TASK_DEQUE_SIZE, thread_data))if (kmp_e_debug >= 20) { __kmp_debug_printf ("__kmp_alloc_task_pri_list: T#%d allocating deque[%d] "
 "for thread_data %p\n", __kmp_get_global_thread_id(), (1 <<
 8), thread_data); };
338  thread_data->td.td_deque = (kmp_taskdata_t **)__kmp_allocate(___kmp_allocate(((1 << 8) * sizeof(kmp_taskdata_t *)), "openmp/runtime/src/kmp_tasking.cpp"
, 339)
339      INITIAL_TASK_DEQUE_SIZE * sizeof(kmp_taskdata_t *))___kmp_allocate(((1 << 8) * sizeof(kmp_taskdata_t *)), "openmp/runtime/src/kmp_tasking.cpp"
, 339);
340  thread_data->td.td_deque_size = INITIAL_TASK_DEQUE_SIZE(1 << 8);
341  return l;
342}
343 
344// The function finds the deque of priority tasks with given priority, or
345// allocates a new deque and put it into sorted (high -> low) list of deques.
346// Deques of non-default priority tasks are shared between all threads in team,
347// as opposed to per-thread deques of tasks with default priority.
348// The function is called under the lock task_team->tt.tt_task_pri_lock.
349static kmp_thread_data_t *
350__kmp_get_priority_deque_data(kmp_task_team_t *task_team, kmp_int32 pri) {
351  kmp_thread_data_t *thread_data;
352  kmp_task_pri_t *lst = task_team->tt.tt_task_pri_list;
353  if (lst->priority == pri) {
354    // Found queue of tasks with given priority.
355    thread_data = &lst->td;
356  } else if (lst->priority < pri) {
357    // All current priority queues contain tasks with lower priority.
358    // Allocate new one for given priority tasks.
359    kmp_task_pri_t *list = __kmp_alloc_task_pri_list();
360    thread_data = &list->td;
361    list->priority = pri;
362    list->next = lst;
363    task_team->tt.tt_task_pri_list = list;
364  } else { // task_team->tt.tt_task_pri_list->priority > pri
365    kmp_task_pri_t *next_queue = lst->next;
366    while (next_queue && next_queue->priority > pri) {
367      lst = next_queue;
368      next_queue = lst->next;
369    }
370    // lst->priority > pri && (next == NULL || pri >= next->priority)
371    if (next_queue == NULL__null) {
372      // No queue with pri priority, need to allocate new one.
373      kmp_task_pri_t *list = __kmp_alloc_task_pri_list();
374      thread_data = &list->td;
375      list->priority = pri;
376      list->next = NULL__null;
377      lst->next = list;
378    } else if (next_queue->priority == pri) {
379      // Found queue of tasks with given priority.
380      thread_data = &next_queue->td;
381    } else { // lst->priority > pri > next->priority
382      // insert newly allocated between existed queues
383      kmp_task_pri_t *list = __kmp_alloc_task_pri_list();
384      thread_data = &list->td;
385      list->priority = pri;
386      list->next = next_queue;
387      lst->next = list;
388    }
389  }
390  return thread_data;
391}
392 
393//  __kmp_push_priority_task: Add a task to the team's priority task deque
394static kmp_int32 __kmp_push_priority_task(kmp_int32 gtid, kmp_info_t *thread,
395                                          kmp_taskdata_t *taskdata,
396                                          kmp_task_team_t *task_team,
397                                          kmp_int32 pri) {
398  kmp_thread_data_t *thread_data = NULL__null;
399  KA_TRACE(20,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_push_priority_task: T#%d trying to push task %p, pri %d.\n"
, gtid, taskdata, pri); }
400           ("__kmp_push_priority_task: T#%d trying to push task %p, pri %d.\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_push_priority_task: T#%d trying to push task %p, pri %d.\n"
, gtid, taskdata, pri); }
401            gtid, taskdata, pri))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_push_priority_task: T#%d trying to push task %p, pri %d.\n"
, gtid, taskdata, pri); };
402 
403  // Find task queue specific to priority value
404  kmp_task_pri_t *lst = task_team->tt.tt_task_pri_list;
405  if (UNLIKELY(lst == NULL)__builtin_expect(!!(lst == __null), 0)) {
406    __kmp_acquire_bootstrap_lock(&task_team->tt.tt_task_pri_lock);
407    if (task_team->tt.tt_task_pri_list == NULL__null) {
408      // List of queues is still empty, allocate one.
409      kmp_task_pri_t *list = __kmp_alloc_task_pri_list();
410      thread_data = &list->td;
411      list->priority = pri;
412      list->next = NULL__null;
413      task_team->tt.tt_task_pri_list = list;
414    } else {
415      // Other thread initialized a queue. Check if it fits and get thread_data.
416      thread_data = __kmp_get_priority_deque_data(task_team, pri);
417    }
418    __kmp_release_bootstrap_lock(&task_team->tt.tt_task_pri_lock);
419  } else {
420    if (lst->priority == pri) {
421      // Found queue of tasks with given priority.
422      thread_data = &lst->td;
423    } else {
424      __kmp_acquire_bootstrap_lock(&task_team->tt.tt_task_pri_lock);
425      thread_data = __kmp_get_priority_deque_data(task_team, pri);
426      __kmp_release_bootstrap_lock(&task_team->tt.tt_task_pri_lock);
427    }
428  }
429  KMP_DEBUG_ASSERT(thread_data)if (!(thread_data)) { __kmp_debug_assert("thread_data", "openmp/runtime/src/kmp_tasking.cpp"
, 429); };
430 
431  __kmp_acquire_bootstrap_lock(&thread_data->td.td_deque_lock);
432  // Check if deque is full
433  if (TCR_4(thread_data->td.td_deque_ntasks)(thread_data->td.td_deque_ntasks) >=
434      TASK_DEQUE_SIZE(thread_data->td)((thread_data->td).td_deque_size)) {
435    if (__kmp_enable_task_throttling &&
436        __kmp_task_is_allowed(gtid, __kmp_task_stealing_constraint, taskdata,
437                              thread->th.th_current_task)) {
438      __kmp_release_bootstrap_lock(&thread_data->td.td_deque_lock);
439      KA_TRACE(20, ("__kmp_push_priority_task: T#%d deque is full; returning "if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_push_priority_task: T#%d deque is full; returning "
 "TASK_NOT_PUSHED for task %p\n", gtid, taskdata); }
440                    "TASK_NOT_PUSHED for task %p\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_push_priority_task: T#%d deque is full; returning "
 "TASK_NOT_PUSHED for task %p\n", gtid, taskdata); }
441                    gtid, taskdata))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_push_priority_task: T#%d deque is full; returning "
 "TASK_NOT_PUSHED for task %p\n", gtid, taskdata); };
442      return TASK_NOT_PUSHED1;
443    } else {
444      // expand deque to push the task which is not allowed to execute
445      __kmp_realloc_task_deque(thread, thread_data);
446    }
447  }
448  KMP_DEBUG_ASSERT(TCR_4(thread_data->td.td_deque_ntasks) <if (!((thread_data->td.td_deque_ntasks) < ((thread_data
->td).td_deque_size))) { __kmp_debug_assert("(thread_data->td.td_deque_ntasks) < ((thread_data->td).td_deque_size)"
, "openmp/runtime/src/kmp_tasking.cpp", 449); }
449                   TASK_DEQUE_SIZE(thread_data->td))if (!((thread_data->td.td_deque_ntasks) < ((thread_data
->td).td_deque_size))) { __kmp_debug_assert("(thread_data->td.td_deque_ntasks) < ((thread_data->td).td_deque_size)"
, "openmp/runtime/src/kmp_tasking.cpp", 449); };
450  // Push taskdata.
451  thread_data->td.td_deque[thread_data->td.td_deque_tail] = taskdata;
452  // Wrap index.
453  thread_data->td.td_deque_tail =
454      (thread_data->td.td_deque_tail + 1) & TASK_DEQUE_MASK(thread_data->td)((thread_data->td).td_deque_size - 1);
455  TCW_4(thread_data->td.td_deque_ntasks,(thread_data->td.td_deque_ntasks) = ((thread_data->td.td_deque_ntasks
) + 1)
456        TCR_4(thread_data->td.td_deque_ntasks) + 1)(thread_data->td.td_deque_ntasks) = ((thread_data->td.td_deque_ntasks
) + 1); // Adjust task count
457  KMP_FSYNC_RELEASING(thread->th.th_current_task)(!__kmp_itt_fsync_releasing_ptr__3_0) ? (void)0 : __kmp_itt_fsync_releasing_ptr__3_0
((void *)(thread->th.th_current_task)); // releasing self
458  KMP_FSYNC_RELEASING(taskdata)(!__kmp_itt_fsync_releasing_ptr__3_0) ? (void)0 : __kmp_itt_fsync_releasing_ptr__3_0
((void *)(taskdata)); // releasing child
459  KA_TRACE(20, ("__kmp_push_priority_task: T#%d returning "if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_push_priority_task: T#%d returning "
 "TASK_SUCCESSFULLY_PUSHED: task=%p ntasks=%d head=%u tail=%u\n"
, gtid, taskdata, thread_data->td.td_deque_ntasks, thread_data
->td.td_deque_head, thread_data->td.td_deque_tail); }
460                "TASK_SUCCESSFULLY_PUSHED: task=%p ntasks=%d head=%u tail=%u\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_push_priority_task: T#%d returning "
 "TASK_SUCCESSFULLY_PUSHED: task=%p ntasks=%d head=%u tail=%u\n"
, gtid, taskdata, thread_data->td.td_deque_ntasks, thread_data
->td.td_deque_head, thread_data->td.td_deque_tail); }
461                gtid, taskdata, thread_data->td.td_deque_ntasks,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_push_priority_task: T#%d returning "
 "TASK_SUCCESSFULLY_PUSHED: task=%p ntasks=%d head=%u tail=%u\n"
, gtid, taskdata, thread_data->td.td_deque_ntasks, thread_data
->td.td_deque_head, thread_data->td.td_deque_tail); }
462                thread_data->td.td_deque_head, thread_data->td.td_deque_tail))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_push_priority_task: T#%d returning "
 "TASK_SUCCESSFULLY_PUSHED: task=%p ntasks=%d head=%u tail=%u\n"
, gtid, taskdata, thread_data->td.td_deque_ntasks, thread_data
->td.td_deque_head, thread_data->td.td_deque_tail); };
463  __kmp_release_bootstrap_lock(&thread_data->td.td_deque_lock);
464  task_team->tt.tt_num_task_pri++; // atomic inc
465  return TASK_SUCCESSFULLY_PUSHED0;
466}
467 
468//  __kmp_push_task: Add a task to the thread's deque
469static kmp_int32 __kmp_push_task(kmp_int32 gtid, kmp_task_t *task) {
470  kmp_info_t *thread = __kmp_threads[gtid];
471  kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(task)(((kmp_taskdata_t *)task) - 1);
472 
473  // If we encounter a hidden helper task, and the current thread is not a
474  // hidden helper thread, we have to give the task to any hidden helper thread
475  // starting from its shadow one.
476  if (UNLIKELY(taskdata->td_flags.hidden_helper &&__builtin_expect(!!(taskdata->td_flags.hidden_helper &&
 !((gtid) >= 1 && (gtid) <= __kmp_hidden_helper_threads_num
)), 0)
477               !KMP_HIDDEN_HELPER_THREAD(gtid))__builtin_expect(!!(taskdata->td_flags.hidden_helper &&
 !((gtid) >= 1 && (gtid) <= __kmp_hidden_helper_threads_num
)), 0)) {
478    kmp_int32 shadow_gtid = KMP_GTID_TO_SHADOW_GTID(gtid)((gtid) % (__kmp_hidden_helper_threads_num - 1) + 2);
479    __kmpc_give_task(task, __kmp_tid_from_gtid(shadow_gtid));
480    // Signal the hidden helper threads.
481    __kmp_hidden_helper_worker_thread_signal();
482    return TASK_SUCCESSFULLY_PUSHED0;
483  }
484 
485  kmp_task_team_t *task_team = thread->th.th_task_team;
486  kmp_int32 tid = __kmp_tid_from_gtid(gtid);
487  kmp_thread_data_t *thread_data;
488 
489  KA_TRACE(20,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_push_task: T#%d trying to push task %p.\n"
, gtid, taskdata); }
490           ("__kmp_push_task: T#%d trying to push task %p.\n", gtid, taskdata))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_push_task: T#%d trying to push task %p.\n"
, gtid, taskdata); };
491 
492  if (UNLIKELY(taskdata->td_flags.tiedness == TASK_UNTIED)__builtin_expect(!!(taskdata->td_flags.tiedness == 0), 0)) {
493    // untied task needs to increment counter so that the task structure is not
494    // freed prematurely
495    kmp_int32 counter = 1 + KMP_ATOMIC_INC(&taskdata->td_untied_count)(&taskdata->td_untied_count)->fetch_add(1, std::memory_order_acq_rel
);
496    KMP_DEBUG_USE_VAR(counter);
497    KA_TRACE(if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_push_task: T#%d untied_count (%d) incremented for task %p\n"
, gtid, counter, taskdata); }
498        20,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_push_task: T#%d untied_count (%d) incremented for task %p\n"
, gtid, counter, taskdata); }
499        ("__kmp_push_task: T#%d untied_count (%d) incremented for task %p\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_push_task: T#%d untied_count (%d) incremented for task %p\n"
, gtid, counter, taskdata); }
500         gtid, counter, taskdata))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_push_task: T#%d untied_count (%d) incremented for task %p\n"
, gtid, counter, taskdata); };
501  }
502 
503  // The first check avoids building task_team thread data if serialized
504  if (UNLIKELY(taskdata->td_flags.task_serial)__builtin_expect(!!(taskdata->td_flags.task_serial), 0)) {
505    KA_TRACE(20, ("__kmp_push_task: T#%d team serialized; returning "if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_push_task: T#%d team serialized; returning "
 "TASK_NOT_PUSHED for task %p\n", gtid, taskdata); }
506                  "TASK_NOT_PUSHED for task %p\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_push_task: T#%d team serialized; returning "
 "TASK_NOT_PUSHED for task %p\n", gtid, taskdata); }
507                  gtid, taskdata))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_push_task: T#%d team serialized; returning "
 "TASK_NOT_PUSHED for task %p\n", gtid, taskdata); };
508    return TASK_NOT_PUSHED1;
509  }
510 
511  // Now that serialized tasks have returned, we can assume that we are not in
512  // immediate exec mode
513  KMP_DEBUG_ASSERT(__kmp_tasking_mode != tskm_immediate_exec)if (!(__kmp_tasking_mode != tskm_immediate_exec)) { __kmp_debug_assert
("__kmp_tasking_mode != tskm_immediate_exec", "openmp/runtime/src/kmp_tasking.cpp"
, 513); };
514  if (UNLIKELY(!KMP_TASKING_ENABLED(task_team))__builtin_expect(!!(!((!0) == ((task_team)->tt.tt_found_tasks
))), 0)) {
515    __kmp_enable_tasking(task_team, thread);
516  }
517  KMP_DEBUG_ASSERT(TCR_4(task_team->tt.tt_found_tasks) == TRUE)if (!((task_team->tt.tt_found_tasks) == (!0))) { __kmp_debug_assert
("(task_team->tt.tt_found_tasks) == (!0)", "openmp/runtime/src/kmp_tasking.cpp"
, 517); };
518  KMP_DEBUG_ASSERT(TCR_PTR(task_team->tt.tt_threads_data) != NULL)if (!(((void *)(task_team->tt.tt_threads_data)) != __null)
) { __kmp_debug_assert("((void *)(task_team->tt.tt_threads_data)) != __null"
, "openmp/runtime/src/kmp_tasking.cpp", 518); };
519 
520  if (taskdata->td_flags.priority_specified && task->data2.priority > 0 &&
521      __kmp_max_task_priority > 0) {
522    int pri = KMP_MIN(task->data2.priority, __kmp_max_task_priority)((task->data2.priority) < (__kmp_max_task_priority) ? (
task->data2.priority) : (__kmp_max_task_priority));
523    return __kmp_push_priority_task(gtid, thread, taskdata, task_team, pri);
524  }
525 
526  // Find tasking deque specific to encountering thread
527  thread_data = &task_team->tt.tt_threads_data[tid];
528 
529  // No lock needed since only owner can allocate. If the task is hidden_helper,
530  // we don't need it either because we have initialized the dequeue for hidden
531  // helper thread data.
532  if (UNLIKELY(thread_data->td.td_deque == NULL)__builtin_expect(!!(thread_data->td.td_deque == __null), 0
)) {
533    __kmp_alloc_task_deque(thread, thread_data);
534  }
535 
536  int locked = 0;
537  // Check if deque is full
538  if (TCR_4(thread_data->td.td_deque_ntasks)(thread_data->td.td_deque_ntasks) >=
539      TASK_DEQUE_SIZE(thread_data->td)((thread_data->td).td_deque_size)) {
540    if (__kmp_enable_task_throttling &&
541        __kmp_task_is_allowed(gtid, __kmp_task_stealing_constraint, taskdata,
542                              thread->th.th_current_task)) {
543      KA_TRACE(20, ("__kmp_push_task: T#%d deque is full; returning "if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_push_task: T#%d deque is full; returning "
 "TASK_NOT_PUSHED for task %p\n", gtid, taskdata); }
544                    "TASK_NOT_PUSHED for task %p\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_push_task: T#%d deque is full; returning "
 "TASK_NOT_PUSHED for task %p\n", gtid, taskdata); }
545                    gtid, taskdata))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_push_task: T#%d deque is full; returning "
 "TASK_NOT_PUSHED for task %p\n", gtid, taskdata); };
546      return TASK_NOT_PUSHED1;
547    } else {
548      __kmp_acquire_bootstrap_lock(&thread_data->td.td_deque_lock);
549      locked = 1;
550      if (TCR_4(thread_data->td.td_deque_ntasks)(thread_data->td.td_deque_ntasks) >=
551          TASK_DEQUE_SIZE(thread_data->td)((thread_data->td).td_deque_size)) {
552        // expand deque to push the task which is not allowed to execute
553        __kmp_realloc_task_deque(thread, thread_data);
554      }
555    }
556  }
557  // Lock the deque for the task push operation
558  if (!locked) {
559    __kmp_acquire_bootstrap_lock(&thread_data->td.td_deque_lock);
560    // Need to recheck as we can get a proxy task from thread outside of OpenMP
561    if (TCR_4(thread_data->td.td_deque_ntasks)(thread_data->td.td_deque_ntasks) >=
562        TASK_DEQUE_SIZE(thread_data->td)((thread_data->td).td_deque_size)) {
563      if (__kmp_enable_task_throttling &&
564          __kmp_task_is_allowed(gtid, __kmp_task_stealing_constraint, taskdata,
565                                thread->th.th_current_task)) {
566        __kmp_release_bootstrap_lock(&thread_data->td.td_deque_lock);
567        KA_TRACE(20, ("__kmp_push_task: T#%d deque is full on 2nd check; "if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_push_task: T#%d deque is full on 2nd check; "
 "returning TASK_NOT_PUSHED for task %p\n", gtid, taskdata); }
568                      "returning TASK_NOT_PUSHED for task %p\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_push_task: T#%d deque is full on 2nd check; "
 "returning TASK_NOT_PUSHED for task %p\n", gtid, taskdata); }
569                      gtid, taskdata))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_push_task: T#%d deque is full on 2nd check; "
 "returning TASK_NOT_PUSHED for task %p\n", gtid, taskdata); };
570        return TASK_NOT_PUSHED1;
571      } else {
572        // expand deque to push the task which is not allowed to execute
573        __kmp_realloc_task_deque(thread, thread_data);
574      }
575    }
576  }
577  // Must have room since no thread can add tasks but calling thread
578  KMP_DEBUG_ASSERT(TCR_4(thread_data->td.td_deque_ntasks) <if (!((thread_data->td.td_deque_ntasks) < ((thread_data
->td).td_deque_size))) { __kmp_debug_assert("(thread_data->td.td_deque_ntasks) < ((thread_data->td).td_deque_size)"
, "openmp/runtime/src/kmp_tasking.cpp", 579); }
579                   TASK_DEQUE_SIZE(thread_data->td))if (!((thread_data->td.td_deque_ntasks) < ((thread_data
->td).td_deque_size))) { __kmp_debug_assert("(thread_data->td.td_deque_ntasks) < ((thread_data->td).td_deque_size)"
, "openmp/runtime/src/kmp_tasking.cpp", 579); };
580 
581  thread_data->td.td_deque[thread_data->td.td_deque_tail] =
582      taskdata; // Push taskdata
583  // Wrap index.
584  thread_data->td.td_deque_tail =
585      (thread_data->td.td_deque_tail + 1) & TASK_DEQUE_MASK(thread_data->td)((thread_data->td).td_deque_size - 1);
586  TCW_4(thread_data->td.td_deque_ntasks,(thread_data->td.td_deque_ntasks) = ((thread_data->td.td_deque_ntasks
) + 1)
587        TCR_4(thread_data->td.td_deque_ntasks) + 1)(thread_data->td.td_deque_ntasks) = ((thread_data->td.td_deque_ntasks
) + 1); // Adjust task count
588  KMP_FSYNC_RELEASING(thread->th.th_current_task)(!__kmp_itt_fsync_releasing_ptr__3_0) ? (void)0 : __kmp_itt_fsync_releasing_ptr__3_0
((void *)(thread->th.th_current_task)); // releasing self
589  KMP_FSYNC_RELEASING(taskdata)(!__kmp_itt_fsync_releasing_ptr__3_0) ? (void)0 : __kmp_itt_fsync_releasing_ptr__3_0
((void *)(taskdata)); // releasing child
590  KA_TRACE(20, ("__kmp_push_task: T#%d returning TASK_SUCCESSFULLY_PUSHED: "if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_push_task: T#%d returning TASK_SUCCESSFULLY_PUSHED: "
 "task=%p ntasks=%d head=%u tail=%u\n", gtid, taskdata, thread_data
->td.td_deque_ntasks, thread_data->td.td_deque_head, thread_data
->td.td_deque_tail); }
591                "task=%p ntasks=%d head=%u tail=%u\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_push_task: T#%d returning TASK_SUCCESSFULLY_PUSHED: "
 "task=%p ntasks=%d head=%u tail=%u\n", gtid, taskdata, thread_data
->td.td_deque_ntasks, thread_data->td.td_deque_head, thread_data
->td.td_deque_tail); }
592                gtid, taskdata, thread_data->td.td_deque_ntasks,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_push_task: T#%d returning TASK_SUCCESSFULLY_PUSHED: "
 "task=%p ntasks=%d head=%u tail=%u\n", gtid, taskdata, thread_data
->td.td_deque_ntasks, thread_data->td.td_deque_head, thread_data
->td.td_deque_tail); }
593                thread_data->td.td_deque_head, thread_data->td.td_deque_tail))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_push_task: T#%d returning TASK_SUCCESSFULLY_PUSHED: "
 "task=%p ntasks=%d head=%u tail=%u\n", gtid, taskdata, thread_data
->td.td_deque_ntasks, thread_data->td.td_deque_head, thread_data
->td.td_deque_tail); };
594 
595  __kmp_release_bootstrap_lock(&thread_data->td.td_deque_lock);
596 
597  return TASK_SUCCESSFULLY_PUSHED0;
598}
599 
600// __kmp_pop_current_task_from_thread: set up current task from called thread
601// when team ends
602//
603// this_thr: thread structure to set current_task in.
604void __kmp_pop_current_task_from_thread(kmp_info_t *this_thr) {
605  KF_TRACE(10, ("__kmp_pop_current_task_from_thread(enter): T#%d "if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_pop_current_task_from_thread(enter): T#%d "
 "this_thread=%p, curtask=%p, " "curtask_parent=%p\n", 0, this_thr
, this_thr->th.th_current_task, this_thr->th.th_current_task
->td_parent); }
606                "this_thread=%p, curtask=%p, "if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_pop_current_task_from_thread(enter): T#%d "
 "this_thread=%p, curtask=%p, " "curtask_parent=%p\n", 0, this_thr
, this_thr->th.th_current_task, this_thr->th.th_current_task
->td_parent); }
607                "curtask_parent=%p\n",if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_pop_current_task_from_thread(enter): T#%d "
 "this_thread=%p, curtask=%p, " "curtask_parent=%p\n", 0, this_thr
, this_thr->th.th_current_task, this_thr->th.th_current_task
->td_parent); }
608                0, this_thr, this_thr->th.th_current_task,if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_pop_current_task_from_thread(enter): T#%d "
 "this_thread=%p, curtask=%p, " "curtask_parent=%p\n", 0, this_thr
, this_thr->th.th_current_task, this_thr->th.th_current_task
->td_parent); }
609                this_thr->th.th_current_task->td_parent))if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_pop_current_task_from_thread(enter): T#%d "
 "this_thread=%p, curtask=%p, " "curtask_parent=%p\n", 0, this_thr
, this_thr->th.th_current_task, this_thr->th.th_current_task
->td_parent); };
610 
611  this_thr->th.th_current_task = this_thr->th.th_current_task->td_parent;
612 
613  KF_TRACE(10, ("__kmp_pop_current_task_from_thread(exit): T#%d "if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_pop_current_task_from_thread(exit): T#%d "
 "this_thread=%p, curtask=%p, " "curtask_parent=%p\n", 0, this_thr
, this_thr->th.th_current_task, this_thr->th.th_current_task
->td_parent); }
614                "this_thread=%p, curtask=%p, "if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_pop_current_task_from_thread(exit): T#%d "
 "this_thread=%p, curtask=%p, " "curtask_parent=%p\n", 0, this_thr
, this_thr->th.th_current_task, this_thr->th.th_current_task
->td_parent); }
615                "curtask_parent=%p\n",if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_pop_current_task_from_thread(exit): T#%d "
 "this_thread=%p, curtask=%p, " "curtask_parent=%p\n", 0, this_thr
, this_thr->th.th_current_task, this_thr->th.th_current_task
->td_parent); }
616                0, this_thr, this_thr->th.th_current_task,if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_pop_current_task_from_thread(exit): T#%d "
 "this_thread=%p, curtask=%p, " "curtask_parent=%p\n", 0, this_thr
, this_thr->th.th_current_task, this_thr->th.th_current_task
->td_parent); }
617                this_thr->th.th_current_task->td_parent))if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_pop_current_task_from_thread(exit): T#%d "
 "this_thread=%p, curtask=%p, " "curtask_parent=%p\n", 0, this_thr
, this_thr->th.th_current_task, this_thr->th.th_current_task
->td_parent); };
618}
619 
620// __kmp_push_current_task_to_thread: set up current task in called thread for a
621// new team
622//
623// this_thr: thread structure to set up
624// team: team for implicit task data
625// tid: thread within team to set up
626void __kmp_push_current_task_to_thread(kmp_info_t *this_thr, kmp_team_t *team,
627                                       int tid) {
628  // current task of the thread is a parent of the new just created implicit
629  // tasks of new team
630  KF_TRACE(10, ("__kmp_push_current_task_to_thread(enter): T#%d this_thread=%p "if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_push_current_task_to_thread(enter): T#%d this_thread=%p "
 "curtask=%p " "parent_task=%p\n", tid, this_thr, this_thr->
th.th_current_task, team->t.t_implicit_task_taskdata[tid].
td_parent); }
631                "curtask=%p "if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_push_current_task_to_thread(enter): T#%d this_thread=%p "
 "curtask=%p " "parent_task=%p\n", tid, this_thr, this_thr->
th.th_current_task, team->t.t_implicit_task_taskdata[tid].
td_parent); }
632                "parent_task=%p\n",if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_push_current_task_to_thread(enter): T#%d this_thread=%p "
 "curtask=%p " "parent_task=%p\n", tid, this_thr, this_thr->
th.th_current_task, team->t.t_implicit_task_taskdata[tid].
td_parent); }
633                tid, this_thr, this_thr->th.th_current_task,if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_push_current_task_to_thread(enter): T#%d this_thread=%p "
 "curtask=%p " "parent_task=%p\n", tid, this_thr, this_thr->
th.th_current_task, team->t.t_implicit_task_taskdata[tid].
td_parent); }
634                team->t.t_implicit_task_taskdata[tid].td_parent))if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_push_current_task_to_thread(enter): T#%d this_thread=%p "
 "curtask=%p " "parent_task=%p\n", tid, this_thr, this_thr->
th.th_current_task, team->t.t_implicit_task_taskdata[tid].
td_parent); };
635 
636  KMP_DEBUG_ASSERT(this_thr != NULL)if (!(this_thr != __null)) { __kmp_debug_assert("this_thr != __null"
, "openmp/runtime/src/kmp_tasking.cpp", 636); };
637 
638  if (tid == 0) {
639    if (this_thr->th.th_current_task != &team->t.t_implicit_task_taskdata[0]) {
640      team->t.t_implicit_task_taskdata[0].td_parent =
641          this_thr->th.th_current_task;
642      this_thr->th.th_current_task = &team->t.t_implicit_task_taskdata[0];
643    }
644  } else {
645    team->t.t_implicit_task_taskdata[tid].td_parent =
646        team->t.t_implicit_task_taskdata[0].td_parent;
647    this_thr->th.th_current_task = &team->t.t_implicit_task_taskdata[tid];
648  }
649 
650  KF_TRACE(10, ("__kmp_push_current_task_to_thread(exit): T#%d this_thread=%p "if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_push_current_task_to_thread(exit): T#%d this_thread=%p "
 "curtask=%p " "parent_task=%p\n", tid, this_thr, this_thr->
th.th_current_task, team->t.t_implicit_task_taskdata[tid].
td_parent); }
651                "curtask=%p "if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_push_current_task_to_thread(exit): T#%d this_thread=%p "
 "curtask=%p " "parent_task=%p\n", tid, this_thr, this_thr->
th.th_current_task, team->t.t_implicit_task_taskdata[tid].
td_parent); }
652                "parent_task=%p\n",if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_push_current_task_to_thread(exit): T#%d this_thread=%p "
 "curtask=%p " "parent_task=%p\n", tid, this_thr, this_thr->
th.th_current_task, team->t.t_implicit_task_taskdata[tid].
td_parent); }
653                tid, this_thr, this_thr->th.th_current_task,if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_push_current_task_to_thread(exit): T#%d this_thread=%p "
 "curtask=%p " "parent_task=%p\n", tid, this_thr, this_thr->
th.th_current_task, team->t.t_implicit_task_taskdata[tid].
td_parent); }
654                team->t.t_implicit_task_taskdata[tid].td_parent))if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_push_current_task_to_thread(exit): T#%d this_thread=%p "
 "curtask=%p " "parent_task=%p\n", tid, this_thr, this_thr->
th.th_current_task, team->t.t_implicit_task_taskdata[tid].
td_parent); };
655}
656 
657// __kmp_task_start: bookkeeping for a task starting execution
658//
659// GTID: global thread id of calling thread
660// task: task starting execution
661// current_task: task suspending
662static void __kmp_task_start(kmp_int32 gtid, kmp_task_t *task,
663                             kmp_taskdata_t *current_task) {
664  kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(task)(((kmp_taskdata_t *)task) - 1);
665  kmp_info_t *thread = __kmp_threads[gtid];
666 
667  KA_TRACE(10,if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_task_start(enter): T#%d starting task %p: current_task=%p\n"
, gtid, taskdata, current_task); }
668           ("__kmp_task_start(enter): T#%d starting task %p: current_task=%p\n",if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_task_start(enter): T#%d starting task %p: current_task=%p\n"
, gtid, taskdata, current_task); }
669            gtid, taskdata, current_task))if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_task_start(enter): T#%d starting task %p: current_task=%p\n"
, gtid, taskdata, current_task); };
670 
671  KMP_DEBUG_ASSERT(taskdata->td_flags.tasktype == TASK_EXPLICIT)if (!(taskdata->td_flags.tasktype == 1)) { __kmp_debug_assert
("taskdata->td_flags.tasktype == 1", "openmp/runtime/src/kmp_tasking.cpp"
, 671); };
672 
673  // mark currently executing task as suspended
674  // TODO: GEH - make sure root team implicit task is initialized properly.
675  // KMP_DEBUG_ASSERT( current_task -> td_flags.executing == 1 );
676  current_task->td_flags.executing = 0;
677 
678// Add task to stack if tied
679#ifdef BUILD_TIED_TASK_STACK
680  if (taskdata->td_flags.tiedness == TASK_TIED1) {
681    __kmp_push_task_stack(gtid, thread, taskdata);
682  }
683#endif /* BUILD_TIED_TASK_STACK */
684 
685  // mark starting task as executing and as current task
686  thread->th.th_current_task = taskdata;
687 
688  KMP_DEBUG_ASSERT(taskdata->td_flags.started == 0 ||if (!(taskdata->td_flags.started == 0 || taskdata->td_flags
.tiedness == 0)) { __kmp_debug_assert("taskdata->td_flags.started == 0 || taskdata->td_flags.tiedness == 0"
, "openmp/runtime/src/kmp_tasking.cpp", 689); }
689                   taskdata->td_flags.tiedness == TASK_UNTIED)if (!(taskdata->td_flags.started == 0 || taskdata->td_flags
.tiedness == 0)) { __kmp_debug_assert("taskdata->td_flags.started == 0 || taskdata->td_flags.tiedness == 0"
, "openmp/runtime/src/kmp_tasking.cpp", 689); };
690  KMP_DEBUG_ASSERT(taskdata->td_flags.executing == 0 ||if (!(taskdata->td_flags.executing == 0 || taskdata->td_flags
.tiedness == 0)) { __kmp_debug_assert("taskdata->td_flags.executing == 0 || taskdata->td_flags.tiedness == 0"
, "openmp/runtime/src/kmp_tasking.cpp", 691); }
691                   taskdata->td_flags.tiedness == TASK_UNTIED)if (!(taskdata->td_flags.executing == 0 || taskdata->td_flags
.tiedness == 0)) { __kmp_debug_assert("taskdata->td_flags.executing == 0 || taskdata->td_flags.tiedness == 0"
, "openmp/runtime/src/kmp_tasking.cpp", 691); };
692  taskdata->td_flags.started = 1;
693  taskdata->td_flags.executing = 1;
694  KMP_DEBUG_ASSERT(taskdata->td_flags.complete == 0)if (!(taskdata->td_flags.complete == 0)) { __kmp_debug_assert
("taskdata->td_flags.complete == 0", "openmp/runtime/src/kmp_tasking.cpp"
, 694); };
695  KMP_DEBUG_ASSERT(taskdata->td_flags.freed == 0)if (!(taskdata->td_flags.freed == 0)) { __kmp_debug_assert
("taskdata->td_flags.freed == 0", "openmp/runtime/src/kmp_tasking.cpp"
, 695); };
696 
697  // GEH TODO: shouldn't we pass some sort of location identifier here?
698  // APT: yes, we will pass location here.
699  // need to store current thread state (in a thread or taskdata structure)
700  // before setting work_state, otherwise wrong state is set after end of task
701 
702  KA_TRACE(10, ("__kmp_task_start(exit): T#%d task=%p\n", gtid, taskdata))if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_task_start(exit): T#%d task=%p\n"
, gtid, taskdata); };
703 
704  return;
705}
706 
707#if OMPT_SUPPORT1
708//------------------------------------------------------------------------------
709// __ompt_task_init:
710//   Initialize OMPT fields maintained by a task. This will only be called after
711//   ompt_start_tool, so we already know whether ompt is enabled or not.
712 
713static inline void __ompt_task_init(kmp_taskdata_t *task, int tid) {
714  // The calls to __ompt_task_init already have the ompt_enabled condition.
715  task->ompt_task_info.task_data.value = 0;
716  task->ompt_task_info.frame.exit_frame = ompt_data_none{0};
717  task->ompt_task_info.frame.enter_frame = ompt_data_none{0};
718  task->ompt_task_info.frame.exit_frame_flags =
719      ompt_frame_runtime | ompt_frame_framepointer;
720  task->ompt_task_info.frame.enter_frame_flags =
721      ompt_frame_runtime | ompt_frame_framepointer;
722  task->ompt_task_info.dispatch_chunk.start = 0;
723  task->ompt_task_info.dispatch_chunk.iterations = 0;
724}
725 
726// __ompt_task_start:
727//   Build and trigger task-begin event
728static inline void __ompt_task_start(kmp_task_t *task,
729                                     kmp_taskdata_t *current_task,
730                                     kmp_int32 gtid) {
731  kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(task)(((kmp_taskdata_t *)task) - 1);
732  ompt_task_status_t status = ompt_task_switch;
733  if (__kmp_threads[gtid]->th.ompt_thread_info.ompt_task_yielded) {
734    status = ompt_task_yield;
735    __kmp_threads[gtid]->th.ompt_thread_info.ompt_task_yielded = 0;
736  }
737  /* let OMPT know that we're about to run this task */
738  if (ompt_enabled.ompt_callback_task_schedule) {
739    ompt_callbacks.ompt_callback(ompt_callback_task_schedule)ompt_callback_task_schedule_callback(
740        &(current_task->ompt_task_info.task_data), status,
741        &(taskdata->ompt_task_info.task_data));
742  }
743  taskdata->ompt_task_info.scheduling_parent = current_task;
744}
745 
746// __ompt_task_finish:
747//   Build and trigger final task-schedule event
748static inline void __ompt_task_finish(kmp_task_t *task,
749                                      kmp_taskdata_t *resumed_task,
750                                      ompt_task_status_t status) {
751  if (ompt_enabled.ompt_callback_task_schedule) {
752    kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(task)(((kmp_taskdata_t *)task) - 1);
753    if (__kmp_omp_cancellation && taskdata->td_taskgroup &&
754        taskdata->td_taskgroup->cancel_request == cancel_taskgroup) {
755      status = ompt_task_cancel;
756    }
757 
758    /* let OMPT know that we're returning to the callee task */
759    ompt_callbacks.ompt_callback(ompt_callback_task_schedule)ompt_callback_task_schedule_callback(
760        &(taskdata->ompt_task_info.task_data), status,
761        (resumed_task ? &(resumed_task->ompt_task_info.task_data) : NULL__null));
762  }
763}
764#endif
765 
766template <bool ompt>
767static void __kmpc_omp_task_begin_if0_template(ident_t *loc_ref, kmp_int32 gtid,
768                                               kmp_task_t *task,
769                                               void *frame_address,
770                                               void *return_address) {
771  kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(task)(((kmp_taskdata_t *)task) - 1);
772  kmp_taskdata_t *current_task = __kmp_threads[gtid]->th.th_current_task;
773 
774  KA_TRACE(10, ("__kmpc_omp_task_begin_if0(enter): T#%d loc=%p task=%p "if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmpc_omp_task_begin_if0(enter): T#%d loc=%p task=%p "
 "current_task=%p\n", gtid, loc_ref, taskdata, current_task);
 }
775                "current_task=%p\n",if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmpc_omp_task_begin_if0(enter): T#%d loc=%p task=%p "
 "current_task=%p\n", gtid, loc_ref, taskdata, current_task);
 }
776                gtid, loc_ref, taskdata, current_task))if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmpc_omp_task_begin_if0(enter): T#%d loc=%p task=%p "
 "current_task=%p\n", gtid, loc_ref, taskdata, current_task);
 };
777 
778  if (UNLIKELY(taskdata->td_flags.tiedness == TASK_UNTIED)__builtin_expect(!!(taskdata->td_flags.tiedness == 0), 0)) {
779    // untied task needs to increment counter so that the task structure is not
780    // freed prematurely
781    kmp_int32 counter = 1 + KMP_ATOMIC_INC(&taskdata->td_untied_count)(&taskdata->td_untied_count)->fetch_add(1, std::memory_order_acq_rel
);
782    KMP_DEBUG_USE_VAR(counter);
783    KA_TRACE(20, ("__kmpc_omp_task_begin_if0: T#%d untied_count (%d) "if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmpc_omp_task_begin_if0: T#%d untied_count (%d) "
 "incremented for task %p\n", gtid, counter, taskdata); }
784                  "incremented for task %p\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmpc_omp_task_begin_if0: T#%d untied_count (%d) "
 "incremented for task %p\n", gtid, counter, taskdata); }
785                  gtid, counter, taskdata))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmpc_omp_task_begin_if0: T#%d untied_count (%d) "
 "incremented for task %p\n", gtid, counter, taskdata); };
786  }
787 
788  taskdata->td_flags.task_serial =
789      1; // Execute this task immediately, not deferred.
790  __kmp_task_start(gtid, task, current_task);
791 
792#if OMPT_SUPPORT1
793  if (ompt) {
794    if (current_task->ompt_task_info.frame.enter_frame.ptr == NULL__null) {
795      current_task->ompt_task_info.frame.enter_frame.ptr =
796          taskdata->ompt_task_info.frame.exit_frame.ptr = frame_address;
797      current_task->ompt_task_info.frame.enter_frame_flags =
798          taskdata->ompt_task_info.frame.exit_frame_flags =
799              ompt_frame_application | ompt_frame_framepointer;
800    }
801    if (ompt_enabled.ompt_callback_task_create) {
802      ompt_task_info_t *parent_info = &(current_task->ompt_task_info);
803      ompt_callbacks.ompt_callback(ompt_callback_task_create)ompt_callback_task_create_callback(
804          &(parent_info->task_data), &(parent_info->frame),
805          &(taskdata->ompt_task_info.task_data),
806          ompt_task_explicit | TASK_TYPE_DETAILS_FORMAT(taskdata)((taskdata->td_flags.task_serial || taskdata->td_flags.
tasking_ser) ? ompt_task_undeferred : 0x0) | ((!(taskdata->
td_flags.tiedness)) ? ompt_task_untied : 0x0) | (taskdata->
td_flags.final ? ompt_task_final : 0x0) | (taskdata->td_flags
.merged_if0 ? ompt_task_mergeable : 0x0), 0,
807          return_address);
808    }
809    __ompt_task_start(task, current_task, gtid);
810  }
811#endif // OMPT_SUPPORT
812 
813  KA_TRACE(10, ("__kmpc_omp_task_begin_if0(exit): T#%d loc=%p task=%p,\n", gtid,if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmpc_omp_task_begin_if0(exit): T#%d loc=%p task=%p,\n"
, gtid, loc_ref, taskdata); }
814                loc_ref, taskdata))if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmpc_omp_task_begin_if0(exit): T#%d loc=%p task=%p,\n"
, gtid, loc_ref, taskdata); };
815}
816 
817#if OMPT_SUPPORT1
818OMPT_NOINLINE__attribute__((noinline))
819static void __kmpc_omp_task_begin_if0_ompt(ident_t *loc_ref, kmp_int32 gtid,
820                                           kmp_task_t *task,
821                                           void *frame_address,
822                                           void *return_address) {
823  __kmpc_omp_task_begin_if0_template<true>(loc_ref, gtid, task, frame_address,
824                                           return_address);
825}
826#endif // OMPT_SUPPORT
827 
828// __kmpc_omp_task_begin_if0: report that a given serialized task has started
829// execution
830//
831// loc_ref: source location information; points to beginning of task block.
832// gtid: global thread number.
833// task: task thunk for the started task.
834void __kmpc_omp_task_begin_if0(ident_t *loc_ref, kmp_int32 gtid,
835                               kmp_task_t *task) {
836#if OMPT_SUPPORT1
837  if (UNLIKELY(ompt_enabled.enabled)__builtin_expect(!!(ompt_enabled.enabled), 0)) {
838    OMPT_STORE_RETURN_ADDRESS(gtid)OmptReturnAddressGuard ReturnAddressGuard{gtid, __builtin_return_address
(0)};;
839    __kmpc_omp_task_begin_if0_ompt(loc_ref, gtid, task,
840                                   OMPT_GET_FRAME_ADDRESS(1)__builtin_frame_address(1),
841                                   OMPT_LOAD_RETURN_ADDRESS(gtid)__ompt_load_return_address(gtid));
842    return;
843  }
844#endif
845  __kmpc_omp_task_begin_if0_template<false>(loc_ref, gtid, task, NULL__null, NULL__null);
846}
847 
848#ifdef TASK_UNUSED
849// __kmpc_omp_task_begin: report that a given task has started execution
850// NEVER GENERATED BY COMPILER, DEPRECATED!!!
851void __kmpc_omp_task_begin(ident_t *loc_ref, kmp_int32 gtid, kmp_task_t *task) {
852  kmp_taskdata_t *current_task = __kmp_threads[gtid]->th.th_current_task;
853 
854  KA_TRACE(if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmpc_omp_task_begin(enter): T#%d loc=%p task=%p current_task=%p\n"
, gtid, loc_ref, (((kmp_taskdata_t *)task) - 1), current_task
); }
855      10,if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmpc_omp_task_begin(enter): T#%d loc=%p task=%p current_task=%p\n"
, gtid, loc_ref, (((kmp_taskdata_t *)task) - 1), current_task
); }
856      ("__kmpc_omp_task_begin(enter): T#%d loc=%p task=%p current_task=%p\n",if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmpc_omp_task_begin(enter): T#%d loc=%p task=%p current_task=%p\n"
, gtid, loc_ref, (((kmp_taskdata_t *)task) - 1), current_task
); }
857       gtid, loc_ref, KMP_TASK_TO_TASKDATA(task), current_task))if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmpc_omp_task_begin(enter): T#%d loc=%p task=%p current_task=%p\n"
, gtid, loc_ref, (((kmp_taskdata_t *)task) - 1), current_task
); };
858 
859  __kmp_task_start(gtid, task, current_task);
860 
861  KA_TRACE(10, ("__kmpc_omp_task_begin(exit): T#%d loc=%p task=%p,\n", gtid,if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmpc_omp_task_begin(exit): T#%d loc=%p task=%p,\n"
, gtid, loc_ref, (((kmp_taskdata_t *)task) - 1)); }
862                loc_ref, KMP_TASK_TO_TASKDATA(task)))if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmpc_omp_task_begin(exit): T#%d loc=%p task=%p,\n"
, gtid, loc_ref, (((kmp_taskdata_t *)task) - 1)); };
863  return;
864}
865#endif // TASK_UNUSED
866 
867// __kmp_free_task: free the current task space and the space for shareds
868//
869// gtid: Global thread ID of calling thread
870// taskdata: task to free
871// thread: thread data structure of caller
872static void __kmp_free_task(kmp_int32 gtid, kmp_taskdata_t *taskdata,
873                            kmp_info_t *thread) {
874  KA_TRACE(30, ("__kmp_free_task: T#%d freeing data from task %p\n", gtid,if (kmp_a_debug >= 30) { __kmp_debug_printf ("__kmp_free_task: T#%d freeing data from task %p\n"
, gtid, taskdata); }
875                taskdata))if (kmp_a_debug >= 30) { __kmp_debug_printf ("__kmp_free_task: T#%d freeing data from task %p\n"
, gtid, taskdata); };
876 
877  // Check to make sure all flags and counters have the correct values
878  KMP_DEBUG_ASSERT(taskdata->td_flags.tasktype == TASK_EXPLICIT)if (!(taskdata->td_flags.tasktype == 1)) { __kmp_debug_assert
("taskdata->td_flags.tasktype == 1", "openmp/runtime/src/kmp_tasking.cpp"
, 878); };
879  KMP_DEBUG_ASSERT(taskdata->td_flags.executing == 0)if (!(taskdata->td_flags.executing == 0)) { __kmp_debug_assert
("taskdata->td_flags.executing == 0", "openmp/runtime/src/kmp_tasking.cpp"
, 879); };
880  KMP_DEBUG_ASSERT(taskdata->td_flags.complete == 1)if (!(taskdata->td_flags.complete == 1)) { __kmp_debug_assert
("taskdata->td_flags.complete == 1", "openmp/runtime/src/kmp_tasking.cpp"
, 880); };
881  KMP_DEBUG_ASSERT(taskdata->td_flags.freed == 0)if (!(taskdata->td_flags.freed == 0)) { __kmp_debug_assert
("taskdata->td_flags.freed == 0", "openmp/runtime/src/kmp_tasking.cpp"
, 881); };
882  KMP_DEBUG_ASSERT(taskdata->td_allocated_child_tasks == 0 ||if (!(taskdata->td_allocated_child_tasks == 0 || taskdata->
td_flags.task_serial == 1)) { __kmp_debug_assert("taskdata->td_allocated_child_tasks == 0 || taskdata->td_flags.task_serial == 1"
, "openmp/runtime/src/kmp_tasking.cpp", 883); }
883                   taskdata->td_flags.task_serial == 1)if (!(taskdata->td_allocated_child_tasks == 0 || taskdata->
td_flags.task_serial == 1)) { __kmp_debug_assert("taskdata->td_allocated_child_tasks == 0 || taskdata->td_flags.task_serial == 1"
, "openmp/runtime/src/kmp_tasking.cpp", 883); };
884  KMP_DEBUG_ASSERT(taskdata->td_incomplete_child_tasks == 0)if (!(taskdata->td_incomplete_child_tasks == 0)) { __kmp_debug_assert
("taskdata->td_incomplete_child_tasks == 0", "openmp/runtime/src/kmp_tasking.cpp"
, 884); };
885  kmp_task_t *task = KMP_TASKDATA_TO_TASK(taskdata)(kmp_task_t *)(taskdata + 1);
886  // Clear data to not be re-used later by mistake.
887  task->data1.destructors = NULL__null;
888  task->data2.priority = 0;
889 
890  taskdata->td_flags.freed = 1;
891// deallocate the taskdata and shared variable blocks associated with this task
892#if USE_FAST_MEMORY3
893  __kmp_fast_free(thread, taskdata)___kmp_fast_free((thread), (taskdata), "openmp/runtime/src/kmp_tasking.cpp"
, 893);
894#else /* ! USE_FAST_MEMORY */
895  __kmp_thread_free(thread, taskdata)___kmp_thread_free((thread), (taskdata), "openmp/runtime/src/kmp_tasking.cpp"
, 895);
896#endif
897  KA_TRACE(20, ("__kmp_free_task: T#%d freed task %p\n", gtid, taskdata))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_free_task: T#%d freed task %p\n"
, gtid, taskdata); };
898}
899 
900// __kmp_free_task_and_ancestors: free the current task and ancestors without
901// children
902//
903// gtid: Global thread ID of calling thread
904// taskdata: task to free
905// thread: thread data structure of caller
906static void __kmp_free_task_and_ancestors(kmp_int32 gtid,
907                                          kmp_taskdata_t *taskdata,
908                                          kmp_info_t *thread) {
909  // Proxy tasks must always be allowed to free their parents
910  // because they can be run in background even in serial mode.
911  kmp_int32 team_serial =
912      (taskdata->td_flags.team_serial || taskdata->td_flags.tasking_ser) &&
913      !taskdata->td_flags.proxy;
914  KMP_DEBUG_ASSERT(taskdata->td_flags.tasktype == TASK_EXPLICIT)if (!(taskdata->td_flags.tasktype == 1)) { __kmp_debug_assert
("taskdata->td_flags.tasktype == 1", "openmp/runtime/src/kmp_tasking.cpp"
, 914); };
915 
916  kmp_int32 children = KMP_ATOMIC_DEC(&taskdata->td_allocated_child_tasks)(&taskdata->td_allocated_child_tasks)->fetch_sub(1,
 std::memory_order_acq_rel) - 1;
917  KMP_DEBUG_ASSERT(children >= 0)if (!(children >= 0)) { __kmp_debug_assert("children >= 0"
, "openmp/runtime/src/kmp_tasking.cpp", 917); };
918 
919  // Now, go up the ancestor tree to see if any ancestors can now be freed.
920  while (children == 0) {
921    kmp_taskdata_t *parent_taskdata = taskdata->td_parent;
922 
923    KA_TRACE(20, ("__kmp_free_task_and_ancestors(enter): T#%d task %p complete "if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_free_task_and_ancestors(enter): T#%d task %p complete "
 "and freeing itself\n", gtid, taskdata); }
924                  "and freeing itself\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_free_task_and_ancestors(enter): T#%d task %p complete "
 "and freeing itself\n", gtid, taskdata); }
925                  gtid, taskdata))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_free_task_and_ancestors(enter): T#%d task %p complete "
 "and freeing itself\n", gtid, taskdata); };
926 
927    // --- Deallocate my ancestor task ---
928    __kmp_free_task(gtid, taskdata, thread);
929 
930    taskdata = parent_taskdata;
931 
932    if (team_serial)
933      return;
934    // Stop checking ancestors at implicit task instead of walking up ancestor
935    // tree to avoid premature deallocation of ancestors.
936    if (taskdata->td_flags.tasktype == TASK_IMPLICIT0) {
937      if (taskdata->td_dephash) { // do we need to cleanup dephash?
938        int children = KMP_ATOMIC_LD_ACQ(&taskdata->td_incomplete_child_tasks)(&taskdata->td_incomplete_child_tasks)->load(std::memory_order_acquire
);
939        kmp_tasking_flags_t flags_old = taskdata->td_flags;
940        if (children == 0 && flags_old.complete == 1) {
941          kmp_tasking_flags_t flags_new = flags_old;
942          flags_new.complete = 0;
943          if (KMP_COMPARE_AND_STORE_ACQ32(__sync_bool_compare_and_swap((volatile kmp_uint32 *)(reinterpret_cast
<kmp_int32 *>(&taskdata->td_flags)), (kmp_uint32
)(*reinterpret_cast<kmp_int32 *>(&flags_old)), (kmp_uint32
)(*reinterpret_cast<kmp_int32 *>(&flags_new)))
944                  RCAST(kmp_int32 *, &taskdata->td_flags),__sync_bool_compare_and_swap((volatile kmp_uint32 *)(reinterpret_cast
<kmp_int32 *>(&taskdata->td_flags)), (kmp_uint32
)(*reinterpret_cast<kmp_int32 *>(&flags_old)), (kmp_uint32
)(*reinterpret_cast<kmp_int32 *>(&flags_new)))
945                  *RCAST(kmp_int32 *, &flags_old),__sync_bool_compare_and_swap((volatile kmp_uint32 *)(reinterpret_cast
<kmp_int32 *>(&taskdata->td_flags)), (kmp_uint32
)(*reinterpret_cast<kmp_int32 *>(&flags_old)), (kmp_uint32
)(*reinterpret_cast<kmp_int32 *>(&flags_new)))
946                  *RCAST(kmp_int32 *, &flags_new))__sync_bool_compare_and_swap((volatile kmp_uint32 *)(reinterpret_cast
<kmp_int32 *>(&taskdata->td_flags)), (kmp_uint32
)(*reinterpret_cast<kmp_int32 *>(&flags_old)), (kmp_uint32
)(*reinterpret_cast<kmp_int32 *>(&flags_new)))) {
947            KA_TRACE(100, ("__kmp_free_task_and_ancestors: T#%d cleans "if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_free_task_and_ancestors: T#%d cleans "
 "dephash of implicit task %p\n", gtid, taskdata); }
948                           "dephash of implicit task %p\n",if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_free_task_and_ancestors: T#%d cleans "
 "dephash of implicit task %p\n", gtid, taskdata); }
949                           gtid, taskdata))if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_free_task_and_ancestors: T#%d cleans "
 "dephash of implicit task %p\n", gtid, taskdata); };
950            // cleanup dephash of finished implicit task
951            __kmp_dephash_free_entries(thread, taskdata->td_dephash);
952          }
953        }
954      }
955      return;
956    }
957    // Predecrement simulated by "- 1" calculation
958    children = KMP_ATOMIC_DEC(&taskdata->td_allocated_child_tasks)(&taskdata->td_allocated_child_tasks)->fetch_sub(1,
 std::memory_order_acq_rel) - 1;
959    KMP_DEBUG_ASSERT(children >= 0)if (!(children >= 0)) { __kmp_debug_assert("children >= 0"
, "openmp/runtime/src/kmp_tasking.cpp", 959); };
960  }
961 
962  KA_TRACE(if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_free_task_and_ancestors(exit): T#%d task %p has %d children; "
 "not freeing it yet\n", gtid, taskdata, children); }
963      20, ("__kmp_free_task_and_ancestors(exit): T#%d task %p has %d children; "if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_free_task_and_ancestors(exit): T#%d task %p has %d children; "
 "not freeing it yet\n", gtid, taskdata, children); }
964           "not freeing it yet\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_free_task_and_ancestors(exit): T#%d task %p has %d children; "
 "not freeing it yet\n", gtid, taskdata, children); }
965           gtid, taskdata, children))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_free_task_and_ancestors(exit): T#%d task %p has %d children; "
 "not freeing it yet\n", gtid, taskdata, children); };
966}
967 
968// Only need to keep track of child task counts if any of the following:
969// 1. team parallel and tasking not serialized;
970// 2. it is a proxy or detachable or hidden helper task
971// 3. the children counter of its parent task is greater than 0.
972// The reason for the 3rd one is for serialized team that found detached task,
973// hidden helper task, T. In this case, the execution of T is still deferred,
974// and it is also possible that a regular task depends on T. In this case, if we
975// don't track the children, task synchronization will be broken.
976static bool __kmp_track_children_task(kmp_taskdata_t *taskdata) {
977  kmp_tasking_flags_t flags = taskdata->td_flags;
978  bool ret = !(flags.team_serial || flags.tasking_ser);
979  ret = ret || flags.proxy == TASK_PROXY1 ||
980        flags.detachable == TASK_DETACHABLE1 || flags.hidden_helper;
981  ret = ret ||
982        KMP_ATOMIC_LD_ACQ(&taskdata->td_parent->td_incomplete_child_tasks)(&taskdata->td_parent->td_incomplete_child_tasks)->
load(std::memory_order_acquire) > 0;
983  return ret;
984}
985 
986// __kmp_task_finish: bookkeeping to do when a task finishes execution
987//
988// gtid: global thread ID for calling thread
989// task: task to be finished
990// resumed_task: task to be resumed.  (may be NULL if task is serialized)
991//
992// template<ompt>: effectively ompt_enabled.enabled!=0
993// the version with ompt=false is inlined, allowing to optimize away all ompt
994// code in this case
995template <bool ompt>
996static void __kmp_task_finish(kmp_int32 gtid, kmp_task_t *task,
997                              kmp_taskdata_t *resumed_task) {
998  kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(task)(((kmp_taskdata_t *)task) - 1);
999  kmp_info_t *thread = __kmp_threads[gtid];
1000  kmp_task_team_t *task_team =
1001      thread->th.th_task_team; // might be NULL for serial teams...
1002#if KMP_DEBUG1
1003  kmp_int32 children = 0;
1004#endif
1005  KA_TRACE(10, ("__kmp_task_finish(enter): T#%d finishing task %p and resuming "if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_task_finish(enter): T#%d finishing task %p and resuming "
 "task %p\n", gtid, taskdata, resumed_task); }
1006                "task %p\n",if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_task_finish(enter): T#%d finishing task %p and resuming "
 "task %p\n", gtid, taskdata, resumed_task); }
1007                gtid, taskdata, resumed_task))if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_task_finish(enter): T#%d finishing task %p and resuming "
 "task %p\n", gtid, taskdata, resumed_task); };
1008 
1009  KMP_DEBUG_ASSERT(taskdata->td_flags.tasktype == TASK_EXPLICIT)if (!(taskdata->td_flags.tasktype == 1)) { __kmp_debug_assert
("taskdata->td_flags.tasktype == 1", "openmp/runtime/src/kmp_tasking.cpp"
, 1009); };
1010 
1011// Pop task from stack if tied
1012#ifdef BUILD_TIED_TASK_STACK
1013  if (taskdata->td_flags.tiedness == TASK_TIED1) {
1014    __kmp_pop_task_stack(gtid, thread, taskdata);
1015  }
1016#endif /* BUILD_TIED_TASK_STACK */
1017 
1018  if (UNLIKELY(taskdata->td_flags.tiedness == TASK_UNTIED)__builtin_expect(!!(taskdata->td_flags.tiedness == 0), 0)) {
1019    // untied task needs to check the counter so that the task structure is not
1020    // freed prematurely
1021    kmp_int32 counter = KMP_ATOMIC_DEC(&taskdata->td_untied_count)(&taskdata->td_untied_count)->fetch_sub(1, std::memory_order_acq_rel
) - 1;
1022    KA_TRACE(if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_task_finish: T#%d untied_count (%d) decremented for task %p\n"
, gtid, counter, taskdata); }
1023        20,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_task_finish: T#%d untied_count (%d) decremented for task %p\n"
, gtid, counter, taskdata); }
1024        ("__kmp_task_finish: T#%d untied_count (%d) decremented for task %p\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_task_finish: T#%d untied_count (%d) decremented for task %p\n"
, gtid, counter, taskdata); }
1025         gtid, counter, taskdata))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_task_finish: T#%d untied_count (%d) decremented for task %p\n"
, gtid, counter, taskdata); };
1026    if (counter > 0) {
1027      // untied task is not done, to be continued possibly by other thread, do
1028      // not free it now
1029      if (resumed_task == NULL__null) {
1030        KMP_DEBUG_ASSERT(taskdata->td_flags.task_serial)if (!(taskdata->td_flags.task_serial)) { __kmp_debug_assert
("taskdata->td_flags.task_serial", "openmp/runtime/src/kmp_tasking.cpp"
, 1030); };
1031        resumed_task = taskdata->td_parent; // In a serialized task, the resumed
1032        // task is the parent
1033      }
1034      thread->th.th_current_task = resumed_task; // restore current_task
1035      resumed_task->td_flags.executing = 1; // resume previous task
1036      KA_TRACE(10, ("__kmp_task_finish(exit): T#%d partially done task %p, "if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_task_finish(exit): T#%d partially done task %p, "
 "resuming task %p\n", gtid, taskdata, resumed_task); }
1037                    "resuming task %p\n",if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_task_finish(exit): T#%d partially done task %p, "
 "resuming task %p\n", gtid, taskdata, resumed_task); }
1038                    gtid, taskdata, resumed_task))if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_task_finish(exit): T#%d partially done task %p, "
 "resuming task %p\n", gtid, taskdata, resumed_task); };
1039      return;
1040    }
1041  }
1042 
1043  // bookkeeping for resuming task:
1044  // GEH - note tasking_ser => task_serial
1045  KMP_DEBUG_ASSERT(if (!((taskdata->td_flags.tasking_ser || taskdata->td_flags
.task_serial) == taskdata->td_flags.task_serial)) { __kmp_debug_assert
("(taskdata->td_flags.tasking_ser || taskdata->td_flags.task_serial) == taskdata->td_flags.task_serial"
, "openmp/runtime/src/kmp_tasking.cpp", 1047); }
1046      (taskdata->td_flags.tasking_ser || taskdata->td_flags.task_serial) ==if (!((taskdata->td_flags.tasking_ser || taskdata->td_flags
.task_serial) == taskdata->td_flags.task_serial)) { __kmp_debug_assert
("(taskdata->td_flags.tasking_ser || taskdata->td_flags.task_serial) == taskdata->td_flags.task_serial"
, "openmp/runtime/src/kmp_tasking.cpp", 1047); }
1047      taskdata->td_flags.task_serial)if (!((taskdata->td_flags.tasking_ser || taskdata->td_flags
.task_serial) == taskdata->td_flags.task_serial)) { __kmp_debug_assert
("(taskdata->td_flags.tasking_ser || taskdata->td_flags.task_serial) == taskdata->td_flags.task_serial"
, "openmp/runtime/src/kmp_tasking.cpp", 1047); };
1048  if (taskdata->td_flags.task_serial) {
1049    if (resumed_task == NULL__null) {
1050      resumed_task = taskdata->td_parent; // In a serialized task, the resumed
1051      // task is the parent
1052    }
1053  } else {
1054    KMP_DEBUG_ASSERT(resumed_task !=if (!(resumed_task != __null)) { __kmp_debug_assert("resumed_task != __null"
, "openmp/runtime/src/kmp_tasking.cpp", 1055); }
1055                     NULL)if (!(resumed_task != __null)) { __kmp_debug_assert("resumed_task != __null"
, "openmp/runtime/src/kmp_tasking.cpp", 1055); }; // verify that resumed task is passed as argument
1056  }
1057 
1058  /* If the tasks' destructor thunk flag has been set, we need to invoke the
1059     destructor thunk that has been generated by the compiler. The code is
1060     placed here, since at this point other tasks might have been released
1061     hence overlapping the destructor invocations with some other work in the
1062     released tasks.  The OpenMP spec is not specific on when the destructors
1063     are invoked, so we should be free to choose. */
1064  if (UNLIKELY(taskdata->td_flags.destructors_thunk)__builtin_expect(!!(taskdata->td_flags.destructors_thunk),
 0)) {
1065    kmp_routine_entry_t destr_thunk = task->data1.destructors;
1066    KMP_ASSERT(destr_thunk)if (!(destr_thunk)) { __kmp_debug_assert("destr_thunk", "openmp/runtime/src/kmp_tasking.cpp"
, 1066); };
1067    destr_thunk(gtid, task);
1068  }
1069 
1070  KMP_DEBUG_ASSERT(taskdata->td_flags.complete == 0)if (!(taskdata->td_flags.complete == 0)) { __kmp_debug_assert
("taskdata->td_flags.complete == 0", "openmp/runtime/src/kmp_tasking.cpp"
, 1070); };
1071  KMP_DEBUG_ASSERT(taskdata->td_flags.started == 1)if (!(taskdata->td_flags.started == 1)) { __kmp_debug_assert
("taskdata->td_flags.started == 1", "openmp/runtime/src/kmp_tasking.cpp"
, 1071); };
1072  KMP_DEBUG_ASSERT(taskdata->td_flags.freed == 0)if (!(taskdata->td_flags.freed == 0)) { __kmp_debug_assert
("taskdata->td_flags.freed == 0", "openmp/runtime/src/kmp_tasking.cpp"
, 1072); };
1073 
1074  bool completed = true;
1075  if (UNLIKELY(taskdata->td_flags.detachable == TASK_DETACHABLE)__builtin_expect(!!(taskdata->td_flags.detachable == 1), 0
)) {
1076    if (taskdata->td_allow_completion_event.type ==
1077        KMP_EVENT_ALLOW_COMPLETION) {
1078      // event hasn't been fulfilled yet. Try to detach task.
1079      __kmp_acquire_tas_lock(&taskdata->td_allow_completion_event.lock, gtid);
1080      if (taskdata->td_allow_completion_event.type ==
1081          KMP_EVENT_ALLOW_COMPLETION) {
1082        // task finished execution
1083        KMP_DEBUG_ASSERT(taskdata->td_flags.executing == 1)if (!(taskdata->td_flags.executing == 1)) { __kmp_debug_assert
("taskdata->td_flags.executing == 1", "openmp/runtime/src/kmp_tasking.cpp"
, 1083); };
1084        taskdata->td_flags.executing = 0; // suspend the finishing task
1085 
1086#if OMPT_SUPPORT1
1087        // For a detached task, which is not completed, we switch back
1088        // the omp_fulfill_event signals completion
1089        // locking is necessary to avoid a race with ompt_task_late_fulfill
1090        if (ompt)
1091          __ompt_task_finish(task, resumed_task, ompt_task_detach);
1092#endif
1093 
1094        // no access to taskdata after this point!
1095        // __kmp_fulfill_event might free taskdata at any time from now
1096 
1097        taskdata->td_flags.proxy = TASK_PROXY1; // proxify!
1098        completed = false;
1099      }
1100      __kmp_release_tas_lock(&taskdata->td_allow_completion_event.lock, gtid);
1101    }
1102  }
1103 
1104  // Tasks with valid target async handles must be re-enqueued.
1105  if (taskdata->td_target_data.async_handle != NULL__null) {
1106    // Note: no need to translate gtid to its shadow. If the current thread is a
1107    // hidden helper one, then the gtid is already correct. Otherwise, hidden
1108    // helper threads are disabled, and gtid refers to a OpenMP thread.
1109    __kmpc_give_task(task, __kmp_tid_from_gtid(gtid));
1110    if (KMP_HIDDEN_HELPER_THREAD(gtid)((gtid) >= 1 && (gtid) <= __kmp_hidden_helper_threads_num
))
1111      __kmp_hidden_helper_worker_thread_signal();
1112    completed = false;
1113  }
1114 
1115  if (completed) {
1116    taskdata->td_flags.complete = 1; // mark the task as completed
1117 
1118#if OMPT_SUPPORT1
1119    // This is not a detached task, we are done here
1120    if (ompt)
1121      __ompt_task_finish(task, resumed_task, ompt_task_complete);
1122#endif
1123    // TODO: What would be the balance between the conditions in the function
1124    // and an atomic operation?
1125    if (__kmp_track_children_task(taskdata)) {
1126      __kmp_release_deps(gtid, taskdata);
1127      // Predecrement simulated by "- 1" calculation
1128#if KMP_DEBUG1
1129      children = -1 +
1130#endif
1131          KMP_ATOMIC_DEC(&taskdata->td_parent->td_incomplete_child_tasks)(&taskdata->td_parent->td_incomplete_child_tasks)->
fetch_sub(1, std::memory_order_acq_rel);
1132      KMP_DEBUG_ASSERT(children >= 0)if (!(children >= 0)) { __kmp_debug_assert("children >= 0"
, "openmp/runtime/src/kmp_tasking.cpp", 1132); };
1133      if (taskdata->td_taskgroup)
1134        KMP_ATOMIC_DEC(&taskdata->td_taskgroup->count)(&taskdata->td_taskgroup->count)->fetch_sub(1, std
::memory_order_acq_rel);
1135    } else if (task_team && (task_team->tt.tt_found_proxy_tasks ||
1136                             task_team->tt.tt_hidden_helper_task_encountered)) {
1137      // if we found proxy or hidden helper tasks there could exist a dependency
1138      // chain with the proxy task as origin
1139      __kmp_release_deps(gtid, taskdata);
1140    }
1141    // td_flags.executing must be marked as 0 after __kmp_release_deps has been
1142    // called. Othertwise, if a task is executed immediately from the
1143    // release_deps code, the flag will be reset to 1 again by this same
1144    // function
1145    KMP_DEBUG_ASSERT(taskdata->td_flags.executing == 1)if (!(taskdata->td_flags.executing == 1)) { __kmp_debug_assert
("taskdata->td_flags.executing == 1", "openmp/runtime/src/kmp_tasking.cpp"
, 1145); };
1146    taskdata->td_flags.executing = 0; // suspend the finishing task
1147 
1148    // Decrement the counter of hidden helper tasks to be executed.
1149    if (taskdata->td_flags.hidden_helper) {
1150      // Hidden helper tasks can only be executed by hidden helper threads.
1151      KMP_ASSERT(KMP_HIDDEN_HELPER_THREAD(gtid))if (!(((gtid) >= 1 && (gtid) <= __kmp_hidden_helper_threads_num
))) { __kmp_debug_assert("KMP_HIDDEN_HELPER_THREAD(gtid)", "openmp/runtime/src/kmp_tasking.cpp"
, 1151); };
1152      KMP_ATOMIC_DEC(&__kmp_unexecuted_hidden_helper_tasks)(&__kmp_unexecuted_hidden_helper_tasks)->fetch_sub(1, std
::memory_order_acq_rel);
1153    }
1154  }
1155 
1156  KA_TRACE(if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_task_finish: T#%d finished task %p, %d incomplete children\n"
, gtid, taskdata, children); }
1157      20, ("__kmp_task_finish: T#%d finished task %p, %d incomplete children\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_task_finish: T#%d finished task %p, %d incomplete children\n"
, gtid, taskdata, children); }
1158           gtid, taskdata, children))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_task_finish: T#%d finished task %p, %d incomplete children\n"
, gtid, taskdata, children); };
1159 
1160  // Free this task and then ancestor tasks if they have no children.
1161  // Restore th_current_task first as suggested by John:
1162  // johnmc: if an asynchronous inquiry peers into the runtime system
1163  // it doesn't see the freed task as the current task.
1164  thread->th.th_current_task = resumed_task;
1165  if (completed)
1166    __kmp_free_task_and_ancestors(gtid, taskdata, thread);
1167 
1168  // TODO: GEH - make sure root team implicit task is initialized properly.
1169  // KMP_DEBUG_ASSERT( resumed_task->td_flags.executing == 0 );
1170  resumed_task->td_flags.executing = 1; // resume previous task
1171 
1172  KA_TRACE(if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_task_finish(exit): T#%d finished task %p, resuming task %p\n"
, gtid, taskdata, resumed_task); }
1173      10, ("__kmp_task_finish(exit): T#%d finished task %p, resuming task %p\n",if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_task_finish(exit): T#%d finished task %p, resuming task %p\n"
, gtid, taskdata, resumed_task); }
1174           gtid, taskdata, resumed_task))if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_task_finish(exit): T#%d finished task %p, resuming task %p\n"
, gtid, taskdata, resumed_task); };
1175 
1176  return;
1177}
1178 
1179template <bool ompt>
1180static void __kmpc_omp_task_complete_if0_template(ident_t *loc_ref,
1181                                                  kmp_int32 gtid,
1182                                                  kmp_task_t *task) {
1183  KA_TRACE(10, ("__kmpc_omp_task_complete_if0(enter): T#%d loc=%p task=%p\n",if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmpc_omp_task_complete_if0(enter): T#%d loc=%p task=%p\n"
, gtid, loc_ref, (((kmp_taskdata_t *)task) - 1)); }
1184                gtid, loc_ref, KMP_TASK_TO_TASKDATA(task)))if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmpc_omp_task_complete_if0(enter): T#%d loc=%p task=%p\n"
, gtid, loc_ref, (((kmp_taskdata_t *)task) - 1)); };
1185  KMP_DEBUG_ASSERT(gtid >= 0)if (!(gtid >= 0)) { __kmp_debug_assert("gtid >= 0", "openmp/runtime/src/kmp_tasking.cpp"
, 1185); };
1186  // this routine will provide task to resume
1187  __kmp_task_finish<ompt>(gtid, task, NULL__null);
1188 
1189  KA_TRACE(10, ("__kmpc_omp_task_complete_if0(exit): T#%d loc=%p task=%p\n",if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmpc_omp_task_complete_if0(exit): T#%d loc=%p task=%p\n"
, gtid, loc_ref, (((kmp_taskdata_t *)task) - 1)); }
1190                gtid, loc_ref, KMP_TASK_TO_TASKDATA(task)))if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmpc_omp_task_complete_if0(exit): T#%d loc=%p task=%p\n"
, gtid, loc_ref, (((kmp_taskdata_t *)task) - 1)); };
1191 
1192#if OMPT_SUPPORT1
1193  if (ompt) {
1194    ompt_frame_t *ompt_frame;
1195    __ompt_get_task_info_internal(0, NULL__null, NULL__null, &ompt_frame, NULL__null, NULL__null);
1196    ompt_frame->enter_frame = ompt_data_none{0};
1197    ompt_frame->enter_frame_flags =
1198        ompt_frame_runtime | ompt_frame_framepointer;
1199  }
1200#endif
1201 
1202  return;
1203}
1204 
1205#if OMPT_SUPPORT1
1206OMPT_NOINLINE__attribute__((noinline))
1207void __kmpc_omp_task_complete_if0_ompt(ident_t *loc_ref, kmp_int32 gtid,
1208                                       kmp_task_t *task) {
1209  __kmpc_omp_task_complete_if0_template<true>(loc_ref, gtid, task);
1210}
1211#endif // OMPT_SUPPORT
1212 
1213// __kmpc_omp_task_complete_if0: report that a task has completed execution
1214//
1215// loc_ref: source location information; points to end of task block.
1216// gtid: global thread number.
1217// task: task thunk for the completed task.
1218void __kmpc_omp_task_complete_if0(ident_t *loc_ref, kmp_int32 gtid,
1219                                  kmp_task_t *task) {
1220#if OMPT_SUPPORT1
1221  if (UNLIKELY(ompt_enabled.enabled)__builtin_expect(!!(ompt_enabled.enabled), 0)) {
1222    __kmpc_omp_task_complete_if0_ompt(loc_ref, gtid, task);
1223    return;
1224  }
1225#endif
1226  __kmpc_omp_task_complete_if0_template<false>(loc_ref, gtid, task);
1227}
1228 
1229#ifdef TASK_UNUSED
1230// __kmpc_omp_task_complete: report that a task has completed execution
1231// NEVER GENERATED BY COMPILER, DEPRECATED!!!
1232void __kmpc_omp_task_complete(ident_t *loc_ref, kmp_int32 gtid,
1233                              kmp_task_t *task) {
1234  KA_TRACE(10, ("__kmpc_omp_task_complete(enter): T#%d loc=%p task=%p\n", gtid,if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmpc_omp_task_complete(enter): T#%d loc=%p task=%p\n"
, gtid, loc_ref, (((kmp_taskdata_t *)task) - 1)); }
1235                loc_ref, KMP_TASK_TO_TASKDATA(task)))if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmpc_omp_task_complete(enter): T#%d loc=%p task=%p\n"
, gtid, loc_ref, (((kmp_taskdata_t *)task) - 1)); };
1236 
1237  __kmp_task_finish<false>(gtid, task,
1238                           NULL__null); // Not sure how to find task to resume
1239 
1240  KA_TRACE(10, ("__kmpc_omp_task_complete(exit): T#%d loc=%p task=%p\n", gtid,if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmpc_omp_task_complete(exit): T#%d loc=%p task=%p\n"
, gtid, loc_ref, (((kmp_taskdata_t *)task) - 1)); }
1241                loc_ref, KMP_TASK_TO_TASKDATA(task)))if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmpc_omp_task_complete(exit): T#%d loc=%p task=%p\n"
, gtid, loc_ref, (((kmp_taskdata_t *)task) - 1)); };
1242  return;
1243}
1244#endif // TASK_UNUSED
1245 
1246// __kmp_init_implicit_task: Initialize the appropriate fields in the implicit
1247// task for a given thread
1248//
1249// loc_ref:  reference to source location of parallel region
1250// this_thr:  thread data structure corresponding to implicit task
1251// team: team for this_thr
1252// tid: thread id of given thread within team
1253// set_curr_task: TRUE if need to push current task to thread
1254// NOTE: Routine does not set up the implicit task ICVS.  This is assumed to
1255// have already been done elsewhere.
1256// TODO: Get better loc_ref.  Value passed in may be NULL
1257void __kmp_init_implicit_task(ident_t *loc_ref, kmp_info_t *this_thr,
1258                              kmp_team_t *team, int tid, int set_curr_task) {
1259  kmp_taskdata_t *task = &team->t.t_implicit_task_taskdata[tid];
1260 
1261  KF_TRACE(if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_init_implicit_task(enter): T#:%d team=%p task=%p, reinit=%s\n"
, tid, team, task, set_curr_task ? "TRUE" : "FALSE"); }
1262      10,if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_init_implicit_task(enter): T#:%d team=%p task=%p, reinit=%s\n"
, tid, team, task, set_curr_task ? "TRUE" : "FALSE"); }
1263      ("__kmp_init_implicit_task(enter): T#:%d team=%p task=%p, reinit=%s\n",if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_init_implicit_task(enter): T#:%d team=%p task=%p, reinit=%s\n"
, tid, team, task, set_curr_task ? "TRUE" : "FALSE"); }
1264       tid, team, task, set_curr_task ? "TRUE" : "FALSE"))if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_init_implicit_task(enter): T#:%d team=%p task=%p, reinit=%s\n"
, tid, team, task, set_curr_task ? "TRUE" : "FALSE"); };
1265 
1266  task->td_task_id = KMP_GEN_TASK_ID()(~0);
1267  task->td_team = team;
1268  //    task->td_parent   = NULL;  // fix for CQ230101 (broken parent task info
1269  //    in debugger)
1270  task->td_ident = loc_ref;
1271  task->td_taskwait_ident = NULL__null;
1272  task->td_taskwait_counter = 0;
1273  task->td_taskwait_thread = 0;
1274 
1275  task->td_flags.tiedness = TASK_TIED1;
1276  task->td_flags.tasktype = TASK_IMPLICIT0;
1277  task->td_flags.proxy = TASK_FULL0;
1278 
1279  // All implicit tasks are executed immediately, not deferred
1280  task->td_flags.task_serial = 1;
1281  task->td_flags.tasking_ser = (__kmp_tasking_mode == tskm_immediate_exec);
1282  task->td_flags.team_serial = (team->t.t_serialized) ? 1 : 0;
1283 
1284  task->td_flags.started = 1;
1285  task->td_flags.executing = 1;
1286  task->td_flags.complete = 0;
1287  task->td_flags.freed = 0;
1288 
1289  task->td_depnode = NULL__null;
1290  task->td_last_tied = task;
1291  task->td_allow_completion_event.type = KMP_EVENT_UNINITIALIZED;
1292 
1293  if (set_curr_task) { // only do this init first time thread is created
1294    KMP_ATOMIC_ST_REL(&task->td_incomplete_child_tasks, 0)(&task->td_incomplete_child_tasks)->store(0, std::memory_order_release
);
1295    // Not used: don't need to deallocate implicit task
1296    KMP_ATOMIC_ST_REL(&task->td_allocated_child_tasks, 0)(&task->td_allocated_child_tasks)->store(0, std::memory_order_release
);
1297    task->td_taskgroup = NULL__null; // An implicit task does not have taskgroup
1298    task->td_dephash = NULL__null;
1299    __kmp_push_current_task_to_thread(this_thr, team, tid);
1300  } else {
1301    KMP_DEBUG_ASSERT(task->td_incomplete_child_tasks == 0)if (!(task->td_incomplete_child_tasks == 0)) { __kmp_debug_assert
("task->td_incomplete_child_tasks == 0", "openmp/runtime/src/kmp_tasking.cpp"
, 1301); };
1302    KMP_DEBUG_ASSERT(task->td_allocated_child_tasks == 0)if (!(task->td_allocated_child_tasks == 0)) { __kmp_debug_assert
("task->td_allocated_child_tasks == 0", "openmp/runtime/src/kmp_tasking.cpp"
, 1302); };
1303  }
1304 
1305#if OMPT_SUPPORT1
1306  if (UNLIKELY(ompt_enabled.enabled)__builtin_expect(!!(ompt_enabled.enabled), 0))
1307    __ompt_task_init(task, tid);
1308#endif
1309 
1310  KF_TRACE(10, ("__kmp_init_implicit_task(exit): T#:%d team=%p task=%p\n", tid,if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_init_implicit_task(exit): T#:%d team=%p task=%p\n"
, tid, team, task); }
1311                team, task))if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_init_implicit_task(exit): T#:%d team=%p task=%p\n"
, tid, team, task); };
1312}
1313 
1314// __kmp_finish_implicit_task: Release resources associated to implicit tasks
1315// at the end of parallel regions. Some resources are kept for reuse in the next
1316// parallel region.
1317//
1318// thread:  thread data structure corresponding to implicit task
1319void __kmp_finish_implicit_task(kmp_info_t *thread) {
1320  kmp_taskdata_t *task = thread->th.th_current_task;
1321  if (task->td_dephash) {
1322    int children;
1323    task->td_flags.complete = 1;
1324    children = KMP_ATOMIC_LD_ACQ(&task->td_incomplete_child_tasks)(&task->td_incomplete_child_tasks)->load(std::memory_order_acquire
);
1325    kmp_tasking_flags_t flags_old = task->td_flags;
1326    if (children == 0 && flags_old.complete == 1) {
1327      kmp_tasking_flags_t flags_new = flags_old;
1328      flags_new.complete = 0;
1329      if (KMP_COMPARE_AND_STORE_ACQ32(RCAST(kmp_int32 *, &task->td_flags),__sync_bool_compare_and_swap((volatile kmp_uint32 *)(reinterpret_cast
<kmp_int32 *>(&task->td_flags)), (kmp_uint32)(*reinterpret_cast
<kmp_int32 *>(&flags_old)), (kmp_uint32)(*reinterpret_cast
<kmp_int32 *>(&flags_new)))
1330                                      *RCAST(kmp_int32 *, &flags_old),__sync_bool_compare_and_swap((volatile kmp_uint32 *)(reinterpret_cast
<kmp_int32 *>(&task->td_flags)), (kmp_uint32)(*reinterpret_cast
<kmp_int32 *>(&flags_old)), (kmp_uint32)(*reinterpret_cast
<kmp_int32 *>(&flags_new)))
1331                                      *RCAST(kmp_int32 *, &flags_new))__sync_bool_compare_and_swap((volatile kmp_uint32 *)(reinterpret_cast
<kmp_int32 *>(&task->td_flags)), (kmp_uint32)(*reinterpret_cast
<kmp_int32 *>(&flags_old)), (kmp_uint32)(*reinterpret_cast
<kmp_int32 *>(&flags_new)))) {
1332        KA_TRACE(100, ("__kmp_finish_implicit_task: T#%d cleans "if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_finish_implicit_task: T#%d cleans "
 "dephash of implicit task %p\n", thread->th.th_info.ds.ds_gtid
, task); }
1333                       "dephash of implicit task %p\n",if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_finish_implicit_task: T#%d cleans "
 "dephash of implicit task %p\n", thread->th.th_info.ds.ds_gtid
, task); }
1334                       thread->th.th_info.ds.ds_gtid, task))if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_finish_implicit_task: T#%d cleans "
 "dephash of implicit task %p\n", thread->th.th_info.ds.ds_gtid
, task); };
1335        __kmp_dephash_free_entries(thread, task->td_dephash);
1336      }
1337    }
1338  }
1339}
1340 
1341// __kmp_free_implicit_task: Release resources associated to implicit tasks
1342// when these are destroyed regions
1343//
1344// thread:  thread data structure corresponding to implicit task
1345void __kmp_free_implicit_task(kmp_info_t *thread) {
1346  kmp_taskdata_t *task = thread->th.th_current_task;
1347  if (task && task->td_dephash) {
1348    __kmp_dephash_free(thread, task->td_dephash);
1349    task->td_dephash = NULL__null;
1350  }
1351}
1352 
1353// Round up a size to a power of two specified by val: Used to insert padding
1354// between structures co-allocated using a single malloc() call
1355static size_t __kmp_round_up_to_val(size_t size, size_t val) {
1356  if (size & (val - 1)) {
1357    size &= ~(val - 1);
1358    if (size <= KMP_SIZE_T_MAX(0xFFFFFFFFFFFFFFFF) - val) {
1359      size += val; // Round up if there is no overflow.
1360    }
1361  }
1362  return size;
1363} // __kmp_round_up_to_va
1364 
1365// __kmp_task_alloc: Allocate the taskdata and task data structures for a task
1366//
1367// loc_ref: source location information
1368// gtid: global thread number.
1369// flags: include tiedness & task type (explicit vs. implicit) of the ''new''
1370// task encountered. Converted from kmp_int32 to kmp_tasking_flags_t in routine.
1371// sizeof_kmp_task_t:  Size in bytes of kmp_task_t data structure including
1372// private vars accessed in task.
1373// sizeof_shareds:  Size in bytes of array of pointers to shared vars accessed
1374// in task.
1375// task_entry: Pointer to task code entry point generated by compiler.
1376// returns: a pointer to the allocated kmp_task_t structure (task).
1377kmp_task_t *__kmp_task_alloc(ident_t *loc_ref, kmp_int32 gtid,
1378                             kmp_tasking_flags_t *flags,
1379                             size_t sizeof_kmp_task_t, size_t sizeof_shareds,
1380                             kmp_routine_entry_t task_entry) {
1381  kmp_task_t *task;
1382  kmp_taskdata_t *taskdata;
1383  kmp_info_t *thread = __kmp_threads[gtid];
1384  kmp_team_t *team = thread->th.th_team;
1385  kmp_taskdata_t *parent_task = thread->th.th_current_task;
1386  size_t shareds_offset;
1387 
1388  if (UNLIKELY(!TCR_4(__kmp_init_middle))__builtin_expect(!!(!(__kmp_init_middle)), 0))
1389    __kmp_middle_initialize();
1390 
1391  if (flags->hidden_helper) {
1392    if (__kmp_enable_hidden_helper) {
1393      if (!TCR_4(__kmp_init_hidden_helper)(__kmp_init_hidden_helper))
1394        __kmp_hidden_helper_initialize();
1395    } else {
1396      // If the hidden helper task is not enabled, reset the flag to FALSE.
1397      flags->hidden_helper = FALSE0;
1398    }
1399  }
1400 
1401  KA_TRACE(10, ("__kmp_task_alloc(enter): T#%d loc=%p, flags=(0x%x) "if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_task_alloc(enter): T#%d loc=%p, flags=(0x%x) "
 "sizeof_task=%ld sizeof_shared=%ld entry=%p\n", gtid, loc_ref
, *((kmp_int32 *)flags), sizeof_kmp_task_t, sizeof_shareds, task_entry
); }
1402                "sizeof_task=%ld sizeof_shared=%ld entry=%p\n",if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_task_alloc(enter): T#%d loc=%p, flags=(0x%x) "
 "sizeof_task=%ld sizeof_shared=%ld entry=%p\n", gtid, loc_ref
, *((kmp_int32 *)flags), sizeof_kmp_task_t, sizeof_shareds, task_entry
); }
1403                gtid, loc_ref, *((kmp_int32 *)flags), sizeof_kmp_task_t,if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_task_alloc(enter): T#%d loc=%p, flags=(0x%x) "
 "sizeof_task=%ld sizeof_shared=%ld entry=%p\n", gtid, loc_ref
, *((kmp_int32 *)flags), sizeof_kmp_task_t, sizeof_shareds, task_entry
); }
1404                sizeof_shareds, task_entry))if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_task_alloc(enter): T#%d loc=%p, flags=(0x%x) "
 "sizeof_task=%ld sizeof_shared=%ld entry=%p\n", gtid, loc_ref
, *((kmp_int32 *)flags), sizeof_kmp_task_t, sizeof_shareds, task_entry
); };
1405 
1406  KMP_DEBUG_ASSERT(parent_task)if (!(parent_task)) { __kmp_debug_assert("parent_task", "openmp/runtime/src/kmp_tasking.cpp"
, 1406); };
1407  if (parent_task->td_flags.final) {
1408    if (flags->merged_if0) {
1409    }
1410    flags->final = 1;
1411  }
1412 
1413  if (flags->tiedness == TASK_UNTIED0 && !team->t.t_serialized) {
1414    // Untied task encountered causes the TSC algorithm to check entire deque of
1415    // the victim thread. If no untied task encountered, then checking the head
1416    // of the deque should be enough.
1417    KMP_CHECK_UPDATE(thread->th.th_task_team->tt.tt_untied_task_encountered, 1)if ((thread->th.th_task_team->tt.tt_untied_task_encountered
) != (1)) (thread->th.th_task_team->tt.tt_untied_task_encountered
) = (1);
1418  }
1419 
1420  // Detachable tasks are not proxy tasks yet but could be in the future. Doing
1421  // the tasking setup
1422  // when that happens is too late.
1423  if (UNLIKELY(flags->proxy == TASK_PROXY ||__builtin_expect(!!(flags->proxy == 1 || flags->detachable
 == 1 || flags->hidden_helper), 0)
1424               flags->detachable == TASK_DETACHABLE || flags->hidden_helper)__builtin_expect(!!(flags->proxy == 1 || flags->detachable
 == 1 || flags->hidden_helper), 0)) {
1425    if (flags->proxy == TASK_PROXY1) {
1426      flags->tiedness = TASK_UNTIED0;
1427      flags->merged_if0 = 1;
1428    }
1429    /* are we running in a sequential parallel or tskm_immediate_exec... we need
1430       tasking support enabled */
1431    if ((thread->th.th_task_team) == NULL__null) {
1432      /* This should only happen if the team is serialized
1433          setup a task team and propagate it to the thread */
1434      KMP_DEBUG_ASSERT(team->t.t_serialized)if (!(team->t.t_serialized)) { __kmp_debug_assert("team->t.t_serialized"
, "openmp/runtime/src/kmp_tasking.cpp", 1434); };
1435      KA_TRACE(30,if (kmp_a_debug >= 30) { __kmp_debug_printf ("T#%d creating task team in __kmp_task_alloc for proxy task\n"
, gtid); }
1436               ("T#%d creating task team in __kmp_task_alloc for proxy task\n",if (kmp_a_debug >= 30) { __kmp_debug_printf ("T#%d creating task team in __kmp_task_alloc for proxy task\n"
, gtid); }
1437                gtid))if (kmp_a_debug >= 30) { __kmp_debug_printf ("T#%d creating task team in __kmp_task_alloc for proxy task\n"
, gtid); };
1438      // 1 indicates setup the current team regardless of nthreads
1439      __kmp_task_team_setup(thread, team, 1);
1440      thread->th.th_task_team = team->t.t_task_team[thread->th.th_task_state];
1441    }
1442    kmp_task_team_t *task_team = thread->th.th_task_team;
1443 
1444    /* tasking must be enabled now as the task might not be pushed */
1445    if (!KMP_TASKING_ENABLED(task_team)((!0) == ((task_team)->tt.tt_found_tasks))) {
1446      KA_TRACE(if (kmp_a_debug >= 30) { __kmp_debug_printf ("T#%d enabling tasking in __kmp_task_alloc for proxy task\n"
, gtid); }
1447          30,if (kmp_a_debug >= 30) { __kmp_debug_printf ("T#%d enabling tasking in __kmp_task_alloc for proxy task\n"
, gtid); }
1448          ("T#%d enabling tasking in __kmp_task_alloc for proxy task\n", gtid))if (kmp_a_debug >= 30) { __kmp_debug_printf ("T#%d enabling tasking in __kmp_task_alloc for proxy task\n"
, gtid); };
1449      __kmp_enable_tasking(task_team, thread);
1450      kmp_int32 tid = thread->th.th_info.ds.ds_tid;
1451      kmp_thread_data_t *thread_data = &task_team->tt.tt_threads_data[tid];
1452      // No lock needed since only owner can allocate
1453      if (thread_data->td.td_deque == NULL__null) {
1454        __kmp_alloc_task_deque(thread, thread_data);
1455      }
1456    }
1457 
1458    if ((flags->proxy == TASK_PROXY1 || flags->detachable == TASK_DETACHABLE1) &&
1459        task_team->tt.tt_found_proxy_tasks == FALSE0)
1460      TCW_4(task_team->tt.tt_found_proxy_tasks, TRUE)(task_team->tt.tt_found_proxy_tasks) = ((!0));
1461    if (flags->hidden_helper &&
1462        task_team->tt.tt_hidden_helper_task_encountered == FALSE0)
1463      TCW_4(task_team->tt.tt_hidden_helper_task_encountered, TRUE)(task_team->tt.tt_hidden_helper_task_encountered) = ((!0));
1464  }
1465 
1466  // Calculate shared structure offset including padding after kmp_task_t struct
1467  // to align pointers in shared struct
1468  shareds_offset = sizeof(kmp_taskdata_t) + sizeof_kmp_task_t;
1469  shareds_offset = __kmp_round_up_to_val(shareds_offset, sizeof(void *));
1470 
1471  // Allocate a kmp_taskdata_t block and a kmp_task_t block.
1472  KA_TRACE(30, ("__kmp_task_alloc: T#%d First malloc size: %ld\n", gtid,if (kmp_a_debug >= 30) { __kmp_debug_printf ("__kmp_task_alloc: T#%d First malloc size: %ld\n"
, gtid, shareds_offset); }
1473                shareds_offset))if (kmp_a_debug >= 30) { __kmp_debug_printf ("__kmp_task_alloc: T#%d First malloc size: %ld\n"
, gtid, shareds_offset); };
1474  KA_TRACE(30, ("__kmp_task_alloc: T#%d Second malloc size: %ld\n", gtid,if (kmp_a_debug >= 30) { __kmp_debug_printf ("__kmp_task_alloc: T#%d Second malloc size: %ld\n"
, gtid, sizeof_shareds); }
1475                sizeof_shareds))if (kmp_a_debug >= 30) { __kmp_debug_printf ("__kmp_task_alloc: T#%d Second malloc size: %ld\n"
, gtid, sizeof_shareds); };
1476 
1477  // Avoid double allocation here by combining shareds with taskdata
1478#if USE_FAST_MEMORY3
1479  taskdata = (kmp_taskdata_t *)__kmp_fast_allocate(thread, shareds_offset +___kmp_fast_allocate((thread), (shareds_offset + sizeof_shareds
), "openmp/runtime/src/kmp_tasking.cpp", 1480)
1480                                                               sizeof_shareds)___kmp_fast_allocate((thread), (shareds_offset + sizeof_shareds
), "openmp/runtime/src/kmp_tasking.cpp", 1480);
1481#else /* ! USE_FAST_MEMORY */
1482  taskdata = (kmp_taskdata_t *)__kmp_thread_malloc(thread, shareds_offset +___kmp_thread_malloc((thread), (shareds_offset + sizeof_shareds
), "openmp/runtime/src/kmp_tasking.cpp", 1483)
1483                                                               sizeof_shareds)___kmp_thread_malloc((thread), (shareds_offset + sizeof_shareds
), "openmp/runtime/src/kmp_tasking.cpp", 1483);
1484#endif /* USE_FAST_MEMORY */
1485 
1486  task = KMP_TASKDATA_TO_TASK(taskdata)(kmp_task_t *)(taskdata + 1);
1487 
1488// Make sure task & taskdata are aligned appropriately
1489#if KMP_ARCH_X860 || KMP_ARCH_PPC64(0 || 0) || !KMP_HAVE_QUAD0
1490  KMP_DEBUG_ASSERT((((kmp_uintptr_t)taskdata) & (sizeof(double) - 1)) == 0)if (!((((kmp_uintptr_t)taskdata) & (sizeof(double) - 1)) ==
 0)) { __kmp_debug_assert("(((kmp_uintptr_t)taskdata) & (sizeof(double) - 1)) == 0"
, "openmp/runtime/src/kmp_tasking.cpp", 1490); };
1491  KMP_DEBUG_ASSERT((((kmp_uintptr_t)task) & (sizeof(double) - 1)) == 0)if (!((((kmp_uintptr_t)task) & (sizeof(double) - 1)) == 0
)) { __kmp_debug_assert("(((kmp_uintptr_t)task) & (sizeof(double) - 1)) == 0"
, "openmp/runtime/src/kmp_tasking.cpp", 1491); };
1492#else
1493  KMP_DEBUG_ASSERT((((kmp_uintptr_t)taskdata) & (sizeof(_Quad) - 1)) == 0)if (!((((kmp_uintptr_t)taskdata) & (sizeof(_Quad) - 1)) ==
 0)) { __kmp_debug_assert("(((kmp_uintptr_t)taskdata) & (sizeof(_Quad) - 1)) == 0"
, "openmp/runtime/src/kmp_tasking.cpp", 1493); };
1494  KMP_DEBUG_ASSERT((((kmp_uintptr_t)task) & (sizeof(_Quad) - 1)) == 0)if (!((((kmp_uintptr_t)task) & (sizeof(_Quad) - 1)) == 0)
) { __kmp_debug_assert("(((kmp_uintptr_t)task) & (sizeof(_Quad) - 1)) == 0"
, "openmp/runtime/src/kmp_tasking.cpp", 1494); };
1495#endif
1496  if (sizeof_shareds > 0) {
1497    // Avoid double allocation here by combining shareds with taskdata
1498    task->shareds = &((char *)taskdata)[shareds_offset];
1499    // Make sure shareds struct is aligned to pointer size
1500    KMP_DEBUG_ASSERT((((kmp_uintptr_t)task->shareds) & (sizeof(void *) - 1)) ==if (!((((kmp_uintptr_t)task->shareds) & (sizeof(void *
) - 1)) == 0)) { __kmp_debug_assert("(((kmp_uintptr_t)task->shareds) & (sizeof(void *) - 1)) == 0"
, "openmp/runtime/src/kmp_tasking.cpp", 1501); }
1501                     0)if (!((((kmp_uintptr_t)task->shareds) & (sizeof(void *
) - 1)) == 0)) { __kmp_debug_assert("(((kmp_uintptr_t)task->shareds) & (sizeof(void *) - 1)) == 0"
, "openmp/runtime/src/kmp_tasking.cpp", 1501); };
1502  } else {
1503    task->shareds = NULL__null;
1504  }
1505  task->routine = task_entry;
1506  task->part_id = 0; // AC: Always start with 0 part id
1507 
1508  taskdata->td_task_id = KMP_GEN_TASK_ID()(~0);
1509  taskdata->td_team = thread->th.th_team;
1510  taskdata->td_alloc_thread = thread;
1511  taskdata->td_parent = parent_task;
1512  taskdata->td_level = parent_task->td_level + 1; // increment nesting level
1513  KMP_ATOMIC_ST_RLX(&taskdata->td_untied_count, 0)(&taskdata->td_untied_count)->store(0, std::memory_order_relaxed
);
1514  taskdata->td_ident = loc_ref;
1515  taskdata->td_taskwait_ident = NULL__null;
1516  taskdata->td_taskwait_counter = 0;
1517  taskdata->td_taskwait_thread = 0;
1518  KMP_DEBUG_ASSERT(taskdata->td_parent != NULL)if (!(taskdata->td_parent != __null)) { __kmp_debug_assert
("taskdata->td_parent != __null", "openmp/runtime/src/kmp_tasking.cpp"
, 1518); };
1519  // avoid copying icvs for proxy tasks
1520  if (flags->proxy == TASK_FULL0)
1521    copy_icvs(&taskdata->td_icvs, &taskdata->td_parent->td_icvs);
1522 
1523  taskdata->td_flags = *flags;
1524  taskdata->td_task_team = thread->th.th_task_team;
1525  taskdata->td_size_alloc = shareds_offset + sizeof_shareds;
1526  taskdata->td_flags.tasktype = TASK_EXPLICIT1;
1527  // If it is hidden helper task, we need to set the team and task team
1528  // correspondingly.
1529  if (flags->hidden_helper) {
1530    kmp_info_t *shadow_thread = __kmp_threads[KMP_GTID_TO_SHADOW_GTID(gtid)((gtid) % (__kmp_hidden_helper_threads_num - 1) + 2)];
1531    taskdata->td_team = shadow_thread->th.th_team;
1532    taskdata->td_task_team = shadow_thread->th.th_task_team;
1533  }
1534 
1535  // GEH - TODO: fix this to copy parent task's value of tasking_ser flag
1536  taskdata->td_flags.tasking_ser = (__kmp_tasking_mode == tskm_immediate_exec);
1537 
1538  // GEH - TODO: fix this to copy parent task's value of team_serial flag
1539  taskdata->td_flags.team_serial = (team->t.t_serialized) ? 1 : 0;
1540 
1541  // GEH - Note we serialize the task if the team is serialized to make sure
1542  // implicit parallel region tasks are not left until program termination to
1543  // execute. Also, it helps locality to execute immediately.
1544 
1545  taskdata->td_flags.task_serial =
1546      (parent_task->td_flags.final || taskdata->td_flags.team_serial ||
1547       taskdata->td_flags.tasking_ser || flags->merged_if0);
1548 
1549  taskdata->td_flags.started = 0;
1550  taskdata->td_flags.executing = 0;
1551  taskdata->td_flags.complete = 0;
1552  taskdata->td_flags.freed = 0;
1553 
1554  KMP_ATOMIC_ST_RLX(&taskdata->td_incomplete_child_tasks, 0)(&taskdata->td_incomplete_child_tasks)->store(0, std
::memory_order_relaxed);
1555  // start at one because counts current task and children
1556  KMP_ATOMIC_ST_RLX(&taskdata->td_allocated_child_tasks, 1)(&taskdata->td_allocated_child_tasks)->store(1, std
::memory_order_relaxed);
1557  taskdata->td_taskgroup =
1558      parent_task->td_taskgroup; // task inherits taskgroup from the parent task
1559  taskdata->td_dephash = NULL__null;
1560  taskdata->td_depnode = NULL__null;
1561  taskdata->td_target_data.async_handle = NULL__null;
1562  if (flags->tiedness == TASK_UNTIED0)
1563    taskdata->td_last_tied = NULL__null; // will be set when the task is scheduled
1564  else
1565    taskdata->td_last_tied = taskdata;
1566  taskdata->td_allow_completion_event.type = KMP_EVENT_UNINITIALIZED;
1567#if OMPT_SUPPORT1
1568  if (UNLIKELY(ompt_enabled.enabled)__builtin_expect(!!(ompt_enabled.enabled), 0))
1569    __ompt_task_init(taskdata, gtid);
1570#endif
1571  // TODO: What would be the balance between the conditions in the function and
1572  // an atomic operation?
1573  if (__kmp_track_children_task(taskdata)) {
1574    KMP_ATOMIC_INC(&parent_task->td_incomplete_child_tasks)(&parent_task->td_incomplete_child_tasks)->fetch_add
(1, std::memory_order_acq_rel);
1575    if (parent_task->td_taskgroup)
1576      KMP_ATOMIC_INC(&parent_task->td_taskgroup->count)(&parent_task->td_taskgroup->count)->fetch_add(1
, std::memory_order_acq_rel);
1577    // Only need to keep track of allocated child tasks for explicit tasks since
1578    // implicit not deallocated
1579    if (taskdata->td_parent->td_flags.tasktype == TASK_EXPLICIT1) {
1580      KMP_ATOMIC_INC(&taskdata->td_parent->td_allocated_child_tasks)(&taskdata->td_parent->td_allocated_child_tasks)->
fetch_add(1, std::memory_order_acq_rel);
1581    }
1582    if (flags->hidden_helper) {
1583      taskdata->td_flags.task_serial = FALSE0;
1584      // Increment the number of hidden helper tasks to be executed
1585      KMP_ATOMIC_INC(&__kmp_unexecuted_hidden_helper_tasks)(&__kmp_unexecuted_hidden_helper_tasks)->fetch_add(1, std
::memory_order_acq_rel);
1586    }
1587  }
1588 
1589  KA_TRACE(20, ("__kmp_task_alloc(exit): T#%d created task %p parent=%p\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_task_alloc(exit): T#%d created task %p parent=%p\n"
, gtid, taskdata, taskdata->td_parent); }
1590                gtid, taskdata, taskdata->td_parent))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_task_alloc(exit): T#%d created task %p parent=%p\n"
, gtid, taskdata, taskdata->td_parent); };
1591 
1592  return task;
1593}
1594 
1595kmp_task_t *__kmpc_omp_task_alloc(ident_t *loc_ref, kmp_int32 gtid,
1596                                  kmp_int32 flags, size_t sizeof_kmp_task_t,
1597                                  size_t sizeof_shareds,
1598                                  kmp_routine_entry_t task_entry) {
1599  kmp_task_t *retval;
1600  kmp_tasking_flags_t *input_flags = (kmp_tasking_flags_t *)&flags;
1601  __kmp_assert_valid_gtid(gtid);
1602  input_flags->native = FALSE0;
1603  // __kmp_task_alloc() sets up all other runtime flags
1604  KA_TRACE(10, ("__kmpc_omp_task_alloc(enter): T#%d loc=%p, flags=(%s %s %s) "if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmpc_omp_task_alloc(enter): T#%d loc=%p, flags=(%s %s %s) "
 "sizeof_task=%ld sizeof_shared=%ld entry=%p\n", gtid, loc_ref
, input_flags->tiedness ? "tied  " : "untied", input_flags
->proxy ? "proxy" : "", input_flags->detachable ? "detachable"
 : "", sizeof_kmp_task_t, sizeof_shareds, task_entry); }
1605                "sizeof_task=%ld sizeof_shared=%ld entry=%p\n",if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmpc_omp_task_alloc(enter): T#%d loc=%p, flags=(%s %s %s) "
 "sizeof_task=%ld sizeof_shared=%ld entry=%p\n", gtid, loc_ref
, input_flags->tiedness ? "tied  " : "untied", input_flags
->proxy ? "proxy" : "", input_flags->detachable ? "detachable"
 : "", sizeof_kmp_task_t, sizeof_shareds, task_entry); }
1606                gtid, loc_ref, input_flags->tiedness ? "tied  " : "untied",if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmpc_omp_task_alloc(enter): T#%d loc=%p, flags=(%s %s %s) "
 "sizeof_task=%ld sizeof_shared=%ld entry=%p\n", gtid, loc_ref
, input_flags->tiedness ? "tied  " : "untied", input_flags
->proxy ? "proxy" : "", input_flags->detachable ? "detachable"
 : "", sizeof_kmp_task_t, sizeof_shareds, task_entry); }
1607                input_flags->proxy ? "proxy" : "",if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmpc_omp_task_alloc(enter): T#%d loc=%p, flags=(%s %s %s) "
 "sizeof_task=%ld sizeof_shared=%ld entry=%p\n", gtid, loc_ref
, input_flags->tiedness ? "tied  " : "untied", input_flags
->proxy ? "proxy" : "", input_flags->detachable ? "detachable"
 : "", sizeof_kmp_task_t, sizeof_shareds, task_entry); }
1608                input_flags->detachable ? "detachable" : "", sizeof_kmp_task_t,if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmpc_omp_task_alloc(enter): T#%d loc=%p, flags=(%s %s %s) "
 "sizeof_task=%ld sizeof_shared=%ld entry=%p\n", gtid, loc_ref
, input_flags->tiedness ? "tied  " : "untied", input_flags
->proxy ? "proxy" : "", input_flags->detachable ? "detachable"
 : "", sizeof_kmp_task_t, sizeof_shareds, task_entry); }
1609                sizeof_shareds, task_entry))if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmpc_omp_task_alloc(enter): T#%d loc=%p, flags=(%s %s %s) "
 "sizeof_task=%ld sizeof_shared=%ld entry=%p\n", gtid, loc_ref
, input_flags->tiedness ? "tied  " : "untied", input_flags
->proxy ? "proxy" : "", input_flags->detachable ? "detachable"
 : "", sizeof_kmp_task_t, sizeof_shareds, task_entry); };
1610 
1611  retval = __kmp_task_alloc(loc_ref, gtid, input_flags, sizeof_kmp_task_t,
1612                            sizeof_shareds, task_entry);
1613 
1614  KA_TRACE(20, ("__kmpc_omp_task_alloc(exit): T#%d retval %p\n", gtid, retval))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmpc_omp_task_alloc(exit): T#%d retval %p\n"
, gtid, retval); };
1615 
1616  return retval;
1617}
1618 
1619kmp_task_t *__kmpc_omp_target_task_alloc(ident_t *loc_ref, kmp_int32 gtid,
1620                                         kmp_int32 flags,
1621                                         size_t sizeof_kmp_task_t,
1622                                         size_t sizeof_shareds,
1623                                         kmp_routine_entry_t task_entry,
1624                                         kmp_int64 device_id) {
1625  auto &input_flags = reinterpret_cast<kmp_tasking_flags_t &>(flags);
1626  // target task is untied defined in the specification
1627  input_flags.tiedness = TASK_UNTIED0;
1628 
1629  if (__kmp_enable_hidden_helper)
1630    input_flags.hidden_helper = TRUE(!0);
1631 
1632  return __kmpc_omp_task_alloc(loc_ref, gtid, flags, sizeof_kmp_task_t,
1633                               sizeof_shareds, task_entry);
1634}
1635 
1636/*!
1637@ingroup TASKING
1638@param loc_ref location of the original task directive
1639@param gtid Global Thread ID of encountering thread
1640@param new_task task thunk allocated by __kmpc_omp_task_alloc() for the ''new
1641task''
1642@param naffins Number of affinity items
1643@param affin_list List of affinity items
1644@return Returns non-zero if registering affinity information was not successful.
1645 Returns 0 if registration was successful
1646This entry registers the affinity information attached to a task with the task
1647thunk structure kmp_taskdata_t.
1648*/
1649kmp_int32
1650__kmpc_omp_reg_task_with_affinity(ident_t *loc_ref, kmp_int32 gtid,
1651                                  kmp_task_t *new_task, kmp_int32 naffins,
1652                                  kmp_task_affinity_info_t *affin_list) {
1653  return 0;
1654}
1655 
1656//  __kmp_invoke_task: invoke the specified task
1657//
1658// gtid: global thread ID of caller
1659// task: the task to invoke
1660// current_task: the task to resume after task invocation
1661static void __kmp_invoke_task(kmp_int32 gtid, kmp_task_t *task,
1662                              kmp_taskdata_t *current_task) {
1663  kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(task)(((kmp_taskdata_t *)task) - 1);
1664  kmp_info_t *thread;
1665  int discard = 0 /* false */;
1666  KA_TRACE(if (kmp_a_debug >= 30) { __kmp_debug_printf ("__kmp_invoke_task(enter): T#%d invoking task %p, current_task=%p\n"
, gtid, taskdata, current_task); }
1667      30, ("__kmp_invoke_task(enter): T#%d invoking task %p, current_task=%p\n",if (kmp_a_debug >= 30) { __kmp_debug_printf ("__kmp_invoke_task(enter): T#%d invoking task %p, current_task=%p\n"
, gtid, taskdata, current_task); }
1668           gtid, taskdata, current_task))if (kmp_a_debug >= 30) { __kmp_debug_printf ("__kmp_invoke_task(enter): T#%d invoking task %p, current_task=%p\n"
, gtid, taskdata, current_task); };
1669  KMP_DEBUG_ASSERT(task)if (!(task)) { __kmp_debug_assert("task", "openmp/runtime/src/kmp_tasking.cpp"
, 1669); };
1670  if (UNLIKELY(taskdata->td_flags.proxy == TASK_PROXY &&__builtin_expect(!!(taskdata->td_flags.proxy == 1 &&
 taskdata->td_flags.complete == 1), 0)
1671               taskdata->td_flags.complete == 1)__builtin_expect(!!(taskdata->td_flags.proxy == 1 &&
 taskdata->td_flags.complete == 1), 0)) {
1672    // This is a proxy task that was already completed but it needs to run
1673    // its bottom-half finish
1674    KA_TRACE(if (kmp_a_debug >= 30) { __kmp_debug_printf ("__kmp_invoke_task: T#%d running bottom finish for proxy task %p\n"
, gtid, taskdata); }
1675        30,if (kmp_a_debug >= 30) { __kmp_debug_printf ("__kmp_invoke_task: T#%d running bottom finish for proxy task %p\n"
, gtid, taskdata); }
1676        ("__kmp_invoke_task: T#%d running bottom finish for proxy task %p\n",if (kmp_a_debug >= 30) { __kmp_debug_printf ("__kmp_invoke_task: T#%d running bottom finish for proxy task %p\n"
, gtid, taskdata); }
1677         gtid, taskdata))if (kmp_a_debug >= 30) { __kmp_debug_printf ("__kmp_invoke_task: T#%d running bottom finish for proxy task %p\n"
, gtid, taskdata); };
1678 
1679    __kmp_bottom_half_finish_proxy(gtid, task);
1680 
1681    KA_TRACE(30, ("__kmp_invoke_task(exit): T#%d completed bottom finish for "if (kmp_a_debug >= 30) { __kmp_debug_printf ("__kmp_invoke_task(exit): T#%d completed bottom finish for "
 "proxy task %p, resuming task %p\n", gtid, taskdata, current_task
); }
1682                  "proxy task %p, resuming task %p\n",if (kmp_a_debug >= 30) { __kmp_debug_printf ("__kmp_invoke_task(exit): T#%d completed bottom finish for "
 "proxy task %p, resuming task %p\n", gtid, taskdata, current_task
); }
1683                  gtid, taskdata, current_task))if (kmp_a_debug >= 30) { __kmp_debug_printf ("__kmp_invoke_task(exit): T#%d completed bottom finish for "
 "proxy task %p, resuming task %p\n", gtid, taskdata, current_task
); };
1684 
1685    return;
1686  }
1687 
1688#if OMPT_SUPPORT1
1689  // For untied tasks, the first task executed only calls __kmpc_omp_task and
1690  // does not execute code.
1691  ompt_thread_info_t oldInfo;
1692  if (UNLIKELY(ompt_enabled.enabled)__builtin_expect(!!(ompt_enabled.enabled), 0)) {
1693    // Store the threads states and restore them after the task
1694    thread = __kmp_threads[gtid];
1695    oldInfo = thread->th.ompt_thread_info;
1696    thread->th.ompt_thread_info.wait_id = 0;
1697    thread->th.ompt_thread_info.state = (thread->th.th_team_serialized)
1698                                            ? ompt_state_work_serial
1699                                            : ompt_state_work_parallel;
1700    taskdata->ompt_task_info.frame.exit_frame.ptr = OMPT_GET_FRAME_ADDRESS(0)__builtin_frame_address(0);
1701  }
1702#endif
1703 
1704  // Proxy tasks are not handled by the runtime
1705  if (taskdata->td_flags.proxy != TASK_PROXY1) {
1706    __kmp_task_start(gtid, task, current_task); // OMPT only if not discarded
1707  }
1708 
1709  // TODO: cancel tasks if the parallel region has also been cancelled
1710  // TODO: check if this sequence can be hoisted above __kmp_task_start
1711  // if cancellation has been enabled for this run ...
1712  if (UNLIKELY(__kmp_omp_cancellation)__builtin_expect(!!(__kmp_omp_cancellation), 0)) {
1713    thread = __kmp_threads[gtid];
1714    kmp_team_t *this_team = thread->th.th_team;
1715    kmp_taskgroup_t *taskgroup = taskdata->td_taskgroup;
1716    if ((taskgroup && taskgroup->cancel_request) ||
1717        (this_team->t.t_cancel_request == cancel_parallel)) {
1718#if OMPT_SUPPORT1 && OMPT_OPTIONAL1
1719      ompt_data_t *task_data;
1720      if (UNLIKELY(ompt_enabled.ompt_callback_cancel)__builtin_expect(!!(ompt_enabled.ompt_callback_cancel), 0)) {
1721        __ompt_get_task_info_internal(0, NULL__null, &task_data, NULL__null, NULL__null, NULL__null);
1722        ompt_callbacks.ompt_callback(ompt_callback_cancel)ompt_callback_cancel_callback(
1723            task_data,
1724            ((taskgroup && taskgroup->cancel_request) ? ompt_cancel_taskgroup
1725                                                      : ompt_cancel_parallel) |
1726                ompt_cancel_discarded_task,
1727            NULL__null);
1728      }
1729#endif
1730      KMP_COUNT_BLOCK(TASK_cancelled)((void)0);
1731      // this task belongs to a task group and we need to cancel it
1732      discard = 1 /* true */;
1733    }
1734  }
1735 
1736  // Invoke the task routine and pass in relevant data.
1737  // Thunks generated by gcc take a different argument list.
1738  if (!discard) {
1739    if (taskdata->td_flags.tiedness == TASK_UNTIED0) {
1740      taskdata->td_last_tied = current_task->td_last_tied;
1741      KMP_DEBUG_ASSERT(taskdata->td_last_tied)if (!(taskdata->td_last_tied)) { __kmp_debug_assert("taskdata->td_last_tied"
, "openmp/runtime/src/kmp_tasking.cpp", 1741); };
1742    }
1743#if KMP_STATS_ENABLED0
1744    KMP_COUNT_BLOCK(TASK_executed)((void)0);
1745    switch (KMP_GET_THREAD_STATE()((void)0)) {
1746    case FORK_JOIN_BARRIER:
1747      KMP_PUSH_PARTITIONED_TIMER(OMP_task_join_bar)((void)0);
1748      break;
1749    case PLAIN_BARRIER:
1750      KMP_PUSH_PARTITIONED_TIMER(OMP_task_plain_bar)((void)0);
1751      break;
1752    case TASKYIELD:
1753      KMP_PUSH_PARTITIONED_TIMER(OMP_task_taskyield)((void)0);
1754      break;
1755    case TASKWAIT:
1756      KMP_PUSH_PARTITIONED_TIMER(OMP_task_taskwait)((void)0);
1757      break;
1758    case TASKGROUP:
1759      KMP_PUSH_PARTITIONED_TIMER(OMP_task_taskgroup)((void)0);
1760      break;
1761    default:
1762      KMP_PUSH_PARTITIONED_TIMER(OMP_task_immediate)((void)0);
1763      break;
1764    }
1765#endif // KMP_STATS_ENABLED
1766 
1767// OMPT task begin
1768#if OMPT_SUPPORT1
1769    if (UNLIKELY(ompt_enabled.enabled)__builtin_expect(!!(ompt_enabled.enabled), 0))
1770      __ompt_task_start(task, current_task, gtid);
1771#endif
1772#if OMPT_SUPPORT1 && OMPT_OPTIONAL1
1773    if (UNLIKELY(ompt_enabled.ompt_callback_dispatch &&__builtin_expect(!!(ompt_enabled.ompt_callback_dispatch &&
 taskdata->ompt_task_info.dispatch_chunk.iterations > 0
), 0)
1774                 taskdata->ompt_task_info.dispatch_chunk.iterations > 0)__builtin_expect(!!(ompt_enabled.ompt_callback_dispatch &&
 taskdata->ompt_task_info.dispatch_chunk.iterations > 0
), 0)) {
1775      ompt_data_t instance = ompt_data_none{0};
1776      instance.ptr = &(taskdata->ompt_task_info.dispatch_chunk);
1777      ompt_team_info_t *team_info = __ompt_get_teaminfo(0, NULL__null);
1778      ompt_callbacks.ompt_callback(ompt_callback_dispatch)ompt_callback_dispatch_callback(
1779          &(team_info->parallel_data), &(taskdata->ompt_task_info.task_data),
1780          ompt_dispatch_taskloop_chunk, instance);
1781      taskdata->ompt_task_info.dispatch_chunk = {0, 0};
1782    }
1783#endif // OMPT_SUPPORT && OMPT_OPTIONAL
1784 
1785#if OMPD_SUPPORT1
1786    if (ompd_state & OMPD_ENABLE_BP0x1)
1787      ompd_bp_task_begin();
1788#endif
1789 
1790#if USE_ITT_BUILD1 && USE_ITT_NOTIFY1
1791    kmp_uint64 cur_time;
1792    kmp_int32 kmp_itt_count_task =
1793        __kmp_forkjoin_frames_mode == 3 && !taskdata->td_flags.task_serial &&
1794        current_task->td_flags.tasktype == TASK_IMPLICIT0;
1795    if (kmp_itt_count_task) {
1796      thread = __kmp_threads[gtid];
1797      // Time outer level explicit task on barrier for adjusting imbalance time
1798      if (thread->th.th_bar_arrive_time)
1799        cur_time = __itt_get_timestamp(!__kmp_itt_get_timestamp_ptr__3_0) ? 0 : __kmp_itt_get_timestamp_ptr__3_0();
1800      else
1801        kmp_itt_count_task = 0; // thread is not on a barrier - skip timing
1802    }
1803    KMP_FSYNC_ACQUIRED(taskdata)(!__kmp_itt_fsync_acquired_ptr__3_0) ? (void)0 : __kmp_itt_fsync_acquired_ptr__3_0
((void *)(taskdata)); // acquired self (new task)
1804#endif
1805 
1806#if ENABLE_LIBOMPTARGET1
1807    if (taskdata->td_target_data.async_handle != NULL__null) {
1808      // If we have a valid target async handle, that means that we have already
1809      // executed the task routine once. We must query for the handle completion
1810      // instead of re-executing the routine.
1811      KMP_ASSERT(tgt_target_nowait_query)if (!(tgt_target_nowait_query)) { __kmp_debug_assert("tgt_target_nowait_query"
, "openmp/runtime/src/kmp_tasking.cpp", 1811); };
1812      tgt_target_nowait_query(&taskdata->td_target_data.async_handle);
1813    } else
1814#endif
1815    if (task->routine != NULL__null) {
1816#ifdef KMP_GOMP_COMPAT
1817      if (taskdata->td_flags.native) {
1818        ((void (*)(void *))(*(task->routine)))(task->shareds);
1819      } else
1820#endif /* KMP_GOMP_COMPAT */
1821      {
1822        (*(task->routine))(gtid, task);
1823      }
1824    }
1825    KMP_POP_PARTITIONED_TIMER()((void)0);
1826 
1827#if USE_ITT_BUILD1 && USE_ITT_NOTIFY1
1828    if (kmp_itt_count_task) {
1829      // Barrier imbalance - adjust arrive time with the task duration
1830      thread->th.th_bar_arrive_time += (__itt_get_timestamp(!__kmp_itt_get_timestamp_ptr__3_0) ? 0 : __kmp_itt_get_timestamp_ptr__3_0() - cur_time);
1831    }
1832    KMP_FSYNC_CANCEL(taskdata)(!__kmp_itt_fsync_cancel_ptr__3_0) ? (void)0 : __kmp_itt_fsync_cancel_ptr__3_0
((void *)(taskdata)); // destroy self (just executed)
1833    KMP_FSYNC_RELEASING(taskdata->td_parent)(!__kmp_itt_fsync_releasing_ptr__3_0) ? (void)0 : __kmp_itt_fsync_releasing_ptr__3_0
((void *)(taskdata->td_parent)); // releasing parent
1834#endif
1835  }
1836 
1837#if OMPD_SUPPORT1
1838  if (ompd_state & OMPD_ENABLE_BP0x1)
1839    ompd_bp_task_end();
1840#endif
1841 
1842  // Proxy tasks are not handled by the runtime
1843  if (taskdata->td_flags.proxy != TASK_PROXY1) {
1844#if OMPT_SUPPORT1
1845    if (UNLIKELY(ompt_enabled.enabled)__builtin_expect(!!(ompt_enabled.enabled), 0)) {
1846      thread->th.ompt_thread_info = oldInfo;
1847      if (taskdata->td_flags.tiedness == TASK_TIED1) {
1848        taskdata->ompt_task_info.frame.exit_frame = ompt_data_none{0};
1849      }
1850      __kmp_task_finish<true>(gtid, task, current_task);
1851    } else
1852#endif
1853      __kmp_task_finish<false>(gtid, task, current_task);
1854  }
1855 
1856  KA_TRACE(if (kmp_a_debug >= 30) { __kmp_debug_printf ("__kmp_invoke_task(exit): T#%d completed task %p, resuming task %p\n"
, gtid, taskdata, current_task); }
1857      30,if (kmp_a_debug >= 30) { __kmp_debug_printf ("__kmp_invoke_task(exit): T#%d completed task %p, resuming task %p\n"
, gtid, taskdata, current_task); }
1858      ("__kmp_invoke_task(exit): T#%d completed task %p, resuming task %p\n",if (kmp_a_debug >= 30) { __kmp_debug_printf ("__kmp_invoke_task(exit): T#%d completed task %p, resuming task %p\n"
, gtid, taskdata, current_task); }
1859       gtid, taskdata, current_task))if (kmp_a_debug >= 30) { __kmp_debug_printf ("__kmp_invoke_task(exit): T#%d completed task %p, resuming task %p\n"
, gtid, taskdata, current_task); };
1860  return;
1861}
1862 
1863// __kmpc_omp_task_parts: Schedule a thread-switchable task for execution
1864//
1865// loc_ref: location of original task pragma (ignored)
1866// gtid: Global Thread ID of encountering thread
1867// new_task: task thunk allocated by __kmp_omp_task_alloc() for the ''new task''
1868// Returns:
1869//    TASK_CURRENT_NOT_QUEUED (0) if did not suspend and queue current task to
1870//    be resumed later.
1871//    TASK_CURRENT_QUEUED (1) if suspended and queued the current task to be
1872//    resumed later.
1873kmp_int32 __kmpc_omp_task_parts(ident_t *loc_ref, kmp_int32 gtid,
1874                                kmp_task_t *new_task) {
1875  kmp_taskdata_t *new_taskdata = KMP_TASK_TO_TASKDATA(new_task)(((kmp_taskdata_t *)new_task) - 1);
1876 
1877  KA_TRACE(10, ("__kmpc_omp_task_parts(enter): T#%d loc=%p task=%p\n", gtid,if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmpc_omp_task_parts(enter): T#%d loc=%p task=%p\n"
, gtid, loc_ref, new_taskdata); }
1878                loc_ref, new_taskdata))if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmpc_omp_task_parts(enter): T#%d loc=%p task=%p\n"
, gtid, loc_ref, new_taskdata); };
1879 
1880#if OMPT_SUPPORT1
1881  kmp_taskdata_t *parent;
1882  if (UNLIKELY(ompt_enabled.enabled)__builtin_expect(!!(ompt_enabled.enabled), 0)) {
1883    parent = new_taskdata->td_parent;
1884    if (ompt_enabled.ompt_callback_task_create) {
1885      ompt_callbacks.ompt_callback(ompt_callback_task_create)ompt_callback_task_create_callback(
1886          &(parent->ompt_task_info.task_data), &(parent->ompt_task_info.frame),
1887          &(new_taskdata->ompt_task_info.task_data), ompt_task_explicit, 0,
1888          OMPT_GET_RETURN_ADDRESS(0)__builtin_return_address(0));
1889    }
1890  }
1891#endif
1892 
1893  /* Should we execute the new task or queue it? For now, let's just always try
1894     to queue it.  If the queue fills up, then we'll execute it.  */
1895 
1896  if (__kmp_push_task(gtid, new_task) == TASK_NOT_PUSHED1) // if cannot defer
1897  { // Execute this task immediately
1898    kmp_taskdata_t *current_task = __kmp_threads[gtid]->th.th_current_task;
1899    new_taskdata->td_flags.task_serial = 1;
1900    __kmp_invoke_task(gtid, new_task, current_task);
1901  }
1902 
1903  KA_TRACE(if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmpc_omp_task_parts(exit): T#%d returning TASK_CURRENT_NOT_QUEUED: "
 "loc=%p task=%p, return: TASK_CURRENT_NOT_QUEUED\n", gtid, loc_ref
, new_taskdata); }
1904      10,if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmpc_omp_task_parts(exit): T#%d returning TASK_CURRENT_NOT_QUEUED: "
 "loc=%p task=%p, return: TASK_CURRENT_NOT_QUEUED\n", gtid, loc_ref
, new_taskdata); }
1905      ("__kmpc_omp_task_parts(exit): T#%d returning TASK_CURRENT_NOT_QUEUED: "if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmpc_omp_task_parts(exit): T#%d returning TASK_CURRENT_NOT_QUEUED: "
 "loc=%p task=%p, return: TASK_CURRENT_NOT_QUEUED\n", gtid, loc_ref
, new_taskdata); }
1906       "loc=%p task=%p, return: TASK_CURRENT_NOT_QUEUED\n",if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmpc_omp_task_parts(exit): T#%d returning TASK_CURRENT_NOT_QUEUED: "
 "loc=%p task=%p, return: TASK_CURRENT_NOT_QUEUED\n", gtid, loc_ref
, new_taskdata); }
1907       gtid, loc_ref, new_taskdata))if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmpc_omp_task_parts(exit): T#%d returning TASK_CURRENT_NOT_QUEUED: "
 "loc=%p task=%p, return: TASK_CURRENT_NOT_QUEUED\n", gtid, loc_ref
, new_taskdata); };
1908 
1909#if OMPT_SUPPORT1
1910  if (UNLIKELY(ompt_enabled.enabled)__builtin_expect(!!(ompt_enabled.enabled), 0)) {
1911    parent->ompt_task_info.frame.enter_frame = ompt_data_none{0};
1912  }
1913#endif
1914  return TASK_CURRENT_NOT_QUEUED0;
1915}
1916 
1917// __kmp_omp_task: Schedule a non-thread-switchable task for execution
1918//
1919// gtid: Global Thread ID of encountering thread
1920// new_task:non-thread-switchable task thunk allocated by __kmp_omp_task_alloc()
1921// serialize_immediate: if TRUE then if the task is executed immediately its
1922// execution will be serialized
1923// Returns:
1924//    TASK_CURRENT_NOT_QUEUED (0) if did not suspend and queue current task to
1925//    be resumed later.
1926//    TASK_CURRENT_QUEUED (1) if suspended and queued the current task to be
1927//    resumed later.
1928kmp_int32 __kmp_omp_task(kmp_int32 gtid, kmp_task_t *new_task,
1929                         bool serialize_immediate) {
1930  kmp_taskdata_t *new_taskdata = KMP_TASK_TO_TASKDATA(new_task)(((kmp_taskdata_t *)new_task) - 1);
1931 
1932  /* Should we execute the new task or queue it? For now, let's just always try
1933     to queue it.  If the queue fills up, then we'll execute it.  */
1934  if (new_taskdata->td_flags.proxy == TASK_PROXY1 ||
1935      __kmp_push_task(gtid, new_task) == TASK_NOT_PUSHED1) // if cannot defer
1936  { // Execute this task immediately
1937    kmp_taskdata_t *current_task = __kmp_threads[gtid]->th.th_current_task;
1938    if (serialize_immediate)
1939      new_taskdata->td_flags.task_serial = 1;
1940    __kmp_invoke_task(gtid, new_task, current_task);
1941  } else if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME(2147483647) &&
1942             __kmp_wpolicy_passive) {
1943    kmp_info_t *this_thr = __kmp_threads[gtid];
1944    kmp_team_t *team = this_thr->th.th_team;
1945    kmp_int32 nthreads = this_thr->th.th_team_nproc;
1946    for (int i = 0; i < nthreads; ++i) {
1947      kmp_info_t *thread = team->t.t_threads[i];
1948      if (thread == this_thr)
1949        continue;
1950      if (thread->th.th_sleep_loc != NULL__null) {
1951        __kmp_null_resume_wrapper(thread);
1952        break; // awake one thread at a time
1953      }
1954    }
1955  }
1956  return TASK_CURRENT_NOT_QUEUED0;
1957}
1958 
1959// __kmpc_omp_task: Wrapper around __kmp_omp_task to schedule a
1960// non-thread-switchable task from the parent thread only!
1961//
1962// loc_ref: location of original task pragma (ignored)
1963// gtid: Global Thread ID of encountering thread
1964// new_task: non-thread-switchable task thunk allocated by
1965// __kmp_omp_task_alloc()
1966// Returns:
1967//    TASK_CURRENT_NOT_QUEUED (0) if did not suspend and queue current task to
1968//    be resumed later.
1969//    TASK_CURRENT_QUEUED (1) if suspended and queued the current task to be
1970//    resumed later.
1971kmp_int32 __kmpc_omp_task(ident_t *loc_ref, kmp_int32 gtid,
1972                          kmp_task_t *new_task) {
1973  kmp_int32 res;
1974  KMP_SET_THREAD_STATE_BLOCK(EXPLICIT_TASK)((void)0);
1975 
1976#if KMP_DEBUG1 || OMPT_SUPPORT1
1977  kmp_taskdata_t *new_taskdata = KMP_TASK_TO_TASKDATA(new_task)(((kmp_taskdata_t *)new_task) - 1);
1978#endif
1979  KA_TRACE(10, ("__kmpc_omp_task(enter): T#%d loc=%p task=%p\n", gtid, loc_ref,if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmpc_omp_task(enter): T#%d loc=%p task=%p\n"
, gtid, loc_ref, new_taskdata); }
1980                new_taskdata))if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmpc_omp_task(enter): T#%d loc=%p task=%p\n"
, gtid, loc_ref, new_taskdata); };
1981  __kmp_assert_valid_gtid(gtid);
1982 
1983#if OMPT_SUPPORT1
1984  kmp_taskdata_t *parent = NULL__null;
1985  if (UNLIKELY(ompt_enabled.enabled)__builtin_expect(!!(ompt_enabled.enabled), 0)) {
1986    if (!new_taskdata->td_flags.started) {
1987      OMPT_STORE_RETURN_ADDRESS(gtid)OmptReturnAddressGuard ReturnAddressGuard{gtid, __builtin_return_address
(0)};;
1988      parent = new_taskdata->td_parent;
1989      if (!parent->ompt_task_info.frame.enter_frame.ptr) {
1990        parent->ompt_task_info.frame.enter_frame.ptr =
1991            OMPT_GET_FRAME_ADDRESS(0)__builtin_frame_address(0);
1992      }
1993      if (ompt_enabled.ompt_callback_task_create) {
1994        ompt_callbacks.ompt_callback(ompt_callback_task_create)ompt_callback_task_create_callback(
1995            &(parent->ompt_task_info.task_data),
1996            &(parent->ompt_task_info.frame),
1997            &(new_taskdata->ompt_task_info.task_data),
1998            ompt_task_explicit | TASK_TYPE_DETAILS_FORMAT(new_taskdata)((new_taskdata->td_flags.task_serial || new_taskdata->td_flags
.tasking_ser) ? ompt_task_undeferred : 0x0) | ((!(new_taskdata
->td_flags.tiedness)) ? ompt_task_untied : 0x0) | (new_taskdata
->td_flags.final ? ompt_task_final : 0x0) | (new_taskdata->
td_flags.merged_if0 ? ompt_task_mergeable : 0x0), 0,
1999            OMPT_LOAD_RETURN_ADDRESS(gtid)__ompt_load_return_address(gtid));
2000      }
2001    } else {
2002      // We are scheduling the continuation of an UNTIED task.
2003      // Scheduling back to the parent task.
2004      __ompt_task_finish(new_task,
2005                         new_taskdata->ompt_task_info.scheduling_parent,
2006                         ompt_task_switch);
2007      new_taskdata->ompt_task_info.frame.exit_frame = ompt_data_none{0};
2008    }
2009  }
2010#endif
2011 
2012  res = __kmp_omp_task(gtid, new_task, true);
2013 
2014  KA_TRACE(10, ("__kmpc_omp_task(exit): T#%d returning "if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmpc_omp_task(exit): T#%d returning "
 "TASK_CURRENT_NOT_QUEUED: loc=%p task=%p\n", gtid, loc_ref, new_taskdata
); }
2015                "TASK_CURRENT_NOT_QUEUED: loc=%p task=%p\n",if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmpc_omp_task(exit): T#%d returning "
 "TASK_CURRENT_NOT_QUEUED: loc=%p task=%p\n", gtid, loc_ref, new_taskdata
); }
2016                gtid, loc_ref, new_taskdata))if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmpc_omp_task(exit): T#%d returning "
 "TASK_CURRENT_NOT_QUEUED: loc=%p task=%p\n", gtid, loc_ref, new_taskdata
); };
2017#if OMPT_SUPPORT1
2018  if (UNLIKELY(ompt_enabled.enabled && parent != NULL)__builtin_expect(!!(ompt_enabled.enabled && parent !=
 __null), 0)) {
2019    parent->ompt_task_info.frame.enter_frame = ompt_data_none{0};
2020  }
2021#endif
2022  return res;
2023}
2024 
2025// __kmp_omp_taskloop_task: Wrapper around __kmp_omp_task to schedule
2026// a taskloop task with the correct OMPT return address
2027//
2028// loc_ref: location of original task pragma (ignored)
2029// gtid: Global Thread ID of encountering thread
2030// new_task: non-thread-switchable task thunk allocated by
2031// __kmp_omp_task_alloc()
2032// codeptr_ra: return address for OMPT callback
2033// Returns:
2034//    TASK_CURRENT_NOT_QUEUED (0) if did not suspend and queue current task to
2035//    be resumed later.
2036//    TASK_CURRENT_QUEUED (1) if suspended and queued the current task to be
2037//    resumed later.
2038kmp_int32 __kmp_omp_taskloop_task(ident_t *loc_ref, kmp_int32 gtid,
2039                                  kmp_task_t *new_task, void *codeptr_ra) {
2040  kmp_int32 res;
2041  KMP_SET_THREAD_STATE_BLOCK(EXPLICIT_TASK)((void)0);
2042 
2043#if KMP_DEBUG1 || OMPT_SUPPORT1
2044  kmp_taskdata_t *new_taskdata = KMP_TASK_TO_TASKDATA(new_task)(((kmp_taskdata_t *)new_task) - 1);
2045#endif
2046  KA_TRACE(10, ("__kmpc_omp_task(enter): T#%d loc=%p task=%p\n", gtid, loc_ref,if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmpc_omp_task(enter): T#%d loc=%p task=%p\n"
, gtid, loc_ref, new_taskdata); }
2047                new_taskdata))if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmpc_omp_task(enter): T#%d loc=%p task=%p\n"
, gtid, loc_ref, new_taskdata); };
2048 
2049#if OMPT_SUPPORT1
2050  kmp_taskdata_t *parent = NULL__null;
2051  if (UNLIKELY(ompt_enabled.enabled && !new_taskdata->td_flags.started)__builtin_expect(!!(ompt_enabled.enabled && !new_taskdata
->td_flags.started), 0)) {
2052    parent = new_taskdata->td_parent;
2053    if (!parent->ompt_task_info.frame.enter_frame.ptr)
2054      parent->ompt_task_info.frame.enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0)__builtin_frame_address(0);
2055    if (ompt_enabled.ompt_callback_task_create) {
2056      ompt_callbacks.ompt_callback(ompt_callback_task_create)ompt_callback_task_create_callback(
2057          &(parent->ompt_task_info.task_data), &(parent->ompt_task_info.frame),
2058          &(new_taskdata->ompt_task_info.task_data),
2059          ompt_task_explicit | TASK_TYPE_DETAILS_FORMAT(new_taskdata)((new_taskdata->td_flags.task_serial || new_taskdata->td_flags
.tasking_ser) ? ompt_task_undeferred : 0x0) | ((!(new_taskdata
->td_flags.tiedness)) ? ompt_task_untied : 0x0) | (new_taskdata
->td_flags.final ? ompt_task_final : 0x0) | (new_taskdata->
td_flags.merged_if0 ? ompt_task_mergeable : 0x0), 0,
2060          codeptr_ra);
2061    }
2062  }
2063#endif
2064 
2065  res = __kmp_omp_task(gtid, new_task, true);
2066 
2067  KA_TRACE(10, ("__kmpc_omp_task(exit): T#%d returning "if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmpc_omp_task(exit): T#%d returning "
 "TASK_CURRENT_NOT_QUEUED: loc=%p task=%p\n", gtid, loc_ref, new_taskdata
); }
2068                "TASK_CURRENT_NOT_QUEUED: loc=%p task=%p\n",if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmpc_omp_task(exit): T#%d returning "
 "TASK_CURRENT_NOT_QUEUED: loc=%p task=%p\n", gtid, loc_ref, new_taskdata
); }
2069                gtid, loc_ref, new_taskdata))if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmpc_omp_task(exit): T#%d returning "
 "TASK_CURRENT_NOT_QUEUED: loc=%p task=%p\n", gtid, loc_ref, new_taskdata
); };
2070#if OMPT_SUPPORT1
2071  if (UNLIKELY(ompt_enabled.enabled && parent != NULL)__builtin_expect(!!(ompt_enabled.enabled && parent !=
 __null), 0)) {
2072    parent->ompt_task_info.frame.enter_frame = ompt_data_none{0};
2073  }
2074#endif
2075  return res;
2076}
2077 
2078template <bool ompt>
2079static kmp_int32 __kmpc_omp_taskwait_template(ident_t *loc_ref, kmp_int32 gtid,
2080                                              void *frame_address,
2081                                              void *return_address) {
2082  kmp_taskdata_t *taskdata = nullptr;
2083  kmp_info_t *thread;
2084  int thread_finished = FALSE0;
2085  KMP_SET_THREAD_STATE_BLOCK(TASKWAIT)((void)0);
2086 
2087  KA_TRACE(10, ("__kmpc_omp_taskwait(enter): T#%d loc=%p\n", gtid, loc_ref))if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmpc_omp_taskwait(enter): T#%d loc=%p\n"
, gtid, loc_ref); };
2088  KMP_DEBUG_ASSERT(gtid >= 0)if (!(gtid >= 0)) { __kmp_debug_assert("gtid >= 0", "openmp/runtime/src/kmp_tasking.cpp"
, 2088); };
2089 
2090  if (__kmp_tasking_mode != tskm_immediate_exec) {
2091    thread = __kmp_threads[gtid];
2092    taskdata = thread->th.th_current_task;
2093 
2094#if OMPT_SUPPORT1 && OMPT_OPTIONAL1
2095    ompt_data_t *my_task_data;
2096    ompt_data_t *my_parallel_data;
2097 
2098    if (ompt) {
2099      my_task_data = &(taskdata->ompt_task_info.task_data);
2100      my_parallel_data = OMPT_CUR_TEAM_DATA(thread)(&(thread->th.th_team->t.ompt_team_info.parallel_data
));
2101 
2102      taskdata->ompt_task_info.frame.enter_frame.ptr = frame_address;
2103 
2104      if (ompt_enabled.ompt_callback_sync_region) {
2105        ompt_callbacks.ompt_callback(ompt_callback_sync_region)ompt_callback_sync_region_callback(
2106            ompt_sync_region_taskwait, ompt_scope_begin, my_parallel_data,
2107            my_task_data, return_address);
2108      }
2109 
2110      if (ompt_enabled.ompt_callback_sync_region_wait) {
2111        ompt_callbacks.ompt_callback(ompt_callback_sync_region_wait)ompt_callback_sync_region_wait_callback(
2112            ompt_sync_region_taskwait, ompt_scope_begin, my_parallel_data,
2113            my_task_data, return_address);
2114      }
2115    }
2116#endif // OMPT_SUPPORT && OMPT_OPTIONAL
2117 
2118// Debugger: The taskwait is active. Store location and thread encountered the
2119// taskwait.
2120#if USE_ITT_BUILD1
2121// Note: These values are used by ITT events as well.
2122#endif /* USE_ITT_BUILD */
2123    taskdata->td_taskwait_counter += 1;
2124    taskdata->td_taskwait_ident = loc_ref;
2125    taskdata->td_taskwait_thread = gtid + 1;
2126 
2127#if USE_ITT_BUILD1
2128    void *itt_sync_obj = NULL__null;
2129#if USE_ITT_NOTIFY1
2130    KMP_ITT_TASKWAIT_STARTING(itt_sync_obj)if (__builtin_expect(!!(__kmp_itt_sync_create_ptr__3_0), 0)) {
 itt_sync_obj = __kmp_itt_taskwait_object(gtid); if (itt_sync_obj
 != __null) { __kmp_itt_taskwait_starting(gtid, itt_sync_obj)
; } };
2131#endif /* USE_ITT_NOTIFY */
2132#endif /* USE_ITT_BUILD */
2133 
2134    bool must_wait =
2135        !taskdata->td_flags.team_serial && !taskdata->td_flags.final;
2136 
2137    must_wait = must_wait || (thread->th.th_task_team != NULL__null &&
2138                              thread->th.th_task_team->tt.tt_found_proxy_tasks);
2139    // If hidden helper thread is encountered, we must enable wait here.
2140    must_wait =
2141        must_wait ||
2142        (__kmp_enable_hidden_helper && thread->th.th_task_team != NULL__null &&
2143         thread->th.th_task_team->tt.tt_hidden_helper_task_encountered);
2144 
2145    if (must_wait) {
2146      kmp_flag_32<false, false> flag(
2147          RCAST(std::atomic<kmp_uint32> *,reinterpret_cast<std::atomic<kmp_uint32> *>(&
(taskdata->td_incomplete_child_tasks))
2148                &(taskdata->td_incomplete_child_tasks))reinterpret_cast<std::atomic<kmp_uint32> *>(&
(taskdata->td_incomplete_child_tasks)),
2149          0U);
2150      while (KMP_ATOMIC_LD_ACQ(&taskdata->td_incomplete_child_tasks)(&taskdata->td_incomplete_child_tasks)->load(std::memory_order_acquire
) != 0) {
2151        flag.execute_tasks(thread, gtid, FALSE0,
2152                           &thread_finished USE_ITT_BUILD_ARG(itt_sync_obj), itt_sync_obj,
2153                           __kmp_task_stealing_constraint);
2154      }
2155    }
2156#if USE_ITT_BUILD1
2157    KMP_ITT_TASKWAIT_FINISHED(itt_sync_obj)if (__builtin_expect(!!(itt_sync_obj != __null), 0)) __kmp_itt_taskwait_finished
(gtid, itt_sync_obj);;
2158    KMP_FSYNC_ACQUIRED(taskdata)(!__kmp_itt_fsync_acquired_ptr__3_0) ? (void)0 : __kmp_itt_fsync_acquired_ptr__3_0
((void *)(taskdata)); // acquire self - sync with children
2159#endif /* USE_ITT_BUILD */
2160 
2161    // Debugger:  The taskwait is completed. Location remains, but thread is
2162    // negated.
2163    taskdata->td_taskwait_thread = -taskdata->td_taskwait_thread;
2164 
2165#if OMPT_SUPPORT1 && OMPT_OPTIONAL1
2166    if (ompt) {
2167      if (ompt_enabled.ompt_callback_sync_region_wait) {
2168        ompt_callbacks.ompt_callback(ompt_callback_sync_region_wait)ompt_callback_sync_region_wait_callback(
2169            ompt_sync_region_taskwait, ompt_scope_end, my_parallel_data,
2170            my_task_data, return_address);
2171      }
2172      if (ompt_enabled.ompt_callback_sync_region) {
2173        ompt_callbacks.ompt_callback(ompt_callback_sync_region)ompt_callback_sync_region_callback(
2174            ompt_sync_region_taskwait, ompt_scope_end, my_parallel_data,
2175            my_task_data, return_address);
2176      }
2177      taskdata->ompt_task_info.frame.enter_frame = ompt_data_none{0};
2178    }
2179#endif // OMPT_SUPPORT && OMPT_OPTIONAL
2180 
2181  }
2182 
2183  KA_TRACE(10, ("__kmpc_omp_taskwait(exit): T#%d task %p finished waiting, "if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmpc_omp_taskwait(exit): T#%d task %p finished waiting, "
 "returning TASK_CURRENT_NOT_QUEUED\n", gtid, taskdata); }
2184                "returning TASK_CURRENT_NOT_QUEUED\n",if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmpc_omp_taskwait(exit): T#%d task %p finished waiting, "
 "returning TASK_CURRENT_NOT_QUEUED\n", gtid, taskdata); }
2185                gtid, taskdata))if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmpc_omp_taskwait(exit): T#%d task %p finished waiting, "
 "returning TASK_CURRENT_NOT_QUEUED\n", gtid, taskdata); };
2186 
2187  return TASK_CURRENT_NOT_QUEUED0;
2188}
2189 
2190#if OMPT_SUPPORT1 && OMPT_OPTIONAL1
2191OMPT_NOINLINE__attribute__((noinline))
2192static kmp_int32 __kmpc_omp_taskwait_ompt(ident_t *loc_ref, kmp_int32 gtid,
2193                                          void *frame_address,
2194                                          void *return_address) {
2195  return __kmpc_omp_taskwait_template<true>(loc_ref, gtid, frame_address,
2196                                            return_address);
2197}
2198#endif // OMPT_SUPPORT && OMPT_OPTIONAL
2199 
2200// __kmpc_omp_taskwait: Wait until all tasks generated by the current task are
2201// complete
2202kmp_int32 __kmpc_omp_taskwait(ident_t *loc_ref, kmp_int32 gtid) {
2203#if OMPT_SUPPORT1 && OMPT_OPTIONAL1
2204  if (UNLIKELY(ompt_enabled.enabled)__builtin_expect(!!(ompt_enabled.enabled), 0)) {
2205    OMPT_STORE_RETURN_ADDRESS(gtid)OmptReturnAddressGuard ReturnAddressGuard{gtid, __builtin_return_address
(0)};;
2206    return __kmpc_omp_taskwait_ompt(loc_ref, gtid, OMPT_GET_FRAME_ADDRESS(0)__builtin_frame_address(0),
2207                                    OMPT_LOAD_RETURN_ADDRESS(gtid)__ompt_load_return_address(gtid));
2208  }
2209#endif
2210  return __kmpc_omp_taskwait_template<false>(loc_ref, gtid, NULL__null, NULL__null);
2211}
2212 
2213// __kmpc_omp_taskyield: switch to a different task
2214kmp_int32 __kmpc_omp_taskyield(ident_t *loc_ref, kmp_int32 gtid, int end_part) {
2215  kmp_taskdata_t *taskdata = NULL__null;
2216  kmp_info_t *thread;
2217  int thread_finished = FALSE0;
2218 
2219  KMP_COUNT_BLOCK(OMP_TASKYIELD)((void)0);
2220  KMP_SET_THREAD_STATE_BLOCK(TASKYIELD)((void)0);
2221 
2222  KA_TRACE(10, ("__kmpc_omp_taskyield(enter): T#%d loc=%p end_part = %d\n",if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmpc_omp_taskyield(enter): T#%d loc=%p end_part = %d\n"
, gtid, loc_ref, end_part); }
2223                gtid, loc_ref, end_part))if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmpc_omp_taskyield(enter): T#%d loc=%p end_part = %d\n"
, gtid, loc_ref, end_part); };
2224  __kmp_assert_valid_gtid(gtid);
2225 
2226  if (__kmp_tasking_mode != tskm_immediate_exec && __kmp_init_parallel) {
2227    thread = __kmp_threads[gtid];
2228    taskdata = thread->th.th_current_task;
2229// Should we model this as a task wait or not?
2230// Debugger: The taskwait is active. Store location and thread encountered the
2231// taskwait.
2232#if USE_ITT_BUILD1
2233// Note: These values are used by ITT events as well.
2234#endif /* USE_ITT_BUILD */
2235    taskdata->td_taskwait_counter += 1;
2236    taskdata->td_taskwait_ident = loc_ref;
2237    taskdata->td_taskwait_thread = gtid + 1;
2238 
2239#if USE_ITT_BUILD1
2240    void *itt_sync_obj = NULL__null;
2241#if USE_ITT_NOTIFY1
2242    KMP_ITT_TASKWAIT_STARTING(itt_sync_obj)if (__builtin_expect(!!(__kmp_itt_sync_create_ptr__3_0), 0)) {
 itt_sync_obj = __kmp_itt_taskwait_object(gtid); if (itt_sync_obj
 != __null) { __kmp_itt_taskwait_starting(gtid, itt_sync_obj)
; } };
2243#endif /* USE_ITT_NOTIFY */
2244#endif /* USE_ITT_BUILD */
2245    if (!taskdata->td_flags.team_serial) {
2246      kmp_task_team_t *task_team = thread->th.th_task_team;
2247      if (task_team != NULL__null) {
2248        if (KMP_TASKING_ENABLED(task_team)((!0) == ((task_team)->tt.tt_found_tasks))) {
2249#if OMPT_SUPPORT1
2250          if (UNLIKELY(ompt_enabled.enabled)__builtin_expect(!!(ompt_enabled.enabled), 0))
2251            thread->th.ompt_thread_info.ompt_task_yielded = 1;
2252#endif
2253          __kmp_execute_tasks_32(
2254              thread, gtid, (kmp_flag_32<> *)NULL__null, FALSE0,
2255              &thread_finished USE_ITT_BUILD_ARG(itt_sync_obj), itt_sync_obj,
2256              __kmp_task_stealing_constraint);
2257#if OMPT_SUPPORT1
2258          if (UNLIKELY(ompt_enabled.enabled)__builtin_expect(!!(ompt_enabled.enabled), 0))
2259            thread->th.ompt_thread_info.ompt_task_yielded = 0;
2260#endif
2261        }
2262      }
2263    }
2264#if USE_ITT_BUILD1
2265    KMP_ITT_TASKWAIT_FINISHED(itt_sync_obj)if (__builtin_expect(!!(itt_sync_obj != __null), 0)) __kmp_itt_taskwait_finished
(gtid, itt_sync_obj);;
2266#endif /* USE_ITT_BUILD */
2267 
2268    // Debugger:  The taskwait is completed. Location remains, but thread is
2269    // negated.
2270    taskdata->td_taskwait_thread = -taskdata->td_taskwait_thread;
2271  }
2272 
2273  KA_TRACE(10, ("__kmpc_omp_taskyield(exit): T#%d task %p resuming, "if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmpc_omp_taskyield(exit): T#%d task %p resuming, "
 "returning TASK_CURRENT_NOT_QUEUED\n", gtid, taskdata); }
2274                "returning TASK_CURRENT_NOT_QUEUED\n",if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmpc_omp_taskyield(exit): T#%d task %p resuming, "
 "returning TASK_CURRENT_NOT_QUEUED\n", gtid, taskdata); }
2275                gtid, taskdata))if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmpc_omp_taskyield(exit): T#%d task %p resuming, "
 "returning TASK_CURRENT_NOT_QUEUED\n", gtid, taskdata); };
2276 
2277  return TASK_CURRENT_NOT_QUEUED0;
2278}
2279 
2280// Task Reduction implementation
2281//
2282// Note: initial implementation didn't take into account the possibility
2283// to specify omp_orig for initializer of the UDR (user defined reduction).
2284// Corrected implementation takes into account the omp_orig object.
2285// Compiler is free to use old implementation if omp_orig is not specified.
2286 
2287/*!
2288@ingroup BASIC_TYPES
2289@{
2290*/
2291 
2292/*!
2293Flags for special info per task reduction item.
2294*/
2295typedef struct kmp_taskred_flags {
2296  /*! 1 - use lazy alloc/init (e.g. big objects, num tasks < num threads) */
2297  unsigned lazy_priv : 1;
2298  unsigned reserved31 : 31;
2299} kmp_taskred_flags_t;
2300 
2301/*!
2302Internal struct for reduction data item related info set up by compiler.
2303*/
2304typedef struct kmp_task_red_input {
2305  void *reduce_shar; /**< shared between tasks item to reduce into */
2306  size_t reduce_size; /**< size of data item in bytes */
2307  // three compiler-generated routines (init, fini are optional):
2308  void *reduce_init; /**< data initialization routine (single parameter) */
2309  void *reduce_fini; /**< data finalization routine */
2310  void *reduce_comb; /**< data combiner routine */
2311  kmp_taskred_flags_t flags; /**< flags for additional info from compiler */
2312} kmp_task_red_input_t;
2313 
2314/*!
2315Internal struct for reduction data item related info saved by the library.
2316*/
2317typedef struct kmp_taskred_data {
2318  void *reduce_shar; /**< shared between tasks item to reduce into */
2319  size_t reduce_size; /**< size of data item */
2320  kmp_taskred_flags_t flags; /**< flags for additional info from compiler */
2321  void *reduce_priv; /**< array of thread specific items */
2322  void *reduce_pend; /**< end of private data for faster comparison op */
2323  // three compiler-generated routines (init, fini are optional):
2324  void *reduce_comb; /**< data combiner routine */
2325  void *reduce_init; /**< data initialization routine (two parameters) */
2326  void *reduce_fini; /**< data finalization routine */
2327  void *reduce_orig; /**< original item (can be used in UDR initializer) */
2328} kmp_taskred_data_t;
2329 
2330/*!
2331Internal struct for reduction data item related info set up by compiler.
2332 
2333New interface: added reduce_orig field to provide omp_orig for UDR initializer.
2334*/
2335typedef struct kmp_taskred_input {
2336  void *reduce_shar; /**< shared between tasks item to reduce into */
2337  void *reduce_orig; /**< original reduction item used for initialization */
2338  size_t reduce_size; /**< size of data item */
2339  // three compiler-generated routines (init, fini are optional):
2340  void *reduce_init; /**< data initialization routine (two parameters) */
2341  void *reduce_fini; /**< data finalization routine */
2342  void *reduce_comb; /**< data combiner routine */
2343  kmp_taskred_flags_t flags; /**< flags for additional info from compiler */
2344} kmp_taskred_input_t;
2345/*!
2346@}
2347*/
2348 
2349template <typename T> void __kmp_assign_orig(kmp_taskred_data_t &item, T &src);
2350template <>
2351void __kmp_assign_orig<kmp_task_red_input_t>(kmp_taskred_data_t &item,
2352                                             kmp_task_red_input_t &src) {
2353  item.reduce_orig = NULL__null;
2354}
2355template <>
2356void __kmp_assign_orig<kmp_taskred_input_t>(kmp_taskred_data_t &item,
2357                                            kmp_taskred_input_t &src) {
2358  if (src.reduce_orig != NULL__null) {
2359    item.reduce_orig = src.reduce_orig;
2360  } else {
2361    item.reduce_orig = src.reduce_shar;
2362  } // non-NULL reduce_orig means new interface used
2363}
2364 
2365template <typename T> void __kmp_call_init(kmp_taskred_data_t &item, size_t j);
2366template <>
2367void __kmp_call_init<kmp_task_red_input_t>(kmp_taskred_data_t &item,
2368                                           size_t offset) {
2369  ((void (*)(void *))item.reduce_init)((char *)(item.reduce_priv) + offset);
2370}
2371template <>
2372void __kmp_call_init<kmp_taskred_input_t>(kmp_taskred_data_t &item,
2373                                          size_t offset) {
2374  ((void (*)(void *, void *))item.reduce_init)(
2375      (char *)(item.reduce_priv) + offset, item.reduce_orig);
2376}
2377 
2378template <typename T>
2379void *__kmp_task_reduction_init(int gtid, int num, T *data) {
2380  __kmp_assert_valid_gtid(gtid);
2381  kmp_info_t *thread = __kmp_threads[gtid];
2382  kmp_taskgroup_t *tg = thread->th.th_current_task->td_taskgroup;
2383  kmp_uint32 nth = thread->th.th_team_nproc;
2384  kmp_taskred_data_t *arr;
2385 
2386  // check input data just in case
2387  KMP_ASSERT(tg != NULL)if (!(tg != __null)) { __kmp_debug_assert("tg != NULL", "openmp/runtime/src/kmp_tasking.cpp"
, 2387); };
2388  KMP_ASSERT(data != NULL)if (!(data != __null)) { __kmp_debug_assert("data != NULL", "openmp/runtime/src/kmp_tasking.cpp"
, 2388); };
2389  KMP_ASSERT(num > 0)if (!(num > 0)) { __kmp_debug_assert("num > 0", "openmp/runtime/src/kmp_tasking.cpp"
, 2389); };
2390  if (nth == 1) {
2391    KA_TRACE(10, ("__kmpc_task_reduction_init: T#%d, tg %p, exiting nth=1\n",if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmpc_task_reduction_init: T#%d, tg %p, exiting nth=1\n"
, gtid, tg); }
2392                  gtid, tg))if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmpc_task_reduction_init: T#%d, tg %p, exiting nth=1\n"
, gtid, tg); };
2393    return (void *)tg;
2394  }
2395  KA_TRACE(10, ("__kmpc_task_reduction_init: T#%d, taskgroup %p, #items %d\n",if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmpc_task_reduction_init: T#%d, taskgroup %p, #items %d\n"
, gtid, tg, num); }
2396                gtid, tg, num))if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmpc_task_reduction_init: T#%d, taskgroup %p, #items %d\n"
, gtid, tg, num); };
2397  arr = (kmp_taskred_data_t *)__kmp_thread_malloc(___kmp_thread_malloc((thread), (num * sizeof(kmp_taskred_data_t
)), "openmp/runtime/src/kmp_tasking.cpp", 2398)
2398      thread, num * sizeof(kmp_taskred_data_t))___kmp_thread_malloc((thread), (num * sizeof(kmp_taskred_data_t
)), "openmp/runtime/src/kmp_tasking.cpp", 2398);
2399  for (int i = 0; i < num; ++i) {
2400    size_t size = data[i].reduce_size - 1;
2401    // round the size up to cache line per thread-specific item
2402    size += CACHE_LINE64 - size % CACHE_LINE64;
2403    KMP_ASSERT(data[i].reduce_comb != NULL)if (!(data[i].reduce_comb != __null)) { __kmp_debug_assert("data[i].reduce_comb != NULL"
, "openmp/runtime/src/kmp_tasking.cpp", 2403); }; // combiner is mandatory
2404    arr[i].reduce_shar = data[i].reduce_shar;
2405    arr[i].reduce_size = size;
2406    arr[i].flags = data[i].flags;
2407    arr[i].reduce_comb = data[i].reduce_comb;
2408    arr[i].reduce_init = data[i].reduce_init;
2409    arr[i].reduce_fini = data[i].reduce_fini;
2410    __kmp_assign_orig<T>(arr[i], data[i]);
2411    if (!arr[i].flags.lazy_priv) {
2412      // allocate cache-line aligned block and fill it with zeros
2413      arr[i].reduce_priv = __kmp_allocate(nth * size)___kmp_allocate((nth * size), "openmp/runtime/src/kmp_tasking.cpp"
, 2413);
2414      arr[i].reduce_pend = (char *)(arr[i].reduce_priv) + nth * size;
2415      if (arr[i].reduce_init != NULL__null) {
2416        // initialize all thread-specific items
2417        for (size_t j = 0; j < nth; ++j) {
2418          __kmp_call_init<T>(arr[i], j * size);
2419        }
2420      }
2421    } else {
2422      // only allocate space for pointers now,
2423      // objects will be lazily allocated/initialized if/when requested
2424      // note that __kmp_allocate zeroes the allocated memory
2425      arr[i].reduce_priv = __kmp_allocate(nth * sizeof(void *))___kmp_allocate((nth * sizeof(void *)), "openmp/runtime/src/kmp_tasking.cpp"
, 2425);
2426    }
2427  }
2428  tg->reduce_data = (void *)arr;
2429  tg->reduce_num_data = num;
2430  return (void *)tg;
2431}
2432 
2433/*!
2434@ingroup TASKING
2435@param gtid      Global thread ID
2436@param num       Number of data items to reduce
2437@param data      Array of data for reduction
2438@return The taskgroup identifier
2439 
2440Initialize task reduction for the taskgroup.
2441 
2442Note: this entry supposes the optional compiler-generated initializer routine
2443has single parameter - pointer to object to be initialized. That means
2444the reduction either does not use omp_orig object, or the omp_orig is accessible
2445without help of the runtime library.
2446*/
2447void *__kmpc_task_reduction_init(int gtid, int num, void *data) {
2448  return __kmp_task_reduction_init(gtid, num, (kmp_task_red_input_t *)data);
2449}
2450 
2451/*!
2452@ingroup TASKING
2453@param gtid      Global thread ID
2454@param num       Number of data items to reduce
2455@param data      Array of data for reduction
2456@return The taskgroup identifier
2457 
2458Initialize task reduction for the taskgroup.
2459 
2460Note: this entry supposes the optional compiler-generated initializer routine
2461has two parameters, pointer to object to be initialized and pointer to omp_orig
2462*/
2463void *__kmpc_taskred_init(int gtid, int num, void *data) {
2464  return __kmp_task_reduction_init(gtid, num, (kmp_taskred_input_t *)data);
2465}
2466 
2467// Copy task reduction data (except for shared pointers).
2468template <typename T>
2469void __kmp_task_reduction_init_copy(kmp_info_t *thr, int num, T *data,
2470                                    kmp_taskgroup_t *tg, void *reduce_data) {
2471  kmp_taskred_data_t *arr;
2472  KA_TRACE(20, ("__kmp_task_reduction_init_copy: Th %p, init taskgroup %p,"if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_task_reduction_init_copy: Th %p, init taskgroup %p,"
 " from data %p\n", thr, tg, reduce_data); }
2473                " from data %p\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_task_reduction_init_copy: Th %p, init taskgroup %p,"
 " from data %p\n", thr, tg, reduce_data); }
2474                thr, tg, reduce_data))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_task_reduction_init_copy: Th %p, init taskgroup %p,"
 " from data %p\n", thr, tg, reduce_data); };
2475  arr = (kmp_taskred_data_t *)__kmp_thread_malloc(___kmp_thread_malloc((thr), (num * sizeof(kmp_taskred_data_t)
), "openmp/runtime/src/kmp_tasking.cpp", 2476)
2476      thr, num * sizeof(kmp_taskred_data_t))___kmp_thread_malloc((thr), (num * sizeof(kmp_taskred_data_t)
), "openmp/runtime/src/kmp_tasking.cpp", 2476);
2477  // threads will share private copies, thunk routines, sizes, flags, etc.:
2478  KMP_MEMCPYmemcpy(arr, reduce_data, num * sizeof(kmp_taskred_data_t));
2479  for (int i = 0; i < num; ++i) {
2480    arr[i].reduce_shar = data[i].reduce_shar; // init unique shared pointers
2481  }
2482  tg->reduce_data = (void *)arr;
2483  tg->reduce_num_data = num;
2484}
2485 
2486/*!
2487@ingroup TASKING
2488@param gtid    Global thread ID
2489@param tskgrp  The taskgroup ID (optional)
2490@param data    Shared location of the item
2491@return The pointer to per-thread data
2492 
2493Get thread-specific location of data item
2494*/
2495void *__kmpc_task_reduction_get_th_data(int gtid, void *tskgrp, void *data) {
2496  __kmp_assert_valid_gtid(gtid);
2497  kmp_info_t *thread = __kmp_threads[gtid];
2498  kmp_int32 nth = thread->th.th_team_nproc;
2499  if (nth == 1)
2500    return data; // nothing to do
2501 
2502  kmp_taskgroup_t *tg = (kmp_taskgroup_t *)tskgrp;
2503  if (tg == NULL__null)
2504    tg = thread->th.th_current_task->td_taskgroup;
2505  KMP_ASSERT(tg != NULL)if (!(tg != __null)) { __kmp_debug_assert("tg != NULL", "openmp/runtime/src/kmp_tasking.cpp"
, 2505); };
2506  kmp_taskred_data_t *arr = (kmp_taskred_data_t *)(tg->reduce_data);
2507  kmp_int32 num = tg->reduce_num_data;
2508  kmp_int32 tid = thread->th.th_info.ds.ds_tid;
2509 
2510  KMP_ASSERT(data != NULL)if (!(data != __null)) { __kmp_debug_assert("data != NULL", "openmp/runtime/src/kmp_tasking.cpp"
, 2510); };
2511  while (tg != NULL__null) {
2512    for (int i = 0; i < num; ++i) {
2513      if (!arr[i].flags.lazy_priv) {
2514        if (data == arr[i].reduce_shar ||
2515            (data >= arr[i].reduce_priv && data < arr[i].reduce_pend))
2516          return (char *)(arr[i].reduce_priv) + tid * arr[i].reduce_size;
2517      } else {
2518        // check shared location first
2519        void **p_priv = (void **)(arr[i].reduce_priv);
2520        if (data == arr[i].reduce_shar)
2521          goto found;
2522        // check if we get some thread specific location as parameter
2523        for (int j = 0; j < nth; ++j)
2524          if (data == p_priv[j])
2525            goto found;
2526        continue; // not found, continue search
2527      found:
2528        if (p_priv[tid] == NULL__null) {
2529          // allocate thread specific object lazily
2530          p_priv[tid] = __kmp_allocate(arr[i].reduce_size)___kmp_allocate((arr[i].reduce_size), "openmp/runtime/src/kmp_tasking.cpp"
, 2530);
2531          if (arr[i].reduce_init != NULL__null) {
2532            if (arr[i].reduce_orig != NULL__null) { // new interface
2533              ((void (*)(void *, void *))arr[i].reduce_init)(
2534                  p_priv[tid], arr[i].reduce_orig);
2535            } else { // old interface (single parameter)
2536              ((void (*)(void *))arr[i].reduce_init)(p_priv[tid]);
2537            }
2538          }
2539        }
2540        return p_priv[tid];
2541      }
2542    }
2543    tg = tg->parent;
2544    arr = (kmp_taskred_data_t *)(tg->reduce_data);
2545    num = tg->reduce_num_data;
2546  }
2547  KMP_ASSERT2(0, "Unknown task reduction item")if (!(0)) { __kmp_debug_assert(("Unknown task reduction item"
), "openmp/runtime/src/kmp_tasking.cpp", 2547); };
2548  return NULL__null; // ERROR, this line never executed
2549}
2550 
2551// Finalize task reduction.
2552// Called from __kmpc_end_taskgroup()
2553static void __kmp_task_reduction_fini(kmp_info_t *th, kmp_taskgroup_t *tg) {
2554  kmp_int32 nth = th->th.th_team_nproc;
2555  KMP_DEBUG_ASSERT(nth > 1)if (!(nth > 1)) { __kmp_debug_assert("nth > 1", "openmp/runtime/src/kmp_tasking.cpp"
, 2555); }; // should not be called if nth == 1
2556  kmp_taskred_data_t *arr = (kmp_taskred_data_t *)tg->reduce_data;
2557  kmp_int32 num = tg->reduce_num_data;
2558  for (int i = 0; i < num; ++i) {
2559    void *sh_data = arr[i].reduce_shar;
2560    void (*f_fini)(void *) = (void (*)(void *))(arr[i].reduce_fini);
2561    void (*f_comb)(void *, void *) =
2562        (void (*)(void *, void *))(arr[i].reduce_comb);
2563    if (!arr[i].flags.lazy_priv) {
2564      void *pr_data = arr[i].reduce_priv;
2565      size_t size = arr[i].reduce_size;
2566      for (int j = 0; j < nth; ++j) {
2567        void *priv_data = (char *)pr_data + j * size;
2568        f_comb(sh_data, priv_data); // combine results
2569        if (f_fini)
2570          f_fini(priv_data); // finalize if needed
2571      }
2572    } else {
2573      void **pr_data = (void **)(arr[i].reduce_priv);
2574      for (int j = 0; j < nth; ++j) {
2575        if (pr_data[j] != NULL__null) {
2576          f_comb(sh_data, pr_data[j]); // combine results
2577          if (f_fini)
2578            f_fini(pr_data[j]); // finalize if needed
2579          __kmp_free(pr_data[j])___kmp_free((pr_data[j]), "openmp/runtime/src/kmp_tasking.cpp"
, 2579);
2580        }
2581      }
2582    }
2583    __kmp_free(arr[i].reduce_priv)___kmp_free((arr[i].reduce_priv), "openmp/runtime/src/kmp_tasking.cpp"
, 2583);
2584  }
2585  __kmp_thread_free(th, arr)___kmp_thread_free((th), (arr), "openmp/runtime/src/kmp_tasking.cpp"
, 2585);
2586  tg->reduce_data = NULL__null;
2587  tg->reduce_num_data = 0;
2588}
2589 
2590// Cleanup task reduction data for parallel or worksharing,
2591// do not touch task private data other threads still working with.
2592// Called from __kmpc_end_taskgroup()
2593static void __kmp_task_reduction_clean(kmp_info_t *th, kmp_taskgroup_t *tg) {
2594  __kmp_thread_free(th, tg->reduce_data)___kmp_thread_free((th), (tg->reduce_data), "openmp/runtime/src/kmp_tasking.cpp"
, 2594);
2595  tg->reduce_data = NULL__null;
2596  tg->reduce_num_data = 0;
2597}
2598 
2599template <typename T>
2600void *__kmp_task_reduction_modifier_init(ident_t *loc, int gtid, int is_ws,
2601                                         int num, T *data) {
2602  __kmp_assert_valid_gtid(gtid);
2603  kmp_info_t *thr = __kmp_threads[gtid];
2604  kmp_int32 nth = thr->th.th_team_nproc;
2605  __kmpc_taskgroup(loc, gtid); // form new taskgroup first
2606  if (nth == 1) {
2607    KA_TRACE(10,if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmpc_reduction_modifier_init: T#%d, tg %p, exiting nth=1\n"
, gtid, thr->th.th_current_task->td_taskgroup); }
2608             ("__kmpc_reduction_modifier_init: T#%d, tg %p, exiting nth=1\n",if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmpc_reduction_modifier_init: T#%d, tg %p, exiting nth=1\n"
, gtid, thr->th.th_current_task->td_taskgroup); }
2609              gtid, thr->th.th_current_task->td_taskgroup))if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmpc_reduction_modifier_init: T#%d, tg %p, exiting nth=1\n"
, gtid, thr->th.th_current_task->td_taskgroup); };
2610    return (void *)thr->th.th_current_task->td_taskgroup;
2611  }
2612  kmp_team_t *team = thr->th.th_team;
2613  void *reduce_data;
2614  kmp_taskgroup_t *tg;
2615  reduce_data = KMP_ATOMIC_LD_RLX(&team->t.t_tg_reduce_data[is_ws])(&team->t.t_tg_reduce_data[is_ws])->load(std::memory_order_relaxed
);
2616  if (reduce_data == NULL__null &&
2617      __kmp_atomic_compare_store(&team->t.t_tg_reduce_data[is_ws], reduce_data,
2618                                 (void *)1)) {
2619    // single thread enters this block to initialize common reduction data
2620    KMP_DEBUG_ASSERT(reduce_data == NULL)if (!(reduce_data == __null)) { __kmp_debug_assert("reduce_data == __null"
, "openmp/runtime/src/kmp_tasking.cpp", 2620); };
2621    // first initialize own data, then make a copy other threads can use
2622    tg = (kmp_taskgroup_t *)__kmp_task_reduction_init<T>(gtid, num, data);
2623    reduce_data = __kmp_thread_malloc(thr, num * sizeof(kmp_taskred_data_t))___kmp_thread_malloc((thr), (num * sizeof(kmp_taskred_data_t)
), "openmp/runtime/src/kmp_tasking.cpp", 2623);
2624    KMP_MEMCPYmemcpy(reduce_data, tg->reduce_data, num * sizeof(kmp_taskred_data_t));
2625    // fini counters should be 0 at this point
2626    KMP_DEBUG_ASSERT(KMP_ATOMIC_LD_RLX(&team->t.t_tg_fini_counter[0]) == 0)if (!((&team->t.t_tg_fini_counter[0])->load(std::memory_order_relaxed
) == 0)) { __kmp_debug_assert("(&team->t.t_tg_fini_counter[0])->load(std::memory_order_relaxed) == 0"
, "openmp/runtime/src/kmp_tasking.cpp", 2626); };
2627    KMP_DEBUG_ASSERT(KMP_ATOMIC_LD_RLX(&team->t.t_tg_fini_counter[1]) == 0)if (!((&team->t.t_tg_fini_counter[1])->load(std::memory_order_relaxed
) == 0)) { __kmp_debug_assert("(&team->t.t_tg_fini_counter[1])->load(std::memory_order_relaxed) == 0"
, "openmp/runtime/src/kmp_tasking.cpp", 2627); };
2628    KMP_ATOMIC_ST_REL(&team->t.t_tg_reduce_data[is_ws], reduce_data)(&team->t.t_tg_reduce_data[is_ws])->store(reduce_data
, std::memory_order_release);
2629  } else {
2630    while (
2631        (reduce_data = KMP_ATOMIC_LD_ACQ(&team->t.t_tg_reduce_data[is_ws])(&team->t.t_tg_reduce_data[is_ws])->load(std::memory_order_acquire
)) ==
2632        (void *)1) { // wait for task reduction initialization
2633      KMP_CPU_PAUSE()__kmp_x86_pause();
2634    }
2635    KMP_DEBUG_ASSERT(reduce_data > (void *)1)if (!(reduce_data > (void *)1)) { __kmp_debug_assert("reduce_data > (void *)1"
, "openmp/runtime/src/kmp_tasking.cpp", 2635); }; // should be valid pointer here
2636    tg = thr->th.th_current_task->td_taskgroup;
2637    __kmp_task_reduction_init_copy<T>(thr, num, data, tg, reduce_data);
2638  }
2639  return tg;
2640}
2641 
2642/*!
2643@ingroup TASKING
2644@param loc       Source location info
2645@param gtid      Global thread ID
2646@param is_ws     Is 1 if the reduction is for worksharing, 0 otherwise
2647@param num       Number of data items to reduce
2648@param data      Array of data for reduction
2649@return The taskgroup identifier
2650 
2651Initialize task reduction for a parallel or worksharing.
2652 
2653Note: this entry supposes the optional compiler-generated initializer routine
2654has single parameter - pointer to object to be initialized. That means
2655the reduction either does not use omp_orig object, or the omp_orig is accessible
2656without help of the runtime library.
2657*/
2658void *__kmpc_task_reduction_modifier_init(ident_t *loc, int gtid, int is_ws,
2659                                          int num, void *data) {
2660  return __kmp_task_reduction_modifier_init(loc, gtid, is_ws, num,
2661                                            (kmp_task_red_input_t *)data);
2662}
2663 
2664/*!
2665@ingroup TASKING
2666@param loc       Source location info
2667@param gtid      Global thread ID
2668@param is_ws     Is 1 if the reduction is for worksharing, 0 otherwise
2669@param num       Number of data items to reduce
2670@param data      Array of data for reduction
2671@return The taskgroup identifier
2672 
2673Initialize task reduction for a parallel or worksharing.
2674 
2675Note: this entry supposes the optional compiler-generated initializer routine
2676has two parameters, pointer to object to be initialized and pointer to omp_orig
2677*/
2678void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int is_ws, int num,
2679                                   void *data) {
2680  return __kmp_task_reduction_modifier_init(loc, gtid, is_ws, num,
2681                                            (kmp_taskred_input_t *)data);
2682}
2683 
2684/*!
2685@ingroup TASKING
2686@param loc       Source location info
2687@param gtid      Global thread ID
2688@param is_ws     Is 1 if the reduction is for worksharing, 0 otherwise
2689 
2690Finalize task reduction for a parallel or worksharing.
2691*/
2692void __kmpc_task_reduction_modifier_fini(ident_t *loc, int gtid, int is_ws) {
2693  __kmpc_end_taskgroup(loc, gtid);
2694}
2695 
2696// __kmpc_taskgroup: Start a new taskgroup
2697void __kmpc_taskgroup(ident_t *loc, int gtid) {
2698  __kmp_assert_valid_gtid(gtid);
2699  kmp_info_t *thread = __kmp_threads[gtid];
2700  kmp_taskdata_t *taskdata = thread->th.th_current_task;
2701  kmp_taskgroup_t *tg_new =
2702      (kmp_taskgroup_t *)__kmp_thread_malloc(thread, sizeof(kmp_taskgroup_t))___kmp_thread_malloc((thread), (sizeof(kmp_taskgroup_t)), "openmp/runtime/src/kmp_tasking.cpp"
, 2702);
2703  KA_TRACE(10, ("__kmpc_taskgroup: T#%d loc=%p group=%p\n", gtid, loc, tg_new))if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmpc_taskgroup: T#%d loc=%p group=%p\n"
, gtid, loc, tg_new); };
2704  KMP_ATOMIC_ST_RLX(&tg_new->count, 0)(&tg_new->count)->store(0, std::memory_order_relaxed
);
2705  KMP_ATOMIC_ST_RLX(&tg_new->cancel_request, cancel_noreq)(&tg_new->cancel_request)->store(cancel_noreq, std::
memory_order_relaxed);
2706  tg_new->parent = taskdata->td_taskgroup;
2707  tg_new->reduce_data = NULL__null;
2708  tg_new->reduce_num_data = 0;
2709  tg_new->gomp_data = NULL__null;
2710  taskdata->td_taskgroup = tg_new;
2711 
2712#if OMPT_SUPPORT1 && OMPT_OPTIONAL1
2713  if (UNLIKELY(ompt_enabled.ompt_callback_sync_region)__builtin_expect(!!(ompt_enabled.ompt_callback_sync_region), 0
)) {
2714    void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid)__ompt_load_return_address(gtid);
2715    if (!codeptr)
2716      codeptr = OMPT_GET_RETURN_ADDRESS(0)__builtin_return_address(0);
2717    kmp_team_t *team = thread->th.th_team;
2718    ompt_data_t my_task_data = taskdata->ompt_task_info.task_data;
2719    // FIXME: I think this is wrong for lwt!
2720    ompt_data_t my_parallel_data = team->t.ompt_team_info.parallel_data;
2721 
2722    ompt_callbacks.ompt_callback(ompt_callback_sync_region)ompt_callback_sync_region_callback(
2723        ompt_sync_region_taskgroup, ompt_scope_begin, &(my_parallel_data),
2724        &(my_task_data), codeptr);
2725  }
2726#endif
2727}
2728 
2729// __kmpc_end_taskgroup: Wait until all tasks generated by the current task
2730//                       and its descendants are complete
2731void __kmpc_end_taskgroup(ident_t *loc, int gtid) {
2732  __kmp_assert_valid_gtid(gtid);
2733  kmp_info_t *thread = __kmp_threads[gtid];
2734  kmp_taskdata_t *taskdata = thread->th.th_current_task;
2735  kmp_taskgroup_t *taskgroup = taskdata->td_taskgroup;
2736  int thread_finished = FALSE0;
2737 
2738#if OMPT_SUPPORT1 && OMPT_OPTIONAL1
2739  kmp_team_t *team;
2740  ompt_data_t my_task_data;
2741  ompt_data_t my_parallel_data;
2742  void *codeptr = nullptr;
2743  if (UNLIKELY(ompt_enabled.enabled)__builtin_expect(!!(ompt_enabled.enabled), 0)) {
2744    team = thread->th.th_team;
2745    my_task_data = taskdata->ompt_task_info.task_data;
2746    // FIXME: I think this is wrong for lwt!
2747    my_parallel_data = team->t.ompt_team_info.parallel_data;
2748    codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid)__ompt_load_return_address(gtid);
2749    if (!codeptr)
2750      codeptr = OMPT_GET_RETURN_ADDRESS(0)__builtin_return_address(0);
2751  }
2752#endif
2753 
2754  KA_TRACE(10, ("__kmpc_end_taskgroup(enter): T#%d loc=%p\n", gtid, loc))if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmpc_end_taskgroup(enter): T#%d loc=%p\n"
, gtid, loc); };
2755  KMP_DEBUG_ASSERT(taskgroup != NULL)if (!(taskgroup != __null)) { __kmp_debug_assert("taskgroup != __null"
, "openmp/runtime/src/kmp_tasking.cpp", 2755); };
2756  KMP_SET_THREAD_STATE_BLOCK(TASKGROUP)((void)0);
2757 
2758  if (__kmp_tasking_mode != tskm_immediate_exec) {
2759    // mark task as waiting not on a barrier
2760    taskdata->td_taskwait_counter += 1;
2761    taskdata->td_taskwait_ident = loc;
2762    taskdata->td_taskwait_thread = gtid + 1;
2763#if USE_ITT_BUILD1
2764    // For ITT the taskgroup wait is similar to taskwait until we need to
2765    // distinguish them
2766    void *itt_sync_obj = NULL__null;
2767#if USE_ITT_NOTIFY1
2768    KMP_ITT_TASKWAIT_STARTING(itt_sync_obj)if (__builtin_expect(!!(__kmp_itt_sync_create_ptr__3_0), 0)) {
 itt_sync_obj = __kmp_itt_taskwait_object(gtid); if (itt_sync_obj
 != __null) { __kmp_itt_taskwait_starting(gtid, itt_sync_obj)
; } };
2769#endif /* USE_ITT_NOTIFY */
2770#endif /* USE_ITT_BUILD */
2771 
2772#if OMPT_SUPPORT1 && OMPT_OPTIONAL1
2773    if (UNLIKELY(ompt_enabled.ompt_callback_sync_region_wait)__builtin_expect(!!(ompt_enabled.ompt_callback_sync_region_wait
), 0)) {
2774      ompt_callbacks.ompt_callback(ompt_callback_sync_region_wait)ompt_callback_sync_region_wait_callback(
2775          ompt_sync_region_taskgroup, ompt_scope_begin, &(my_parallel_data),
2776          &(my_task_data), codeptr);
2777    }
2778#endif
2779 
2780    if (!taskdata->td_flags.team_serial ||
2781        (thread->th.th_task_team != NULL__null &&
2782         (thread->th.th_task_team->tt.tt_found_proxy_tasks ||
2783          thread->th.th_task_team->tt.tt_hidden_helper_task_encountered))) {
2784      kmp_flag_32<false, false> flag(
2785          RCAST(std::atomic<kmp_uint32> *, &(taskgroup->count))reinterpret_cast<std::atomic<kmp_uint32> *>(&
(taskgroup->count)), 0U);
2786      while (KMP_ATOMIC_LD_ACQ(&taskgroup->count)(&taskgroup->count)->load(std::memory_order_acquire
) != 0) {
2787        flag.execute_tasks(thread, gtid, FALSE0,
2788                           &thread_finished USE_ITT_BUILD_ARG(itt_sync_obj), itt_sync_obj,
2789                           __kmp_task_stealing_constraint);
2790      }
2791    }
2792    taskdata->td_taskwait_thread = -taskdata->td_taskwait_thread; // end waiting
2793 
2794#if OMPT_SUPPORT1 && OMPT_OPTIONAL1
2795    if (UNLIKELY(ompt_enabled.ompt_callback_sync_region_wait)__builtin_expect(!!(ompt_enabled.ompt_callback_sync_region_wait
), 0)) {
2796      ompt_callbacks.ompt_callback(ompt_callback_sync_region_wait)ompt_callback_sync_region_wait_callback(
2797          ompt_sync_region_taskgroup, ompt_scope_end, &(my_parallel_data),
2798          &(my_task_data), codeptr);
2799    }
2800#endif
2801 
2802#if USE_ITT_BUILD1
2803    KMP_ITT_TASKWAIT_FINISHED(itt_sync_obj)if (__builtin_expect(!!(itt_sync_obj != __null), 0)) __kmp_itt_taskwait_finished
(gtid, itt_sync_obj);;
2804    KMP_FSYNC_ACQUIRED(taskdata)(!__kmp_itt_fsync_acquired_ptr__3_0) ? (void)0 : __kmp_itt_fsync_acquired_ptr__3_0
((void *)(taskdata)); // acquire self - sync with descendants
2805#endif /* USE_ITT_BUILD */
2806  }
2807  KMP_DEBUG_ASSERT(taskgroup->count == 0)if (!(taskgroup->count == 0)) { __kmp_debug_assert("taskgroup->count == 0"
, "openmp/runtime/src/kmp_tasking.cpp", 2807); };
2808 
2809  if (taskgroup->reduce_data != NULL__null &&
2810      !taskgroup->gomp_data) { // need to reduce?
2811    int cnt;
2812    void *reduce_data;
2813    kmp_team_t *t = thread->th.th_team;
2814    kmp_taskred_data_t *arr = (kmp_taskred_data_t *)taskgroup->reduce_data;
2815    // check if <priv> data of the first reduction variable shared for the team
2816    void *priv0 = arr[0].reduce_priv;
2817    if ((reduce_data = KMP_ATOMIC_LD_ACQ(&t->t.t_tg_reduce_data[0])(&t->t.t_tg_reduce_data[0])->load(std::memory_order_acquire
)) != NULL__null &&
2818        ((kmp_taskred_data_t *)reduce_data)[0].reduce_priv == priv0) {
2819      // finishing task reduction on parallel
2820      cnt = KMP_ATOMIC_INC(&t->t.t_tg_fini_counter[0])(&t->t.t_tg_fini_counter[0])->fetch_add(1, std::memory_order_acq_rel
);
2821      if (cnt == thread->th.th_team_nproc - 1) {
2822        // we are the last thread passing __kmpc_reduction_modifier_fini()
2823        // finalize task reduction:
2824        __kmp_task_reduction_fini(thread, taskgroup);
2825        // cleanup fields in the team structure:
2826        // TODO: is relaxed store enough here (whole barrier should follow)?
2827        __kmp_thread_free(thread, reduce_data)___kmp_thread_free((thread), (reduce_data), "openmp/runtime/src/kmp_tasking.cpp"
, 2827);
2828        KMP_ATOMIC_ST_REL(&t->t.t_tg_reduce_data[0], NULL)(&t->t.t_tg_reduce_data[0])->store(__null, std::memory_order_release
);
2829        KMP_ATOMIC_ST_REL(&t->t.t_tg_fini_counter[0], 0)(&t->t.t_tg_fini_counter[0])->store(0, std::memory_order_release
);
2830      } else {
2831        // we are not the last thread passing __kmpc_reduction_modifier_fini(),
2832        // so do not finalize reduction, just clean own copy of the data
2833        __kmp_task_reduction_clean(thread, taskgroup);
2834      }
2835    } else if ((reduce_data = KMP_ATOMIC_LD_ACQ(&t->t.t_tg_reduce_data[1])(&t->t.t_tg_reduce_data[1])->load(std::memory_order_acquire
)) !=
2836                   NULL__null &&
2837               ((kmp_taskred_data_t *)reduce_data)[0].reduce_priv == priv0) {
2838      // finishing task reduction on worksharing
2839      cnt = KMP_ATOMIC_INC(&t->t.t_tg_fini_counter[1])(&t->t.t_tg_fini_counter[1])->fetch_add(1, std::memory_order_acq_rel
);
2840      if (cnt == thread->th.th_team_nproc - 1) {
2841        // we are the last thread passing __kmpc_reduction_modifier_fini()
2842        __kmp_task_reduction_fini(thread, taskgroup);
2843        // cleanup fields in team structure:
2844        // TODO: is relaxed store enough here (whole barrier should follow)?
2845        __kmp_thread_free(thread, reduce_data)___kmp_thread_free((thread), (reduce_data), "openmp/runtime/src/kmp_tasking.cpp"
, 2845);
2846        KMP_ATOMIC_ST_REL(&t->t.t_tg_reduce_data[1], NULL)(&t->t.t_tg_reduce_data[1])->store(__null, std::memory_order_release
);
2847        KMP_ATOMIC_ST_REL(&t->t.t_tg_fini_counter[1], 0)(&t->t.t_tg_fini_counter[1])->store(0, std::memory_order_release
);
2848      } else {
2849        // we are not the last thread passing __kmpc_reduction_modifier_fini(),
2850        // so do not finalize reduction, just clean own copy of the data
2851        __kmp_task_reduction_clean(thread, taskgroup);
2852      }
2853    } else {
2854      // finishing task reduction on taskgroup
2855      __kmp_task_reduction_fini(thread, taskgroup);
2856    }
2857  }
2858  // Restore parent taskgroup for the current task
2859  taskdata->td_taskgroup = taskgroup->parent;
2860  __kmp_thread_free(thread, taskgroup)___kmp_thread_free((thread), (taskgroup), "openmp/runtime/src/kmp_tasking.cpp"
, 2860);
2861 
2862  KA_TRACE(10, ("__kmpc_end_taskgroup(exit): T#%d task %p finished waiting\n",if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmpc_end_taskgroup(exit): T#%d task %p finished waiting\n"
, gtid, taskdata); }
2863                gtid, taskdata))if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmpc_end_taskgroup(exit): T#%d task %p finished waiting\n"
, gtid, taskdata); };
2864 
2865#if OMPT_SUPPORT1 && OMPT_OPTIONAL1
2866  if (UNLIKELY(ompt_enabled.ompt_callback_sync_region)__builtin_expect(!!(ompt_enabled.ompt_callback_sync_region), 0
)) {
2867    ompt_callbacks.ompt_callback(ompt_callback_sync_region)ompt_callback_sync_region_callback(
2868        ompt_sync_region_taskgroup, ompt_scope_end, &(my_parallel_data),
2869        &(my_task_data), codeptr);
2870  }
2871#endif
2872}
2873 
2874static kmp_task_t *__kmp_get_priority_task(kmp_int32 gtid,
2875                                           kmp_task_team_t *task_team,
2876                                           kmp_int32 is_constrained) {
2877  kmp_task_t *task = NULL__null;
2878  kmp_taskdata_t *taskdata;
2879  kmp_taskdata_t *current;
2880  kmp_thread_data_t *thread_data;
2881  int ntasks = task_team->tt.tt_num_task_pri;
2882  if (ntasks == 0) {
2883    KA_TRACE(if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_get_priority_task(exit #1): T#%d No tasks to get\n"
, gtid); }
2884        20, ("__kmp_get_priority_task(exit #1): T#%d No tasks to get\n", gtid))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_get_priority_task(exit #1): T#%d No tasks to get\n"
, gtid); };
2885    return NULL__null;
2886  }
2887  do {
2888    // decrement num_tasks to "reserve" one task to get for execution
2889    if (__kmp_atomic_compare_store(&task_team->tt.tt_num_task_pri, ntasks,
2890                                   ntasks - 1))
2891      break;
2892  } while (ntasks > 0);
2893  if (ntasks == 0) {
2894    KA_TRACE(20, ("__kmp_get_priority_task(exit #2): T#%d No tasks to get\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_get_priority_task(exit #2): T#%d No tasks to get\n"
, __kmp_get_global_thread_id()); }
2895                  __kmp_get_gtid()))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_get_priority_task(exit #2): T#%d No tasks to get\n"
, __kmp_get_global_thread_id()); };
2896    return NULL__null;
2897  }
2898  // We got a "ticket" to get a "reserved" priority task
2899  int deque_ntasks;
2900  kmp_task_pri_t *list = task_team->tt.tt_task_pri_list;
2901  do {
2902    KMP_ASSERT(list != NULL)if (!(list != __null)) { __kmp_debug_assert("list != NULL", "openmp/runtime/src/kmp_tasking.cpp"
, 2902); };
2903    thread_data = &list->td;
2904    __kmp_acquire_bootstrap_lock(&thread_data->td.td_deque_lock);
2905    deque_ntasks = thread_data->td.td_deque_ntasks;
2906    if (deque_ntasks == 0) {
2907      __kmp_release_bootstrap_lock(&thread_data->td.td_deque_lock);
2908      KA_TRACE(20, ("__kmp_get_priority_task: T#%d No tasks to get from %p\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_get_priority_task: T#%d No tasks to get from %p\n"
, __kmp_get_global_thread_id(), thread_data); }
2909                    __kmp_get_gtid(), thread_data))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_get_priority_task: T#%d No tasks to get from %p\n"
, __kmp_get_global_thread_id(), thread_data); };
2910      list = list->next;
2911    }
2912  } while (deque_ntasks == 0);
2913  KMP_DEBUG_ASSERT(deque_ntasks)if (!(deque_ntasks)) { __kmp_debug_assert("deque_ntasks", "openmp/runtime/src/kmp_tasking.cpp"
, 2913); };
2914  int target = thread_data->td.td_deque_head;
2915  current = __kmp_threads[gtid]->th.th_current_task;
2916  taskdata = thread_data->td.td_deque[target];
2917  if (__kmp_task_is_allowed(gtid, is_constrained, taskdata, current)) {
2918    // Bump head pointer and Wrap.
2919    thread_data->td.td_deque_head =
2920        (target + 1) & TASK_DEQUE_MASK(thread_data->td)((thread_data->td).td_deque_size - 1);
2921  } else {
2922    if (!task_team->tt.tt_untied_task_encountered) {
2923      // The TSC does not allow to steal victim task
2924      __kmp_release_bootstrap_lock(&thread_data->td.td_deque_lock);
2925      KA_TRACE(20, ("__kmp_get_priority_task(exit #3): T#%d could not get task "if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_get_priority_task(exit #3): T#%d could not get task "
 "from %p: task_team=%p ntasks=%d head=%u tail=%u\n", gtid, thread_data
, task_team, deque_ntasks, target, thread_data->td.td_deque_tail
); }
2926                    "from %p: task_team=%p ntasks=%d head=%u tail=%u\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_get_priority_task(exit #3): T#%d could not get task "
 "from %p: task_team=%p ntasks=%d head=%u tail=%u\n", gtid, thread_data
, task_team, deque_ntasks, target, thread_data->td.td_deque_tail
); }
2927                    gtid, thread_data, task_team, deque_ntasks, target,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_get_priority_task(exit #3): T#%d could not get task "
 "from %p: task_team=%p ntasks=%d head=%u tail=%u\n", gtid, thread_data
, task_team, deque_ntasks, target, thread_data->td.td_deque_tail
); }
2928                    thread_data->td.td_deque_tail))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_get_priority_task(exit #3): T#%d could not get task "
 "from %p: task_team=%p ntasks=%d head=%u tail=%u\n", gtid, thread_data
, task_team, deque_ntasks, target, thread_data->td.td_deque_tail
); };
2929      task_team->tt.tt_num_task_pri++; // atomic inc, restore value
2930      return NULL__null;
2931    }
2932    int i;
2933    // walk through the deque trying to steal any task
2934    taskdata = NULL__null;
2935    for (i = 1; i < deque_ntasks; ++i) {
2936      target = (target + 1) & TASK_DEQUE_MASK(thread_data->td)((thread_data->td).td_deque_size - 1);
2937      taskdata = thread_data->td.td_deque[target];
2938      if (__kmp_task_is_allowed(gtid, is_constrained, taskdata, current)) {
2939        break; // found task to execute
2940      } else {
2941        taskdata = NULL__null;
2942      }
2943    }
2944    if (taskdata == NULL__null) {
2945      // No appropriate candidate found to execute
2946      __kmp_release_bootstrap_lock(&thread_data->td.td_deque_lock);
2947      KA_TRACE(if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_get_priority_task(exit #4): T#%d could not get task from "
 "%p: task_team=%p ntasks=%d head=%u tail=%u\n", gtid, thread_data
, task_team, deque_ntasks, thread_data->td.td_deque_head, thread_data
->td.td_deque_tail); }
2948          10, ("__kmp_get_priority_task(exit #4): T#%d could not get task from "if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_get_priority_task(exit #4): T#%d could not get task from "
 "%p: task_team=%p ntasks=%d head=%u tail=%u\n", gtid, thread_data
, task_team, deque_ntasks, thread_data->td.td_deque_head, thread_data
->td.td_deque_tail); }
2949               "%p: task_team=%p ntasks=%d head=%u tail=%u\n",if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_get_priority_task(exit #4): T#%d could not get task from "
 "%p: task_team=%p ntasks=%d head=%u tail=%u\n", gtid, thread_data
, task_team, deque_ntasks, thread_data->td.td_deque_head, thread_data
->td.td_deque_tail); }
2950               gtid, thread_data, task_team, deque_ntasks,if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_get_priority_task(exit #4): T#%d could not get task from "
 "%p: task_team=%p ntasks=%d head=%u tail=%u\n", gtid, thread_data
, task_team, deque_ntasks, thread_data->td.td_deque_head, thread_data
->td.td_deque_tail); }
2951               thread_data->td.td_deque_head, thread_data->td.td_deque_tail))if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_get_priority_task(exit #4): T#%d could not get task from "
 "%p: task_team=%p ntasks=%d head=%u tail=%u\n", gtid, thread_data
, task_team, deque_ntasks, thread_data->td.td_deque_head, thread_data
->td.td_deque_tail); };
2952      task_team->tt.tt_num_task_pri++; // atomic inc, restore value
2953      return NULL__null;
2954    }
2955    int prev = target;
2956    for (i = i + 1; i < deque_ntasks; ++i) {
2957      // shift remaining tasks in the deque left by 1
2958      target = (target + 1) & TASK_DEQUE_MASK(thread_data->td)((thread_data->td).td_deque_size - 1);
2959      thread_data->td.td_deque[prev] = thread_data->td.td_deque[target];
2960      prev = target;
2961    }
2962    KMP_DEBUG_ASSERT(if (!(thread_data->td.td_deque_tail == (kmp_uint32)((target
 + 1) & ((thread_data->td).td_deque_size - 1)))) { __kmp_debug_assert
("thread_data->td.td_deque_tail == (kmp_uint32)((target + 1) & ((thread_data->td).td_deque_size - 1))"
, "openmp/runtime/src/kmp_tasking.cpp", 2964); }
2963        thread_data->td.td_deque_tail ==if (!(thread_data->td.td_deque_tail == (kmp_uint32)((target
 + 1) & ((thread_data->td).td_deque_size - 1)))) { __kmp_debug_assert
("thread_data->td.td_deque_tail == (kmp_uint32)((target + 1) & ((thread_data->td).td_deque_size - 1))"
, "openmp/runtime/src/kmp_tasking.cpp", 2964); }
2964        (kmp_uint32)((target + 1) & TASK_DEQUE_MASK(thread_data->td)))if (!(thread_data->td.td_deque_tail == (kmp_uint32)((target
 + 1) & ((thread_data->td).td_deque_size - 1)))) { __kmp_debug_assert
("thread_data->td.td_deque_tail == (kmp_uint32)((target + 1) & ((thread_data->td).td_deque_size - 1))"
, "openmp/runtime/src/kmp_tasking.cpp", 2964); };
2965    thread_data->td.td_deque_tail = target; // tail -= 1 (wrapped))
2966  }
2967  thread_data->td.td_deque_ntasks = deque_ntasks - 1;
2968  __kmp_release_bootstrap_lock(&thread_data->td.td_deque_lock);
2969  task = KMP_TASKDATA_TO_TASK(taskdata)(kmp_task_t *)(taskdata + 1);
2970  return task;
2971}
2972 
2973// __kmp_remove_my_task: remove a task from my own deque
2974static kmp_task_t *__kmp_remove_my_task(kmp_info_t *thread, kmp_int32 gtid,
2975                                        kmp_task_team_t *task_team,
2976                                        kmp_int32 is_constrained) {
2977  kmp_task_t *task;
2978  kmp_taskdata_t *taskdata;
2979  kmp_thread_data_t *thread_data;
2980  kmp_uint32 tail;
2981 
2982  KMP_DEBUG_ASSERT(__kmp_tasking_mode != tskm_immediate_exec)if (!(__kmp_tasking_mode != tskm_immediate_exec)) { __kmp_debug_assert
("__kmp_tasking_mode != tskm_immediate_exec", "openmp/runtime/src/kmp_tasking.cpp"
, 2982); };
2983  KMP_DEBUG_ASSERT(task_team->tt.tt_threads_data !=if (!(task_team->tt.tt_threads_data != __null)) { __kmp_debug_assert
("task_team->tt.tt_threads_data != __null", "openmp/runtime/src/kmp_tasking.cpp"
, 2984); }
2984                   NULL)if (!(task_team->tt.tt_threads_data != __null)) { __kmp_debug_assert
("task_team->tt.tt_threads_data != __null", "openmp/runtime/src/kmp_tasking.cpp"
, 2984); }; // Caller should check this condition
2985 
2986  thread_data = &task_team->tt.tt_threads_data[__kmp_tid_from_gtid(gtid)];
2987 
2988  KA_TRACE(10, ("__kmp_remove_my_task(enter): T#%d ntasks=%d head=%u tail=%u\n",if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_remove_my_task(enter): T#%d ntasks=%d head=%u tail=%u\n"
, gtid, thread_data->td.td_deque_ntasks, thread_data->td
.td_deque_head, thread_data->td.td_deque_tail); }
2989                gtid, thread_data->td.td_deque_ntasks,if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_remove_my_task(enter): T#%d ntasks=%d head=%u tail=%u\n"
, gtid, thread_data->td.td_deque_ntasks, thread_data->td
.td_deque_head, thread_data->td.td_deque_tail); }
2990                thread_data->td.td_deque_head, thread_data->td.td_deque_tail))if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_remove_my_task(enter): T#%d ntasks=%d head=%u tail=%u\n"
, gtid, thread_data->td.td_deque_ntasks, thread_data->td
.td_deque_head, thread_data->td.td_deque_tail); };
2991 
2992  if (TCR_4(thread_data->td.td_deque_ntasks)(thread_data->td.td_deque_ntasks) == 0) {
2993    KA_TRACE(10,if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_remove_my_task(exit #1): T#%d No tasks to remove: "
 "ntasks=%d head=%u tail=%u\n", gtid, thread_data->td.td_deque_ntasks
, thread_data->td.td_deque_head, thread_data->td.td_deque_tail
); }
2994             ("__kmp_remove_my_task(exit #1): T#%d No tasks to remove: "if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_remove_my_task(exit #1): T#%d No tasks to remove: "
 "ntasks=%d head=%u tail=%u\n", gtid, thread_data->td.td_deque_ntasks
, thread_data->td.td_deque_head, thread_data->td.td_deque_tail
); }
2995              "ntasks=%d head=%u tail=%u\n",if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_remove_my_task(exit #1): T#%d No tasks to remove: "
 "ntasks=%d head=%u tail=%u\n", gtid, thread_data->td.td_deque_ntasks
, thread_data->td.td_deque_head, thread_data->td.td_deque_tail
); }
2996              gtid, thread_data->td.td_deque_ntasks,if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_remove_my_task(exit #1): T#%d No tasks to remove: "
 "ntasks=%d head=%u tail=%u\n", gtid, thread_data->td.td_deque_ntasks
, thread_data->td.td_deque_head, thread_data->td.td_deque_tail
); }
2997              thread_data->td.td_deque_head, thread_data->td.td_deque_tail))if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_remove_my_task(exit #1): T#%d No tasks to remove: "
 "ntasks=%d head=%u tail=%u\n", gtid, thread_data->td.td_deque_ntasks
, thread_data->td.td_deque_head, thread_data->td.td_deque_tail
); };
2998    return NULL__null;
2999  }
3000 
3001  __kmp_acquire_bootstrap_lock(&thread_data->td.td_deque_lock);
3002 
3003  if (TCR_4(thread_data->td.td_deque_ntasks)(thread_data->td.td_deque_ntasks) == 0) {
3004    __kmp_release_bootstrap_lock(&thread_data->td.td_deque_lock);
3005    KA_TRACE(10,if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_remove_my_task(exit #2): T#%d No tasks to remove: "
 "ntasks=%d head=%u tail=%u\n", gtid, thread_data->td.td_deque_ntasks
, thread_data->td.td_deque_head, thread_data->td.td_deque_tail
); }
3006             ("__kmp_remove_my_task(exit #2): T#%d No tasks to remove: "if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_remove_my_task(exit #2): T#%d No tasks to remove: "
 "ntasks=%d head=%u tail=%u\n", gtid, thread_data->td.td_deque_ntasks
, thread_data->td.td_deque_head, thread_data->td.td_deque_tail
); }
3007              "ntasks=%d head=%u tail=%u\n",if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_remove_my_task(exit #2): T#%d No tasks to remove: "
 "ntasks=%d head=%u tail=%u\n", gtid, thread_data->td.td_deque_ntasks
, thread_data->td.td_deque_head, thread_data->td.td_deque_tail
); }
3008              gtid, thread_data->td.td_deque_ntasks,if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_remove_my_task(exit #2): T#%d No tasks to remove: "
 "ntasks=%d head=%u tail=%u\n", gtid, thread_data->td.td_deque_ntasks
, thread_data->td.td_deque_head, thread_data->td.td_deque_tail
); }
3009              thread_data->td.td_deque_head, thread_data->td.td_deque_tail))if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_remove_my_task(exit #2): T#%d No tasks to remove: "
 "ntasks=%d head=%u tail=%u\n", gtid, thread_data->td.td_deque_ntasks
, thread_data->td.td_deque_head, thread_data->td.td_deque_tail
); };
3010    return NULL__null;
3011  }
3012 
3013  tail = (thread_data->td.td_deque_tail - 1) &
3014         TASK_DEQUE_MASK(thread_data->td)((thread_data->td).td_deque_size - 1); // Wrap index.
3015  taskdata = thread_data->td.td_deque[tail];
3016 
3017  if (!__kmp_task_is_allowed(gtid, is_constrained, taskdata,
3018                             thread->th.th_current_task)) {
3019    // The TSC does not allow to steal victim task
3020    __kmp_release_bootstrap_lock(&thread_data->td.td_deque_lock);
3021    KA_TRACE(10,if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_remove_my_task(exit #3): T#%d TSC blocks tail task: "
 "ntasks=%d head=%u tail=%u\n", gtid, thread_data->td.td_deque_ntasks
, thread_data->td.td_deque_head, thread_data->td.td_deque_tail
); }
3022             ("__kmp_remove_my_task(exit #3): T#%d TSC blocks tail task: "if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_remove_my_task(exit #3): T#%d TSC blocks tail task: "
 "ntasks=%d head=%u tail=%u\n", gtid, thread_data->td.td_deque_ntasks
, thread_data->td.td_deque_head, thread_data->td.td_deque_tail
); }
3023              "ntasks=%d head=%u tail=%u\n",if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_remove_my_task(exit #3): T#%d TSC blocks tail task: "
 "ntasks=%d head=%u tail=%u\n", gtid, thread_data->td.td_deque_ntasks
, thread_data->td.td_deque_head, thread_data->td.td_deque_tail
); }
3024              gtid, thread_data->td.td_deque_ntasks,if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_remove_my_task(exit #3): T#%d TSC blocks tail task: "
 "ntasks=%d head=%u tail=%u\n", gtid, thread_data->td.td_deque_ntasks
, thread_data->td.td_deque_head, thread_data->td.td_deque_tail
); }
3025              thread_data->td.td_deque_head, thread_data->td.td_deque_tail))if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_remove_my_task(exit #3): T#%d TSC blocks tail task: "
 "ntasks=%d head=%u tail=%u\n", gtid, thread_data->td.td_deque_ntasks
, thread_data->td.td_deque_head, thread_data->td.td_deque_tail
); };
3026    return NULL__null;
3027  }
3028 
3029  thread_data->td.td_deque_tail = tail;
3030  TCW_4(thread_data->td.td_deque_ntasks, thread_data->td.td_deque_ntasks - 1)(thread_data->td.td_deque_ntasks) = (thread_data->td.td_deque_ntasks
 - 1);
3031 
3032  __kmp_release_bootstrap_lock(&thread_data->td.td_deque_lock);
3033 
3034  KA_TRACE(10, ("__kmp_remove_my_task(exit #4): T#%d task %p removed: "if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_remove_my_task(exit #4): T#%d task %p removed: "
 "ntasks=%d head=%u tail=%u\n", gtid, taskdata, thread_data->
td.td_deque_ntasks, thread_data->td.td_deque_head, thread_data
->td.td_deque_tail); }
3035                "ntasks=%d head=%u tail=%u\n",if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_remove_my_task(exit #4): T#%d task %p removed: "
 "ntasks=%d head=%u tail=%u\n", gtid, taskdata, thread_data->
td.td_deque_ntasks, thread_data->td.td_deque_head, thread_data
->td.td_deque_tail); }
3036                gtid, taskdata, thread_data->td.td_deque_ntasks,if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_remove_my_task(exit #4): T#%d task %p removed: "
 "ntasks=%d head=%u tail=%u\n", gtid, taskdata, thread_data->
td.td_deque_ntasks, thread_data->td.td_deque_head, thread_data
->td.td_deque_tail); }
3037                thread_data->td.td_deque_head, thread_data->td.td_deque_tail))if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_remove_my_task(exit #4): T#%d task %p removed: "
 "ntasks=%d head=%u tail=%u\n", gtid, taskdata, thread_data->
td.td_deque_ntasks, thread_data->td.td_deque_head, thread_data
->td.td_deque_tail); };
3038 
3039  task = KMP_TASKDATA_TO_TASK(taskdata)(kmp_task_t *)(taskdata + 1);
3040  return task;
3041}
3042 
3043// __kmp_steal_task: remove a task from another thread's deque
3044// Assume that calling thread has already checked existence of
3045// task_team thread_data before calling this routine.
3046static kmp_task_t *__kmp_steal_task(kmp_info_t *victim_thr, kmp_int32 gtid,
3047                                    kmp_task_team_t *task_team,
3048                                    std::atomic<kmp_int32> *unfinished_threads,
3049                                    int *thread_finished,
3050                                    kmp_int32 is_constrained) {
3051  kmp_task_t *task;
3052  kmp_taskdata_t *taskdata;
3053  kmp_taskdata_t *current;
3054  kmp_thread_data_t *victim_td, *threads_data;
3055  kmp_int32 target;
3056  kmp_int32 victim_tid;
3057 
3058  KMP_DEBUG_ASSERT(__kmp_tasking_mode != tskm_immediate_exec)if (!(__kmp_tasking_mode != tskm_immediate_exec)) { __kmp_debug_assert
("__kmp_tasking_mode != tskm_immediate_exec", "openmp/runtime/src/kmp_tasking.cpp"
, 3058); };
3059 
3060  threads_data = task_team->tt.tt_threads_data;
3061  KMP_DEBUG_ASSERT(threads_data != NULL)if (!(threads_data != __null)) { __kmp_debug_assert("threads_data != __null"
, "openmp/runtime/src/kmp_tasking.cpp", 3061); }; // Caller should check this condition
3062 
3063  victim_tid = victim_thr->th.th_info.ds.ds_tid;
3064  victim_td = &threads_data[victim_tid];
3065 
3066  KA_TRACE(10, ("__kmp_steal_task(enter): T#%d try to steal from T#%d: "if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_steal_task(enter): T#%d try to steal from T#%d: "
 "task_team=%p ntasks=%d head=%u tail=%u\n", gtid, __kmp_gtid_from_thread
(victim_thr), task_team, victim_td->td.td_deque_ntasks, victim_td
->td.td_deque_head, victim_td->td.td_deque_tail); }
3067                "task_team=%p ntasks=%d head=%u tail=%u\n",if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_steal_task(enter): T#%d try to steal from T#%d: "
 "task_team=%p ntasks=%d head=%u tail=%u\n", gtid, __kmp_gtid_from_thread
(victim_thr), task_team, victim_td->td.td_deque_ntasks, victim_td
->td.td_deque_head, victim_td->td.td_deque_tail); }
3068                gtid, __kmp_gtid_from_thread(victim_thr), task_team,if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_steal_task(enter): T#%d try to steal from T#%d: "
 "task_team=%p ntasks=%d head=%u tail=%u\n", gtid, __kmp_gtid_from_thread
(victim_thr), task_team, victim_td->td.td_deque_ntasks, victim_td
->td.td_deque_head, victim_td->td.td_deque_tail); }
3069                victim_td->td.td_deque_ntasks, victim_td->td.td_deque_head,if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_steal_task(enter): T#%d try to steal from T#%d: "
 "task_team=%p ntasks=%d head=%u tail=%u\n", gtid, __kmp_gtid_from_thread
(victim_thr), task_team, victim_td->td.td_deque_ntasks, victim_td
->td.td_deque_head, victim_td->td.td_deque_tail); }
3070                victim_td->td.td_deque_tail))if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_steal_task(enter): T#%d try to steal from T#%d: "
 "task_team=%p ntasks=%d head=%u tail=%u\n", gtid, __kmp_gtid_from_thread
(victim_thr), task_team, victim_td->td.td_deque_ntasks, victim_td
->td.td_deque_head, victim_td->td.td_deque_tail); };
3071 
3072  if (TCR_4(victim_td->td.td_deque_ntasks)(victim_td->td.td_deque_ntasks) == 0) {
3073    KA_TRACE(10, ("__kmp_steal_task(exit #1): T#%d could not steal from T#%d: "if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_steal_task(exit #1): T#%d could not steal from T#%d: "
 "task_team=%p ntasks=%d head=%u tail=%u\n", gtid, __kmp_gtid_from_thread
(victim_thr), task_team, victim_td->td.td_deque_ntasks, victim_td
->td.td_deque_head, victim_td->td.td_deque_tail); }
3074                  "task_team=%p ntasks=%d head=%u tail=%u\n",if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_steal_task(exit #1): T#%d could not steal from T#%d: "
 "task_team=%p ntasks=%d head=%u tail=%u\n", gtid, __kmp_gtid_from_thread
(victim_thr), task_team, victim_td->td.td_deque_ntasks, victim_td
->td.td_deque_head, victim_td->td.td_deque_tail); }
3075                  gtid, __kmp_gtid_from_thread(victim_thr), task_team,if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_steal_task(exit #1): T#%d could not steal from T#%d: "
 "task_team=%p ntasks=%d head=%u tail=%u\n", gtid, __kmp_gtid_from_thread
(victim_thr), task_team, victim_td->td.td_deque_ntasks, victim_td
->td.td_deque_head, victim_td->td.td_deque_tail); }
3076                  victim_td->td.td_deque_ntasks, victim_td->td.td_deque_head,if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_steal_task(exit #1): T#%d could not steal from T#%d: "
 "task_team=%p ntasks=%d head=%u tail=%u\n", gtid, __kmp_gtid_from_thread
(victim_thr), task_team, victim_td->td.td_deque_ntasks, victim_td
->td.td_deque_head, victim_td->td.td_deque_tail); }
3077                  victim_td->td.td_deque_tail))if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_steal_task(exit #1): T#%d could not steal from T#%d: "
 "task_team=%p ntasks=%d head=%u tail=%u\n", gtid, __kmp_gtid_from_thread
(victim_thr), task_team, victim_td->td.td_deque_ntasks, victim_td
->td.td_deque_head, victim_td->td.td_deque_tail); };
3078    return NULL__null;
3079  }
3080 
3081  __kmp_acquire_bootstrap_lock(&victim_td->td.td_deque_lock);
3082 
3083  int ntasks = TCR_4(victim_td->td.td_deque_ntasks)(victim_td->td.td_deque_ntasks);
3084  // Check again after we acquire the lock
3085  if (ntasks == 0) {
3086    __kmp_release_bootstrap_lock(&victim_td->td.td_deque_lock);
3087    KA_TRACE(10, ("__kmp_steal_task(exit #2): T#%d could not steal from T#%d: "if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_steal_task(exit #2): T#%d could not steal from T#%d: "
 "task_team=%p ntasks=%d head=%u tail=%u\n", gtid, __kmp_gtid_from_thread
(victim_thr), task_team, ntasks, victim_td->td.td_deque_head
, victim_td->td.td_deque_tail); }
3088                  "task_team=%p ntasks=%d head=%u tail=%u\n",if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_steal_task(exit #2): T#%d could not steal from T#%d: "
 "task_team=%p ntasks=%d head=%u tail=%u\n", gtid, __kmp_gtid_from_thread
(victim_thr), task_team, ntasks, victim_td->td.td_deque_head
, victim_td->td.td_deque_tail); }
3089                  gtid, __kmp_gtid_from_thread(victim_thr), task_team, ntasks,if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_steal_task(exit #2): T#%d could not steal from T#%d: "
 "task_team=%p ntasks=%d head=%u tail=%u\n", gtid, __kmp_gtid_from_thread
(victim_thr), task_team, ntasks, victim_td->td.td_deque_head
, victim_td->td.td_deque_tail); }
3090                  victim_td->td.td_deque_head, victim_td->td.td_deque_tail))if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_steal_task(exit #2): T#%d could not steal from T#%d: "
 "task_team=%p ntasks=%d head=%u tail=%u\n", gtid, __kmp_gtid_from_thread
(victim_thr), task_team, ntasks, victim_td->td.td_deque_head
, victim_td->td.td_deque_tail); };
3091    return NULL__null;
3092  }
3093 
3094  KMP_DEBUG_ASSERT(victim_td->td.td_deque != NULL)if (!(victim_td->td.td_deque != __null)) { __kmp_debug_assert
("victim_td->td.td_deque != __null", "openmp/runtime/src/kmp_tasking.cpp"
, 3094); };
3095  current = __kmp_threads[gtid]->th.th_current_task;
3096  taskdata = victim_td->td.td_deque[victim_td->td.td_deque_head];
3097  if (__kmp_task_is_allowed(gtid, is_constrained, taskdata, current)) {
3098    // Bump head pointer and Wrap.
3099    victim_td->td.td_deque_head =
3100        (victim_td->td.td_deque_head + 1) & TASK_DEQUE_MASK(victim_td->td)((victim_td->td).td_deque_size - 1);
3101  } else {
3102    if (!task_team->tt.tt_untied_task_encountered) {
3103      // The TSC does not allow to steal victim task
3104      __kmp_release_bootstrap_lock(&victim_td->td.td_deque_lock);
3105      KA_TRACE(10, ("__kmp_steal_task(exit #3): T#%d could not steal from "if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_steal_task(exit #3): T#%d could not steal from "
 "T#%d: task_team=%p ntasks=%d head=%u tail=%u\n", gtid, __kmp_gtid_from_thread
(victim_thr), task_team, ntasks, victim_td->td.td_deque_head
, victim_td->td.td_deque_tail); }
3106                    "T#%d: task_team=%p ntasks=%d head=%u tail=%u\n",if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_steal_task(exit #3): T#%d could not steal from "
 "T#%d: task_team=%p ntasks=%d head=%u tail=%u\n", gtid, __kmp_gtid_from_thread
(victim_thr), task_team, ntasks, victim_td->td.td_deque_head
, victim_td->td.td_deque_tail); }
3107                    gtid, __kmp_gtid_from_thread(victim_thr), task_team, ntasks,if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_steal_task(exit #3): T#%d could not steal from "
 "T#%d: task_team=%p ntasks=%d head=%u tail=%u\n", gtid, __kmp_gtid_from_thread
(victim_thr), task_team, ntasks, victim_td->td.td_deque_head
, victim_td->td.td_deque_tail); }
3108                    victim_td->td.td_deque_head, victim_td->td.td_deque_tail))if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_steal_task(exit #3): T#%d could not steal from "
 "T#%d: task_team=%p ntasks=%d head=%u tail=%u\n", gtid, __kmp_gtid_from_thread
(victim_thr), task_team, ntasks, victim_td->td.td_deque_head
, victim_td->td.td_deque_tail); };
3109      return NULL__null;
3110    }
3111    int i;
3112    // walk through victim's deque trying to steal any task
3113    target = victim_td->td.td_deque_head;
3114    taskdata = NULL__null;
3115    for (i = 1; i < ntasks; ++i) {
3116      target = (target + 1) & TASK_DEQUE_MASK(victim_td->td)((victim_td->td).td_deque_size - 1);
3117      taskdata = victim_td->td.td_deque[target];
3118      if (__kmp_task_is_allowed(gtid, is_constrained, taskdata, current)) {
3119        break; // found victim task
3120      } else {
3121        taskdata = NULL__null;
3122      }
3123    }
3124    if (taskdata == NULL__null) {
3125      // No appropriate candidate to steal found
3126      __kmp_release_bootstrap_lock(&victim_td->td.td_deque_lock);
3127      KA_TRACE(10, ("__kmp_steal_task(exit #4): T#%d could not steal from "if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_steal_task(exit #4): T#%d could not steal from "
 "T#%d: task_team=%p ntasks=%d head=%u tail=%u\n", gtid, __kmp_gtid_from_thread
(victim_thr), task_team, ntasks, victim_td->td.td_deque_head
, victim_td->td.td_deque_tail); }
3128                    "T#%d: task_team=%p ntasks=%d head=%u tail=%u\n",if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_steal_task(exit #4): T#%d could not steal from "
 "T#%d: task_team=%p ntasks=%d head=%u tail=%u\n", gtid, __kmp_gtid_from_thread
(victim_thr), task_team, ntasks, victim_td->td.td_deque_head
, victim_td->td.td_deque_tail); }
3129                    gtid, __kmp_gtid_from_thread(victim_thr), task_team, ntasks,if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_steal_task(exit #4): T#%d could not steal from "
 "T#%d: task_team=%p ntasks=%d head=%u tail=%u\n", gtid, __kmp_gtid_from_thread
(victim_thr), task_team, ntasks, victim_td->td.td_deque_head
, victim_td->td.td_deque_tail); }
3130                    victim_td->td.td_deque_head, victim_td->td.td_deque_tail))if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_steal_task(exit #4): T#%d could not steal from "
 "T#%d: task_team=%p ntasks=%d head=%u tail=%u\n", gtid, __kmp_gtid_from_thread
(victim_thr), task_team, ntasks, victim_td->td.td_deque_head
, victim_td->td.td_deque_tail); };
3131      return NULL__null;
3132    }
3133    int prev = target;
3134    for (i = i + 1; i < ntasks; ++i) {
3135      // shift remaining tasks in the deque left by 1
3136      target = (target + 1) & TASK_DEQUE_MASK(victim_td->td)((victim_td->td).td_deque_size - 1);
3137      victim_td->td.td_deque[prev] = victim_td->td.td_deque[target];
3138      prev = target;
3139    }
3140    KMP_DEBUG_ASSERT(if (!(victim_td->td.td_deque_tail == (kmp_uint32)((target +
 1) & ((victim_td->td).td_deque_size - 1)))) { __kmp_debug_assert
("victim_td->td.td_deque_tail == (kmp_uint32)((target + 1) & ((victim_td->td).td_deque_size - 1))"
, "openmp/runtime/src/kmp_tasking.cpp", 3142); }
3141        victim_td->td.td_deque_tail ==if (!(victim_td->td.td_deque_tail == (kmp_uint32)((target +
 1) & ((victim_td->td).td_deque_size - 1)))) { __kmp_debug_assert
("victim_td->td.td_deque_tail == (kmp_uint32)((target + 1) & ((victim_td->td).td_deque_size - 1))"
, "openmp/runtime/src/kmp_tasking.cpp", 3142); }
3142        (kmp_uint32)((target + 1) & TASK_DEQUE_MASK(victim_td->td)))if (!(victim_td->td.td_deque_tail == (kmp_uint32)((target +
 1) & ((victim_td->td).td_deque_size - 1)))) { __kmp_debug_assert
("victim_td->td.td_deque_tail == (kmp_uint32)((target + 1) & ((victim_td->td).td_deque_size - 1))"
, "openmp/runtime/src/kmp_tasking.cpp", 3142); };
3143    victim_td->td.td_deque_tail = target; // tail -= 1 (wrapped))
3144  }
3145  if (*thread_finished) {
3146    // We need to un-mark this victim as a finished victim.  This must be done
3147    // before releasing the lock, or else other threads (starting with the
3148    // primary thread victim) might be prematurely released from the barrier!!!
3149#if KMP_DEBUG1
3150    kmp_int32 count =
3151#endif
3152        KMP_ATOMIC_INC(unfinished_threads)(unfinished_threads)->fetch_add(1, std::memory_order_acq_rel
);
3153    KA_TRACE(if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_steal_task: T#%d inc unfinished_threads to %d: task_team=%p\n"
, gtid, count + 1, task_team); }
3154        20,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_steal_task: T#%d inc unfinished_threads to %d: task_team=%p\n"
, gtid, count + 1, task_team); }
3155        ("__kmp_steal_task: T#%d inc unfinished_threads to %d: task_team=%p\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_steal_task: T#%d inc unfinished_threads to %d: task_team=%p\n"
, gtid, count + 1, task_team); }
3156         gtid, count + 1, task_team))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_steal_task: T#%d inc unfinished_threads to %d: task_team=%p\n"
, gtid, count + 1, task_team); };
3157    *thread_finished = FALSE0;
3158  }
3159  TCW_4(victim_td->td.td_deque_ntasks, ntasks - 1)(victim_td->td.td_deque_ntasks) = (ntasks - 1);
3160 
3161  __kmp_release_bootstrap_lock(&victim_td->td.td_deque_lock);
3162 
3163  KMP_COUNT_BLOCK(TASK_stolen)((void)0);
3164  KA_TRACE(10,if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_steal_task(exit #5): T#%d stole task %p from T#%d: "
 "task_team=%p ntasks=%d head=%u tail=%u\n", gtid, taskdata, __kmp_gtid_from_thread
(victim_thr), task_team, ntasks, victim_td->td.td_deque_head
, victim_td->td.td_deque_tail); }
3165           ("__kmp_steal_task(exit #5): T#%d stole task %p from T#%d: "if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_steal_task(exit #5): T#%d stole task %p from T#%d: "
 "task_team=%p ntasks=%d head=%u tail=%u\n", gtid, taskdata, __kmp_gtid_from_thread
(victim_thr), task_team, ntasks, victim_td->td.td_deque_head
, victim_td->td.td_deque_tail); }
3166            "task_team=%p ntasks=%d head=%u tail=%u\n",if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_steal_task(exit #5): T#%d stole task %p from T#%d: "
 "task_team=%p ntasks=%d head=%u tail=%u\n", gtid, taskdata, __kmp_gtid_from_thread
(victim_thr), task_team, ntasks, victim_td->td.td_deque_head
, victim_td->td.td_deque_tail); }
3167            gtid, taskdata, __kmp_gtid_from_thread(victim_thr), task_team,if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_steal_task(exit #5): T#%d stole task %p from T#%d: "
 "task_team=%p ntasks=%d head=%u tail=%u\n", gtid, taskdata, __kmp_gtid_from_thread
(victim_thr), task_team, ntasks, victim_td->td.td_deque_head
, victim_td->td.td_deque_tail); }
3168            ntasks, victim_td->td.td_deque_head, victim_td->td.td_deque_tail))if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_steal_task(exit #5): T#%d stole task %p from T#%d: "
 "task_team=%p ntasks=%d head=%u tail=%u\n", gtid, taskdata, __kmp_gtid_from_thread
(victim_thr), task_team, ntasks, victim_td->td.td_deque_head
, victim_td->td.td_deque_tail); };
3169 
3170  task = KMP_TASKDATA_TO_TASK(taskdata)(kmp_task_t *)(taskdata + 1);
3171  return task;
3172}
3173 
3174// __kmp_execute_tasks_template: Choose and execute tasks until either the
3175// condition is statisfied (return true) or there are none left (return false).
3176//
3177// final_spin is TRUE if this is the spin at the release barrier.
3178// thread_finished indicates whether the thread is finished executing all
3179// the tasks it has on its deque, and is at the release barrier.
3180// spinner is the location on which to spin.
3181// spinner == NULL means only execute a single task and return.
3182// checker is the value to check to terminate the spin.
3183template <class C>
3184static inline int __kmp_execute_tasks_template(
3185    kmp_info_t *thread, kmp_int32 gtid, C *flag, int final_spin,
3186    int *thread_finished USE_ITT_BUILD_ARG(void *itt_sync_obj), void *itt_sync_obj,
3187    kmp_int32 is_constrained) {
3188  kmp_task_team_t *task_team = thread->th.th_task_team;
3189  kmp_thread_data_t *threads_data;
3190  kmp_task_t *task;
3191  kmp_info_t *other_thread;
3192  kmp_taskdata_t *current_task = thread->th.th_current_task;
3193  std::atomic<kmp_int32> *unfinished_threads;
3194  kmp_int32 nthreads, victim_tid = -2, use_own_tasks = 1, new_victim = 0,
3195                      tid = thread->th.th_info.ds.ds_tid;
3196 
3197  KMP_DEBUG_ASSERT(__kmp_tasking_mode != tskm_immediate_exec)if (!(__kmp_tasking_mode != tskm_immediate_exec)) { __kmp_debug_assert
("__kmp_tasking_mode != tskm_immediate_exec", "openmp/runtime/src/kmp_tasking.cpp"
, 3197); };
3198  KMP_DEBUG_ASSERT(thread == __kmp_threads[gtid])if (!(thread == __kmp_threads[gtid])) { __kmp_debug_assert("thread == __kmp_threads[gtid]"
, "openmp/runtime/src/kmp_tasking.cpp", 3198); };
3199 
3200  if (task_team == NULL__null || current_task == NULL__null)
3201    return FALSE0;
3202 
3203  KA_TRACE(15, ("__kmp_execute_tasks_template(enter): T#%d final_spin=%d "if (kmp_a_debug >= 15) { __kmp_debug_printf ("__kmp_execute_tasks_template(enter): T#%d final_spin=%d "
 "*thread_finished=%d\n", gtid, final_spin, *thread_finished)
; }
3204                "*thread_finished=%d\n",if (kmp_a_debug >= 15) { __kmp_debug_printf ("__kmp_execute_tasks_template(enter): T#%d final_spin=%d "
 "*thread_finished=%d\n", gtid, final_spin, *thread_finished)
; }
3205                gtid, final_spin, *thread_finished))if (kmp_a_debug >= 15) { __kmp_debug_printf ("__kmp_execute_tasks_template(enter): T#%d final_spin=%d "
 "*thread_finished=%d\n", gtid, final_spin, *thread_finished)
; };
3206 
3207  thread->th.th_reap_state = KMP_NOT_SAFE_TO_REAP0;
3208  threads_data = (kmp_thread_data_t *)TCR_PTR(task_team->tt.tt_threads_data)((void *)(task_team->tt.tt_threads_data));
3209 
3210  KMP_DEBUG_ASSERT(threads_data != NULL)if (!(threads_data != __null)) { __kmp_debug_assert("threads_data != __null"
, "openmp/runtime/src/kmp_tasking.cpp", 3210); };
3211 
3212  nthreads = task_team->tt.tt_nproc;
3213  unfinished_threads = &(task_team->tt.tt_unfinished_threads);
3214  KMP_DEBUG_ASSERT(nthreads > 1 || task_team->tt.tt_found_proxy_tasks ||if (!(nthreads > 1 || task_team->tt.tt_found_proxy_tasks
 || task_team->tt.tt_hidden_helper_task_encountered)) { __kmp_debug_assert
("nthreads > 1 || task_team->tt.tt_found_proxy_tasks || task_team->tt.tt_hidden_helper_task_encountered"
, "openmp/runtime/src/kmp_tasking.cpp", 3215); }
3215                   task_team->tt.tt_hidden_helper_task_encountered)if (!(nthreads > 1 || task_team->tt.tt_found_proxy_tasks
 || task_team->tt.tt_hidden_helper_task_encountered)) { __kmp_debug_assert
("nthreads > 1 || task_team->tt.tt_found_proxy_tasks || task_team->tt.tt_hidden_helper_task_encountered"
, "openmp/runtime/src/kmp_tasking.cpp", 3215); };
3216  KMP_DEBUG_ASSERT(*unfinished_threads >= 0)if (!(*unfinished_threads >= 0)) { __kmp_debug_assert("*unfinished_threads >= 0"
, "openmp/runtime/src/kmp_tasking.cpp", 3216); };
3217 
3218  while (1) { // Outer loop keeps trying to find tasks in case of single thread
3219    // getting tasks from target constructs
3220    while (1) { // Inner loop to find a task and execute it
3221      task = NULL__null;
3222      if (task_team->tt.tt_num_task_pri) { // get priority task first
3223        task = __kmp_get_priority_task(gtid, task_team, is_constrained);
3224      }
3225      if (task == NULL__null && use_own_tasks) { // check own queue next
3226        task = __kmp_remove_my_task(thread, gtid, task_team, is_constrained);
3227      }
3228      if ((task == NULL__null) && (nthreads > 1)) { // Steal a task finally
3229        int asleep = 1;
3230        use_own_tasks = 0;
3231        // Try to steal from the last place I stole from successfully.
3232        if (victim_tid == -2) { // haven't stolen anything yet
3233          victim_tid = threads_data[tid].td.td_deque_last_stolen;
3234          if (victim_tid !=
3235              -1) // if we have a last stolen from victim, get the thread
3236            other_thread = threads_data[victim_tid].td.td_thr;
3237        }
3238        if (victim_tid != -1) { // found last victim
3239          asleep = 0;
3240        } else if (!new_victim) { // no recent steals and we haven't already
3241          // used a new victim; select a random thread
3242          do { // Find a different thread to steal work from.
3243            // Pick a random thread. Initial plan was to cycle through all the
3244            // threads, and only return if we tried to steal from every thread,
3245            // and failed.  Arch says that's not such a great idea.
3246            victim_tid = __kmp_get_random(thread) % (nthreads - 1);
3247            if (victim_tid >= tid) {
3248              ++victim_tid; // Adjusts random distribution to exclude self
3249            }
3250            // Found a potential victim
3251            other_thread = threads_data[victim_tid].td.td_thr;
3252            // There is a slight chance that __kmp_enable_tasking() did not wake
3253            // up all threads waiting at the barrier.  If victim is sleeping,
3254            // then wake it up. Since we were going to pay the cache miss
3255            // penalty for referencing another thread's kmp_info_t struct
3256            // anyway,
3257            // the check shouldn't cost too much performance at this point. In
3258            // extra barrier mode, tasks do not sleep at the separate tasking
3259            // barrier, so this isn't a problem.
3260            asleep = 0;
3261            if ((__kmp_tasking_mode == tskm_task_teams) &&
3262                (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME(2147483647)) &&
3263                (TCR_PTR(CCAST(void *, other_thread->th.th_sleep_loc))((void *)(const_cast<void *>(other_thread->th.th_sleep_loc
))) !=
3264                 NULL__null)) {
3265              asleep = 1;
3266              __kmp_null_resume_wrapper(other_thread);
3267              // A sleeping thread should not have any tasks on it's queue.
3268              // There is a slight possibility that it resumes, steals a task
3269              // from another thread, which spawns more tasks, all in the time
3270              // that it takes this thread to check => don't write an assertion
3271              // that the victim's queue is empty.  Try stealing from a
3272              // different thread.
3273            }
3274          } while (asleep);
3275        }
3276 
3277        if (!asleep) {
3278          // We have a victim to try to steal from
3279          task = __kmp_steal_task(other_thread, gtid, task_team,
3280                                  unfinished_threads, thread_finished,
3281                                  is_constrained);
3282        }
3283        if (task != NULL__null) { // set last stolen to victim
3284          if (threads_data[tid].td.td_deque_last_stolen != victim_tid) {
3285            threads_data[tid].td.td_deque_last_stolen = victim_tid;
3286            // The pre-refactored code did not try more than 1 successful new
3287            // vicitm, unless the last one generated more local tasks;
3288            // new_victim keeps track of this
3289            new_victim = 1;
3290          }
3291        } else { // No tasks found; unset last_stolen
3292          KMP_CHECK_UPDATE(threads_data[tid].td.td_deque_last_stolen, -1)if ((threads_data[tid].td.td_deque_last_stolen) != (-1)) (threads_data
[tid].td.td_deque_last_stolen) = (-1);
3293          victim_tid = -2; // no successful victim found
3294        }
3295      }
3296 
3297      if (task == NULL__null)
3298        break; // break out of tasking loop
3299 
3300// Found a task; execute it
3301#if USE_ITT_BUILD1 && USE_ITT_NOTIFY1
3302      if (__itt_sync_create_ptr__kmp_itt_sync_create_ptr__3_0 || KMP_ITT_DEBUG0) {
3303        if (itt_sync_obj == NULL__null) { // we are at fork barrier where we could not
3304          // get the object reliably
3305          itt_sync_obj = __kmp_itt_barrier_object(gtid, bs_forkjoin_barrier);
3306        }
3307        __kmp_itt_task_starting(itt_sync_obj);
3308      }
3309#endif /* USE_ITT_BUILD && USE_ITT_NOTIFY */
3310      __kmp_invoke_task(gtid, task, current_task);
3311#if USE_ITT_BUILD1
3312      if (itt_sync_obj != NULL__null)
3313        __kmp_itt_task_finished(itt_sync_obj);
3314#endif /* USE_ITT_BUILD */
3315      // If this thread is only partway through the barrier and the condition is
3316      // met, then return now, so that the barrier gather/release pattern can
3317      // proceed. If this thread is in the last spin loop in the barrier,
3318      // waiting to be released, we know that the termination condition will not
3319      // be satisfied, so don't waste any cycles checking it.
3320      if (flag == NULL__null || (!final_spin && flag->done_check())) {
3321        KA_TRACE(if (kmp_a_debug >= 15) { __kmp_debug_printf ("__kmp_execute_tasks_template: T#%d spin condition satisfied\n"
, gtid); }
3322            15,if (kmp_a_debug >= 15) { __kmp_debug_printf ("__kmp_execute_tasks_template: T#%d spin condition satisfied\n"
, gtid); }
3323            ("__kmp_execute_tasks_template: T#%d spin condition satisfied\n",if (kmp_a_debug >= 15) { __kmp_debug_printf ("__kmp_execute_tasks_template: T#%d spin condition satisfied\n"
, gtid); }
3324             gtid))if (kmp_a_debug >= 15) { __kmp_debug_printf ("__kmp_execute_tasks_template: T#%d spin condition satisfied\n"
, gtid); };
3325        return TRUE(!0);
3326      }
3327      if (thread->th.th_task_team == NULL__null) {
3328        break;
3329      }
3330      KMP_YIELD(__kmp_library == library_throughput){ __kmp_x86_pause(); if ((__kmp_library == library_throughput
) && (((__kmp_use_yield == 1) || (__kmp_use_yield == 2
 && (((__kmp_nth) > (__kmp_avail_proc ? __kmp_avail_proc
 : __kmp_xproc))))))) __kmp_yield(); }; // Yield before next task
3331      // If execution of a stolen task results in more tasks being placed on our
3332      // run queue, reset use_own_tasks
3333      if (!use_own_tasks && TCR_4(threads_data[tid].td.td_deque_ntasks)(threads_data[tid].td.td_deque_ntasks) != 0) {
3334        KA_TRACE(20, ("__kmp_execute_tasks_template: T#%d stolen task spawned "if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_execute_tasks_template: T#%d stolen task spawned "
 "other tasks, restart\n", gtid); }
3335                      "other tasks, restart\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_execute_tasks_template: T#%d stolen task spawned "
 "other tasks, restart\n", gtid); }
3336                      gtid))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_execute_tasks_template: T#%d stolen task spawned "
 "other tasks, restart\n", gtid); };
3337        use_own_tasks = 1;
3338        new_victim = 0;
3339      }
3340    }
3341 
3342    // The task source has been exhausted. If in final spin loop of barrier,
3343    // check if termination condition is satisfied. The work queue may be empty
3344    // but there might be proxy tasks still executing.
3345    if (final_spin &&
3346        KMP_ATOMIC_LD_ACQ(&current_task->td_incomplete_child_tasks)(&current_task->td_incomplete_child_tasks)->load(std
::memory_order_acquire) == 0) {
3347      // First, decrement the #unfinished threads, if that has not already been
3348      // done.  This decrement might be to the spin location, and result in the
3349      // termination condition being satisfied.
3350      if (!*thread_finished) {
3351#if KMP_DEBUG1
3352        kmp_int32 count = -1 +
3353#endif
3354            KMP_ATOMIC_DEC(unfinished_threads)(unfinished_threads)->fetch_sub(1, std::memory_order_acq_rel
);
3355        KA_TRACE(20, ("__kmp_execute_tasks_template: T#%d dec "if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_execute_tasks_template: T#%d dec "
 "unfinished_threads to %d task_team=%p\n", gtid, count, task_team
); }
3356                      "unfinished_threads to %d task_team=%p\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_execute_tasks_template: T#%d dec "
 "unfinished_threads to %d task_team=%p\n", gtid, count, task_team
); }
3357                      gtid, count, task_team))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_execute_tasks_template: T#%d dec "
 "unfinished_threads to %d task_team=%p\n", gtid, count, task_team
); };
3358        *thread_finished = TRUE(!0);
3359      }
3360 
3361      // It is now unsafe to reference thread->th.th_team !!!
3362      // Decrementing task_team->tt.tt_unfinished_threads can allow the primary
3363      // thread to pass through the barrier, where it might reset each thread's
3364      // th.th_team field for the next parallel region. If we can steal more
3365      // work, we know that this has not happened yet.
3366      if (flag != NULL__null && flag->done_check()) {
3367        KA_TRACE(if (kmp_a_debug >= 15) { __kmp_debug_printf ("__kmp_execute_tasks_template: T#%d spin condition satisfied\n"
, gtid); }
3368            15,if (kmp_a_debug >= 15) { __kmp_debug_printf ("__kmp_execute_tasks_template: T#%d spin condition satisfied\n"
, gtid); }
3369            ("__kmp_execute_tasks_template: T#%d spin condition satisfied\n",if (kmp_a_debug >= 15) { __kmp_debug_printf ("__kmp_execute_tasks_template: T#%d spin condition satisfied\n"
, gtid); }
3370             gtid))if (kmp_a_debug >= 15) { __kmp_debug_printf ("__kmp_execute_tasks_template: T#%d spin condition satisfied\n"
, gtid); };
3371        return TRUE(!0);
3372      }
3373    }
3374 
3375    // If this thread's task team is NULL, primary thread has recognized that
3376    // there are no more tasks; bail out
3377    if (thread->th.th_task_team == NULL__null) {
3378      KA_TRACE(15,if (kmp_a_debug >= 15) { __kmp_debug_printf ("__kmp_execute_tasks_template: T#%d no more tasks\n"
, gtid); }
3379               ("__kmp_execute_tasks_template: T#%d no more tasks\n", gtid))if (kmp_a_debug >= 15) { __kmp_debug_printf ("__kmp_execute_tasks_template: T#%d no more tasks\n"
, gtid); };
3380      return FALSE0;
3381    }
3382 
3383    // Check the flag again to see if it has already done in case to be trapped
3384    // into infinite loop when a if0 task depends on a hidden helper task
3385    // outside any parallel region. Detached tasks are not impacted in this case
3386    // because the only thread executing this function has to execute the proxy
3387    // task so it is in another code path that has the same check.
3388    if (flag == NULL__null || (!final_spin && flag->done_check())) {
3389      KA_TRACE(15,if (kmp_a_debug >= 15) { __kmp_debug_printf ("__kmp_execute_tasks_template: T#%d spin condition satisfied\n"
, gtid); }
3390               ("__kmp_execute_tasks_template: T#%d spin condition satisfied\n",if (kmp_a_debug >= 15) { __kmp_debug_printf ("__kmp_execute_tasks_template: T#%d spin condition satisfied\n"
, gtid); }
3391                gtid))if (kmp_a_debug >= 15) { __kmp_debug_printf ("__kmp_execute_tasks_template: T#%d spin condition satisfied\n"
, gtid); };
3392      return TRUE(!0);
3393    }
3394 
3395    // We could be getting tasks from target constructs; if this is the only
3396    // thread, keep trying to execute tasks from own queue
3397    if (nthreads == 1 &&
3398        KMP_ATOMIC_LD_ACQ(&current_task->td_incomplete_child_tasks)(&current_task->td_incomplete_child_tasks)->load(std
::memory_order_acquire))
3399      use_own_tasks = 1;
3400    else {
3401      KA_TRACE(15,if (kmp_a_debug >= 15) { __kmp_debug_printf ("__kmp_execute_tasks_template: T#%d can't find work\n"
, gtid); }
3402               ("__kmp_execute_tasks_template: T#%d can't find work\n", gtid))if (kmp_a_debug >= 15) { __kmp_debug_printf ("__kmp_execute_tasks_template: T#%d can't find work\n"
, gtid); };
3403      return FALSE0;
3404    }
3405  }
3406}
3407 
3408template <bool C, bool S>
3409int __kmp_execute_tasks_32(
3410    kmp_info_t *thread, kmp_int32 gtid, kmp_flag_32<C, S> *flag, int final_spin,
3411    int *thread_finished USE_ITT_BUILD_ARG(void *itt_sync_obj), void *itt_sync_obj,
3412    kmp_int32 is_constrained) {
3413  return __kmp_execute_tasks_template(
3414      thread, gtid, flag, final_spin,
3415      thread_finished USE_ITT_BUILD_ARG(itt_sync_obj), itt_sync_obj, is_constrained);
3416}
3417 
3418template <bool C, bool S>
3419int __kmp_execute_tasks_64(
3420    kmp_info_t *thread, kmp_int32 gtid, kmp_flag_64<C, S> *flag, int final_spin,
3421    int *thread_finished USE_ITT_BUILD_ARG(void *itt_sync_obj), void *itt_sync_obj,
3422    kmp_int32 is_constrained) {
3423  return __kmp_execute_tasks_template(
3424      thread, gtid, flag, final_spin,
3425      thread_finished USE_ITT_BUILD_ARG(itt_sync_obj), itt_sync_obj, is_constrained);
3426}
3427 
3428template <bool C, bool S>
3429int __kmp_atomic_execute_tasks_64(
3430    kmp_info_t *thread, kmp_int32 gtid, kmp_atomic_flag_64<C, S> *flag,
3431    int final_spin, int *thread_finished USE_ITT_BUILD_ARG(void *itt_sync_obj), void *itt_sync_obj,
3432    kmp_int32 is_constrained) {
3433  return __kmp_execute_tasks_template(
3434      thread, gtid, flag, final_spin,
3435      thread_finished USE_ITT_BUILD_ARG(itt_sync_obj), itt_sync_obj, is_constrained);
3436}
3437 
3438int __kmp_execute_tasks_oncore(
3439    kmp_info_t *thread, kmp_int32 gtid, kmp_flag_oncore *flag, int final_spin,
3440    int *thread_finished USE_ITT_BUILD_ARG(void *itt_sync_obj), void *itt_sync_obj,
3441    kmp_int32 is_constrained) {
3442  return __kmp_execute_tasks_template(
3443      thread, gtid, flag, final_spin,
3444      thread_finished USE_ITT_BUILD_ARG(itt_sync_obj), itt_sync_obj, is_constrained);
3445}
3446 
3447template int
3448__kmp_execute_tasks_32<false, false>(kmp_info_t *, kmp_int32,
3449                                     kmp_flag_32<false, false> *, int,
3450                                     int *USE_ITT_BUILD_ARG(void *), void *, kmp_int32);
3451 
3452template int __kmp_execute_tasks_64<false, true>(kmp_info_t *, kmp_int32,
3453                                                 kmp_flag_64<false, true> *,
3454                                                 int,
3455                                                 int *USE_ITT_BUILD_ARG(void *), void *,
3456                                                 kmp_int32);
3457 
3458template int __kmp_execute_tasks_64<true, false>(kmp_info_t *, kmp_int32,
3459                                                 kmp_flag_64<true, false> *,
3460                                                 int,
3461                                                 int *USE_ITT_BUILD_ARG(void *), void *,
3462                                                 kmp_int32);
3463 
3464template int __kmp_atomic_execute_tasks_64<false, true>(
3465    kmp_info_t *, kmp_int32, kmp_atomic_flag_64<false, true> *, int,
3466    int *USE_ITT_BUILD_ARG(void *), void *, kmp_int32);
3467 
3468template int __kmp_atomic_execute_tasks_64<true, false>(
3469    kmp_info_t *, kmp_int32, kmp_atomic_flag_64<true, false> *, int,
3470    int *USE_ITT_BUILD_ARG(void *), void *, kmp_int32);
3471 
3472// __kmp_enable_tasking: Allocate task team and resume threads sleeping at the
3473// next barrier so they can assist in executing enqueued tasks.
3474// First thread in allocates the task team atomically.
3475static void __kmp_enable_tasking(kmp_task_team_t *task_team,
3476                                 kmp_info_t *this_thr) {
3477  kmp_thread_data_t *threads_data;
3478  int nthreads, i, is_init_thread;
3479 
3480  KA_TRACE(10, ("__kmp_enable_tasking(enter): T#%d\n",if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_enable_tasking(enter): T#%d\n"
, __kmp_gtid_from_thread(this_thr)); }
3481                __kmp_gtid_from_thread(this_thr)))if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_enable_tasking(enter): T#%d\n"
, __kmp_gtid_from_thread(this_thr)); };
3482 
3483  KMP_DEBUG_ASSERT(task_team != NULL)if (!(task_team != __null)) { __kmp_debug_assert("task_team != __null"
, "openmp/runtime/src/kmp_tasking.cpp", 3483); };
3484  KMP_DEBUG_ASSERT(this_thr->th.th_team != NULL)if (!(this_thr->th.th_team != __null)) { __kmp_debug_assert
("this_thr->th.th_team != __null", "openmp/runtime/src/kmp_tasking.cpp"
, 3484); };
3485 
3486  nthreads = task_team->tt.tt_nproc;
3487  KMP_DEBUG_ASSERT(nthreads > 0)if (!(nthreads > 0)) { __kmp_debug_assert("nthreads > 0"
, "openmp/runtime/src/kmp_tasking.cpp", 3487); };
3488  KMP_DEBUG_ASSERT(nthreads == this_thr->th.th_team->t.t_nproc)if (!(nthreads == this_thr->th.th_team->t.t_nproc)) { __kmp_debug_assert
("nthreads == this_thr->th.th_team->t.t_nproc", "openmp/runtime/src/kmp_tasking.cpp"
, 3488); };
3489 
3490  // Allocate or increase the size of threads_data if necessary
3491  is_init_thread = __kmp_realloc_task_threads_data(this_thr, task_team);
3492 
3493  if (!is_init_thread) {
3494    // Some other thread already set up the array.
3495    KA_TRACE(if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_enable_tasking(exit): T#%d: threads array already set up.\n"
, __kmp_gtid_from_thread(this_thr)); }
3496        20,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_enable_tasking(exit): T#%d: threads array already set up.\n"
, __kmp_gtid_from_thread(this_thr)); }
3497        ("__kmp_enable_tasking(exit): T#%d: threads array already set up.\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_enable_tasking(exit): T#%d: threads array already set up.\n"
, __kmp_gtid_from_thread(this_thr)); }
3498         __kmp_gtid_from_thread(this_thr)))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_enable_tasking(exit): T#%d: threads array already set up.\n"
, __kmp_gtid_from_thread(this_thr)); };
3499    return;
3500  }
3501  threads_data = (kmp_thread_data_t *)TCR_PTR(task_team->tt.tt_threads_data)((void *)(task_team->tt.tt_threads_data));
3502  KMP_DEBUG_ASSERT(threads_data != NULL)if (!(threads_data != __null)) { __kmp_debug_assert("threads_data != __null"
, "openmp/runtime/src/kmp_tasking.cpp", 3502); };
3503 
3504  if (__kmp_tasking_mode == tskm_task_teams &&
3505      (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME(2147483647))) {
3506    // Release any threads sleeping at the barrier, so that they can steal
3507    // tasks and execute them.  In extra barrier mode, tasks do not sleep
3508    // at the separate tasking barrier, so this isn't a problem.
3509    for (i = 0; i < nthreads; i++) {
3510      void *sleep_loc;
3511      kmp_info_t *thread = threads_data[i].td.td_thr;
3512 
3513      if (i == this_thr->th.th_info.ds.ds_tid) {
3514        continue;
3515      }
3516      // Since we haven't locked the thread's suspend mutex lock at this
3517      // point, there is a small window where a thread might be putting
3518      // itself to sleep, but hasn't set the th_sleep_loc field yet.
3519      // To work around this, __kmp_execute_tasks_template() periodically checks
3520      // see if other threads are sleeping (using the same random mechanism that
3521      // is used for task stealing) and awakens them if they are.
3522      if ((sleep_loc = TCR_PTR(CCAST(void *, thread->th.th_sleep_loc))((void *)(const_cast<void *>(thread->th.th_sleep_loc
)))) !=
3523          NULL__null) {
3524        KF_TRACE(50, ("__kmp_enable_tasking: T#%d waking up thread T#%d\n",if (kmp_f_debug >= 50) { __kmp_debug_printf ("__kmp_enable_tasking: T#%d waking up thread T#%d\n"
, __kmp_gtid_from_thread(this_thr), __kmp_gtid_from_thread(thread
)); }
3525                      __kmp_gtid_from_thread(this_thr),if (kmp_f_debug >= 50) { __kmp_debug_printf ("__kmp_enable_tasking: T#%d waking up thread T#%d\n"
, __kmp_gtid_from_thread(this_thr), __kmp_gtid_from_thread(thread
)); }
3526                      __kmp_gtid_from_thread(thread)))if (kmp_f_debug >= 50) { __kmp_debug_printf ("__kmp_enable_tasking: T#%d waking up thread T#%d\n"
, __kmp_gtid_from_thread(this_thr), __kmp_gtid_from_thread(thread
)); };
3527        __kmp_null_resume_wrapper(thread);
3528      } else {
3529        KF_TRACE(50, ("__kmp_enable_tasking: T#%d don't wake up thread T#%d\n",if (kmp_f_debug >= 50) { __kmp_debug_printf ("__kmp_enable_tasking: T#%d don't wake up thread T#%d\n"
, __kmp_gtid_from_thread(this_thr), __kmp_gtid_from_thread(thread
)); }
3530                      __kmp_gtid_from_thread(this_thr),if (kmp_f_debug >= 50) { __kmp_debug_printf ("__kmp_enable_tasking: T#%d don't wake up thread T#%d\n"
, __kmp_gtid_from_thread(this_thr), __kmp_gtid_from_thread(thread
)); }
3531                      __kmp_gtid_from_thread(thread)))if (kmp_f_debug >= 50) { __kmp_debug_printf ("__kmp_enable_tasking: T#%d don't wake up thread T#%d\n"
, __kmp_gtid_from_thread(this_thr), __kmp_gtid_from_thread(thread
)); };
3532      }
3533    }
3534  }
3535 
3536  KA_TRACE(10, ("__kmp_enable_tasking(exit): T#%d\n",if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_enable_tasking(exit): T#%d\n"
, __kmp_gtid_from_thread(this_thr)); }
3537                __kmp_gtid_from_thread(this_thr)))if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_enable_tasking(exit): T#%d\n"
, __kmp_gtid_from_thread(this_thr)); };
3538}
3539 
3540/* // TODO: Check the comment consistency
3541 * Utility routines for "task teams".  A task team (kmp_task_t) is kind of
3542 * like a shadow of the kmp_team_t data struct, with a different lifetime.
3543 * After a child * thread checks into a barrier and calls __kmp_release() from
3544 * the particular variant of __kmp_<barrier_kind>_barrier_gather(), it can no
3545 * longer assume that the kmp_team_t structure is intact (at any moment, the
3546 * primary thread may exit the barrier code and free the team data structure,
3547 * and return the threads to the thread pool).
3548 *
3549 * This does not work with the tasking code, as the thread is still
3550 * expected to participate in the execution of any tasks that may have been
3551 * spawned my a member of the team, and the thread still needs access to all
3552 * to each thread in the team, so that it can steal work from it.
3553 *
3554 * Enter the existence of the kmp_task_team_t struct.  It employs a reference
3555 * counting mechanism, and is allocated by the primary thread before calling
3556 * __kmp_<barrier_kind>_release, and then is release by the last thread to
3557 * exit __kmp_<barrier_kind>_release at the next barrier.  I.e. the lifetimes
3558 * of the kmp_task_team_t structs for consecutive barriers can overlap
3559 * (and will, unless the primary thread is the last thread to exit the barrier
3560 * release phase, which is not typical). The existence of such a struct is
3561 * useful outside the context of tasking.
3562 *
3563 * We currently use the existence of the threads array as an indicator that
3564 * tasks were spawned since the last barrier.  If the structure is to be
3565 * useful outside the context of tasking, then this will have to change, but
3566 * not setting the field minimizes the performance impact of tasking on
3567 * barriers, when no explicit tasks were spawned (pushed, actually).
3568 */
3569 
3570static kmp_task_team_t *__kmp_free_task_teams =
3571    NULL__null; // Free list for task_team data structures
3572// Lock for task team data structures
3573kmp_bootstrap_lock_t __kmp_task_team_lock =
3574    KMP_BOOTSTRAP_LOCK_INITIALIZER(__kmp_task_team_lock){ { true, &((__kmp_task_team_lock)), __null, 0U, 0U, 0, -
1 } };
3575 
3576// __kmp_alloc_task_deque:
3577// Allocates a task deque for a particular thread, and initialize the necessary
3578// data structures relating to the deque.  This only happens once per thread
3579// per task team since task teams are recycled. No lock is needed during
3580// allocation since each thread allocates its own deque.
3581static void __kmp_alloc_task_deque(kmp_info_t *thread,
3582                                   kmp_thread_data_t *thread_data) {
3583  __kmp_init_bootstrap_lock(&thread_data->td.td_deque_lock);
3584  KMP_DEBUG_ASSERT(thread_data->td.td_deque == NULL)if (!(thread_data->td.td_deque == __null)) { __kmp_debug_assert
("thread_data->td.td_deque == __null", "openmp/runtime/src/kmp_tasking.cpp"
, 3584); };
3585 
3586  // Initialize last stolen task field to "none"
3587  thread_data->td.td_deque_last_stolen = -1;
3588 
3589  KMP_DEBUG_ASSERT(TCR_4(thread_data->td.td_deque_ntasks) == 0)if (!((thread_data->td.td_deque_ntasks) == 0)) { __kmp_debug_assert
("(thread_data->td.td_deque_ntasks) == 0", "openmp/runtime/src/kmp_tasking.cpp"
, 3589); };
3590  KMP_DEBUG_ASSERT(thread_data->td.td_deque_head == 0)if (!(thread_data->td.td_deque_head == 0)) { __kmp_debug_assert
("thread_data->td.td_deque_head == 0", "openmp/runtime/src/kmp_tasking.cpp"
, 3590); };
3591  KMP_DEBUG_ASSERT(thread_data->td.td_deque_tail == 0)if (!(thread_data->td.td_deque_tail == 0)) { __kmp_debug_assert
("thread_data->td.td_deque_tail == 0", "openmp/runtime/src/kmp_tasking.cpp"
, 3591); };
3592 
3593  KE_TRACE(if (kmp_e_debug >= 10) { __kmp_debug_printf ("__kmp_alloc_task_deque: T#%d allocating deque[%d] for thread_data %p\n"
, __kmp_gtid_from_thread(thread), (1 << 8), thread_data
); }
3594      10,if (kmp_e_debug >= 10) { __kmp_debug_printf ("__kmp_alloc_task_deque: T#%d allocating deque[%d] for thread_data %p\n"
, __kmp_gtid_from_thread(thread), (1 << 8), thread_data
); }
3595      ("__kmp_alloc_task_deque: T#%d allocating deque[%d] for thread_data %p\n",if (kmp_e_debug >= 10) { __kmp_debug_printf ("__kmp_alloc_task_deque: T#%d allocating deque[%d] for thread_data %p\n"
, __kmp_gtid_from_thread(thread), (1 << 8), thread_data
); }
3596       __kmp_gtid_from_thread(thread), INITIAL_TASK_DEQUE_SIZE, thread_data))if (kmp_e_debug >= 10) { __kmp_debug_printf ("__kmp_alloc_task_deque: T#%d allocating deque[%d] for thread_data %p\n"
, __kmp_gtid_from_thread(thread), (1 << 8), thread_data
); };
3597  // Allocate space for task deque, and zero the deque
3598  // Cannot use __kmp_thread_calloc() because threads not around for
3599  // kmp_reap_task_team( ).
3600  thread_data->td.td_deque = (kmp_taskdata_t **)__kmp_allocate(___kmp_allocate(((1 << 8) * sizeof(kmp_taskdata_t *)), "openmp/runtime/src/kmp_tasking.cpp"
, 3601)
3601      INITIAL_TASK_DEQUE_SIZE * sizeof(kmp_taskdata_t *))___kmp_allocate(((1 << 8) * sizeof(kmp_taskdata_t *)), "openmp/runtime/src/kmp_tasking.cpp"
, 3601);
3602  thread_data->td.td_deque_size = INITIAL_TASK_DEQUE_SIZE(1 << 8);
3603}
3604 
3605// __kmp_free_task_deque:
3606// Deallocates a task deque for a particular thread. Happens at library
3607// deallocation so don't need to reset all thread data fields.
3608static void __kmp_free_task_deque(kmp_thread_data_t *thread_data) {
3609  if (thread_data->td.td_deque != NULL__null) {
3610    __kmp_acquire_bootstrap_lock(&thread_data->td.td_deque_lock);
3611    TCW_4(thread_data->td.td_deque_ntasks, 0)(thread_data->td.td_deque_ntasks) = (0);
3612    __kmp_free(thread_data->td.td_deque)___kmp_free((thread_data->td.td_deque), "openmp/runtime/src/kmp_tasking.cpp"
, 3612);
3613    thread_data->td.td_deque = NULL__null;
3614    __kmp_release_bootstrap_lock(&thread_data->td.td_deque_lock);
3615  }
3616 
3617#ifdef BUILD_TIED_TASK_STACK
3618  // GEH: Figure out what to do here for td_susp_tied_tasks
3619  if (thread_data->td.td_susp_tied_tasks.ts_entries != TASK_STACK_EMPTY) {
3620    __kmp_free_task_stack(__kmp_thread_from_gtid(gtid), thread_data);
3621  }
3622#endif // BUILD_TIED_TASK_STACK
3623}
3624 
3625// __kmp_realloc_task_threads_data:
3626// Allocates a threads_data array for a task team, either by allocating an
3627// initial array or enlarging an existing array.  Only the first thread to get
3628// the lock allocs or enlarges the array and re-initializes the array elements.
3629// That thread returns "TRUE", the rest return "FALSE".
3630// Assumes that the new array size is given by task_team -> tt.tt_nproc.
3631// The current size is given by task_team -> tt.tt_max_threads.
3632static int __kmp_realloc_task_threads_data(kmp_info_t *thread,
3633                                           kmp_task_team_t *task_team) {
3634  kmp_thread_data_t **threads_data_p;
3635  kmp_int32 nthreads, maxthreads;
3636  int is_init_thread = FALSE0;
3637 
3638  if (TCR_4(task_team->tt.tt_found_tasks)(task_team->tt.tt_found_tasks)) {
1
Assuming field 'tt_found_tasks' is 0→
2
←
Taking false branch→
3639    // Already reallocated and initialized.
3640    return FALSE0;
3641  }
3642 
3643  threads_data_p = &task_team->tt.tt_threads_data;
3644  nthreads = task_team->tt.tt_nproc;
3645  maxthreads = task_team->tt.tt_max_threads;
3646 
3647  // All threads must lock when they encounter the first task of the implicit
3648  // task region to make sure threads_data fields are (re)initialized before
3649  // used.
3650  __kmp_acquire_bootstrap_lock(&task_team->tt.tt_threads_lock);
3
←
Calling '__kmp_acquire_bootstrap_lock'→
5
←
Returning from '__kmp_acquire_bootstrap_lock'→
3651 
3652  if (!TCR_4(task_team->tt.tt_found_tasks)(task_team->tt.tt_found_tasks)) {
6
←
Assuming field 'tt_found_tasks' is 0→
7
←
Taking true branch→
3653    // first thread to enable tasking
3654    kmp_team_t *team = thread->th.th_team;
3655    int i;
3656 
3657    is_init_thread = TRUE(!0);
3658    if (maxthreads < nthreads) {
8
←
Assuming 'maxthreads' is >= 'nthreads'→
9
←
Taking false branch→
3659 
3660      if (*threads_data_p != NULL__null) {
3661        kmp_thread_data_t *old_data = *threads_data_p;
3662        kmp_thread_data_t *new_data = NULL__null;
3663 
3664        KE_TRACE(if (kmp_e_debug >= 10) { __kmp_debug_printf ("__kmp_realloc_task_threads_data: T#%d reallocating "
 "threads data for task_team %p, new_size = %d, old_size = %d\n"
, __kmp_gtid_from_thread(thread), task_team, nthreads, maxthreads
); }
3665            10,if (kmp_e_debug >= 10) { __kmp_debug_printf ("__kmp_realloc_task_threads_data: T#%d reallocating "
 "threads data for task_team %p, new_size = %d, old_size = %d\n"
, __kmp_gtid_from_thread(thread), task_team, nthreads, maxthreads
); }
3666            ("__kmp_realloc_task_threads_data: T#%d reallocating "if (kmp_e_debug >= 10) { __kmp_debug_printf ("__kmp_realloc_task_threads_data: T#%d reallocating "
 "threads data for task_team %p, new_size = %d, old_size = %d\n"
, __kmp_gtid_from_thread(thread), task_team, nthreads, maxthreads
); }
3667             "threads data for task_team %p, new_size = %d, old_size = %d\n",if (kmp_e_debug >= 10) { __kmp_debug_printf ("__kmp_realloc_task_threads_data: T#%d reallocating "
 "threads data for task_team %p, new_size = %d, old_size = %d\n"
, __kmp_gtid_from_thread(thread), task_team, nthreads, maxthreads
); }
3668             __kmp_gtid_from_thread(thread), task_team, nthreads, maxthreads))if (kmp_e_debug >= 10) { __kmp_debug_printf ("__kmp_realloc_task_threads_data: T#%d reallocating "
 "threads data for task_team %p, new_size = %d, old_size = %d\n"
, __kmp_gtid_from_thread(thread), task_team, nthreads, maxthreads
); };
3669        // Reallocate threads_data to have more elements than current array
3670        // Cannot use __kmp_thread_realloc() because threads not around for
3671        // kmp_reap_task_team( ).  Note all new array entries are initialized
3672        // to zero by __kmp_allocate().
3673        new_data = (kmp_thread_data_t *)__kmp_allocate(___kmp_allocate((nthreads * sizeof(kmp_thread_data_t)), "openmp/runtime/src/kmp_tasking.cpp"
, 3674)
3674            nthreads * sizeof(kmp_thread_data_t))___kmp_allocate((nthreads * sizeof(kmp_thread_data_t)), "openmp/runtime/src/kmp_tasking.cpp"
, 3674);
3675        // copy old data to new data
3676        KMP_MEMCPY_S((void *)new_data, nthreads * sizeof(kmp_thread_data_t),memcpy((void *)new_data, (void *)old_data, maxthreads * sizeof
(kmp_thread_data_t))
3677                     (void *)old_data, maxthreads * sizeof(kmp_thread_data_t))memcpy((void *)new_data, (void *)old_data, maxthreads * sizeof
(kmp_thread_data_t));
3678 
3679#ifdef BUILD_TIED_TASK_STACK
3680        // GEH: Figure out if this is the right thing to do
3681        for (i = maxthreads; i < nthreads; i++) {
3682          kmp_thread_data_t *thread_data = &(*threads_data_p)[i];
3683          __kmp_init_task_stack(__kmp_gtid_from_thread(thread), thread_data);
3684        }
3685#endif // BUILD_TIED_TASK_STACK
3686       // Install the new data and free the old data
3687        (*threads_data_p) = new_data;
3688        __kmp_free(old_data)___kmp_free((old_data), "openmp/runtime/src/kmp_tasking.cpp",
 3688);
3689      } else {
3690        KE_TRACE(10, ("__kmp_realloc_task_threads_data: T#%d allocating "if (kmp_e_debug >= 10) { __kmp_debug_printf ("__kmp_realloc_task_threads_data: T#%d allocating "
 "threads data for task_team %p, size = %d\n", __kmp_gtid_from_thread
(thread), task_team, nthreads); }
3691                      "threads data for task_team %p, size = %d\n",if (kmp_e_debug >= 10) { __kmp_debug_printf ("__kmp_realloc_task_threads_data: T#%d allocating "
 "threads data for task_team %p, size = %d\n", __kmp_gtid_from_thread
(thread), task_team, nthreads); }
3692                      __kmp_gtid_from_thread(thread), task_team, nthreads))if (kmp_e_debug >= 10) { __kmp_debug_printf ("__kmp_realloc_task_threads_data: T#%d allocating "
 "threads data for task_team %p, size = %d\n", __kmp_gtid_from_thread
(thread), task_team, nthreads); };
3693        // Make the initial allocate for threads_data array, and zero entries
3694        // Cannot use __kmp_thread_calloc() because threads not around for
3695        // kmp_reap_task_team( ).
3696        *threads_data_p = (kmp_thread_data_t *)__kmp_allocate(___kmp_allocate((nthreads * sizeof(kmp_thread_data_t)), "openmp/runtime/src/kmp_tasking.cpp"
, 3697)
3697            nthreads * sizeof(kmp_thread_data_t))___kmp_allocate((nthreads * sizeof(kmp_thread_data_t)), "openmp/runtime/src/kmp_tasking.cpp"
, 3697);
3698#ifdef BUILD_TIED_TASK_STACK
3699        // GEH: Figure out if this is the right thing to do
3700        for (i = 0; i < nthreads; i++) {
3701          kmp_thread_data_t *thread_data = &(*threads_data_p)[i];
3702          __kmp_init_task_stack(__kmp_gtid_from_thread(thread), thread_data);
3703        }
3704#endif // BUILD_TIED_TASK_STACK
3705      }
3706      task_team->tt.tt_max_threads = nthreads;
3707    } else {
3708      // If array has (more than) enough elements, go ahead and use it
3709      KMP_DEBUG_ASSERT(*threads_data_p != NULL)if (!(*threads_data_p != __null)) { __kmp_debug_assert("*threads_data_p != __null"
, "openmp/runtime/src/kmp_tasking.cpp", 3709); };
10
←
Assuming the condition is false→
11
←
Taking true branch→
3710    }
3711 
3712    // initialize threads_data pointers back to thread_info structures
3713    for (i = 0; i < nthreads; i++) {
12
←
Assuming 'i' is < 'nthreads'→
13
←
Loop condition is true.  Entering loop body→
3714      kmp_thread_data_t *thread_data = &(*threads_data_p)[i];
14
←
'thread_data' initialized to a null pointer value→
3715      thread_data->td.td_thr = team->t.t_threads[i];
15
←
Dereference of null pointer
3716 
3717      if (thread_data->td.td_deque_last_stolen >= nthreads) {
3718        // The last stolen field survives across teams / barrier, and the number
3719        // of threads may have changed.  It's possible (likely?) that a new
3720        // parallel region will exhibit the same behavior as previous region.
3721        thread_data->td.td_deque_last_stolen = -1;
3722      }
3723    }
3724 
3725    KMP_MB();
3726    TCW_SYNC_4(task_team->tt.tt_found_tasks, TRUE)(task_team->tt.tt_found_tasks) = ((!0));
3727  }
3728 
3729  __kmp_release_bootstrap_lock(&task_team->tt.tt_threads_lock);
3730  return is_init_thread;
3731}
3732 
3733// __kmp_free_task_threads_data:
3734// Deallocates a threads_data array for a task team, including any attached
3735// tasking deques.  Only occurs at library shutdown.
3736static void __kmp_free_task_threads_data(kmp_task_team_t *task_team) {
3737  __kmp_acquire_bootstrap_lock(&task_team->tt.tt_threads_lock);
3738  if (task_team->tt.tt_threads_data != NULL__null) {
3739    int i;
3740    for (i = 0; i < task_team->tt.tt_max_threads; i++) {
3741      __kmp_free_task_deque(&task_team->tt.tt_threads_data[i]);
3742    }
3743    __kmp_free(task_team->tt.tt_threads_data)___kmp_free((task_team->tt.tt_threads_data), "openmp/runtime/src/kmp_tasking.cpp"
, 3743);
3744    task_team->tt.tt_threads_data = NULL__null;
3745  }
3746  __kmp_release_bootstrap_lock(&task_team->tt.tt_threads_lock);
3747}
3748 
3749// __kmp_free_task_pri_list:
3750// Deallocates tasking deques used for priority tasks.
3751// Only occurs at library shutdown.
3752static void __kmp_free_task_pri_list(kmp_task_team_t *task_team) {
3753  __kmp_acquire_bootstrap_lock(&task_team->tt.tt_task_pri_lock);
3754  if (task_team->tt.tt_task_pri_list != NULL__null) {
3755    kmp_task_pri_t *list = task_team->tt.tt_task_pri_list;
3756    while (list != NULL__null) {
3757      kmp_task_pri_t *next = list->next;
3758      __kmp_free_task_deque(&list->td);
3759      __kmp_free(list)___kmp_free((list), "openmp/runtime/src/kmp_tasking.cpp", 3759
);
3760      list = next;
3761    }
3762    task_team->tt.tt_task_pri_list = NULL__null;
3763  }
3764  __kmp_release_bootstrap_lock(&task_team->tt.tt_task_pri_lock);
3765}
3766 
3767// __kmp_allocate_task_team:
3768// Allocates a task team associated with a specific team, taking it from
3769// the global task team free list if possible.  Also initializes data
3770// structures.
3771static kmp_task_team_t *__kmp_allocate_task_team(kmp_info_t *thread,
3772                                                 kmp_team_t *team) {
3773  kmp_task_team_t *task_team = NULL__null;
3774  int nthreads;
3775 
3776  KA_TRACE(20, ("__kmp_allocate_task_team: T#%d entering; team = %p\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_allocate_task_team: T#%d entering; team = %p\n"
, (thread ? __kmp_gtid_from_thread(thread) : -1), team); }
3777                (thread ? __kmp_gtid_from_thread(thread) : -1), team))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_allocate_task_team: T#%d entering; team = %p\n"
, (thread ? __kmp_gtid_from_thread(thread) : -1), team); };
3778 
3779  if (TCR_PTR(__kmp_free_task_teams)((void *)(__kmp_free_task_teams)) != NULL__null) {
3780    // Take a task team from the task team pool
3781    __kmp_acquire_bootstrap_lock(&__kmp_task_team_lock);
3782    if (__kmp_free_task_teams != NULL__null) {
3783      task_team = __kmp_free_task_teams;
3784      TCW_PTR(__kmp_free_task_teams, task_team->tt.tt_next)((__kmp_free_task_teams)) = ((task_team->tt.tt_next));
3785      task_team->tt.tt_next = NULL__null;
3786    }
3787    __kmp_release_bootstrap_lock(&__kmp_task_team_lock);
3788  }
3789 
3790  if (task_team == NULL__null) {
3791    KE_TRACE(10, ("__kmp_allocate_task_team: T#%d allocating "if (kmp_e_debug >= 10) { __kmp_debug_printf ("__kmp_allocate_task_team: T#%d allocating "
 "task team for team %p\n", __kmp_gtid_from_thread(thread), team
); }
3792                  "task team for team %p\n",if (kmp_e_debug >= 10) { __kmp_debug_printf ("__kmp_allocate_task_team: T#%d allocating "
 "task team for team %p\n", __kmp_gtid_from_thread(thread), team
); }
3793                  __kmp_gtid_from_thread(thread), team))if (kmp_e_debug >= 10) { __kmp_debug_printf ("__kmp_allocate_task_team: T#%d allocating "
 "task team for team %p\n", __kmp_gtid_from_thread(thread), team
); };
3794    // Allocate a new task team if one is not available. Cannot use
3795    // __kmp_thread_malloc because threads not around for kmp_reap_task_team.
3796    task_team = (kmp_task_team_t *)__kmp_allocate(sizeof(kmp_task_team_t))___kmp_allocate((sizeof(kmp_task_team_t)), "openmp/runtime/src/kmp_tasking.cpp"
, 3796);
3797    __kmp_init_bootstrap_lock(&task_team->tt.tt_threads_lock);
3798    __kmp_init_bootstrap_lock(&task_team->tt.tt_task_pri_lock);
3799#if USE_ITT_BUILD1 && USE_ITT_NOTIFY1 && KMP_DEBUG1
3800    // suppress race conditions detection on synchronization flags in debug mode
3801    // this helps to analyze library internals eliminating false positives
3802    __itt_suppress_mark_range(!__kmp_itt_suppress_mark_range_ptr__3_0) ? (void)0 : __kmp_itt_suppress_mark_range_ptr__3_0(
3803        __itt_suppress_range, __itt_suppress_threading_errors0x000000ff,
3804        &task_team->tt.tt_found_tasks, sizeof(task_team->tt.tt_found_tasks));
3805    __itt_suppress_mark_range(!__kmp_itt_suppress_mark_range_ptr__3_0) ? (void)0 : __kmp_itt_suppress_mark_range_ptr__3_0(__itt_suppress_range,
3806                              __itt_suppress_threading_errors0x000000ff,
3807                              CCAST(kmp_uint32 *, &task_team->tt.tt_active)const_cast<kmp_uint32 *>(&task_team->tt.tt_active
),
3808                              sizeof(task_team->tt.tt_active));
3809#endif /* USE_ITT_BUILD && USE_ITT_NOTIFY && KMP_DEBUG */
3810    // Note: __kmp_allocate zeroes returned memory, othewise we would need:
3811    // task_team->tt.tt_threads_data = NULL;
3812    // task_team->tt.tt_max_threads = 0;
3813    // task_team->tt.tt_next = NULL;
3814  }
3815 
3816  TCW_4(task_team->tt.tt_found_tasks, FALSE)(task_team->tt.tt_found_tasks) = (0);
3817  TCW_4(task_team->tt.tt_found_proxy_tasks, FALSE)(task_team->tt.tt_found_proxy_tasks) = (0);
3818  TCW_4(task_team->tt.tt_hidden_helper_task_encountered, FALSE)(task_team->tt.tt_hidden_helper_task_encountered) = (0);
3819  task_team->tt.tt_nproc = nthreads = team->t.t_nproc;
3820 
3821  KMP_ATOMIC_ST_REL(&task_team->tt.tt_unfinished_threads, nthreads)(&task_team->tt.tt_unfinished_threads)->store(nthreads
, std::memory_order_release);
3822  TCW_4(task_team->tt.tt_hidden_helper_task_encountered, FALSE)(task_team->tt.tt_hidden_helper_task_encountered) = (0);
3823  TCW_4(task_team->tt.tt_active, TRUE)(task_team->tt.tt_active) = ((!0));
3824 
3825  KA_TRACE(20, ("__kmp_allocate_task_team: T#%d exiting; task_team = %p "if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_allocate_task_team: T#%d exiting; task_team = %p "
 "unfinished_threads init'd to %d\n", (thread ? __kmp_gtid_from_thread
(thread) : -1), task_team, (&task_team->tt.tt_unfinished_threads
)->load(std::memory_order_relaxed)); }
3826                "unfinished_threads init'd to %d\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_allocate_task_team: T#%d exiting; task_team = %p "
 "unfinished_threads init'd to %d\n", (thread ? __kmp_gtid_from_thread
(thread) : -1), task_team, (&task_team->tt.tt_unfinished_threads
)->load(std::memory_order_relaxed)); }
3827                (thread ? __kmp_gtid_from_thread(thread) : -1), task_team,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_allocate_task_team: T#%d exiting; task_team = %p "
 "unfinished_threads init'd to %d\n", (thread ? __kmp_gtid_from_thread
(thread) : -1), task_team, (&task_team->tt.tt_unfinished_threads
)->load(std::memory_order_relaxed)); }
3828                KMP_ATOMIC_LD_RLX(&task_team->tt.tt_unfinished_threads)))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_allocate_task_team: T#%d exiting; task_team = %p "
 "unfinished_threads init'd to %d\n", (thread ? __kmp_gtid_from_thread
(thread) : -1), task_team, (&task_team->tt.tt_unfinished_threads
)->load(std::memory_order_relaxed)); };
3829  return task_team;
3830}
3831 
3832// __kmp_free_task_team:
3833// Frees the task team associated with a specific thread, and adds it
3834// to the global task team free list.
3835void __kmp_free_task_team(kmp_info_t *thread, kmp_task_team_t *task_team) {
3836  KA_TRACE(20, ("__kmp_free_task_team: T#%d task_team = %p\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_free_task_team: T#%d task_team = %p\n"
, thread ? __kmp_gtid_from_thread(thread) : -1, task_team); }
3837                thread ? __kmp_gtid_from_thread(thread) : -1, task_team))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_free_task_team: T#%d task_team = %p\n"
, thread ? __kmp_gtid_from_thread(thread) : -1, task_team); };
3838 
3839  // Put task team back on free list
3840  __kmp_acquire_bootstrap_lock(&__kmp_task_team_lock);
3841 
3842  KMP_DEBUG_ASSERT(task_team->tt.tt_next == NULL)if (!(task_team->tt.tt_next == __null)) { __kmp_debug_assert
("task_team->tt.tt_next == __null", "openmp/runtime/src/kmp_tasking.cpp"
, 3842); };
3843  task_team->tt.tt_next = __kmp_free_task_teams;
3844  TCW_PTR(__kmp_free_task_teams, task_team)((__kmp_free_task_teams)) = ((task_team));
3845 
3846  __kmp_release_bootstrap_lock(&__kmp_task_team_lock);
3847}
3848 
3849// __kmp_reap_task_teams:
3850// Free all the task teams on the task team free list.
3851// Should only be done during library shutdown.
3852// Cannot do anything that needs a thread structure or gtid since they are
3853// already gone.
3854void __kmp_reap_task_teams(void) {
3855  kmp_task_team_t *task_team;
3856 
3857  if (TCR_PTR(__kmp_free_task_teams)((void *)(__kmp_free_task_teams)) != NULL__null) {
3858    // Free all task_teams on the free list
3859    __kmp_acquire_bootstrap_lock(&__kmp_task_team_lock);
3860    while ((task_team = __kmp_free_task_teams) != NULL__null) {
3861      __kmp_free_task_teams = task_team->tt.tt_next;
3862      task_team->tt.tt_next = NULL__null;
3863 
3864      // Free threads_data if necessary
3865      if (task_team->tt.tt_threads_data != NULL__null) {
3866        __kmp_free_task_threads_data(task_team);
3867      }
3868      if (task_team->tt.tt_task_pri_list != NULL__null) {
3869        __kmp_free_task_pri_list(task_team);
3870      }
3871      __kmp_free(task_team)___kmp_free((task_team), "openmp/runtime/src/kmp_tasking.cpp"
, 3871);
3872    }
3873    __kmp_release_bootstrap_lock(&__kmp_task_team_lock);
3874  }
3875}
3876 
3877// __kmp_wait_to_unref_task_teams:
3878// Some threads could still be in the fork barrier release code, possibly
3879// trying to steal tasks.  Wait for each thread to unreference its task team.
3880void __kmp_wait_to_unref_task_teams(void) {
3881  kmp_info_t *thread;
3882  kmp_uint32 spins;
3883  kmp_uint64 time;
3884  int done;
3885 
3886  KMP_INIT_YIELD(spins){ (spins) = __kmp_yield_init; };
3887  KMP_INIT_BACKOFF(time){ (time) = __kmp_pause_init; };
3888 
3889  for (;;) {
3890    done = TRUE(!0);
3891 
3892    // TODO: GEH - this may be is wrong because some sync would be necessary
3893    // in case threads are added to the pool during the traversal. Need to
3894    // verify that lock for thread pool is held when calling this routine.
3895    for (thread = CCAST(kmp_info_t *, __kmp_thread_pool)const_cast<kmp_info_t *>(__kmp_thread_pool); thread != NULL__null;
3896         thread = thread->th.th_next_pool) {
3897#if KMP_OS_WINDOWS0
3898      DWORD exit_val;
3899#endif
3900      if (TCR_PTR(thread->th.th_task_team)((void *)(thread->th.th_task_team)) == NULL__null) {
3901        KA_TRACE(10, ("__kmp_wait_to_unref_task_team: T#%d task_team == NULL\n",if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_wait_to_unref_task_team: T#%d task_team == NULL\n"
, __kmp_gtid_from_thread(thread)); }
3902                      __kmp_gtid_from_thread(thread)))if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_wait_to_unref_task_team: T#%d task_team == NULL\n"
, __kmp_gtid_from_thread(thread)); };
3903        continue;
3904      }
3905#if KMP_OS_WINDOWS0
3906      // TODO: GEH - add this check for Linux* OS / OS X* as well?
3907      if (!__kmp_is_thread_alive(thread, &exit_val)) {
3908        thread->th.th_task_team = NULL__null;
3909        continue;
3910      }
3911#endif
3912 
3913      done = FALSE0; // Because th_task_team pointer is not NULL for this thread
3914 
3915      KA_TRACE(10, ("__kmp_wait_to_unref_task_team: Waiting for T#%d to "if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_wait_to_unref_task_team: Waiting for T#%d to "
 "unreference task_team\n", __kmp_gtid_from_thread(thread)); }
3916                    "unreference task_team\n",if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_wait_to_unref_task_team: Waiting for T#%d to "
 "unreference task_team\n", __kmp_gtid_from_thread(thread)); }
3917                    __kmp_gtid_from_thread(thread)))if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_wait_to_unref_task_team: Waiting for T#%d to "
 "unreference task_team\n", __kmp_gtid_from_thread(thread)); };
3918 
3919      if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME(2147483647)) {
3920        void *sleep_loc;
3921        // If the thread is sleeping, awaken it.
3922        if ((sleep_loc = TCR_PTR(CCAST(void *, thread->th.th_sleep_loc))((void *)(const_cast<void *>(thread->th.th_sleep_loc
)))) !=
3923            NULL__null) {
3924          KA_TRACE(if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_wait_to_unref_task_team: T#%d waking up thread T#%d\n"
, __kmp_gtid_from_thread(thread), __kmp_gtid_from_thread(thread
)); }
3925              10,if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_wait_to_unref_task_team: T#%d waking up thread T#%d\n"
, __kmp_gtid_from_thread(thread), __kmp_gtid_from_thread(thread
)); }
3926              ("__kmp_wait_to_unref_task_team: T#%d waking up thread T#%d\n",if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_wait_to_unref_task_team: T#%d waking up thread T#%d\n"
, __kmp_gtid_from_thread(thread), __kmp_gtid_from_thread(thread
)); }
3927               __kmp_gtid_from_thread(thread), __kmp_gtid_from_thread(thread)))if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_wait_to_unref_task_team: T#%d waking up thread T#%d\n"
, __kmp_gtid_from_thread(thread), __kmp_gtid_from_thread(thread
)); };
3928          __kmp_null_resume_wrapper(thread);
3929        }
3930      }
3931    }
3932    if (done) {
3933      break;
3934    }
3935 
3936    // If oversubscribed or have waited a bit, yield.
3937    KMP_YIELD_OVERSUB_ELSE_SPIN(spins, time){ if (__kmp_tpause_enabled) { if (((__kmp_nth) > (__kmp_avail_proc
 ? __kmp_avail_proc : __kmp_xproc))) { __kmp_tpause(0, (time)
); } else { __kmp_tpause(__kmp_tpause_hint, (time)); } (time)
 = (time << 1 | 1) & ((kmp_uint64)0xFFFF); } else {
 __kmp_x86_pause(); if ((((__kmp_use_yield == 1 || __kmp_use_yield
 == 2) && (((__kmp_nth) > (__kmp_avail_proc ? __kmp_avail_proc
 : __kmp_xproc)))))) { __kmp_yield(); } else if (__kmp_use_yield
 == 1) { (spins) -= 2; if (!(spins)) { __kmp_yield(); (spins)
 = __kmp_yield_next; } } } };
3938  }
3939}
3940 
3941void __kmp_shift_task_state_stack(kmp_info_t *this_thr, kmp_uint8 value) {
3942  // Shift values from th_task_state_top+1 to task_state_stack_sz
3943  if (this_thr->th.th_task_state_top + 1 >=
3944      this_thr->th.th_task_state_stack_sz) { // increase size
3945    kmp_uint32 new_size = 2 * this_thr->th.th_task_state_stack_sz;
3946    kmp_uint8 *old_stack, *new_stack;
3947    kmp_uint32 i;
3948    new_stack = (kmp_uint8 *)__kmp_allocate(new_size)___kmp_allocate((new_size), "openmp/runtime/src/kmp_tasking.cpp"
, 3948);
3949    for (i = 0; i <= this_thr->th.th_task_state_top; ++i) {
3950      new_stack[i] = this_thr->th.th_task_state_memo_stack[i];
3951    }
3952    // If we need to reallocate do the shift at the same time.
3953    for (; i < this_thr->th.th_task_state_stack_sz; ++i) {
3954      new_stack[i + 1] = this_thr->th.th_task_state_memo_stack[i];
3955    }
3956    for (i = this_thr->th.th_task_state_stack_sz; i < new_size;
3957         ++i) { // zero-init rest of stack
3958      new_stack[i] = 0;
3959    }
3960    old_stack = this_thr->th.th_task_state_memo_stack;
3961    this_thr->th.th_task_state_memo_stack = new_stack;
3962    this_thr->th.th_task_state_stack_sz = new_size;
3963    __kmp_free(old_stack)___kmp_free((old_stack), "openmp/runtime/src/kmp_tasking.cpp"
, 3963);
3964  } else {
3965    kmp_uint8 *end;
3966    kmp_uint32 i;
3967 
3968    end = &this_thr->th
3969               .th_task_state_memo_stack[this_thr->th.th_task_state_stack_sz];
3970 
3971    for (i = this_thr->th.th_task_state_stack_sz - 1;
3972         i > this_thr->th.th_task_state_top; i--, end--)
3973      end[0] = end[-1];
3974  }
3975  this_thr->th.th_task_state_memo_stack[this_thr->th.th_task_state_top + 1] =
3976      value;
3977}
3978 
3979// __kmp_task_team_setup:  Create a task_team for the current team, but use
3980// an already created, unused one if it already exists.
3981void __kmp_task_team_setup(kmp_info_t *this_thr, kmp_team_t *team, int always) {
3982  KMP_DEBUG_ASSERT(__kmp_tasking_mode != tskm_immediate_exec)if (!(__kmp_tasking_mode != tskm_immediate_exec)) { __kmp_debug_assert
("__kmp_tasking_mode != tskm_immediate_exec", "openmp/runtime/src/kmp_tasking.cpp"
, 3982); };
3983 
3984  // If this task_team hasn't been created yet, allocate it. It will be used in
3985  // the region after the next.
3986  // If it exists, it is the current task team and shouldn't be touched yet as
3987  // it may still be in use.
3988  if (team->t.t_task_team[this_thr->th.th_task_state] == NULL__null &&
3989      (always || team->t.t_nproc > 1)) {
3990    team->t.t_task_team[this_thr->th.th_task_state] =
3991        __kmp_allocate_task_team(this_thr, team);
3992    KA_TRACE(20, ("__kmp_task_team_setup: Primary T#%d created new task_team %p"if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_task_team_setup: Primary T#%d created new task_team %p"
 " for team %d at parity=%d\n", __kmp_gtid_from_thread(this_thr
), team->t.t_task_team[this_thr->th.th_task_state], team
->t.t_id, this_thr->th.th_task_state); }
3993                  " for team %d at parity=%d\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_task_team_setup: Primary T#%d created new task_team %p"
 " for team %d at parity=%d\n", __kmp_gtid_from_thread(this_thr
), team->t.t_task_team[this_thr->th.th_task_state], team
->t.t_id, this_thr->th.th_task_state); }
3994                  __kmp_gtid_from_thread(this_thr),if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_task_team_setup: Primary T#%d created new task_team %p"
 " for team %d at parity=%d\n", __kmp_gtid_from_thread(this_thr
), team->t.t_task_team[this_thr->th.th_task_state], team
->t.t_id, this_thr->th.th_task_state); }
3995                  team->t.t_task_team[this_thr->th.th_task_state], team->t.t_id,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_task_team_setup: Primary T#%d created new task_team %p"
 " for team %d at parity=%d\n", __kmp_gtid_from_thread(this_thr
), team->t.t_task_team[this_thr->th.th_task_state], team
->t.t_id, this_thr->th.th_task_state); }
3996                  this_thr->th.th_task_state))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_task_team_setup: Primary T#%d created new task_team %p"
 " for team %d at parity=%d\n", __kmp_gtid_from_thread(this_thr
), team->t.t_task_team[this_thr->th.th_task_state], team
->t.t_id, this_thr->th.th_task_state); };
3997  }
3998  if (this_thr->th.th_task_state == 1 && always && team->t.t_nproc == 1) {
3999    // fix task state stack to adjust for proxy and helper tasks
4000    KA_TRACE(20, ("__kmp_task_team_setup: Primary T#%d needs to shift stack"if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_task_team_setup: Primary T#%d needs to shift stack"
 " for team %d at parity=%d\n", __kmp_gtid_from_thread(this_thr
), team->t.t_id, this_thr->th.th_task_state); }
4001                  " for team %d at parity=%d\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_task_team_setup: Primary T#%d needs to shift stack"
 " for team %d at parity=%d\n", __kmp_gtid_from_thread(this_thr
), team->t.t_id, this_thr->th.th_task_state); }
4002                  __kmp_gtid_from_thread(this_thr), team->t.t_id,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_task_team_setup: Primary T#%d needs to shift stack"
 " for team %d at parity=%d\n", __kmp_gtid_from_thread(this_thr
), team->t.t_id, this_thr->th.th_task_state); }
4003                  this_thr->th.th_task_state))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_task_team_setup: Primary T#%d needs to shift stack"
 " for team %d at parity=%d\n", __kmp_gtid_from_thread(this_thr
), team->t.t_id, this_thr->th.th_task_state); };
4004    __kmp_shift_task_state_stack(this_thr, this_thr->th.th_task_state);
4005  }
4006 
4007  // After threads exit the release, they will call sync, and then point to this
4008  // other task_team; make sure it is allocated and properly initialized. As
4009  // threads spin in the barrier release phase, they will continue to use the
4010  // previous task_team struct(above), until they receive the signal to stop
4011  // checking for tasks (they can't safely reference the kmp_team_t struct,
4012  // which could be reallocated by the primary thread). No task teams are formed
4013  // for serialized teams.
4014  if (team->t.t_nproc > 1) {
4015    int other_team = 1 - this_thr->th.th_task_state;
4016    KMP_DEBUG_ASSERT(other_team >= 0 && other_team < 2)if (!(other_team >= 0 && other_team < 2)) { __kmp_debug_assert
("other_team >= 0 && other_team < 2", "openmp/runtime/src/kmp_tasking.cpp"
, 4016); };
4017    if (team->t.t_task_team[other_team] == NULL__null) { // setup other team as well
4018      team->t.t_task_team[other_team] =
4019          __kmp_allocate_task_team(this_thr, team);
4020      KA_TRACE(20, ("__kmp_task_team_setup: Primary T#%d created second new "if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_task_team_setup: Primary T#%d created second new "
 "task_team %p for team %d at parity=%d\n", __kmp_gtid_from_thread
(this_thr), team->t.t_task_team[other_team], team->t.t_id
, other_team); }
4021                    "task_team %p for team %d at parity=%d\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_task_team_setup: Primary T#%d created second new "
 "task_team %p for team %d at parity=%d\n", __kmp_gtid_from_thread
(this_thr), team->t.t_task_team[other_team], team->t.t_id
, other_team); }
4022                    __kmp_gtid_from_thread(this_thr),if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_task_team_setup: Primary T#%d created second new "
 "task_team %p for team %d at parity=%d\n", __kmp_gtid_from_thread
(this_thr), team->t.t_task_team[other_team], team->t.t_id
, other_team); }
4023                    team->t.t_task_team[other_team], team->t.t_id, other_team))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_task_team_setup: Primary T#%d created second new "
 "task_team %p for team %d at parity=%d\n", __kmp_gtid_from_thread
(this_thr), team->t.t_task_team[other_team], team->t.t_id
, other_team); };
4024    } else { // Leave the old task team struct in place for the upcoming region;
4025      // adjust as needed
4026      kmp_task_team_t *task_team = team->t.t_task_team[other_team];
4027      if (!task_team->tt.tt_active ||
4028          team->t.t_nproc != task_team->tt.tt_nproc) {
4029        TCW_4(task_team->tt.tt_nproc, team->t.t_nproc)(task_team->tt.tt_nproc) = (team->t.t_nproc);
4030        TCW_4(task_team->tt.tt_found_tasks, FALSE)(task_team->tt.tt_found_tasks) = (0);
4031        TCW_4(task_team->tt.tt_found_proxy_tasks, FALSE)(task_team->tt.tt_found_proxy_tasks) = (0);
4032        TCW_4(task_team->tt.tt_hidden_helper_task_encountered, FALSE)(task_team->tt.tt_hidden_helper_task_encountered) = (0);
4033        KMP_ATOMIC_ST_REL(&task_team->tt.tt_unfinished_threads,(&task_team->tt.tt_unfinished_threads)->store(team->
t.t_nproc, std::memory_order_release)
4034                          team->t.t_nproc)(&task_team->tt.tt_unfinished_threads)->store(team->
t.t_nproc, std::memory_order_release);
4035        TCW_4(task_team->tt.tt_active, TRUE)(task_team->tt.tt_active) = ((!0));
4036      }
4037      // if team size has changed, the first thread to enable tasking will
4038      // realloc threads_data if necessary
4039      KA_TRACE(20, ("__kmp_task_team_setup: Primary T#%d reset next task_team "if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_task_team_setup: Primary T#%d reset next task_team "
 "%p for team %d at parity=%d\n", __kmp_gtid_from_thread(this_thr
), team->t.t_task_team[other_team], team->t.t_id, other_team
); }
4040                    "%p for team %d at parity=%d\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_task_team_setup: Primary T#%d reset next task_team "
 "%p for team %d at parity=%d\n", __kmp_gtid_from_thread(this_thr
), team->t.t_task_team[other_team], team->t.t_id, other_team
); }
4041                    __kmp_gtid_from_thread(this_thr),if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_task_team_setup: Primary T#%d reset next task_team "
 "%p for team %d at parity=%d\n", __kmp_gtid_from_thread(this_thr
), team->t.t_task_team[other_team], team->t.t_id, other_team
); }
4042                    team->t.t_task_team[other_team], team->t.t_id, other_team))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_task_team_setup: Primary T#%d reset next task_team "
 "%p for team %d at parity=%d\n", __kmp_gtid_from_thread(this_thr
), team->t.t_task_team[other_team], team->t.t_id, other_team
); };
4043    }
4044  }
4045 
4046  // For regular thread, task enabling should be called when the task is going
4047  // to be pushed to a dequeue. However, for the hidden helper thread, we need
4048  // it ahead of time so that some operations can be performed without race
4049  // condition.
4050  if (this_thr == __kmp_hidden_helper_main_thread) {
4051    for (int i = 0; i < 2; ++i) {
4052      kmp_task_team_t *task_team = team->t.t_task_team[i];
4053      if (KMP_TASKING_ENABLED(task_team)((!0) == ((task_team)->tt.tt_found_tasks))) {
4054        continue;
4055      }
4056      __kmp_enable_tasking(task_team, this_thr);
4057      for (int j = 0; j < task_team->tt.tt_nproc; ++j) {
4058        kmp_thread_data_t *thread_data = &task_team->tt.tt_threads_data[j];
4059        if (thread_data->td.td_deque == NULL__null) {
4060          __kmp_alloc_task_deque(__kmp_hidden_helper_threads[j], thread_data);
4061        }
4062      }
4063    }
4064  }
4065}
4066 
4067// __kmp_task_team_sync: Propagation of task team data from team to threads
4068// which happens just after the release phase of a team barrier.  This may be
4069// called by any thread, but only for teams with # threads > 1.
4070void __kmp_task_team_sync(kmp_info_t *this_thr, kmp_team_t *team) {
4071  KMP_DEBUG_ASSERT(__kmp_tasking_mode != tskm_immediate_exec)if (!(__kmp_tasking_mode != tskm_immediate_exec)) { __kmp_debug_assert
("__kmp_tasking_mode != tskm_immediate_exec", "openmp/runtime/src/kmp_tasking.cpp"
, 4071); };
4072 
4073  // Toggle the th_task_state field, to switch which task_team this thread
4074  // refers to
4075  this_thr->th.th_task_state = (kmp_uint8)(1 - this_thr->th.th_task_state);
4076 
4077  // It is now safe to propagate the task team pointer from the team struct to
4078  // the current thread.
4079  TCW_PTR(this_thr->th.th_task_team,((this_thr->th.th_task_team)) = ((team->t.t_task_team[this_thr
->th.th_task_state]))
4080          team->t.t_task_team[this_thr->th.th_task_state])((this_thr->th.th_task_team)) = ((team->t.t_task_team[this_thr
->th.th_task_state]));
4081  KA_TRACE(20,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_task_team_sync: Thread T#%d task team switched to task_team "
 "%p from Team #%d (parity=%d)\n", __kmp_gtid_from_thread(this_thr
), this_thr->th.th_task_team, team->t.t_id, this_thr->
th.th_task_state); }
4082           ("__kmp_task_team_sync: Thread T#%d task team switched to task_team "if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_task_team_sync: Thread T#%d task team switched to task_team "
 "%p from Team #%d (parity=%d)\n", __kmp_gtid_from_thread(this_thr
), this_thr->th.th_task_team, team->t.t_id, this_thr->
th.th_task_state); }
4083            "%p from Team #%d (parity=%d)\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_task_team_sync: Thread T#%d task team switched to task_team "
 "%p from Team #%d (parity=%d)\n", __kmp_gtid_from_thread(this_thr
), this_thr->th.th_task_team, team->t.t_id, this_thr->
th.th_task_state); }
4084            __kmp_gtid_from_thread(this_thr), this_thr->th.th_task_team,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_task_team_sync: Thread T#%d task team switched to task_team "
 "%p from Team #%d (parity=%d)\n", __kmp_gtid_from_thread(this_thr
), this_thr->th.th_task_team, team->t.t_id, this_thr->
th.th_task_state); }
4085            team->t.t_id, this_thr->th.th_task_state))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_task_team_sync: Thread T#%d task team switched to task_team "
 "%p from Team #%d (parity=%d)\n", __kmp_gtid_from_thread(this_thr
), this_thr->th.th_task_team, team->t.t_id, this_thr->
th.th_task_state); };
4086}
4087 
4088// __kmp_task_team_wait: Primary thread waits for outstanding tasks after the
4089// barrier gather phase. Only called by primary thread if #threads in team > 1
4090// or if proxy tasks were created.
4091//
4092// wait is a flag that defaults to 1 (see kmp.h), but waiting can be turned off
4093// by passing in 0 optionally as the last argument. When wait is zero, primary
4094// thread does not wait for unfinished_threads to reach 0.
4095void __kmp_task_team_wait(
4096    kmp_info_t *this_thr,
4097    kmp_team_t *team USE_ITT_BUILD_ARG(void *itt_sync_obj), void *itt_sync_obj, int wait) {
4098  kmp_task_team_t *task_team = team->t.t_task_team[this_thr->th.th_task_state];
4099 
4100  KMP_DEBUG_ASSERT(__kmp_tasking_mode != tskm_immediate_exec)if (!(__kmp_tasking_mode != tskm_immediate_exec)) { __kmp_debug_assert
("__kmp_tasking_mode != tskm_immediate_exec", "openmp/runtime/src/kmp_tasking.cpp"
, 4100); };
4101  KMP_DEBUG_ASSERT(task_team == this_thr->th.th_task_team)if (!(task_team == this_thr->th.th_task_team)) { __kmp_debug_assert
("task_team == this_thr->th.th_task_team", "openmp/runtime/src/kmp_tasking.cpp"
, 4101); };
4102 
4103  if ((task_team != NULL__null) && KMP_TASKING_ENABLED(task_team)((!0) == ((task_team)->tt.tt_found_tasks))) {
4104    if (wait) {
4105      KA_TRACE(20, ("__kmp_task_team_wait: Primary T#%d waiting for all tasks "if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_task_team_wait: Primary T#%d waiting for all tasks "
 "(for unfinished_threads to reach 0) on task_team = %p\n", __kmp_gtid_from_thread
(this_thr), task_team); }
4106                    "(for unfinished_threads to reach 0) on task_team = %p\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_task_team_wait: Primary T#%d waiting for all tasks "
 "(for unfinished_threads to reach 0) on task_team = %p\n", __kmp_gtid_from_thread
(this_thr), task_team); }
4107                    __kmp_gtid_from_thread(this_thr), task_team))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_task_team_wait: Primary T#%d waiting for all tasks "
 "(for unfinished_threads to reach 0) on task_team = %p\n", __kmp_gtid_from_thread
(this_thr), task_team); };
4108      // Worker threads may have dropped through to release phase, but could
4109      // still be executing tasks. Wait here for tasks to complete. To avoid
4110      // memory contention, only primary thread checks termination condition.
4111      kmp_flag_32<false, false> flag(
4112          RCAST(std::atomic<kmp_uint32> *,reinterpret_cast<std::atomic<kmp_uint32> *>(&
task_team->tt.tt_unfinished_threads)
4113                &task_team->tt.tt_unfinished_threads)reinterpret_cast<std::atomic<kmp_uint32> *>(&
task_team->tt.tt_unfinished_threads),
4114          0U);
4115      flag.wait(this_thr, TRUE(!0) USE_ITT_BUILD_ARG(itt_sync_obj), itt_sync_obj);
4116    }
4117    // Deactivate the old task team, so that the worker threads will stop
4118    // referencing it while spinning.
4119    KA_TRACE(if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_task_team_wait: Primary T#%d deactivating task_team %p: "
 "setting active to false, setting local and team's pointer to NULL\n"
, __kmp_gtid_from_thread(this_thr), task_team); }
4120        20,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_task_team_wait: Primary T#%d deactivating task_team %p: "
 "setting active to false, setting local and team's pointer to NULL\n"
, __kmp_gtid_from_thread(this_thr), task_team); }
4121        ("__kmp_task_team_wait: Primary T#%d deactivating task_team %p: "if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_task_team_wait: Primary T#%d deactivating task_team %p: "
 "setting active to false, setting local and team's pointer to NULL\n"
, __kmp_gtid_from_thread(this_thr), task_team); }
4122         "setting active to false, setting local and team's pointer to NULL\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_task_team_wait: Primary T#%d deactivating task_team %p: "
 "setting active to false, setting local and team's pointer to NULL\n"
, __kmp_gtid_from_thread(this_thr), task_team); }
4123         __kmp_gtid_from_thread(this_thr), task_team))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_task_team_wait: Primary T#%d deactivating task_team %p: "
 "setting active to false, setting local and team's pointer to NULL\n"
, __kmp_gtid_from_thread(this_thr), task_team); };
4124    KMP_DEBUG_ASSERT(task_team->tt.tt_nproc > 1 ||if (!(task_team->tt.tt_nproc > 1 || task_team->tt.tt_found_proxy_tasks
 == (!0) || task_team->tt.tt_hidden_helper_task_encountered
 == (!0))) { __kmp_debug_assert("task_team->tt.tt_nproc > 1 || task_team->tt.tt_found_proxy_tasks == (!0) || task_team->tt.tt_hidden_helper_task_encountered == (!0)"
, "openmp/runtime/src/kmp_tasking.cpp", 4126); }
4125                     task_team->tt.tt_found_proxy_tasks == TRUE ||if (!(task_team->tt.tt_nproc > 1 || task_team->tt.tt_found_proxy_tasks
 == (!0) || task_team->tt.tt_hidden_helper_task_encountered
 == (!0))) { __kmp_debug_assert("task_team->tt.tt_nproc > 1 || task_team->tt.tt_found_proxy_tasks == (!0) || task_team->tt.tt_hidden_helper_task_encountered == (!0)"
, "openmp/runtime/src/kmp_tasking.cpp", 4126); }
4126                     task_team->tt.tt_hidden_helper_task_encountered == TRUE)if (!(task_team->tt.tt_nproc > 1 || task_team->tt.tt_found_proxy_tasks
 == (!0) || task_team->tt.tt_hidden_helper_task_encountered
 == (!0))) { __kmp_debug_assert("task_team->tt.tt_nproc > 1 || task_team->tt.tt_found_proxy_tasks == (!0) || task_team->tt.tt_hidden_helper_task_encountered == (!0)"
, "openmp/runtime/src/kmp_tasking.cpp", 4126); };
4127    TCW_SYNC_4(task_team->tt.tt_found_proxy_tasks, FALSE)(task_team->tt.tt_found_proxy_tasks) = (0);
4128    TCW_SYNC_4(task_team->tt.tt_hidden_helper_task_encountered, FALSE)(task_team->tt.tt_hidden_helper_task_encountered) = (0);
4129    KMP_CHECK_UPDATE(task_team->tt.tt_untied_task_encountered, 0)if ((task_team->tt.tt_untied_task_encountered) != (0)) (task_team
->tt.tt_untied_task_encountered) = (0);
4130    TCW_SYNC_4(task_team->tt.tt_active, FALSE)(task_team->tt.tt_active) = (0);
4131    KMP_MB();
4132 
4133    TCW_PTR(this_thr->th.th_task_team, NULL)((this_thr->th.th_task_team)) = ((__null));
4134  }
4135}
4136 
4137// __kmp_tasking_barrier:
4138// This routine is called only when __kmp_tasking_mode == tskm_extra_barrier.
4139// Internal function to execute all tasks prior to a regular barrier or a join
4140// barrier. It is a full barrier itself, which unfortunately turns regular
4141// barriers into double barriers and join barriers into 1 1/2 barriers.
4142void __kmp_tasking_barrier(kmp_team_t *team, kmp_info_t *thread, int gtid) {
4143  std::atomic<kmp_uint32> *spin = RCAST(reinterpret_cast<std::atomic<kmp_uint32> *>(&
team->t.t_task_team[thread->th.th_task_state]->tt.tt_unfinished_threads
)
4144      std::atomic<kmp_uint32> *,reinterpret_cast<std::atomic<kmp_uint32> *>(&
team->t.t_task_team[thread->th.th_task_state]->tt.tt_unfinished_threads
)
4145      &team->t.t_task_team[thread->th.th_task_state]->tt.tt_unfinished_threads)reinterpret_cast<std::atomic<kmp_uint32> *>(&
team->t.t_task_team[thread->th.th_task_state]->tt.tt_unfinished_threads
);
4146  int flag = FALSE0;
4147  KMP_DEBUG_ASSERT(__kmp_tasking_mode == tskm_extra_barrier)if (!(__kmp_tasking_mode == tskm_extra_barrier)) { __kmp_debug_assert
("__kmp_tasking_mode == tskm_extra_barrier", "openmp/runtime/src/kmp_tasking.cpp"
, 4147); };
4148 
4149#if USE_ITT_BUILD1
4150  KMP_FSYNC_SPIN_INIT(spin, NULL)int sync_iters = 0; if (__kmp_itt_fsync_prepare_ptr__3_0) { if
 (spin == __null) { spin = __null; } } __asm__ __volatile__("movl %0, %%ebx; .byte 0x64, 0x67, 0x90 "
 ::"i"(0x4376) : "%ebx");
4151#endif /* USE_ITT_BUILD */
4152  kmp_flag_32<false, false> spin_flag(spin, 0U);
4153  while (!spin_flag.execute_tasks(thread, gtid, TRUE(!0),
4154                                  &flag USE_ITT_BUILD_ARG(NULL), __null, 0)) {
4155#if USE_ITT_BUILD1
4156    // TODO: What about itt_sync_obj??
4157    KMP_FSYNC_SPIN_PREPARE(RCAST(void *, spin))do { if (__kmp_itt_fsync_prepare_ptr__3_0 && sync_iters
 < __kmp_itt_prepare_delay) { ++sync_iters; if (sync_iters
 >= __kmp_itt_prepare_delay) { (!__kmp_itt_fsync_prepare_ptr__3_0
) ? (void)0 : __kmp_itt_fsync_prepare_ptr__3_0((void *)((void
 *)reinterpret_cast<void *>(spin))); } } } while (0);
4158#endif /* USE_ITT_BUILD */
4159 
4160    if (TCR_4(__kmp_global.g.g_done)(__kmp_global.g.g_done)) {
4161      if (__kmp_global.g.g_abort)
4162        __kmp_abort_thread();
4163      break;
4164    }
4165    KMP_YIELD(TRUE){ __kmp_x86_pause(); if (((!0)) && (((__kmp_use_yield
 == 1) || (__kmp_use_yield == 2 && (((__kmp_nth) >
 (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc))))))) __kmp_yield
(); };
4166  }
4167#if USE_ITT_BUILD1
4168  KMP_FSYNC_SPIN_ACQUIRED(RCAST(void *, spin))do { __asm__ __volatile__("movl %0, %%ebx; .byte 0x64, 0x67, 0x90 "
 ::"i"(0x4377) : "%ebx"); if (sync_iters >= __kmp_itt_prepare_delay
) { (!__kmp_itt_fsync_acquired_ptr__3_0) ? (void)0 : __kmp_itt_fsync_acquired_ptr__3_0
((void *)((void *)reinterpret_cast<void *>(spin))); } }
 while (0);
4169#endif /* USE_ITT_BUILD */
4170}
4171 
4172// __kmp_give_task puts a task into a given thread queue if:
4173//  - the queue for that thread was created
4174//  - there's space in that queue
4175// Because of this, __kmp_push_task needs to check if there's space after
4176// getting the lock
4177static bool __kmp_give_task(kmp_info_t *thread, kmp_int32 tid, kmp_task_t *task,
4178                            kmp_int32 pass) {
4179  kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(task)(((kmp_taskdata_t *)task) - 1);
4180  kmp_task_team_t *task_team = taskdata->td_task_team;
4181 
4182  KA_TRACE(20, ("__kmp_give_task: trying to give task %p to thread %d.\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_give_task: trying to give task %p to thread %d.\n"
, taskdata, tid); }
4183                taskdata, tid))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_give_task: trying to give task %p to thread %d.\n"
, taskdata, tid); };
4184 
4185  // If task_team is NULL something went really bad...
4186  KMP_DEBUG_ASSERT(task_team != NULL)if (!(task_team != __null)) { __kmp_debug_assert("task_team != __null"
, "openmp/runtime/src/kmp_tasking.cpp", 4186); };
4187 
4188  bool result = false;
4189  kmp_thread_data_t *thread_data = &task_team->tt.tt_threads_data[tid];
4190 
4191  if (thread_data->td.td_deque == NULL__null) {
4192    // There's no queue in this thread, go find another one
4193    // We're guaranteed that at least one thread has a queue
4194    KA_TRACE(30,if (kmp_a_debug >= 30) { __kmp_debug_printf ("__kmp_give_task: thread %d has no queue while giving task %p.\n"
, tid, taskdata); }
4195             ("__kmp_give_task: thread %d has no queue while giving task %p.\n",if (kmp_a_debug >= 30) { __kmp_debug_printf ("__kmp_give_task: thread %d has no queue while giving task %p.\n"
, tid, taskdata); }
4196              tid, taskdata))if (kmp_a_debug >= 30) { __kmp_debug_printf ("__kmp_give_task: thread %d has no queue while giving task %p.\n"
, tid, taskdata); };
4197    return result;
4198  }
4199 
4200  if (TCR_4(thread_data->td.td_deque_ntasks)(thread_data->td.td_deque_ntasks) >=
4201      TASK_DEQUE_SIZE(thread_data->td)((thread_data->td).td_deque_size)) {
4202    KA_TRACE(if (kmp_a_debug >= 30) { __kmp_debug_printf ("__kmp_give_task: queue is full while giving task %p to thread %d.\n"
, taskdata, tid); }
4203        30,if (kmp_a_debug >= 30) { __kmp_debug_printf ("__kmp_give_task: queue is full while giving task %p to thread %d.\n"
, taskdata, tid); }
4204        ("__kmp_give_task: queue is full while giving task %p to thread %d.\n",if (kmp_a_debug >= 30) { __kmp_debug_printf ("__kmp_give_task: queue is full while giving task %p to thread %d.\n"
, taskdata, tid); }
4205         taskdata, tid))if (kmp_a_debug >= 30) { __kmp_debug_printf ("__kmp_give_task: queue is full while giving task %p to thread %d.\n"
, taskdata, tid); };
4206 
4207    // if this deque is bigger than the pass ratio give a chance to another
4208    // thread
4209    if (TASK_DEQUE_SIZE(thread_data->td)((thread_data->td).td_deque_size) / INITIAL_TASK_DEQUE_SIZE(1 << 8) >= pass)
4210      return result;
4211 
4212    __kmp_acquire_bootstrap_lock(&thread_data->td.td_deque_lock);
4213    if (TCR_4(thread_data->td.td_deque_ntasks)(thread_data->td.td_deque_ntasks) >=
4214        TASK_DEQUE_SIZE(thread_data->td)((thread_data->td).td_deque_size)) {
4215      // expand deque to push the task which is not allowed to execute
4216      __kmp_realloc_task_deque(thread, thread_data);
4217    }
4218 
4219  } else {
4220 
4221    __kmp_acquire_bootstrap_lock(&thread_data->td.td_deque_lock);
4222 
4223    if (TCR_4(thread_data->td.td_deque_ntasks)(thread_data->td.td_deque_ntasks) >=
4224        TASK_DEQUE_SIZE(thread_data->td)((thread_data->td).td_deque_size)) {
4225      KA_TRACE(30, ("__kmp_give_task: queue is full while giving task %p to "if (kmp_a_debug >= 30) { __kmp_debug_printf ("__kmp_give_task: queue is full while giving task %p to "
 "thread %d.\n", taskdata, tid); }
4226                    "thread %d.\n",if (kmp_a_debug >= 30) { __kmp_debug_printf ("__kmp_give_task: queue is full while giving task %p to "
 "thread %d.\n", taskdata, tid); }
4227                    taskdata, tid))if (kmp_a_debug >= 30) { __kmp_debug_printf ("__kmp_give_task: queue is full while giving task %p to "
 "thread %d.\n", taskdata, tid); };
4228 
4229      // if this deque is bigger than the pass ratio give a chance to another
4230      // thread
4231      if (TASK_DEQUE_SIZE(thread_data->td)((thread_data->td).td_deque_size) / INITIAL_TASK_DEQUE_SIZE(1 << 8) >= pass)
4232        goto release_and_exit;
4233 
4234      __kmp_realloc_task_deque(thread, thread_data);
4235    }
4236  }
4237 
4238  // lock is held here, and there is space in the deque
4239 
4240  thread_data->td.td_deque[thread_data->td.td_deque_tail] = taskdata;
4241  // Wrap index.
4242  thread_data->td.td_deque_tail =
4243      (thread_data->td.td_deque_tail + 1) & TASK_DEQUE_MASK(thread_data->td)((thread_data->td).td_deque_size - 1);
4244  TCW_4(thread_data->td.td_deque_ntasks,(thread_data->td.td_deque_ntasks) = ((thread_data->td.td_deque_ntasks
) + 1)
4245        TCR_4(thread_data->td.td_deque_ntasks) + 1)(thread_data->td.td_deque_ntasks) = ((thread_data->td.td_deque_ntasks
) + 1);
4246 
4247  result = true;
4248  KA_TRACE(30, ("__kmp_give_task: successfully gave task %p to thread %d.\n",if (kmp_a_debug >= 30) { __kmp_debug_printf ("__kmp_give_task: successfully gave task %p to thread %d.\n"
, taskdata, tid); }
4249                taskdata, tid))if (kmp_a_debug >= 30) { __kmp_debug_printf ("__kmp_give_task: successfully gave task %p to thread %d.\n"
, taskdata, tid); };
4250 
4251release_and_exit:
4252  __kmp_release_bootstrap_lock(&thread_data->td.td_deque_lock);
4253 
4254  return result;
4255}
4256 
4257#define PROXY_TASK_FLAG0x40000000 0x40000000
4258/* The finish of the proxy tasks is divided in two pieces:
4259    - the top half is the one that can be done from a thread outside the team
4260    - the bottom half must be run from a thread within the team
4261 
4262   In order to run the bottom half the task gets queued back into one of the
4263   threads of the team. Once the td_incomplete_child_task counter of the parent
4264   is decremented the threads can leave the barriers. So, the bottom half needs
4265   to be queued before the counter is decremented. The top half is therefore
4266   divided in two parts:
4267    - things that can be run before queuing the bottom half
4268    - things that must be run after queuing the bottom half
4269 
4270   This creates a second race as the bottom half can free the task before the
4271   second top half is executed. To avoid this we use the
4272   td_incomplete_child_task of the proxy task to synchronize the top and bottom
4273   half. */
4274static void __kmp_first_top_half_finish_proxy(kmp_taskdata_t *taskdata) {
4275  KMP_DEBUG_ASSERT(taskdata->td_flags.tasktype == TASK_EXPLICIT)if (!(taskdata->td_flags.tasktype == 1)) { __kmp_debug_assert
("taskdata->td_flags.tasktype == 1", "openmp/runtime/src/kmp_tasking.cpp"
, 4275); };
4276  KMP_DEBUG_ASSERT(taskdata->td_flags.proxy == TASK_PROXY)if (!(taskdata->td_flags.proxy == 1)) { __kmp_debug_assert
("taskdata->td_flags.proxy == 1", "openmp/runtime/src/kmp_tasking.cpp"
, 4276); };
4277  KMP_DEBUG_ASSERT(taskdata->td_flags.complete == 0)if (!(taskdata->td_flags.complete == 0)) { __kmp_debug_assert
("taskdata->td_flags.complete == 0", "openmp/runtime/src/kmp_tasking.cpp"
, 4277); };
4278  KMP_DEBUG_ASSERT(taskdata->td_flags.freed == 0)if (!(taskdata->td_flags.freed == 0)) { __kmp_debug_assert
("taskdata->td_flags.freed == 0", "openmp/runtime/src/kmp_tasking.cpp"
, 4278); };
4279 
4280  taskdata->td_flags.complete = 1; // mark the task as completed
4281 
4282  if (taskdata->td_taskgroup)
4283    KMP_ATOMIC_DEC(&taskdata->td_taskgroup->count)(&taskdata->td_taskgroup->count)->fetch_sub(1, std
::memory_order_acq_rel);
4284 
4285  // Create an imaginary children for this task so the bottom half cannot
4286  // release the task before we have completed the second top half
4287  KMP_ATOMIC_OR(&taskdata->td_incomplete_child_tasks, PROXY_TASK_FLAG)(&taskdata->td_incomplete_child_tasks)->fetch_or(0x40000000
, std::memory_order_acq_rel);
4288}
4289 
4290static void __kmp_second_top_half_finish_proxy(kmp_taskdata_t *taskdata) {
4291#if KMP_DEBUG1
4292  kmp_int32 children = 0;
4293  // Predecrement simulated by "- 1" calculation
4294  children = -1 +
4295#endif
4296      KMP_ATOMIC_DEC(&taskdata->td_parent->td_incomplete_child_tasks)(&taskdata->td_parent->td_incomplete_child_tasks)->
fetch_sub(1, std::memory_order_acq_rel);
4297  KMP_DEBUG_ASSERT(children >= 0)if (!(children >= 0)) { __kmp_debug_assert("children >= 0"
, "openmp/runtime/src/kmp_tasking.cpp", 4297); };
4298 
4299  // Remove the imaginary children
4300  KMP_ATOMIC_AND(&taskdata->td_incomplete_child_tasks, ~PROXY_TASK_FLAG)(&taskdata->td_incomplete_child_tasks)->fetch_and(~
0x40000000, std::memory_order_acq_rel);
4301}
4302 
4303static void __kmp_bottom_half_finish_proxy(kmp_int32 gtid, kmp_task_t *ptask) {
4304  kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(ptask)(((kmp_taskdata_t *)ptask) - 1);
4305  kmp_info_t *thread = __kmp_threads[gtid];
4306 
4307  KMP_DEBUG_ASSERT(taskdata->td_flags.proxy == TASK_PROXY)if (!(taskdata->td_flags.proxy == 1)) { __kmp_debug_assert
("taskdata->td_flags.proxy == 1", "openmp/runtime/src/kmp_tasking.cpp"
, 4307); };
4308  KMP_DEBUG_ASSERT(taskdata->td_flags.complete ==if (!(taskdata->td_flags.complete == 1)) { __kmp_debug_assert
("taskdata->td_flags.complete == 1", "openmp/runtime/src/kmp_tasking.cpp"
, 4309); }
4309                   1)if (!(taskdata->td_flags.complete == 1)) { __kmp_debug_assert
("taskdata->td_flags.complete == 1", "openmp/runtime/src/kmp_tasking.cpp"
, 4309); }; // top half must run before bottom half
4310 
4311  // We need to wait to make sure the top half is finished
4312  // Spinning here should be ok as this should happen quickly
4313  while ((KMP_ATOMIC_LD_ACQ(&taskdata->td_incomplete_child_tasks)(&taskdata->td_incomplete_child_tasks)->load(std::memory_order_acquire
) &
4314          PROXY_TASK_FLAG0x40000000) > 0)
4315    ;
4316 
4317  __kmp_release_deps(gtid, taskdata);
4318  __kmp_free_task_and_ancestors(gtid, taskdata, thread);
4319}
4320 
4321/*!
4322@ingroup TASKING
4323@param gtid Global Thread ID of encountering thread
4324@param ptask Task which execution is completed
4325 
4326Execute the completion of a proxy task from a thread of that is part of the
4327team. Run first and bottom halves directly.
4328*/
4329void __kmpc_proxy_task_completed(kmp_int32 gtid, kmp_task_t *ptask) {
4330  KMP_DEBUG_ASSERT(ptask != NULL)if (!(ptask != __null)) { __kmp_debug_assert("ptask != __null"
, "openmp/runtime/src/kmp_tasking.cpp", 4330); };
4331  kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(ptask)(((kmp_taskdata_t *)ptask) - 1);
4332  KA_TRACE(if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_proxy_task_completed(enter): T#%d proxy task %p completing\n"
, gtid, taskdata); }
4333      10, ("__kmp_proxy_task_completed(enter): T#%d proxy task %p completing\n",if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_proxy_task_completed(enter): T#%d proxy task %p completing\n"
, gtid, taskdata); }
4334           gtid, taskdata))if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_proxy_task_completed(enter): T#%d proxy task %p completing\n"
, gtid, taskdata); };
4335  __kmp_assert_valid_gtid(gtid);
4336  KMP_DEBUG_ASSERT(taskdata->td_flags.proxy == TASK_PROXY)if (!(taskdata->td_flags.proxy == 1)) { __kmp_debug_assert
("taskdata->td_flags.proxy == 1", "openmp/runtime/src/kmp_tasking.cpp"
, 4336); };
4337 
4338  __kmp_first_top_half_finish_proxy(taskdata);
4339  __kmp_second_top_half_finish_proxy(taskdata);
4340  __kmp_bottom_half_finish_proxy(gtid, ptask);
4341 
4342  KA_TRACE(10,if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_proxy_task_completed(exit): T#%d proxy task %p completing\n"
, gtid, taskdata); }
4343           ("__kmp_proxy_task_completed(exit): T#%d proxy task %p completing\n",if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_proxy_task_completed(exit): T#%d proxy task %p completing\n"
, gtid, taskdata); }
4344            gtid, taskdata))if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_proxy_task_completed(exit): T#%d proxy task %p completing\n"
, gtid, taskdata); };
4345}
4346 
4347void __kmpc_give_task(kmp_task_t *ptask, kmp_int32 start = 0) {
4348  KMP_DEBUG_ASSERT(ptask != NULL)if (!(ptask != __null)) { __kmp_debug_assert("ptask != __null"
, "openmp/runtime/src/kmp_tasking.cpp", 4348); };
4349  kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(ptask)(((kmp_taskdata_t *)ptask) - 1);
4350 
4351  // Enqueue task to complete bottom half completion from a thread within the
4352  // corresponding team
4353  kmp_team_t *team = taskdata->td_team;
4354  kmp_int32 nthreads = team->t.t_nproc;
4355  kmp_info_t *thread;
4356 
4357  // This should be similar to start_k = __kmp_get_random( thread ) % nthreads
4358  // but we cannot use __kmp_get_random here
4359  kmp_int32 start_k = start % nthreads;
4360  kmp_int32 pass = 1;
4361  kmp_int32 k = start_k;
4362 
4363  do {
4364    // For now we're just linearly trying to find a thread
4365    thread = team->t.t_threads[k];
4366    k = (k + 1) % nthreads;
4367 
4368    // we did a full pass through all the threads
4369    if (k == start_k)
4370      pass = pass << 1;
4371 
4372  } while (!__kmp_give_task(thread, k, ptask, pass));
4373 
4374  if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME(2147483647) && __kmp_wpolicy_passive) {
4375    // awake at least one thread to execute given task
4376    for (int i = 0; i < nthreads; ++i) {
4377      thread = team->t.t_threads[i];
4378      if (thread->th.th_sleep_loc != NULL__null) {
4379        __kmp_null_resume_wrapper(thread);
4380        break;
4381      }
4382    }
4383  }
4384}
4385 
4386/*!
4387@ingroup TASKING
4388@param ptask Task which execution is completed
4389 
4390Execute the completion of a proxy task from a thread that could not belong to
4391the team.
4392*/
4393void __kmpc_proxy_task_completed_ooo(kmp_task_t *ptask) {
4394  KMP_DEBUG_ASSERT(ptask != NULL)if (!(ptask != __null)) { __kmp_debug_assert("ptask != __null"
, "openmp/runtime/src/kmp_tasking.cpp", 4394); };
4395  kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(ptask)(((kmp_taskdata_t *)ptask) - 1);
4396 
4397  KA_TRACE(if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_proxy_task_completed_ooo(enter): proxy task completing ooo %p\n"
, taskdata); }
4398      10,if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_proxy_task_completed_ooo(enter): proxy task completing ooo %p\n"
, taskdata); }
4399      ("__kmp_proxy_task_completed_ooo(enter): proxy task completing ooo %p\n",if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_proxy_task_completed_ooo(enter): proxy task completing ooo %p\n"
, taskdata); }
4400       taskdata))if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_proxy_task_completed_ooo(enter): proxy task completing ooo %p\n"
, taskdata); };
4401 
4402  KMP_DEBUG_ASSERT(taskdata->td_flags.proxy == TASK_PROXY)if (!(taskdata->td_flags.proxy == 1)) { __kmp_debug_assert
("taskdata->td_flags.proxy == 1", "openmp/runtime/src/kmp_tasking.cpp"
, 4402); };
4403 
4404  __kmp_first_top_half_finish_proxy(taskdata);
4405 
4406  __kmpc_give_task(ptask);
4407 
4408  __kmp_second_top_half_finish_proxy(taskdata);
4409 
4410  KA_TRACE(if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_proxy_task_completed_ooo(exit): proxy task completing ooo %p\n"
, taskdata); }
4411      10,if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_proxy_task_completed_ooo(exit): proxy task completing ooo %p\n"
, taskdata); }
4412      ("__kmp_proxy_task_completed_ooo(exit): proxy task completing ooo %p\n",if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_proxy_task_completed_ooo(exit): proxy task completing ooo %p\n"
, taskdata); }
4413       taskdata))if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_proxy_task_completed_ooo(exit): proxy task completing ooo %p\n"
, taskdata); };
4414}
4415 
4416kmp_event_t *__kmpc_task_allow_completion_event(ident_t *loc_ref, int gtid,
4417                                                kmp_task_t *task) {
4418  kmp_taskdata_t *td = KMP_TASK_TO_TASKDATA(task)(((kmp_taskdata_t *)task) - 1);
4419  if (td->td_allow_completion_event.type == KMP_EVENT_UNINITIALIZED) {
4420    td->td_allow_completion_event.type = KMP_EVENT_ALLOW_COMPLETION;
4421    td->td_allow_completion_event.ed.task = task;
4422    __kmp_init_tas_lock(&td->td_allow_completion_event.lock);
4423  }
4424  return &td->td_allow_completion_event;
4425}
4426 
4427void __kmp_fulfill_event(kmp_event_t *event) {
4428  if (event->type == KMP_EVENT_ALLOW_COMPLETION) {
4429    kmp_task_t *ptask = event->ed.task;
4430    kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(ptask)(((kmp_taskdata_t *)ptask) - 1);
4431    bool detached = false;
4432    int gtid = __kmp_get_gtid()__kmp_get_global_thread_id();
4433 
4434    // The associated task might have completed or could be completing at this
4435    // point.
4436    // We need to take the lock to avoid races
4437    __kmp_acquire_tas_lock(&event->lock, gtid);
4438    if (taskdata->td_flags.proxy == TASK_PROXY1) {
4439      detached = true;
4440    } else {
4441#if OMPT_SUPPORT1
4442      // The OMPT event must occur under mutual exclusion,
4443      // otherwise the tool might access ptask after free
4444      if (UNLIKELY(ompt_enabled.enabled)__builtin_expect(!!(ompt_enabled.enabled), 0))
4445        __ompt_task_finish(ptask, NULL__null, ompt_task_early_fulfill);
4446#endif
4447    }
4448    event->type = KMP_EVENT_UNINITIALIZED;
4449    __kmp_release_tas_lock(&event->lock, gtid);
4450 
4451    if (detached) {
4452#if OMPT_SUPPORT1
4453      // We free ptask afterwards and know the task is finished,
4454      // so locking is not necessary
4455      if (UNLIKELY(ompt_enabled.enabled)__builtin_expect(!!(ompt_enabled.enabled), 0))
4456        __ompt_task_finish(ptask, NULL__null, ompt_task_late_fulfill);
4457#endif
4458      // If the task detached complete the proxy task
4459      if (gtid >= 0) {
4460        kmp_team_t *team = taskdata->td_team;
4461        kmp_info_t *thread = __kmp_get_thread()(__kmp_thread_from_gtid(__kmp_get_global_thread_id()));
4462        if (thread->th.th_team == team) {
4463          __kmpc_proxy_task_completed(gtid, ptask);
4464          return;
4465        }
4466      }
4467 
4468      // fallback
4469      __kmpc_proxy_task_completed_ooo(ptask);
4470    }
4471  }
4472}
4473 
4474// __kmp_task_dup_alloc: Allocate the taskdata and make a copy of source task
4475// for taskloop
4476//
4477// thread:   allocating thread
4478// task_src: pointer to source task to be duplicated
4479// returns:  a pointer to the allocated kmp_task_t structure (task).
4480kmp_task_t *__kmp_task_dup_alloc(kmp_info_t *thread, kmp_task_t *task_src) {
4481  kmp_task_t *task;
4482  kmp_taskdata_t *taskdata;
4483  kmp_taskdata_t *taskdata_src = KMP_TASK_TO_TASKDATA(task_src)(((kmp_taskdata_t *)task_src) - 1);
4484  kmp_taskdata_t *parent_task = taskdata_src->td_parent; // same parent task
4485  size_t shareds_offset;
4486  size_t task_size;
4487 
4488  KA_TRACE(10, ("__kmp_task_dup_alloc(enter): Th %p, source task %p\n", thread,if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_task_dup_alloc(enter): Th %p, source task %p\n"
, thread, task_src); }
4489                task_src))if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_task_dup_alloc(enter): Th %p, source task %p\n"
, thread, task_src); };
4490  KMP_DEBUG_ASSERT(taskdata_src->td_flags.proxy ==if (!(taskdata_src->td_flags.proxy == 0)) { __kmp_debug_assert
("taskdata_src->td_flags.proxy == 0", "openmp/runtime/src/kmp_tasking.cpp"
, 4491); }
4491                   TASK_FULL)if (!(taskdata_src->td_flags.proxy == 0)) { __kmp_debug_assert
("taskdata_src->td_flags.proxy == 0", "openmp/runtime/src/kmp_tasking.cpp"
, 4491); }; // it should not be proxy task
4492  KMP_DEBUG_ASSERT(taskdata_src->td_flags.tasktype == TASK_EXPLICIT)if (!(taskdata_src->td_flags.tasktype == 1)) { __kmp_debug_assert
("taskdata_src->td_flags.tasktype == 1", "openmp/runtime/src/kmp_tasking.cpp"
, 4492); };
4493  task_size = taskdata_src->td_size_alloc;
4494 
4495  // Allocate a kmp_taskdata_t block and a kmp_task_t block.
4496  KA_TRACE(30, ("__kmp_task_dup_alloc: Th %p, malloc size %ld\n", thread,if (kmp_a_debug >= 30) { __kmp_debug_printf ("__kmp_task_dup_alloc: Th %p, malloc size %ld\n"
, thread, task_size); }
4497                task_size))if (kmp_a_debug >= 30) { __kmp_debug_printf ("__kmp_task_dup_alloc: Th %p, malloc size %ld\n"
, thread, task_size); };
4498#if USE_FAST_MEMORY3
4499  taskdata = (kmp_taskdata_t *)__kmp_fast_allocate(thread, task_size)___kmp_fast_allocate((thread), (task_size), "openmp/runtime/src/kmp_tasking.cpp"
, 4499);
4500#else
4501  taskdata = (kmp_taskdata_t *)__kmp_thread_malloc(thread, task_size)___kmp_thread_malloc((thread), (task_size), "openmp/runtime/src/kmp_tasking.cpp"
, 4501);
4502#endif /* USE_FAST_MEMORY */
4503  KMP_MEMCPYmemcpy(taskdata, taskdata_src, task_size);
4504 
4505  task = KMP_TASKDATA_TO_TASK(taskdata)(kmp_task_t *)(taskdata + 1);
4506 
4507  // Initialize new task (only specific fields not affected by memcpy)
4508  taskdata->td_task_id = KMP_GEN_TASK_ID()(~0);
4509  if (task->shareds != NULL__null) { // need setup shareds pointer
4510    shareds_offset = (char *)task_src->shareds - (char *)taskdata_src;
4511    task->shareds = &((char *)taskdata)[shareds_offset];
4512    KMP_DEBUG_ASSERT((((kmp_uintptr_t)task->shareds) & (sizeof(void *) - 1)) ==if (!((((kmp_uintptr_t)task->shareds) & (sizeof(void *
) - 1)) == 0)) { __kmp_debug_assert("(((kmp_uintptr_t)task->shareds) & (sizeof(void *) - 1)) == 0"
, "openmp/runtime/src/kmp_tasking.cpp", 4513); }
4513                     0)if (!((((kmp_uintptr_t)task->shareds) & (sizeof(void *
) - 1)) == 0)) { __kmp_debug_assert("(((kmp_uintptr_t)task->shareds) & (sizeof(void *) - 1)) == 0"
, "openmp/runtime/src/kmp_tasking.cpp", 4513); };
4514  }
4515  taskdata->td_alloc_thread = thread;
4516  taskdata->td_parent = parent_task;
4517  // task inherits the taskgroup from the parent task
4518  taskdata->td_taskgroup = parent_task->td_taskgroup;
4519  // tied task needs to initialize the td_last_tied at creation,
4520  // untied one does this when it is scheduled for execution
4521  if (taskdata->td_flags.tiedness == TASK_TIED1)
4522    taskdata->td_last_tied = taskdata;
4523 
4524  // Only need to keep track of child task counts if team parallel and tasking
4525  // not serialized
4526  if (!(taskdata->td_flags.team_serial || taskdata->td_flags.tasking_ser)) {
4527    KMP_ATOMIC_INC(&parent_task->td_incomplete_child_tasks)(&parent_task->td_incomplete_child_tasks)->fetch_add
(1, std::memory_order_acq_rel);
4528    if (parent_task->td_taskgroup)
4529      KMP_ATOMIC_INC(&parent_task->td_taskgroup->count)(&parent_task->td_taskgroup->count)->fetch_add(1
, std::memory_order_acq_rel);
4530    // Only need to keep track of allocated child tasks for explicit tasks since
4531    // implicit not deallocated
4532    if (taskdata->td_parent->td_flags.tasktype == TASK_EXPLICIT1)
4533      KMP_ATOMIC_INC(&taskdata->td_parent->td_allocated_child_tasks)(&taskdata->td_parent->td_allocated_child_tasks)->
fetch_add(1, std::memory_order_acq_rel);
4534  }
4535 
4536  KA_TRACE(20,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_task_dup_alloc(exit): Th %p, created task %p, parent=%p\n"
, thread, taskdata, taskdata->td_parent); }
4537           ("__kmp_task_dup_alloc(exit): Th %p, created task %p, parent=%p\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_task_dup_alloc(exit): Th %p, created task %p, parent=%p\n"
, thread, taskdata, taskdata->td_parent); }
4538            thread, taskdata, taskdata->td_parent))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_task_dup_alloc(exit): Th %p, created task %p, parent=%p\n"
, thread, taskdata, taskdata->td_parent); };
4539#if OMPT_SUPPORT1
4540  if (UNLIKELY(ompt_enabled.enabled)__builtin_expect(!!(ompt_enabled.enabled), 0))
4541    __ompt_task_init(taskdata, thread->th.th_info.ds.ds_gtid);
4542#endif
4543  return task;
4544}
4545 
4546// Routine optionally generated by the compiler for setting the lastprivate flag
4547// and calling needed constructors for private/firstprivate objects
4548// (used to form taskloop tasks from pattern task)
4549// Parameters: dest task, src task, lastprivate flag.
4550typedef void (*p_task_dup_t)(kmp_task_t *, kmp_task_t *, kmp_int32);
4551 
4552KMP_BUILD_ASSERT(sizeof(long) == 4 || sizeof(long) == 8)static_assert(sizeof(long) == 4 || sizeof(long) == 8, "Build condition error"
);
4553 
4554// class to encapsulate manipulating loop bounds in a taskloop task.
4555// this abstracts away the Intel vs GOMP taskloop interface for setting/getting
4556// the loop bound variables.
4557class kmp_taskloop_bounds_t {
4558  kmp_task_t *task;
4559  const kmp_taskdata_t *taskdata;
4560  size_t lower_offset;
4561  size_t upper_offset;
4562 
4563public:
4564  kmp_taskloop_bounds_t(kmp_task_t *_task, kmp_uint64 *lb, kmp_uint64 *ub)
4565      : task(_task), taskdata(KMP_TASK_TO_TASKDATA(task)(((kmp_taskdata_t *)task) - 1)),
4566        lower_offset((char *)lb - (char *)task),
4567        upper_offset((char *)ub - (char *)task) {
4568    KMP_DEBUG_ASSERT((char *)lb > (char *)_task)if (!((char *)lb > (char *)_task)) { __kmp_debug_assert("(char *)lb > (char *)_task"
, "openmp/runtime/src/kmp_tasking.cpp", 4568); };
4569    KMP_DEBUG_ASSERT((char *)ub > (char *)_task)if (!((char *)ub > (char *)_task)) { __kmp_debug_assert("(char *)ub > (char *)_task"
, "openmp/runtime/src/kmp_tasking.cpp", 4569); };
4570  }
4571  kmp_taskloop_bounds_t(kmp_task_t *_task, const kmp_taskloop_bounds_t &bounds)
4572      : task(_task), taskdata(KMP_TASK_TO_TASKDATA(_task)(((kmp_taskdata_t *)_task) - 1)),
4573        lower_offset(bounds.lower_offset), upper_offset(bounds.upper_offset) {}
4574  size_t get_lower_offset() const { return lower_offset; }
4575  size_t get_upper_offset() const { return upper_offset; }
4576  kmp_uint64 get_lb() const {
4577    kmp_int64 retval;
4578#if defined(KMP_GOMP_COMPAT)
4579    // Intel task just returns the lower bound normally
4580    if (!taskdata->td_flags.native) {
4581      retval = *(kmp_int64 *)((char *)task + lower_offset);
4582    } else {
4583      // GOMP task has to take into account the sizeof(long)
4584      if (taskdata->td_size_loop_bounds == 4) {
4585        kmp_int32 *lb = RCAST(kmp_int32 *, task->shareds)reinterpret_cast<kmp_int32 *>(task->shareds);
4586        retval = (kmp_int64)*lb;
4587      } else {
4588        kmp_int64 *lb = RCAST(kmp_int64 *, task->shareds)reinterpret_cast<kmp_int64 *>(task->shareds);
4589        retval = (kmp_int64)*lb;
4590      }
4591    }
4592#else
4593    (void)taskdata;
4594    retval = *(kmp_int64 *)((char *)task + lower_offset);
4595#endif // defined(KMP_GOMP_COMPAT)
4596    return retval;
4597  }
4598  kmp_uint64 get_ub() const {
4599    kmp_int64 retval;
4600#if defined(KMP_GOMP_COMPAT)
4601    // Intel task just returns the upper bound normally
4602    if (!taskdata->td_flags.native) {
4603      retval = *(kmp_int64 *)((char *)task + upper_offset);
4604    } else {
4605      // GOMP task has to take into account the sizeof(long)
4606      if (taskdata->td_size_loop_bounds == 4) {
4607        kmp_int32 *ub = RCAST(kmp_int32 *, task->shareds)reinterpret_cast<kmp_int32 *>(task->shareds) + 1;
4608        retval = (kmp_int64)*ub;
4609      } else {
4610        kmp_int64 *ub = RCAST(kmp_int64 *, task->shareds)reinterpret_cast<kmp_int64 *>(task->shareds) + 1;
4611        retval = (kmp_int64)*ub;
4612      }
4613    }
4614#else
4615    retval = *(kmp_int64 *)((char *)task + upper_offset);
4616#endif // defined(KMP_GOMP_COMPAT)
4617    return retval;
4618  }
4619  void set_lb(kmp_uint64 lb) {
4620#if defined(KMP_GOMP_COMPAT)
4621    // Intel task just sets the lower bound normally
4622    if (!taskdata->td_flags.native) {
4623      *(kmp_uint64 *)((char *)task + lower_offset) = lb;
4624    } else {
4625      // GOMP task has to take into account the sizeof(long)
4626      if (taskdata->td_size_loop_bounds == 4) {
4627        kmp_uint32 *lower = RCAST(kmp_uint32 *, task->shareds)reinterpret_cast<kmp_uint32 *>(task->shareds);
4628        *lower = (kmp_uint32)lb;
4629      } else {
4630        kmp_uint64 *lower = RCAST(kmp_uint64 *, task->shareds)reinterpret_cast<kmp_uint64 *>(task->shareds);
4631        *lower = (kmp_uint64)lb;
4632      }
4633    }
4634#else
4635    *(kmp_uint64 *)((char *)task + lower_offset) = lb;
4636#endif // defined(KMP_GOMP_COMPAT)
4637  }
4638  void set_ub(kmp_uint64 ub) {
4639#if defined(KMP_GOMP_COMPAT)
4640    // Intel task just sets the upper bound normally
4641    if (!taskdata->td_flags.native) {
4642      *(kmp_uint64 *)((char *)task + upper_offset) = ub;
4643    } else {
4644      // GOMP task has to take into account the sizeof(long)
4645      if (taskdata->td_size_loop_bounds == 4) {
4646        kmp_uint32 *upper = RCAST(kmp_uint32 *, task->shareds)reinterpret_cast<kmp_uint32 *>(task->shareds) + 1;
4647        *upper = (kmp_uint32)ub;
4648      } else {
4649        kmp_uint64 *upper = RCAST(kmp_uint64 *, task->shareds)reinterpret_cast<kmp_uint64 *>(task->shareds) + 1;
4650        *upper = (kmp_uint64)ub;
4651      }
4652    }
4653#else
4654    *(kmp_uint64 *)((char *)task + upper_offset) = ub;
4655#endif // defined(KMP_GOMP_COMPAT)
4656  }
4657};
4658 
4659// __kmp_taskloop_linear: Start tasks of the taskloop linearly
4660//
4661// loc        Source location information
4662// gtid       Global thread ID
4663// task       Pattern task, exposes the loop iteration range
4664// lb         Pointer to loop lower bound in task structure
4665// ub         Pointer to loop upper bound in task structure
4666// st         Loop stride
4667// ub_glob    Global upper bound (used for lastprivate check)
4668// num_tasks  Number of tasks to execute
4669// grainsize  Number of loop iterations per task
4670// extras     Number of chunks with grainsize+1 iterations
4671// last_chunk Reduction of grainsize for last task
4672// tc         Iterations count
4673// task_dup   Tasks duplication routine
4674// codeptr_ra Return address for OMPT events
4675void __kmp_taskloop_linear(ident_t *loc, int gtid, kmp_task_t *task,
4676                           kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st,
4677                           kmp_uint64 ub_glob, kmp_uint64 num_tasks,
4678                           kmp_uint64 grainsize, kmp_uint64 extras,
4679                           kmp_int64 last_chunk, kmp_uint64 tc,
4680#if OMPT_SUPPORT1
4681                           void *codeptr_ra,
4682#endif
4683                           void *task_dup) {
4684  KMP_COUNT_BLOCK(OMP_TASKLOOP)((void)0);
4685  KMP_TIME_PARTITIONED_BLOCK(OMP_taskloop_scheduling)((void)0);
4686  p_task_dup_t ptask_dup = (p_task_dup_t)task_dup;
4687  // compiler provides global bounds here
4688  kmp_taskloop_bounds_t task_bounds(task, lb, ub);
4689  kmp_uint64 lower = task_bounds.get_lb();
4690  kmp_uint64 upper = task_bounds.get_ub();
4691  kmp_uint64 i;
4692  kmp_info_t *thread = __kmp_threads[gtid];
4693  kmp_taskdata_t *current_task = thread->th.th_current_task;
4694  kmp_task_t *next_task;
4695  kmp_int32 lastpriv = 0;
4696 
4697  KMP_DEBUG_ASSERT(tc == num_tasks * grainsize +if (!(tc == num_tasks * grainsize + (last_chunk < 0 ? last_chunk
 : extras))) { __kmp_debug_assert("tc == num_tasks * grainsize + (last_chunk < 0 ? last_chunk : extras)"
, "openmp/runtime/src/kmp_tasking.cpp", 4698); }
4698                             (last_chunk < 0 ? last_chunk : extras))if (!(tc == num_tasks * grainsize + (last_chunk < 0 ? last_chunk
 : extras))) { __kmp_debug_assert("tc == num_tasks * grainsize + (last_chunk < 0 ? last_chunk : extras)"
, "openmp/runtime/src/kmp_tasking.cpp", 4698); };
4699  KMP_DEBUG_ASSERT(num_tasks > extras)if (!(num_tasks > extras)) { __kmp_debug_assert("num_tasks > extras"
, "openmp/runtime/src/kmp_tasking.cpp", 4699); };
4700  KMP_DEBUG_ASSERT(num_tasks > 0)if (!(num_tasks > 0)) { __kmp_debug_assert("num_tasks > 0"
, "openmp/runtime/src/kmp_tasking.cpp", 4700); };
4701  KA_TRACE(20, ("__kmp_taskloop_linear: T#%d: %lld tasks, grainsize %lld, "if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_taskloop_linear: T#%d: %lld tasks, grainsize %lld, "
 "extras %lld, last_chunk %lld, i=%lld,%lld(%d)%lld, dup %p\n"
, gtid, num_tasks, grainsize, extras, last_chunk, lower, upper
, ub_glob, st, task_dup); }
4702                "extras %lld, last_chunk %lld, i=%lld,%lld(%d)%lld, dup %p\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_taskloop_linear: T#%d: %lld tasks, grainsize %lld, "
 "extras %lld, last_chunk %lld, i=%lld,%lld(%d)%lld, dup %p\n"
, gtid, num_tasks, grainsize, extras, last_chunk, lower, upper
, ub_glob, st, task_dup); }
4703                gtid, num_tasks, grainsize, extras, last_chunk, lower, upper,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_taskloop_linear: T#%d: %lld tasks, grainsize %lld, "
 "extras %lld, last_chunk %lld, i=%lld,%lld(%d)%lld, dup %p\n"
, gtid, num_tasks, grainsize, extras, last_chunk, lower, upper
, ub_glob, st, task_dup); }
4704                ub_glob, st, task_dup))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_taskloop_linear: T#%d: %lld tasks, grainsize %lld, "
 "extras %lld, last_chunk %lld, i=%lld,%lld(%d)%lld, dup %p\n"
, gtid, num_tasks, grainsize, extras, last_chunk, lower, upper
, ub_glob, st, task_dup); };
4705 
4706  // Launch num_tasks tasks, assign grainsize iterations each task
4707  for (i = 0; i < num_tasks; ++i) {
4708    kmp_uint64 chunk_minus_1;
4709    if (extras == 0) {
4710      chunk_minus_1 = grainsize - 1;
4711    } else {
4712      chunk_minus_1 = grainsize;
4713      --extras; // first extras iterations get bigger chunk (grainsize+1)
4714    }
4715    upper = lower + st * chunk_minus_1;
4716    if (upper > *ub) {
4717      upper = *ub;
4718    }
4719    if (i == num_tasks - 1) {
4720      // schedule the last task, set lastprivate flag if needed
4721      if (st == 1) { // most common case
4722        KMP_DEBUG_ASSERT(upper == *ub)if (!(upper == *ub)) { __kmp_debug_assert("upper == *ub", "openmp/runtime/src/kmp_tasking.cpp"
, 4722); };
4723        if (upper == ub_glob)
4724          lastpriv = 1;
4725      } else if (st > 0) { // positive loop stride
4726        KMP_DEBUG_ASSERT((kmp_uint64)st > *ub - upper)if (!((kmp_uint64)st > *ub - upper)) { __kmp_debug_assert(
"(kmp_uint64)st > *ub - upper", "openmp/runtime/src/kmp_tasking.cpp"
, 4726); };
4727        if ((kmp_uint64)st > ub_glob - upper)
4728          lastpriv = 1;
4729      } else { // negative loop stride
4730        KMP_DEBUG_ASSERT(upper + st < *ub)if (!(upper + st < *ub)) { __kmp_debug_assert("upper + st < *ub"
, "openmp/runtime/src/kmp_tasking.cpp", 4730); };
4731        if (upper - ub_glob < (kmp_uint64)(-st))
4732          lastpriv = 1;
4733      }
4734    }
4735    next_task = __kmp_task_dup_alloc(thread, task); // allocate new task
4736    kmp_taskdata_t *next_taskdata = KMP_TASK_TO_TASKDATA(next_task)(((kmp_taskdata_t *)next_task) - 1);
4737    kmp_taskloop_bounds_t next_task_bounds =
4738        kmp_taskloop_bounds_t(next_task, task_bounds);
4739 
4740    // adjust task-specific bounds
4741    next_task_bounds.set_lb(lower);
4742    if (next_taskdata->td_flags.native) {
4743      next_task_bounds.set_ub(upper + (st > 0 ? 1 : -1));
4744    } else {
4745      next_task_bounds.set_ub(upper);
4746    }
4747    if (ptask_dup != NULL__null) // set lastprivate flag, construct firstprivates,
4748                           // etc.
4749      ptask_dup(next_task, task, lastpriv);
4750    KA_TRACE(40,if (kmp_a_debug >= 40) { __kmp_debug_printf ("__kmp_taskloop_linear: T#%d; task #%llu: task %p: lower %lld, "
 "upper %lld stride %lld, (offsets %p %p)\n", gtid, i, next_task
, lower, upper, st, next_task_bounds.get_lower_offset(), next_task_bounds
.get_upper_offset()); }
4751             ("__kmp_taskloop_linear: T#%d; task #%llu: task %p: lower %lld, "if (kmp_a_debug >= 40) { __kmp_debug_printf ("__kmp_taskloop_linear: T#%d; task #%llu: task %p: lower %lld, "
 "upper %lld stride %lld, (offsets %p %p)\n", gtid, i, next_task
, lower, upper, st, next_task_bounds.get_lower_offset(), next_task_bounds
.get_upper_offset()); }
4752              "upper %lld stride %lld, (offsets %p %p)\n",if (kmp_a_debug >= 40) { __kmp_debug_printf ("__kmp_taskloop_linear: T#%d; task #%llu: task %p: lower %lld, "
 "upper %lld stride %lld, (offsets %p %p)\n", gtid, i, next_task
, lower, upper, st, next_task_bounds.get_lower_offset(), next_task_bounds
.get_upper_offset()); }
4753              gtid, i, next_task, lower, upper, st,if (kmp_a_debug >= 40) { __kmp_debug_printf ("__kmp_taskloop_linear: T#%d; task #%llu: task %p: lower %lld, "
 "upper %lld stride %lld, (offsets %p %p)\n", gtid, i, next_task
, lower, upper, st, next_task_bounds.get_lower_offset(), next_task_bounds
.get_upper_offset()); }
4754              next_task_bounds.get_lower_offset(),if (kmp_a_debug >= 40) { __kmp_debug_printf ("__kmp_taskloop_linear: T#%d; task #%llu: task %p: lower %lld, "
 "upper %lld stride %lld, (offsets %p %p)\n", gtid, i, next_task
, lower, upper, st, next_task_bounds.get_lower_offset(), next_task_bounds
.get_upper_offset()); }
4755              next_task_bounds.get_upper_offset()))if (kmp_a_debug >= 40) { __kmp_debug_printf ("__kmp_taskloop_linear: T#%d; task #%llu: task %p: lower %lld, "
 "upper %lld stride %lld, (offsets %p %p)\n", gtid, i, next_task
, lower, upper, st, next_task_bounds.get_lower_offset(), next_task_bounds
.get_upper_offset()); };
4756#if OMPT_SUPPORT1
4757    __kmp_omp_taskloop_task(NULL__null, gtid, next_task,
4758                            codeptr_ra); // schedule new task
4759#if OMPT_OPTIONAL1
4760    if (ompt_enabled.ompt_callback_dispatch) {
4761      OMPT_GET_DISPATCH_CHUNK(next_taskdata->ompt_task_info.dispatch_chunk,do { if (st > 0) { next_taskdata->ompt_task_info.dispatch_chunk
.start = static_cast<uint64_t>(lower); next_taskdata->
ompt_task_info.dispatch_chunk.iterations = static_cast<uint64_t
>(((upper) - (lower)) / (st) + 1); } else { next_taskdata->
ompt_task_info.dispatch_chunk.start = static_cast<uint64_t
>(upper); next_taskdata->ompt_task_info.dispatch_chunk.
iterations = static_cast<uint64_t>(((lower) - (upper)) /
 -(st) + 1); } } while (0)
4762                              lower, upper, st)do { if (st > 0) { next_taskdata->ompt_task_info.dispatch_chunk
.start = static_cast<uint64_t>(lower); next_taskdata->
ompt_task_info.dispatch_chunk.iterations = static_cast<uint64_t
>(((upper) - (lower)) / (st) + 1); } else { next_taskdata->
ompt_task_info.dispatch_chunk.start = static_cast<uint64_t
>(upper); next_taskdata->ompt_task_info.dispatch_chunk.
iterations = static_cast<uint64_t>(((lower) - (upper)) /
 -(st) + 1); } } while (0);
4763    }
4764#endif // OMPT_OPTIONAL
4765#else
4766    __kmp_omp_task(gtid, next_task, true); // schedule new task
4767#endif
4768    lower = upper + st; // adjust lower bound for the next iteration
4769  }
4770  // free the pattern task and exit
4771  __kmp_task_start(gtid, task, current_task); // make internal bookkeeping
4772  // do not execute the pattern task, just do internal bookkeeping
4773  __kmp_task_finish<false>(gtid, task, current_task);
4774}
4775 
4776// Structure to keep taskloop parameters for auxiliary task
4777// kept in the shareds of the task structure.
4778typedef struct __taskloop_params {
4779  kmp_task_t *task;
4780  kmp_uint64 *lb;
4781  kmp_uint64 *ub;
4782  void *task_dup;
4783  kmp_int64 st;
4784  kmp_uint64 ub_glob;
4785  kmp_uint64 num_tasks;
4786  kmp_uint64 grainsize;
4787  kmp_uint64 extras;
4788  kmp_int64 last_chunk;
4789  kmp_uint64 tc;
4790  kmp_uint64 num_t_min;
4791#if OMPT_SUPPORT1
4792  void *codeptr_ra;
4793#endif
4794} __taskloop_params_t;
4795 
4796void __kmp_taskloop_recur(ident_t *, int, kmp_task_t *, kmp_uint64 *,
4797                          kmp_uint64 *, kmp_int64, kmp_uint64, kmp_uint64,
4798                          kmp_uint64, kmp_uint64, kmp_int64, kmp_uint64,
4799                          kmp_uint64,
4800#if OMPT_SUPPORT1
4801                          void *,
4802#endif
4803                          void *);
4804 
4805// Execute part of the taskloop submitted as a task.
4806int __kmp_taskloop_task(int gtid, void *ptask) {
4807  __taskloop_params_t *p =
4808      (__taskloop_params_t *)((kmp_task_t *)ptask)->shareds;
4809  kmp_task_t *task = p->task;
4810  kmp_uint64 *lb = p->lb;
4811  kmp_uint64 *ub = p->ub;
4812  void *task_dup = p->task_dup;
4813  //  p_task_dup_t ptask_dup = (p_task_dup_t)task_dup;
4814  kmp_int64 st = p->st;
4815  kmp_uint64 ub_glob = p->ub_glob;
4816  kmp_uint64 num_tasks = p->num_tasks;
4817  kmp_uint64 grainsize = p->grainsize;
4818  kmp_uint64 extras = p->extras;
4819  kmp_int64 last_chunk = p->last_chunk;
4820  kmp_uint64 tc = p->tc;
4821  kmp_uint64 num_t_min = p->num_t_min;
4822#if OMPT_SUPPORT1
4823  void *codeptr_ra = p->codeptr_ra;
4824#endif
4825#if KMP_DEBUG1
4826  kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(task)(((kmp_taskdata_t *)task) - 1);
4827  KMP_DEBUG_ASSERT(task != NULL)if (!(task != __null)) { __kmp_debug_assert("task != __null",
 "openmp/runtime/src/kmp_tasking.cpp", 4827); };
4828  KA_TRACE(20,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_taskloop_task: T#%d, task %p: %lld tasks, grainsize"
 " %lld, extras %lld, last_chunk %lld, i=%lld,%lld(%d), dup %p\n"
, gtid, taskdata, num_tasks, grainsize, extras, last_chunk, *
lb, *ub, st, task_dup); }
4829           ("__kmp_taskloop_task: T#%d, task %p: %lld tasks, grainsize"if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_taskloop_task: T#%d, task %p: %lld tasks, grainsize"
 " %lld, extras %lld, last_chunk %lld, i=%lld,%lld(%d), dup %p\n"
, gtid, taskdata, num_tasks, grainsize, extras, last_chunk, *
lb, *ub, st, task_dup); }
4830            " %lld, extras %lld, last_chunk %lld, i=%lld,%lld(%d), dup %p\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_taskloop_task: T#%d, task %p: %lld tasks, grainsize"
 " %lld, extras %lld, last_chunk %lld, i=%lld,%lld(%d), dup %p\n"
, gtid, taskdata, num_tasks, grainsize, extras, last_chunk, *
lb, *ub, st, task_dup); }
4831            gtid, taskdata, num_tasks, grainsize, extras, last_chunk, *lb, *ub,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_taskloop_task: T#%d, task %p: %lld tasks, grainsize"
 " %lld, extras %lld, last_chunk %lld, i=%lld,%lld(%d), dup %p\n"
, gtid, taskdata, num_tasks, grainsize, extras, last_chunk, *
lb, *ub, st, task_dup); }
4832            st, task_dup))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_taskloop_task: T#%d, task %p: %lld tasks, grainsize"
 " %lld, extras %lld, last_chunk %lld, i=%lld,%lld(%d), dup %p\n"
, gtid, taskdata, num_tasks, grainsize, extras, last_chunk, *
lb, *ub, st, task_dup); };
4833#endif
4834  KMP_DEBUG_ASSERT(num_tasks * 2 + 1 > num_t_min)if (!(num_tasks * 2 + 1 > num_t_min)) { __kmp_debug_assert
("num_tasks * 2 + 1 > num_t_min", "openmp/runtime/src/kmp_tasking.cpp"
, 4834); };
4835  if (num_tasks > num_t_min)
4836    __kmp_taskloop_recur(NULL__null, gtid, task, lb, ub, st, ub_glob, num_tasks,
4837                         grainsize, extras, last_chunk, tc, num_t_min,
4838#if OMPT_SUPPORT1
4839                         codeptr_ra,
4840#endif
4841                         task_dup);
4842  else
4843    __kmp_taskloop_linear(NULL__null, gtid, task, lb, ub, st, ub_glob, num_tasks,
4844                          grainsize, extras, last_chunk, tc,
4845#if OMPT_SUPPORT1
4846                          codeptr_ra,
4847#endif
4848                          task_dup);
4849 
4850  KA_TRACE(40, ("__kmp_taskloop_task(exit): T#%d\n", gtid))if (kmp_a_debug >= 40) { __kmp_debug_printf ("__kmp_taskloop_task(exit): T#%d\n"
, gtid); };
4851  return 0;
4852}
4853 
4854// Schedule part of the taskloop as a task,
4855// execute the rest of the taskloop.
4856//
4857// loc        Source location information
4858// gtid       Global thread ID
4859// task       Pattern task, exposes the loop iteration range
4860// lb         Pointer to loop lower bound in task structure
4861// ub         Pointer to loop upper bound in task structure
4862// st         Loop stride
4863// ub_glob    Global upper bound (used for lastprivate check)
4864// num_tasks  Number of tasks to execute
4865// grainsize  Number of loop iterations per task
4866// extras     Number of chunks with grainsize+1 iterations
4867// last_chunk Reduction of grainsize for last task
4868// tc         Iterations count
4869// num_t_min  Threshold to launch tasks recursively
4870// task_dup   Tasks duplication routine
4871// codeptr_ra Return address for OMPT events
4872void __kmp_taskloop_recur(ident_t *loc, int gtid, kmp_task_t *task,
4873                          kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st,
4874                          kmp_uint64 ub_glob, kmp_uint64 num_tasks,
4875                          kmp_uint64 grainsize, kmp_uint64 extras,
4876                          kmp_int64 last_chunk, kmp_uint64 tc,
4877                          kmp_uint64 num_t_min,
4878#if OMPT_SUPPORT1
4879                          void *codeptr_ra,
4880#endif
4881                          void *task_dup) {
4882  kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(task)(((kmp_taskdata_t *)task) - 1);
4883  KMP_DEBUG_ASSERT(task != NULL)if (!(task != __null)) { __kmp_debug_assert("task != __null",
 "openmp/runtime/src/kmp_tasking.cpp", 4883); };
4884  KMP_DEBUG_ASSERT(num_tasks > num_t_min)if (!(num_tasks > num_t_min)) { __kmp_debug_assert("num_tasks > num_t_min"
, "openmp/runtime/src/kmp_tasking.cpp", 4884); };
4885  KA_TRACE(20,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_taskloop_recur: T#%d, task %p: %lld tasks, grainsize"
 " %lld, extras %lld, last_chunk %lld, i=%lld,%lld(%d), dup %p\n"
, gtid, taskdata, num_tasks, grainsize, extras, last_chunk, *
lb, *ub, st, task_dup); }
4886           ("__kmp_taskloop_recur: T#%d, task %p: %lld tasks, grainsize"if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_taskloop_recur: T#%d, task %p: %lld tasks, grainsize"
 " %lld, extras %lld, last_chunk %lld, i=%lld,%lld(%d), dup %p\n"
, gtid, taskdata, num_tasks, grainsize, extras, last_chunk, *
lb, *ub, st, task_dup); }
4887            " %lld, extras %lld, last_chunk %lld, i=%lld,%lld(%d), dup %p\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_taskloop_recur: T#%d, task %p: %lld tasks, grainsize"
 " %lld, extras %lld, last_chunk %lld, i=%lld,%lld(%d), dup %p\n"
, gtid, taskdata, num_tasks, grainsize, extras, last_chunk, *
lb, *ub, st, task_dup); }
4888            gtid, taskdata, num_tasks, grainsize, extras, last_chunk, *lb, *ub,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_taskloop_recur: T#%d, task %p: %lld tasks, grainsize"
 " %lld, extras %lld, last_chunk %lld, i=%lld,%lld(%d), dup %p\n"
, gtid, taskdata, num_tasks, grainsize, extras, last_chunk, *
lb, *ub, st, task_dup); }
4889            st, task_dup))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_taskloop_recur: T#%d, task %p: %lld tasks, grainsize"
 " %lld, extras %lld, last_chunk %lld, i=%lld,%lld(%d), dup %p\n"
, gtid, taskdata, num_tasks, grainsize, extras, last_chunk, *
lb, *ub, st, task_dup); };
4890  p_task_dup_t ptask_dup = (p_task_dup_t)task_dup;
4891  kmp_uint64 lower = *lb;
4892  kmp_info_t *thread = __kmp_threads[gtid];
4893  //  kmp_taskdata_t *current_task = thread->th.th_current_task;
4894  kmp_task_t *next_task;
4895  size_t lower_offset =
4896      (char *)lb - (char *)task; // remember offset of lb in the task structure
4897  size_t upper_offset =
4898      (char *)ub - (char *)task; // remember offset of ub in the task structure
4899 
4900  KMP_DEBUG_ASSERT(tc == num_tasks * grainsize +if (!(tc == num_tasks * grainsize + (last_chunk < 0 ? last_chunk
 : extras))) { __kmp_debug_assert("tc == num_tasks * grainsize + (last_chunk < 0 ? last_chunk : extras)"
, "openmp/runtime/src/kmp_tasking.cpp", 4901); }
4901                             (last_chunk < 0 ? last_chunk : extras))if (!(tc == num_tasks * grainsize + (last_chunk < 0 ? last_chunk
 : extras))) { __kmp_debug_assert("tc == num_tasks * grainsize + (last_chunk < 0 ? last_chunk : extras)"
, "openmp/runtime/src/kmp_tasking.cpp", 4901); };
4902  KMP_DEBUG_ASSERT(num_tasks > extras)if (!(num_tasks > extras)) { __kmp_debug_assert("num_tasks > extras"
, "openmp/runtime/src/kmp_tasking.cpp", 4902); };
4903  KMP_DEBUG_ASSERT(num_tasks > 0)if (!(num_tasks > 0)) { __kmp_debug_assert("num_tasks > 0"
, "openmp/runtime/src/kmp_tasking.cpp", 4903); };
4904 
4905  // split the loop in two halves
4906  kmp_uint64 lb1, ub0, tc0, tc1, ext0, ext1;
4907  kmp_int64 last_chunk0 = 0, last_chunk1 = 0;
4908  kmp_uint64 gr_size0 = grainsize;
4909  kmp_uint64 n_tsk0 = num_tasks >> 1; // num_tasks/2 to execute
4910  kmp_uint64 n_tsk1 = num_tasks - n_tsk0; // to schedule as a task
4911  if (last_chunk < 0) {
4912    ext0 = ext1 = 0;
4913    last_chunk1 = last_chunk;
4914    tc0 = grainsize * n_tsk0;
4915    tc1 = tc - tc0;
4916  } else if (n_tsk0 <= extras) {
4917    gr_size0++; // integrate extras into grainsize
4918    ext0 = 0; // no extra iters in 1st half
4919    ext1 = extras - n_tsk0; // remaining extras
4920    tc0 = gr_size0 * n_tsk0;
4921    tc1 = tc - tc0;
4922  } else { // n_tsk0 > extras
4923    ext1 = 0; // no extra iters in 2nd half
4924    ext0 = extras;
4925    tc1 = grainsize * n_tsk1;
4926    tc0 = tc - tc1;
4927  }
4928  ub0 = lower + st * (tc0 - 1);
4929  lb1 = ub0 + st;
4930 
4931  // create pattern task for 2nd half of the loop
4932  next_task = __kmp_task_dup_alloc(thread, task); // duplicate the task
4933  // adjust lower bound (upper bound is not changed) for the 2nd half
4934  *(kmp_uint64 *)((char *)next_task + lower_offset) = lb1;
4935  if (ptask_dup != NULL__null) // construct firstprivates, etc.
4936    ptask_dup(next_task, task, 0);
4937  *ub = ub0; // adjust upper bound for the 1st half
4938 
4939  // create auxiliary task for 2nd half of the loop
4940  // make sure new task has same parent task as the pattern task
4941  kmp_taskdata_t *current_task = thread->th.th_current_task;
4942  thread->th.th_current_task = taskdata->td_parent;
4943  kmp_task_t *new_task =
4944      __kmpc_omp_task_alloc(loc, gtid, 1, 3 * sizeof(void *),
4945                            sizeof(__taskloop_params_t), &__kmp_taskloop_task);
4946  // restore current task
4947  thread->th.th_current_task = current_task;
4948  __taskloop_params_t *p = (__taskloop_params_t *)new_task->shareds;
4949  p->task = next_task;
4950  p->lb = (kmp_uint64 *)((char *)next_task + lower_offset);
4951  p->ub = (kmp_uint64 *)((char *)next_task + upper_offset);
4952  p->task_dup = task_dup;
4953  p->st = st;
4954  p->ub_glob = ub_glob;
4955  p->num_tasks = n_tsk1;
4956  p->grainsize = grainsize;
4957  p->extras = ext1;
4958  p->last_chunk = last_chunk1;
4959  p->tc = tc1;
4960  p->num_t_min = num_t_min;
4961#if OMPT_SUPPORT1
4962  p->codeptr_ra = codeptr_ra;
4963#endif
4964 
4965#if OMPT_SUPPORT1
4966  // schedule new task with correct return address for OMPT events
4967  __kmp_omp_taskloop_task(NULL__null, gtid, new_task, codeptr_ra);
4968#else
4969  __kmp_omp_task(gtid, new_task, true); // schedule new task
4970#endif
4971 
4972  // execute the 1st half of current subrange
4973  if (n_tsk0 > num_t_min)
4974    __kmp_taskloop_recur(loc, gtid, task, lb, ub, st, ub_glob, n_tsk0, gr_size0,
4975                         ext0, last_chunk0, tc0, num_t_min,
4976#if OMPT_SUPPORT1
4977                         codeptr_ra,
4978#endif
4979                         task_dup);
4980  else
4981    __kmp_taskloop_linear(loc, gtid, task, lb, ub, st, ub_glob, n_tsk0,
4982                          gr_size0, ext0, last_chunk0, tc0,
4983#if OMPT_SUPPORT1
4984                          codeptr_ra,
4985#endif
4986                          task_dup);
4987 
4988  KA_TRACE(40, ("__kmp_taskloop_recur(exit): T#%d\n", gtid))if (kmp_a_debug >= 40) { __kmp_debug_printf ("__kmp_taskloop_recur(exit): T#%d\n"
, gtid); };
4989}
4990 
4991static void __kmp_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int if_val,
4992                           kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st,
4993                           int nogroup, int sched, kmp_uint64 grainsize,
4994                           int modifier, void *task_dup) {
4995  kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(task)(((kmp_taskdata_t *)task) - 1);
4996  KMP_DEBUG_ASSERT(task != NULL)if (!(task != __null)) { __kmp_debug_assert("task != __null",
 "openmp/runtime/src/kmp_tasking.cpp", 4996); };
4997  if (nogroup == 0) {
4998#if OMPT_SUPPORT1 && OMPT_OPTIONAL1
4999    OMPT_STORE_RETURN_ADDRESS(gtid)OmptReturnAddressGuard ReturnAddressGuard{gtid, __builtin_return_address
(0)};;
5000#endif
5001    __kmpc_taskgroup(loc, gtid);
5002  }
5003 
5004  // =========================================================================
5005  // calculate loop parameters
5006  kmp_taskloop_bounds_t task_bounds(task, lb, ub);
5007  kmp_uint64 tc;
5008  // compiler provides global bounds here
5009  kmp_uint64 lower = task_bounds.get_lb();
5010  kmp_uint64 upper = task_bounds.get_ub();
5011  kmp_uint64 ub_glob = upper; // global upper used to calc lastprivate flag
5012  kmp_uint64 num_tasks = 0, extras = 0;
5013  kmp_int64 last_chunk =
5014      0; // reduce grainsize of last task by last_chunk in strict mode
5015  kmp_uint64 num_tasks_min = __kmp_taskloop_min_tasks;
5016  kmp_info_t *thread = __kmp_threads[gtid];
5017  kmp_taskdata_t *current_task = thread->th.th_current_task;
5018 
5019  KA_TRACE(20, ("__kmp_taskloop: T#%d, task %p, lb %lld, ub %lld, st %lld, "if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_taskloop: T#%d, task %p, lb %lld, ub %lld, st %lld, "
 "grain %llu(%d, %d), dup %p\n", gtid, taskdata, lower, upper
, st, grainsize, sched, modifier, task_dup); }
5020                "grain %llu(%d, %d), dup %p\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_taskloop: T#%d, task %p, lb %lld, ub %lld, st %lld, "
 "grain %llu(%d, %d), dup %p\n", gtid, taskdata, lower, upper
, st, grainsize, sched, modifier, task_dup); }
5021                gtid, taskdata, lower, upper, st, grainsize, sched, modifier,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_taskloop: T#%d, task %p, lb %lld, ub %lld, st %lld, "
 "grain %llu(%d, %d), dup %p\n", gtid, taskdata, lower, upper
, st, grainsize, sched, modifier, task_dup); }
5022                task_dup))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_taskloop: T#%d, task %p, lb %lld, ub %lld, st %lld, "
 "grain %llu(%d, %d), dup %p\n", gtid, taskdata, lower, upper
, st, grainsize, sched, modifier, task_dup); };
5023 
5024  // compute trip count
5025  if (st == 1) { // most common case
5026    tc = upper - lower + 1;
5027  } else if (st < 0) {
5028    tc = (lower - upper) / (-st) + 1;
5029  } else { // st > 0
5030    tc = (upper - lower) / st + 1;
5031  }
5032  if (tc == 0) {
5033    KA_TRACE(20, ("__kmp_taskloop(exit): T#%d zero-trip loop\n", gtid))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_taskloop(exit): T#%d zero-trip loop\n"
, gtid); };
5034    // free the pattern task and exit
5035    __kmp_task_start(gtid, task, current_task);
5036    // do not execute anything for zero-trip loop
5037    __kmp_task_finish<false>(gtid, task, current_task);
5038    return;
5039  }
5040 
5041#if OMPT_SUPPORT1 && OMPT_OPTIONAL1
5042  ompt_team_info_t *team_info = __ompt_get_teaminfo(0, NULL__null);
5043  ompt_task_info_t *task_info = __ompt_get_task_info_object(0);
5044  if (ompt_enabled.ompt_callback_work) {
5045    ompt_callbacks.ompt_callback(ompt_callback_work)ompt_callback_work_callback(
5046        ompt_work_taskloop, ompt_scope_begin, &(team_info->parallel_data),
5047        &(task_info->task_data), tc, OMPT_GET_RETURN_ADDRESS(0)__builtin_return_address(0));
5048  }
5049#endif
5050 
5051  if (num_tasks_min == 0)
5052    // TODO: can we choose better default heuristic?
5053    num_tasks_min =
5054        KMP_MIN(thread->th.th_team_nproc * 10, INITIAL_TASK_DEQUE_SIZE)((thread->th.th_team_nproc * 10) < ((1 << 8)) ? (
thread->th.th_team_nproc * 10) : ((1 << 8)));
5055 
5056  // compute num_tasks/grainsize based on the input provided
5057  switch (sched) {
5058  case 0: // no schedule clause specified, we can choose the default
5059    // let's try to schedule (team_size*10) tasks
5060    grainsize = thread->th.th_team_nproc * 10;
5061    KMP_FALLTHROUGH()[[fallthrough]];
5062  case 2: // num_tasks provided
5063    if (grainsize > tc) {
5064      num_tasks = tc; // too big num_tasks requested, adjust values
5065      grainsize = 1;
5066      extras = 0;
5067    } else {
5068      num_tasks = grainsize;
5069      grainsize = tc / num_tasks;
5070      extras = tc % num_tasks;
5071    }
5072    break;
5073  case 1: // grainsize provided
5074    if (grainsize > tc) {
5075      num_tasks = 1;
5076      grainsize = tc; // too big grainsize requested, adjust values
5077      extras = 0;
5078    } else {
5079      if (modifier) {
5080        num_tasks = (tc + grainsize - 1) / grainsize;
5081        last_chunk = tc - (num_tasks * grainsize);
5082        extras = 0;
5083      } else {
5084        num_tasks = tc / grainsize;
5085        // adjust grainsize for balanced distribution of iterations
5086        grainsize = tc / num_tasks;
5087        extras = tc % num_tasks;
5088      }
5089    }
5090    break;
5091  default:
5092    KMP_ASSERT2(0, "unknown scheduling of taskloop")if (!(0)) { __kmp_debug_assert(("unknown scheduling of taskloop"
), "openmp/runtime/src/kmp_tasking.cpp", 5092); };
5093  }
5094 
5095  KMP_DEBUG_ASSERT(tc == num_tasks * grainsize +if (!(tc == num_tasks * grainsize + (last_chunk < 0 ? last_chunk
 : extras))) { __kmp_debug_assert("tc == num_tasks * grainsize + (last_chunk < 0 ? last_chunk : extras)"
, "openmp/runtime/src/kmp_tasking.cpp", 5096); }
5096                             (last_chunk < 0 ? last_chunk : extras))if (!(tc == num_tasks * grainsize + (last_chunk < 0 ? last_chunk
 : extras))) { __kmp_debug_assert("tc == num_tasks * grainsize + (last_chunk < 0 ? last_chunk : extras)"
, "openmp/runtime/src/kmp_tasking.cpp", 5096); };
5097  KMP_DEBUG_ASSERT(num_tasks > extras)if (!(num_tasks > extras)) { __kmp_debug_assert("num_tasks > extras"
, "openmp/runtime/src/kmp_tasking.cpp", 5097); };
5098  KMP_DEBUG_ASSERT(num_tasks > 0)if (!(num_tasks > 0)) { __kmp_debug_assert("num_tasks > 0"
, "openmp/runtime/src/kmp_tasking.cpp", 5098); };
5099  // =========================================================================
5100 
5101  // check if clause value first
5102  // Also require GOMP_taskloop to reduce to linear (taskdata->td_flags.native)
5103  if (if_val == 0) { // if(0) specified, mark task as serial
5104    taskdata->td_flags.task_serial = 1;
5105    taskdata->td_flags.tiedness = TASK_TIED1; // AC: serial task cannot be untied
5106    // always start serial tasks linearly
5107    __kmp_taskloop_linear(loc, gtid, task, lb, ub, st, ub_glob, num_tasks,
5108                          grainsize, extras, last_chunk, tc,
5109#if OMPT_SUPPORT1
5110                          OMPT_GET_RETURN_ADDRESS(0)__builtin_return_address(0),
5111#endif
5112                          task_dup);
5113    // !taskdata->td_flags.native => currently force linear spawning of tasks
5114    // for GOMP_taskloop
5115  } else if (num_tasks > num_tasks_min && !taskdata->td_flags.native) {
5116    KA_TRACE(20, ("__kmp_taskloop: T#%d, go recursive: tc %llu, #tasks %llu"if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_taskloop: T#%d, go recursive: tc %llu, #tasks %llu"
 "(%lld), grain %llu, extras %llu, last_chunk %lld\n", gtid, tc
, num_tasks, num_tasks_min, grainsize, extras, last_chunk); }
5117                  "(%lld), grain %llu, extras %llu, last_chunk %lld\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_taskloop: T#%d, go recursive: tc %llu, #tasks %llu"
 "(%lld), grain %llu, extras %llu, last_chunk %lld\n", gtid, tc
, num_tasks, num_tasks_min, grainsize, extras, last_chunk); }
5118                  gtid, tc, num_tasks, num_tasks_min, grainsize, extras,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_taskloop: T#%d, go recursive: tc %llu, #tasks %llu"
 "(%lld), grain %llu, extras %llu, last_chunk %lld\n", gtid, tc
, num_tasks, num_tasks_min, grainsize, extras, last_chunk); }
5119                  last_chunk))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_taskloop: T#%d, go recursive: tc %llu, #tasks %llu"
 "(%lld), grain %llu, extras %llu, last_chunk %lld\n", gtid, tc
, num_tasks, num_tasks_min, grainsize, extras, last_chunk); };
5120    __kmp_taskloop_recur(loc, gtid, task, lb, ub, st, ub_glob, num_tasks,
5121                         grainsize, extras, last_chunk, tc, num_tasks_min,
5122#if OMPT_SUPPORT1
5123                         OMPT_GET_RETURN_ADDRESS(0)__builtin_return_address(0),
5124#endif
5125                         task_dup);
5126  } else {
5127    KA_TRACE(20, ("__kmp_taskloop: T#%d, go linear: tc %llu, #tasks %llu"if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_taskloop: T#%d, go linear: tc %llu, #tasks %llu"
 "(%lld), grain %llu, extras %llu, last_chunk %lld\n", gtid, tc
, num_tasks, num_tasks_min, grainsize, extras, last_chunk); }
5128                  "(%lld), grain %llu, extras %llu, last_chunk %lld\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_taskloop: T#%d, go linear: tc %llu, #tasks %llu"
 "(%lld), grain %llu, extras %llu, last_chunk %lld\n", gtid, tc
, num_tasks, num_tasks_min, grainsize, extras, last_chunk); }
5129                  gtid, tc, num_tasks, num_tasks_min, grainsize, extras,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_taskloop: T#%d, go linear: tc %llu, #tasks %llu"
 "(%lld), grain %llu, extras %llu, last_chunk %lld\n", gtid, tc
, num_tasks, num_tasks_min, grainsize, extras, last_chunk); }
5130                  last_chunk))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_taskloop: T#%d, go linear: tc %llu, #tasks %llu"
 "(%lld), grain %llu, extras %llu, last_chunk %lld\n", gtid, tc
, num_tasks, num_tasks_min, grainsize, extras, last_chunk); };
5131    __kmp_taskloop_linear(loc, gtid, task, lb, ub, st, ub_glob, num_tasks,
5132                          grainsize, extras, last_chunk, tc,
5133#if OMPT_SUPPORT1
5134                          OMPT_GET_RETURN_ADDRESS(0)__builtin_return_address(0),
5135#endif
5136                          task_dup);
5137  }
5138 
5139#if OMPT_SUPPORT1 && OMPT_OPTIONAL1
5140  if (ompt_enabled.ompt_callback_work) {
5141    ompt_callbacks.ompt_callback(ompt_callback_work)ompt_callback_work_callback(
5142        ompt_work_taskloop, ompt_scope_end, &(team_info->parallel_data),
5143        &(task_info->task_data), tc, OMPT_GET_RETURN_ADDRESS(0)__builtin_return_address(0));
5144  }
5145#endif
5146 
5147  if (nogroup == 0) {
5148#if OMPT_SUPPORT1 && OMPT_OPTIONAL1
5149    OMPT_STORE_RETURN_ADDRESS(gtid)OmptReturnAddressGuard ReturnAddressGuard{gtid, __builtin_return_address
(0)};;
5150#endif
5151    __kmpc_end_taskgroup(loc, gtid);
5152  }
5153  KA_TRACE(20, ("__kmp_taskloop(exit): T#%d\n", gtid))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_taskloop(exit): T#%d\n"
, gtid); };
5154}
5155 
5156/*!
5157@ingroup TASKING
5158@param loc       Source location information
5159@param gtid      Global thread ID
5160@param task      Task structure
5161@param if_val    Value of the if clause
5162@param lb        Pointer to loop lower bound in task structure
5163@param ub        Pointer to loop upper bound in task structure
5164@param st        Loop stride
5165@param nogroup   Flag, 1 if nogroup clause specified, 0 otherwise
5166@param sched     Schedule specified 0/1/2 for none/grainsize/num_tasks
5167@param grainsize Schedule value if specified
5168@param task_dup  Tasks duplication routine
5169 
5170Execute the taskloop construct.
5171*/
5172void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int if_val,
5173                     kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup,
5174                     int sched, kmp_uint64 grainsize, void *task_dup) {
5175  __kmp_assert_valid_gtid(gtid);
5176  KA_TRACE(20, ("__kmpc_taskloop(enter): T#%d\n", gtid))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmpc_taskloop(enter): T#%d\n"
, gtid); };
5177  __kmp_taskloop(loc, gtid, task, if_val, lb, ub, st, nogroup, sched, grainsize,
5178                 0, task_dup);
5179  KA_TRACE(20, ("__kmpc_taskloop(exit): T#%d\n", gtid))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmpc_taskloop(exit): T#%d\n"
, gtid); };
5180}
5181 
5182/*!
5183@ingroup TASKING
5184@param loc       Source location information
5185@param gtid      Global thread ID
5186@param task      Task structure
5187@param if_val    Value of the if clause
5188@param lb        Pointer to loop lower bound in task structure
5189@param ub        Pointer to loop upper bound in task structure
5190@param st        Loop stride
5191@param nogroup   Flag, 1 if nogroup clause specified, 0 otherwise
5192@param sched     Schedule specified 0/1/2 for none/grainsize/num_tasks
5193@param grainsize Schedule value if specified
5194@param modifier  Modifier 'strict' for sched, 1 if present, 0 otherwise
5195@param task_dup  Tasks duplication routine
5196 
5197Execute the taskloop construct.
5198*/
5199void __kmpc_taskloop_5(ident_t *loc, int gtid, kmp_task_t *task, int if_val,
5200                       kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st,
5201                       int nogroup, int sched, kmp_uint64 grainsize,
5202                       int modifier, void *task_dup) {
5203  __kmp_assert_valid_gtid(gtid);
5204  KA_TRACE(20, ("__kmpc_taskloop_5(enter): T#%d\n", gtid))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmpc_taskloop_5(enter): T#%d\n"
, gtid); };
5205  __kmp_taskloop(loc, gtid, task, if_val, lb, ub, st, nogroup, sched, grainsize,
5206                 modifier, task_dup);
5207  KA_TRACE(20, ("__kmpc_taskloop_5(exit): T#%d\n", gtid))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmpc_taskloop_5(exit): T#%d\n"
, gtid); };
5208}
5209 
5210/*!
5211@ingroup TASKING
5212@param gtid Global Thread ID of current thread
5213@return Returns a pointer to the thread's current task async handle. If no task
5214is present or gtid is invalid, returns NULL.
5215 
5216Acqurires a pointer to the target async handle from the current task.
5217*/
5218void **__kmpc_omp_get_target_async_handle_ptr(kmp_int32 gtid) {
5219  if (gtid == KMP_GTID_DNE(-2))
5220    return NULL__null;
5221 
5222  kmp_info_t *thread = __kmp_thread_from_gtid(gtid);
5223  kmp_taskdata_t *taskdata = thread->th.th_current_task;
5224 
5225  if (!taskdata)
5226    return NULL__null;
5227 
5228  return &taskdata->td_target_data.async_handle;
5229}
5230 
5231/*!
5232@ingroup TASKING
5233@param gtid Global Thread ID of current thread
5234@return Returns TRUE if the current task being executed of the given thread has
5235a task team allocated to it. Otherwise, returns FALSE.
5236 
5237Checks if the current thread has a task team.
5238*/
5239bool __kmpc_omp_has_task_team(kmp_int32 gtid) {
5240  if (gtid == KMP_GTID_DNE(-2))
5241    return FALSE0;
5242 
5243  kmp_info_t *thread = __kmp_thread_from_gtid(gtid);
5244  kmp_taskdata_t *taskdata = thread->th.th_current_task;
5245 
5246  if (!taskdata)
5247    return FALSE0;
5248 
5249  return taskdata->td_task_team != NULL__null;
5250}

←

/build/source/openmp/runtime/src/kmp_lock.h

1/*
2 * kmp_lock.h -- lock header file
3 */
4 
5//===----------------------------------------------------------------------===//
6//
7// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
8// See https://llvm.org/LICENSE.txt for license information.
9// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
10//
11//===----------------------------------------------------------------------===//
12 
13#ifndef KMP_LOCK_H
14#define KMP_LOCK_H
15 
16#include <limits.h> // CHAR_BIT
17#include <stddef.h> // offsetof
18 
19#include "kmp_debug.h"
20#include "kmp_os.h"
21 
22#ifdef __cplusplus201703L
23#include <atomic>
24 
25extern "C" {
26#endif // __cplusplus
27 
28// ----------------------------------------------------------------------------
29// Have to copy these definitions from kmp.h because kmp.h cannot be included
30// due to circular dependencies.  Will undef these at end of file.
31 
32#define KMP_PAD(type, sz)(sizeof(type) + (sz - ((sizeof(type) - 1) % (sz)) - 1))                                                      \
33  (sizeof(type) + (sz - ((sizeof(type) - 1) % (sz)) - 1))
34#define KMP_GTID_DNE(-2) (-2)
35 
36// Forward declaration of ident and ident_t
37 
38struct ident;
39typedef struct ident ident_t;
40 
41// End of copied code.
42// ----------------------------------------------------------------------------
43 
44// We need to know the size of the area we can assume that the compiler(s)
45// allocated for objects of type omp_lock_t and omp_nest_lock_t.  The Intel
46// compiler always allocates a pointer-sized area, as does visual studio.
47//
48// gcc however, only allocates 4 bytes for regular locks, even on 64-bit
49// intel archs.  It allocates at least 8 bytes for nested lock (more on
50// recent versions), but we are bounded by the pointer-sized chunks that
51// the Intel compiler allocates.
52 
53#if KMP_OS_LINUX1 && defined(KMP_GOMP_COMPAT)
54#define OMP_LOCK_T_SIZEsizeof(int) sizeof(int)
55#define OMP_NEST_LOCK_T_SIZEsizeof(void *) sizeof(void *)
56#else
57#define OMP_LOCK_T_SIZEsizeof(int) sizeof(void *)
58#define OMP_NEST_LOCK_T_SIZEsizeof(void *) sizeof(void *)
59#endif
60 
61// The Intel compiler allocates a 32-byte chunk for a critical section.
62// Both gcc and visual studio only allocate enough space for a pointer.
63// Sometimes we know that the space was allocated by the Intel compiler.
64#define OMP_CRITICAL_SIZEsizeof(void *) sizeof(void *)
65#define INTEL_CRITICAL_SIZE32 32
66 
67// lock flags
68typedef kmp_uint32 kmp_lock_flags_t;
69 
70#define kmp_lf_critical_section1 1
71 
72// When a lock table is used, the indices are of kmp_lock_index_t
73typedef kmp_uint32 kmp_lock_index_t;
74 
75// When memory allocated for locks are on the lock pool (free list),
76// it is treated as structs of this type.
77struct kmp_lock_pool {
78  union kmp_user_lock *next;
79  kmp_lock_index_t index;
80};
81 
82typedef struct kmp_lock_pool kmp_lock_pool_t;
83 
84extern void __kmp_validate_locks(void);
85 
86// ----------------------------------------------------------------------------
87//  There are 5 lock implementations:
88//       1. Test and set locks.
89//       2. futex locks (Linux* OS on x86 and
90//          Intel(R) Many Integrated Core Architecture)
91//       3. Ticket (Lamport bakery) locks.
92//       4. Queuing locks (with separate spin fields).
93//       5. DRPA (Dynamically Reconfigurable Distributed Polling Area) locks
94//
95//   and 3 lock purposes:
96//       1. Bootstrap locks -- Used for a few locks available at library
97//       startup-shutdown time.
98//          These do not require non-negative global thread ID's.
99//       2. Internal RTL locks -- Used everywhere else in the RTL
100//       3. User locks (includes critical sections)
101// ----------------------------------------------------------------------------
102 
103// ============================================================================
104// Lock implementations.
105//
106// Test and set locks.
107//
108// Non-nested test and set locks differ from the other lock kinds (except
109// futex) in that we use the memory allocated by the compiler for the lock,
110// rather than a pointer to it.
111//
112// On lin32, lin_32e, and win_32, the space allocated may be as small as 4
113// bytes, so we have to use a lock table for nested locks, and avoid accessing
114// the depth_locked field for non-nested locks.
115//
116// Information normally available to the tools, such as lock location, lock
117// usage (normal lock vs. critical section), etc. is not available with test and
118// set locks.
119// ----------------------------------------------------------------------------
120 
121struct kmp_base_tas_lock {
122  // KMP_LOCK_FREE(tas) => unlocked; locked: (gtid+1) of owning thread
123  std::atomic<kmp_int32> poll;
124  kmp_int32 depth_locked; // depth locked, for nested locks only
125};
126 
127typedef struct kmp_base_tas_lock kmp_base_tas_lock_t;
128 
129union kmp_tas_lock {
130  kmp_base_tas_lock_t lk;
131  kmp_lock_pool_t pool; // make certain struct is large enough
132  double lk_align; // use worst case alignment; no cache line padding
133};
134 
135typedef union kmp_tas_lock kmp_tas_lock_t;
136 
137// Static initializer for test and set lock variables. Usage:
138//    kmp_tas_lock_t xlock = KMP_TAS_LOCK_INITIALIZER( xlock );
139#define KMP_TAS_LOCK_INITIALIZER(lock){ { (locktag_tas), 0 } }                                         \
140  {                                                                            \
141    { KMP_LOCK_FREE(tas)(locktag_tas), 0 }                                                  \
142  }
143 
144extern int __kmp_acquire_tas_lock(kmp_tas_lock_t *lck, kmp_int32 gtid);
145extern int __kmp_test_tas_lock(kmp_tas_lock_t *lck, kmp_int32 gtid);
146extern int __kmp_release_tas_lock(kmp_tas_lock_t *lck, kmp_int32 gtid);
147extern void __kmp_init_tas_lock(kmp_tas_lock_t *lck);
148extern void __kmp_destroy_tas_lock(kmp_tas_lock_t *lck);
149 
150extern int __kmp_acquire_nested_tas_lock(kmp_tas_lock_t *lck, kmp_int32 gtid);
151extern int __kmp_test_nested_tas_lock(kmp_tas_lock_t *lck, kmp_int32 gtid);
152extern int __kmp_release_nested_tas_lock(kmp_tas_lock_t *lck, kmp_int32 gtid);
153extern void __kmp_init_nested_tas_lock(kmp_tas_lock_t *lck);
154extern void __kmp_destroy_nested_tas_lock(kmp_tas_lock_t *lck);
155 
156#define KMP_LOCK_RELEASED1 1
157#define KMP_LOCK_STILL_HELD0 0
158#define KMP_LOCK_ACQUIRED_FIRST1 1
159#define KMP_LOCK_ACQUIRED_NEXT0 0
160#ifndef KMP_USE_FUTEX(1 && (0 || 1 || KMP_ARCH_ARM || 0))
161#define KMP_USE_FUTEX(1 && (0 || 1 || KMP_ARCH_ARM || 0))                                                          \
162  (KMP_OS_LINUX1 &&                                                             \
163   (KMP_ARCH_X860 || KMP_ARCH_X86_641 || KMP_ARCH_ARM || KMP_ARCH_AARCH640))
164#endif
165#if KMP_USE_FUTEX(1 && (0 || 1 || KMP_ARCH_ARM || 0))
166 
167// ----------------------------------------------------------------------------
168// futex locks.  futex locks are only available on Linux* OS.
169//
170// Like non-nested test and set lock, non-nested futex locks use the memory
171// allocated by the compiler for the lock, rather than a pointer to it.
172//
173// Information normally available to the tools, such as lock location, lock
174// usage (normal lock vs. critical section), etc. is not available with test and
175// set locks. With non-nested futex locks, the lock owner is not even available.
176// ----------------------------------------------------------------------------
177 
178struct kmp_base_futex_lock {
179  volatile kmp_int32 poll; // KMP_LOCK_FREE(futex) => unlocked
180  // 2*(gtid+1) of owning thread, 0 if unlocked
181  // locked: (gtid+1) of owning thread
182  kmp_int32 depth_locked; // depth locked, for nested locks only
183};
184 
185typedef struct kmp_base_futex_lock kmp_base_futex_lock_t;
186 
187union kmp_futex_lock {
188  kmp_base_futex_lock_t lk;
189  kmp_lock_pool_t pool; // make certain struct is large enough
190  double lk_align; // use worst case alignment
191  // no cache line padding
192};
193 
194typedef union kmp_futex_lock kmp_futex_lock_t;
195 
196// Static initializer for futex lock variables. Usage:
197//    kmp_futex_lock_t xlock = KMP_FUTEX_LOCK_INITIALIZER( xlock );
198#define KMP_FUTEX_LOCK_INITIALIZER(lock){ { (locktag_futex), 0 } }                                       \
199  {                                                                            \
200    { KMP_LOCK_FREE(futex)(locktag_futex), 0 }                                                \
201  }
202 
203extern int __kmp_acquire_futex_lock(kmp_futex_lock_t *lck, kmp_int32 gtid);
204extern int __kmp_test_futex_lock(kmp_futex_lock_t *lck, kmp_int32 gtid);
205extern int __kmp_release_futex_lock(kmp_futex_lock_t *lck, kmp_int32 gtid);
206extern void __kmp_init_futex_lock(kmp_futex_lock_t *lck);
207extern void __kmp_destroy_futex_lock(kmp_futex_lock_t *lck);
208 
209extern int __kmp_acquire_nested_futex_lock(kmp_futex_lock_t *lck,
210                                           kmp_int32 gtid);
211extern int __kmp_test_nested_futex_lock(kmp_futex_lock_t *lck, kmp_int32 gtid);
212extern int __kmp_release_nested_futex_lock(kmp_futex_lock_t *lck,
213                                           kmp_int32 gtid);
214extern void __kmp_init_nested_futex_lock(kmp_futex_lock_t *lck);
215extern void __kmp_destroy_nested_futex_lock(kmp_futex_lock_t *lck);
216 
217#endif // KMP_USE_FUTEX
218 
219// ----------------------------------------------------------------------------
220// Ticket locks.
221 
222#ifdef __cplusplus201703L
223 
224#ifdef _MSC_VER
225// MSVC won't allow use of std::atomic<> in a union since it has non-trivial
226// copy constructor.
227 
228struct kmp_base_ticket_lock {
229  // `initialized' must be the first entry in the lock data structure!
230  std::atomic_bool initialized;
231  volatile union kmp_ticket_lock *self; // points to the lock union
232  ident_t const *location; // Source code location of omp_init_lock().
233  std::atomic_uint
234      next_ticket; // ticket number to give to next thread which acquires
235  std::atomic_uint now_serving; // ticket number for thread which holds the lock
236  std::atomic_int owner_id; // (gtid+1) of owning thread, 0 if unlocked
237  std::atomic_int depth_locked; // depth locked, for nested locks only
238  kmp_lock_flags_t flags; // lock specifics, e.g. critical section lock
239};
240#else
241struct kmp_base_ticket_lock {
242  // `initialized' must be the first entry in the lock data structure!
243  std::atomic<bool> initialized;
244  volatile union kmp_ticket_lock *self; // points to the lock union
245  ident_t const *location; // Source code location of omp_init_lock().
246  std::atomic<unsigned>
247      next_ticket; // ticket number to give to next thread which acquires
248  std::atomic<unsigned>
249      now_serving; // ticket number for thread which holds the lock
250  std::atomic<int> owner_id; // (gtid+1) of owning thread, 0 if unlocked
251  std::atomic<int> depth_locked; // depth locked, for nested locks only
252  kmp_lock_flags_t flags; // lock specifics, e.g. critical section lock
253};
254#endif
255 
256#else // __cplusplus
257 
258struct kmp_base_ticket_lock;
259 
260#endif // !__cplusplus
261 
262typedef struct kmp_base_ticket_lock kmp_base_ticket_lock_t;
263 
264union KMP_ALIGN_CACHE__attribute__((aligned(64))) kmp_ticket_lock {
265  kmp_base_ticket_lock_t
266      lk; // This field must be first to allow static initializing.
267  kmp_lock_pool_t pool;
268  double lk_align; // use worst case alignment
269  char lk_pad[KMP_PAD(kmp_base_ticket_lock_t, CACHE_LINE)(sizeof(kmp_base_ticket_lock_t) + (64 - ((sizeof(kmp_base_ticket_lock_t
) - 1) % (64)) - 1))];
270};
271 
272typedef union kmp_ticket_lock kmp_ticket_lock_t;
273 
274// Static initializer for simple ticket lock variables. Usage:
275//    kmp_ticket_lock_t xlock = KMP_TICKET_LOCK_INITIALIZER( xlock );
276// Note the macro argument. It is important to make var properly initialized.
277#define KMP_TICKET_LOCK_INITIALIZER(lock){ { true, &(lock), __null, 0U, 0U, 0, -1 } }                                      \
278  {                                                                            \
279    { true, &(lock), NULL__null, 0U, 0U, 0, -1 }                                     \
280  }
281 
282extern int __kmp_acquire_ticket_lock(kmp_ticket_lock_t *lck, kmp_int32 gtid);
283extern int __kmp_test_ticket_lock(kmp_ticket_lock_t *lck, kmp_int32 gtid);
284extern int __kmp_test_ticket_lock_with_cheks(kmp_ticket_lock_t *lck,
285                                             kmp_int32 gtid);
286extern int __kmp_release_ticket_lock(kmp_ticket_lock_t *lck, kmp_int32 gtid);
287extern void __kmp_init_ticket_lock(kmp_ticket_lock_t *lck);
288extern void __kmp_destroy_ticket_lock(kmp_ticket_lock_t *lck);
289 
290extern int __kmp_acquire_nested_ticket_lock(kmp_ticket_lock_t *lck,
291                                            kmp_int32 gtid);
292extern int __kmp_test_nested_ticket_lock(kmp_ticket_lock_t *lck,
293                                         kmp_int32 gtid);
294extern int __kmp_release_nested_ticket_lock(kmp_ticket_lock_t *lck,
295                                            kmp_int32 gtid);
296extern void __kmp_init_nested_ticket_lock(kmp_ticket_lock_t *lck);
297extern void __kmp_destroy_nested_ticket_lock(kmp_ticket_lock_t *lck);
298 
299// ----------------------------------------------------------------------------
300// Queuing locks.
301 
302#if KMP_USE_ADAPTIVE_LOCKS(0 || 1) && !0
303 
304struct kmp_adaptive_lock_info;
305 
306typedef struct kmp_adaptive_lock_info kmp_adaptive_lock_info_t;
307 
308#if KMP_DEBUG_ADAPTIVE_LOCKS0
309 
310struct kmp_adaptive_lock_statistics {
311  /* So we can get stats from locks that haven't been destroyed. */
312  kmp_adaptive_lock_info_t *next;
313  kmp_adaptive_lock_info_t *prev;
314 
315  /* Other statistics */
316  kmp_uint32 successfulSpeculations;
317  kmp_uint32 hardFailedSpeculations;
318  kmp_uint32 softFailedSpeculations;
319  kmp_uint32 nonSpeculativeAcquires;
320  kmp_uint32 nonSpeculativeAcquireAttempts;
321  kmp_uint32 lemmingYields;
322};
323 
324typedef struct kmp_adaptive_lock_statistics kmp_adaptive_lock_statistics_t;
325 
326extern void __kmp_print_speculative_stats();
327extern void __kmp_init_speculative_stats();
328 
329#endif // KMP_DEBUG_ADAPTIVE_LOCKS
330 
331struct kmp_adaptive_lock_info {
332  /* Values used for adaptivity.
333     Although these are accessed from multiple threads we don't access them
334     atomically, because if we miss updates it probably doesn't matter much. (It
335     just affects our decision about whether to try speculation on the lock). */
336  kmp_uint32 volatile badness;
337  kmp_uint32 volatile acquire_attempts;
338  /* Parameters of the lock. */
339  kmp_uint32 max_badness;
340  kmp_uint32 max_soft_retries;
341 
342#if KMP_DEBUG_ADAPTIVE_LOCKS0
343  kmp_adaptive_lock_statistics_t volatile stats;
344#endif
345};
346 
347#endif // KMP_USE_ADAPTIVE_LOCKS
348 
349struct kmp_base_queuing_lock {
350 
351  //  `initialized' must be the first entry in the lock data structure!
352  volatile union kmp_queuing_lock
353      *initialized; // Points to the lock union if in initialized state.
354 
355  ident_t const *location; // Source code location of omp_init_lock().
356 
357  KMP_ALIGN(8)__attribute__((aligned(8))) // tail_id  must be 8-byte aligned!
358 
359  volatile kmp_int32
360      tail_id; // (gtid+1) of thread at tail of wait queue, 0 if empty
361  // Must be no padding here since head/tail used in 8-byte CAS
362  volatile kmp_int32
363      head_id; // (gtid+1) of thread at head of wait queue, 0 if empty
364  // Decl order assumes little endian
365  // bakery-style lock
366  volatile kmp_uint32
367      next_ticket; // ticket number to give to next thread which acquires
368  volatile kmp_uint32
369      now_serving; // ticket number for thread which holds the lock
370  volatile kmp_int32 owner_id; // (gtid+1) of owning thread, 0 if unlocked
371  kmp_int32 depth_locked; // depth locked, for nested locks only
372 
373  kmp_lock_flags_t flags; // lock specifics, e.g. critical section lock
374};
375 
376typedef struct kmp_base_queuing_lock kmp_base_queuing_lock_t;
377 
378KMP_BUILD_ASSERT(offsetof(kmp_base_queuing_lock_t, tail_id) % 8 == 0)static_assert(__builtin_offsetof(kmp_base_queuing_lock_t, tail_id
) % 8 == 0, "Build condition error");
379 
380union KMP_ALIGN_CACHE__attribute__((aligned(64))) kmp_queuing_lock {
381  kmp_base_queuing_lock_t
382      lk; // This field must be first to allow static initializing.
383  kmp_lock_pool_t pool;
384  double lk_align; // use worst case alignment
385  char lk_pad[KMP_PAD(kmp_base_queuing_lock_t, CACHE_LINE)(sizeof(kmp_base_queuing_lock_t) + (64 - ((sizeof(kmp_base_queuing_lock_t
) - 1) % (64)) - 1))];
386};
387 
388typedef union kmp_queuing_lock kmp_queuing_lock_t;
389 
390extern int __kmp_acquire_queuing_lock(kmp_queuing_lock_t *lck, kmp_int32 gtid);
391extern int __kmp_test_queuing_lock(kmp_queuing_lock_t *lck, kmp_int32 gtid);
392extern int __kmp_release_queuing_lock(kmp_queuing_lock_t *lck, kmp_int32 gtid);
393extern void __kmp_init_queuing_lock(kmp_queuing_lock_t *lck);
394extern void __kmp_destroy_queuing_lock(kmp_queuing_lock_t *lck);
395 
396extern int __kmp_acquire_nested_queuing_lock(kmp_queuing_lock_t *lck,
397                                             kmp_int32 gtid);
398extern int __kmp_test_nested_queuing_lock(kmp_queuing_lock_t *lck,
399                                          kmp_int32 gtid);
400extern int __kmp_release_nested_queuing_lock(kmp_queuing_lock_t *lck,
401                                             kmp_int32 gtid);
402extern void __kmp_init_nested_queuing_lock(kmp_queuing_lock_t *lck);
403extern void __kmp_destroy_nested_queuing_lock(kmp_queuing_lock_t *lck);
404 
405#if KMP_USE_ADAPTIVE_LOCKS(0 || 1) && !0
406 
407// ----------------------------------------------------------------------------
408// Adaptive locks.
409struct kmp_base_adaptive_lock {
410  kmp_base_queuing_lock qlk;
411  KMP_ALIGN(CACHE_LINE)__attribute__((aligned(64)))
412  kmp_adaptive_lock_info_t
413      adaptive; // Information for the speculative adaptive lock
414};
415 
416typedef struct kmp_base_adaptive_lock kmp_base_adaptive_lock_t;
417 
418union KMP_ALIGN_CACHE__attribute__((aligned(64))) kmp_adaptive_lock {
419  kmp_base_adaptive_lock_t lk;
420  kmp_lock_pool_t pool;
421  double lk_align;
422  char lk_pad[KMP_PAD(kmp_base_adaptive_lock_t, CACHE_LINE)(sizeof(kmp_base_adaptive_lock_t) + (64 - ((sizeof(kmp_base_adaptive_lock_t
) - 1) % (64)) - 1))];
423};
424typedef union kmp_adaptive_lock kmp_adaptive_lock_t;
425 
426#define GET_QLK_PTR(l)((kmp_queuing_lock_t *)&(l)->lk.qlk) ((kmp_queuing_lock_t *)&(l)->lk.qlk)
427 
428#endif // KMP_USE_ADAPTIVE_LOCKS
429 
430// ----------------------------------------------------------------------------
431// DRDPA ticket locks.
432struct kmp_base_drdpa_lock {
433  // All of the fields on the first cache line are only written when
434  // initializing or reconfiguring the lock.  These are relatively rare
435  // operations, so data from the first cache line will usually stay resident in
436  // the cache of each thread trying to acquire the lock.
437  //
438  // initialized must be the first entry in the lock data structure!
439  KMP_ALIGN_CACHE__attribute__((aligned(64)))
440 
441  volatile union kmp_drdpa_lock
442      *initialized; // points to the lock union if in initialized state
443  ident_t const *location; // Source code location of omp_init_lock().
444  std::atomic<std::atomic<kmp_uint64> *> polls;
445  std::atomic<kmp_uint64> mask; // is 2**num_polls-1 for mod op
446  kmp_uint64 cleanup_ticket; // thread with cleanup ticket
447  std::atomic<kmp_uint64> *old_polls; // will deallocate old_polls
448  kmp_uint32 num_polls; // must be power of 2
449 
450  // next_ticket it needs to exist in a separate cache line, as it is
451  // invalidated every time a thread takes a new ticket.
452  KMP_ALIGN_CACHE__attribute__((aligned(64)))
453 
454  std::atomic<kmp_uint64> next_ticket;
455 
456  // now_serving is used to store our ticket value while we hold the lock. It
457  // has a slightly different meaning in the DRDPA ticket locks (where it is
458  // written by the acquiring thread) than it does in the simple ticket locks
459  // (where it is written by the releasing thread).
460  //
461  // Since now_serving is only read and written in the critical section,
462  // it is non-volatile, but it needs to exist on a separate cache line,
463  // as it is invalidated at every lock acquire.
464  //
465  // Likewise, the vars used for nested locks (owner_id and depth_locked) are
466  // only written by the thread owning the lock, so they are put in this cache
467  // line.  owner_id is read by other threads, so it must be declared volatile.
468  KMP_ALIGN_CACHE__attribute__((aligned(64)))
469  kmp_uint64 now_serving; // doesn't have to be volatile
470  volatile kmp_uint32 owner_id; // (gtid+1) of owning thread, 0 if unlocked
471  kmp_int32 depth_locked; // depth locked
472  kmp_lock_flags_t flags; // lock specifics, e.g. critical section lock
473};
474 
475typedef struct kmp_base_drdpa_lock kmp_base_drdpa_lock_t;
476 
477union KMP_ALIGN_CACHE__attribute__((aligned(64))) kmp_drdpa_lock {
478  kmp_base_drdpa_lock_t
479      lk; // This field must be first to allow static initializing. */
480  kmp_lock_pool_t pool;
481  double lk_align; // use worst case alignment
482  char lk_pad[KMP_PAD(kmp_base_drdpa_lock_t, CACHE_LINE)(sizeof(kmp_base_drdpa_lock_t) + (64 - ((sizeof(kmp_base_drdpa_lock_t
) - 1) % (64)) - 1))];
483};
484 
485typedef union kmp_drdpa_lock kmp_drdpa_lock_t;
486 
487extern int __kmp_acquire_drdpa_lock(kmp_drdpa_lock_t *lck, kmp_int32 gtid);
488extern int __kmp_test_drdpa_lock(kmp_drdpa_lock_t *lck, kmp_int32 gtid);
489extern int __kmp_release_drdpa_lock(kmp_drdpa_lock_t *lck, kmp_int32 gtid);
490extern void __kmp_init_drdpa_lock(kmp_drdpa_lock_t *lck);
491extern void __kmp_destroy_drdpa_lock(kmp_drdpa_lock_t *lck);
492 
493extern int __kmp_acquire_nested_drdpa_lock(kmp_drdpa_lock_t *lck,
494                                           kmp_int32 gtid);
495extern int __kmp_test_nested_drdpa_lock(kmp_drdpa_lock_t *lck, kmp_int32 gtid);
496extern int __kmp_release_nested_drdpa_lock(kmp_drdpa_lock_t *lck,
497                                           kmp_int32 gtid);
498extern void __kmp_init_nested_drdpa_lock(kmp_drdpa_lock_t *lck);
499extern void __kmp_destroy_nested_drdpa_lock(kmp_drdpa_lock_t *lck);
500 
501// ============================================================================
502// Lock purposes.
503// ============================================================================
504 
505// Bootstrap locks.
506//
507// Bootstrap locks -- very few locks used at library initialization time.
508// Bootstrap locks are currently implemented as ticket locks.
509// They could also be implemented as test and set lock, but cannot be
510// implemented with other lock kinds as they require gtids which are not
511// available at initialization time.
512 
513typedef kmp_ticket_lock_t kmp_bootstrap_lock_t;
514 
515#define KMP_BOOTSTRAP_LOCK_INITIALIZER(lock){ { true, &((lock)), __null, 0U, 0U, 0, -1 } } KMP_TICKET_LOCK_INITIALIZER((lock)){ { true, &((lock)), __null, 0U, 0U, 0, -1 } }
516#define KMP_BOOTSTRAP_LOCK_INIT(lock)kmp_bootstrap_lock_t lock = { { true, &(lock), __null, 0U
, 0U, 0, -1 } }                                          \
517  kmp_bootstrap_lock_t lock = KMP_TICKET_LOCK_INITIALIZER(lock){ { true, &(lock), __null, 0U, 0U, 0, -1 } }
518 
519static inline int __kmp_acquire_bootstrap_lock(kmp_bootstrap_lock_t *lck) {
520  return __kmp_acquire_ticket_lock(lck, KMP_GTID_DNE(-2));
4
←
Value assigned to field 'tt_found_tasks', which participates in a condition later→
521}
522 
523static inline int __kmp_test_bootstrap_lock(kmp_bootstrap_lock_t *lck) {
524  return __kmp_test_ticket_lock(lck, KMP_GTID_DNE(-2));
525}
526 
527static inline void __kmp_release_bootstrap_lock(kmp_bootstrap_lock_t *lck) {
528  __kmp_release_ticket_lock(lck, KMP_GTID_DNE(-2));
529}
530 
531static inline void __kmp_init_bootstrap_lock(kmp_bootstrap_lock_t *lck) {
532  __kmp_init_ticket_lock(lck);
533}
534 
535static inline void __kmp_destroy_bootstrap_lock(kmp_bootstrap_lock_t *lck) {
536  __kmp_destroy_ticket_lock(lck);
537}
538 
539// Internal RTL locks.
540//
541// Internal RTL locks are also implemented as ticket locks, for now.
542//
543// FIXME - We should go through and figure out which lock kind works best for
544// each internal lock, and use the type declaration and function calls for
545// that explicit lock kind (and get rid of this section).
546 
547typedef kmp_ticket_lock_t kmp_lock_t;
548 
549#define KMP_LOCK_INIT(lock)kmp_lock_t lock = { { true, &(lock), __null, 0U, 0U, 0, -
1 } } kmp_lock_t lock = KMP_TICKET_LOCK_INITIALIZER(lock){ { true, &(lock), __null, 0U, 0U, 0, -1 } }
550 
551static inline int __kmp_acquire_lock(kmp_lock_t *lck, kmp_int32 gtid) {
552  return __kmp_acquire_ticket_lock(lck, gtid);
553}
554 
555static inline int __kmp_test_lock(kmp_lock_t *lck, kmp_int32 gtid) {
556  return __kmp_test_ticket_lock(lck, gtid);
557}
558 
559static inline void __kmp_release_lock(kmp_lock_t *lck, kmp_int32 gtid) {
560  __kmp_release_ticket_lock(lck, gtid);
561}
562 
563static inline void __kmp_init_lock(kmp_lock_t *lck) {
564  __kmp_init_ticket_lock(lck);
565}
566 
567static inline void __kmp_destroy_lock(kmp_lock_t *lck) {
568  __kmp_destroy_ticket_lock(lck);
569}
570 
571// User locks.
572//
573// Do not allocate objects of type union kmp_user_lock!!! This will waste space
574// unless __kmp_user_lock_kind == lk_drdpa. Instead, check the value of
575// __kmp_user_lock_kind and allocate objects of the type of the appropriate
576// union member, and cast their addresses to kmp_user_lock_p.
577 
578enum kmp_lock_kind {
579  lk_default = 0,
580  lk_tas,
581#if KMP_USE_FUTEX(1 && (0 || 1 || KMP_ARCH_ARM || 0))
582  lk_futex,
583#endif
584#if KMP_USE_DYNAMIC_LOCK1 && KMP_USE_TSX(0 || 1) && !0
585  lk_hle,
586  lk_rtm_queuing,
587  lk_rtm_spin,
588#endif
589  lk_ticket,
590  lk_queuing,
591  lk_drdpa,
592#if KMP_USE_ADAPTIVE_LOCKS(0 || 1) && !0
593  lk_adaptive
594#endif // KMP_USE_ADAPTIVE_LOCKS
595};
596 
597typedef enum kmp_lock_kind kmp_lock_kind_t;
598 
599extern kmp_lock_kind_t __kmp_user_lock_kind;
600 
601union kmp_user_lock {
602  kmp_tas_lock_t tas;
603#if KMP_USE_FUTEX(1 && (0 || 1 || KMP_ARCH_ARM || 0))
604  kmp_futex_lock_t futex;
605#endif
606  kmp_ticket_lock_t ticket;
607  kmp_queuing_lock_t queuing;
608  kmp_drdpa_lock_t drdpa;
609#if KMP_USE_ADAPTIVE_LOCKS(0 || 1) && !0
610  kmp_adaptive_lock_t adaptive;
611#endif // KMP_USE_ADAPTIVE_LOCKS
612  kmp_lock_pool_t pool;
613};
614 
615typedef union kmp_user_lock *kmp_user_lock_p;
616 
617#if !KMP_USE_DYNAMIC_LOCK1
618 
619extern size_t __kmp_base_user_lock_size;
620extern size_t __kmp_user_lock_size;
621 
622extern kmp_int32 (*__kmp_get_user_lock_owner_)(kmp_user_lock_p lck);
623 
624static inline kmp_int32 __kmp_get_user_lock_owner(kmp_user_lock_p lck) {
625  KMP_DEBUG_ASSERT(__kmp_get_user_lock_owner_ != NULL)if (!(__kmp_get_user_lock_owner_ != __null)) { __kmp_debug_assert
("__kmp_get_user_lock_owner_ != __null", "openmp/runtime/src/kmp_lock.h"
, 625); };
626  return (*__kmp_get_user_lock_owner_)(lck);
627}
628 
629extern int (*__kmp_acquire_user_lock_with_checks_)(kmp_user_lock_p lck,
630                                                   kmp_int32 gtid);
631 
632#if KMP_OS_LINUX1 &&                                                            \
633    (KMP_ARCH_X860 || KMP_ARCH_X86_641 || KMP_ARCH_ARM || KMP_ARCH_AARCH640)
634 
635#define __kmp_acquire_user_lock_with_checks(lck, gtid)                         \
636  if (__kmp_user_lock_kind == lk_tas) {                                        \
637    if (__kmp_env_consistency_check) {                                         \
638      char const *const func = "omp_set_lock";                                 \
639      if ((sizeof(kmp_tas_lock_t) <= OMP_LOCK_T_SIZEsizeof(int)) &&                       \
640          lck->tas.lk.depth_locked != -1) {                                    \
641        KMP_FATAL(LockNestableUsedAsSimple, func)__kmp_fatal(__kmp_msg_format(kmp_i18n_msg_LockNestableUsedAsSimple
, func), __kmp_msg_null);                             \
642      }                                                                        \
643      if ((gtid >= 0) && (lck->tas.lk.poll - 1 == gtid)) {                     \
644        KMP_FATAL(LockIsAlreadyOwned, func)__kmp_fatal(__kmp_msg_format(kmp_i18n_msg_LockIsAlreadyOwned,
 func), __kmp_msg_null);                                   \
645      }                                                                        \
646    }                                                                          \
647    if (lck->tas.lk.poll != 0 ||                                               \
648        !__kmp_atomic_compare_store_acq(&lck->tas.lk.poll, 0, gtid + 1)) {     \
649      kmp_uint32 spins;                                                        \
650      kmp_uint64 time;                                                         \
651      KMP_FSYNC_PREPARE(lck)(!__kmp_itt_fsync_prepare_ptr__3_0) ? (void)0 : __kmp_itt_fsync_prepare_ptr__3_0
((void *)(lck));                                                  \
652      KMP_INIT_YIELD(spins){ (spins) = __kmp_yield_init; };                                                   \
653      KMP_INIT_BACKOFF(time){ (time) = __kmp_pause_init; };                                                  \
654      do {                                                                     \
655        KMP_YIELD_OVERSUB_ELSE_SPIN(spins, time){ if (__kmp_tpause_enabled) { if (((__kmp_nth) > (__kmp_avail_proc
 ? __kmp_avail_proc : __kmp_xproc))) { __kmp_tpause(0, (time)
); } else { __kmp_tpause(__kmp_tpause_hint, (time)); } (time)
 = (time << 1 | 1) & ((kmp_uint64)0xFFFF); } else {
 __kmp_x86_pause(); if ((((__kmp_use_yield == 1 || __kmp_use_yield
 == 2) && (((__kmp_nth) > (__kmp_avail_proc ? __kmp_avail_proc
 : __kmp_xproc)))))) { __kmp_yield(); } else if (__kmp_use_yield
 == 1) { (spins) -= 2; if (!(spins)) { __kmp_yield(); (spins)
 = __kmp_yield_next; } } } };                              \
656      } while (                                                                \
657          lck->tas.lk.poll != 0 ||                                             \
658          !__kmp_atomic_compare_store_acq(&lck->tas.lk.poll, 0, gtid + 1));    \
659    }                                                                          \
660    KMP_FSYNC_ACQUIRED(lck)(!__kmp_itt_fsync_acquired_ptr__3_0) ? (void)0 : __kmp_itt_fsync_acquired_ptr__3_0
((void *)(lck));                                                   \
661  } else {                                                                     \
662    KMP_DEBUG_ASSERT(__kmp_acquire_user_lock_with_checks_ != NULL)if (!(__kmp_acquire_user_lock_with_checks_ != __null)) { __kmp_debug_assert
("__kmp_acquire_user_lock_with_checks_ != __null", "openmp/runtime/src/kmp_lock.h"
, 662); };            \
663    (*__kmp_acquire_user_lock_with_checks_)(lck, gtid);                        \
664  }
665 
666#else
667static inline int __kmp_acquire_user_lock_with_checks(kmp_user_lock_p lck,
668                                                      kmp_int32 gtid) {
669  KMP_DEBUG_ASSERT(__kmp_acquire_user_lock_with_checks_ != NULL)if (!(__kmp_acquire_user_lock_with_checks_ != __null)) { __kmp_debug_assert
("__kmp_acquire_user_lock_with_checks_ != __null", "openmp/runtime/src/kmp_lock.h"
, 669); };
670  return (*__kmp_acquire_user_lock_with_checks_)(lck, gtid);
671}
672#endif
673 
674extern int (*__kmp_test_user_lock_with_checks_)(kmp_user_lock_p lck,
675                                                kmp_int32 gtid);
676 
677#if KMP_OS_LINUX1 &&                                                            \
678    (KMP_ARCH_X860 || KMP_ARCH_X86_641 || KMP_ARCH_ARM || KMP_ARCH_AARCH640)
679 
680#include "kmp_i18n.h" /* AC: KMP_FATAL definition */
681extern int __kmp_env_consistency_check; /* AC: copy from kmp.h here */
682static inline int __kmp_test_user_lock_with_checks(kmp_user_lock_p lck,
683                                                   kmp_int32 gtid) {
684  if (__kmp_user_lock_kind == lk_tas) {
685    if (__kmp_env_consistency_check) {
686      char const *const func = "omp_test_lock";
687      if ((sizeof(kmp_tas_lock_t) <= OMP_LOCK_T_SIZEsizeof(int)) &&
688          lck->tas.lk.depth_locked != -1) {
689        KMP_FATAL(LockNestableUsedAsSimple, func)__kmp_fatal(__kmp_msg_format(kmp_i18n_msg_LockNestableUsedAsSimple
, func), __kmp_msg_null);
690      }
691    }
692    return ((lck->tas.lk.poll == 0) &&
693            __kmp_atomic_compare_store_acq(&lck->tas.lk.poll, 0, gtid + 1));
694  } else {
695    KMP_DEBUG_ASSERT(__kmp_test_user_lock_with_checks_ != NULL)if (!(__kmp_test_user_lock_with_checks_ != __null)) { __kmp_debug_assert
("__kmp_test_user_lock_with_checks_ != __null", "openmp/runtime/src/kmp_lock.h"
, 695); };
696    return (*__kmp_test_user_lock_with_checks_)(lck, gtid);
697  }
698}
699#else
700static inline int __kmp_test_user_lock_with_checks(kmp_user_lock_p lck,
701                                                   kmp_int32 gtid) {
702  KMP_DEBUG_ASSERT(__kmp_test_user_lock_with_checks_ != NULL)if (!(__kmp_test_user_lock_with_checks_ != __null)) { __kmp_debug_assert
("__kmp_test_user_lock_with_checks_ != __null", "openmp/runtime/src/kmp_lock.h"
, 702); };
703  return (*__kmp_test_user_lock_with_checks_)(lck, gtid);
704}
705#endif
706 
707extern int (*__kmp_release_user_lock_with_checks_)(kmp_user_lock_p lck,
708                                                   kmp_int32 gtid);
709 
710static inline void __kmp_release_user_lock_with_checks(kmp_user_lock_p lck,
711                                                       kmp_int32 gtid) {
712  KMP_DEBUG_ASSERT(__kmp_release_user_lock_with_checks_ != NULL)if (!(__kmp_release_user_lock_with_checks_ != __null)) { __kmp_debug_assert
("__kmp_release_user_lock_with_checks_ != __null", "openmp/runtime/src/kmp_lock.h"
, 712); };
713  (*__kmp_release_user_lock_with_checks_)(lck, gtid);
714}
715 
716extern void (*__kmp_init_user_lock_with_checks_)(kmp_user_lock_p lck);
717 
718static inline void __kmp_init_user_lock_with_checks(kmp_user_lock_p lck) {
719  KMP_DEBUG_ASSERT(__kmp_init_user_lock_with_checks_ != NULL)if (!(__kmp_init_user_lock_with_checks_ != __null)) { __kmp_debug_assert
("__kmp_init_user_lock_with_checks_ != __null", "openmp/runtime/src/kmp_lock.h"
, 719); };
720  (*__kmp_init_user_lock_with_checks_)(lck);
721}
722 
723// We need a non-checking version of destroy lock for when the RTL is
724// doing the cleanup as it can't always tell if the lock is nested or not.
725extern void (*__kmp_destroy_user_lock_)(kmp_user_lock_p lck);
726 
727static inline void __kmp_destroy_user_lock(kmp_user_lock_p lck) {
728  KMP_DEBUG_ASSERT(__kmp_destroy_user_lock_ != NULL)if (!(__kmp_destroy_user_lock_ != __null)) { __kmp_debug_assert
("__kmp_destroy_user_lock_ != __null", "openmp/runtime/src/kmp_lock.h"
, 728); };
729  (*__kmp_destroy_user_lock_)(lck);
730}
731 
732extern void (*__kmp_destroy_user_lock_with_checks_)(kmp_user_lock_p lck);
733 
734static inline void __kmp_destroy_user_lock_with_checks(kmp_user_lock_p lck) {
735  KMP_DEBUG_ASSERT(__kmp_destroy_user_lock_with_checks_ != NULL)if (!(__kmp_destroy_user_lock_with_checks_ != __null)) { __kmp_debug_assert
("__kmp_destroy_user_lock_with_checks_ != __null", "openmp/runtime/src/kmp_lock.h"
, 735); };
736  (*__kmp_destroy_user_lock_with_checks_)(lck);
737}
738 
739extern int (*__kmp_acquire_nested_user_lock_with_checks_)(kmp_user_lock_p lck,
740                                                          kmp_int32 gtid);
741 
742#if KMP_OS_LINUX1 && (KMP_ARCH_X860 || KMP_ARCH_X86_641)
743 
744#define __kmp_acquire_nested_user_lock_with_checks(lck, gtid, depth)           \
745  if (__kmp_user_lock_kind == lk_tas) {                                        \
746    if (__kmp_env_consistency_check) {                                         \
747      char const *const func = "omp_set_nest_lock";                            \
748      if ((sizeof(kmp_tas_lock_t) <= OMP_NEST_LOCK_T_SIZEsizeof(void *)) &&                  \
749          lck->tas.lk.depth_locked == -1) {                                    \
750        KMP_FATAL(LockSimpleUsedAsNestable, func)__kmp_fatal(__kmp_msg_format(kmp_i18n_msg_LockSimpleUsedAsNestable
, func), __kmp_msg_null);                             \
751      }                                                                        \
752    }                                                                          \
753    if (lck->tas.lk.poll - 1 == gtid) {                                        \
754      lck->tas.lk.depth_locked += 1;                                           \
755      *depth = KMP_LOCK_ACQUIRED_NEXT0;                                         \
756    } else {                                                                   \
757      if ((lck->tas.lk.poll != 0) ||                                           \
758          !__kmp_atomic_compare_store_acq(&lck->tas.lk.poll, 0, gtid + 1)) {   \
759        kmp_uint32 spins;                                                      \
760        kmp_uint64 time;                                                       \
761        KMP_FSYNC_PREPARE(lck)(!__kmp_itt_fsync_prepare_ptr__3_0) ? (void)0 : __kmp_itt_fsync_prepare_ptr__3_0
((void *)(lck));                                                \
762        KMP_INIT_YIELD(spins){ (spins) = __kmp_yield_init; };                                                 \
763        KMP_INIT_BACKOFF(time){ (time) = __kmp_pause_init; };                                                \
764        do {                                                                   \
765          KMP_YIELD_OVERSUB_ELSE_SPIN(spins, time){ if (__kmp_tpause_enabled) { if (((__kmp_nth) > (__kmp_avail_proc
 ? __kmp_avail_proc : __kmp_xproc))) { __kmp_tpause(0, (time)
); } else { __kmp_tpause(__kmp_tpause_hint, (time)); } (time)
 = (time << 1 | 1) & ((kmp_uint64)0xFFFF); } else {
 __kmp_x86_pause(); if ((((__kmp_use_yield == 1 || __kmp_use_yield
 == 2) && (((__kmp_nth) > (__kmp_avail_proc ? __kmp_avail_proc
 : __kmp_xproc)))))) { __kmp_yield(); } else if (__kmp_use_yield
 == 1) { (spins) -= 2; if (!(spins)) { __kmp_yield(); (spins)
 = __kmp_yield_next; } } } };                            \
766        } while (                                                              \
767            (lck->tas.lk.poll != 0) ||                                         \
768            !__kmp_atomic_compare_store_acq(&lck->tas.lk.poll, 0, gtid + 1));  \
769      }                                                                        \
770      lck->tas.lk.depth_locked = 1;                                            \
771      *depth = KMP_LOCK_ACQUIRED_FIRST1;                                        \
772    }                                                                          \
773    KMP_FSYNC_ACQUIRED(lck)(!__kmp_itt_fsync_acquired_ptr__3_0) ? (void)0 : __kmp_itt_fsync_acquired_ptr__3_0
((void *)(lck));                                                   \
774  } else {                                                                     \
775    KMP_DEBUG_ASSERT(__kmp_acquire_nested_user_lock_with_checks_ != NULL)if (!(__kmp_acquire_nested_user_lock_with_checks_ != __null))
 { __kmp_debug_assert("__kmp_acquire_nested_user_lock_with_checks_ != __null"
, "openmp/runtime/src/kmp_lock.h", 775); };     \
776    *depth = (*__kmp_acquire_nested_user_lock_with_checks_)(lck, gtid);        \
777  }
778 
779#else
780static inline void
781__kmp_acquire_nested_user_lock_with_checks(kmp_user_lock_p lck, kmp_int32 gtid,
782                                           int *depth) {
783  KMP_DEBUG_ASSERT(__kmp_acquire_nested_user_lock_with_checks_ != NULL)if (!(__kmp_acquire_nested_user_lock_with_checks_ != __null))
 { __kmp_debug_assert("__kmp_acquire_nested_user_lock_with_checks_ != __null"
, "openmp/runtime/src/kmp_lock.h", 783); };
784  *depth = (*__kmp_acquire_nested_user_lock_with_checks_)(lck, gtid);
785}
786#endif
787 
788extern int (*__kmp_test_nested_user_lock_with_checks_)(kmp_user_lock_p lck,
789                                                       kmp_int32 gtid);
790 
791#if KMP_OS_LINUX1 && (KMP_ARCH_X860 || KMP_ARCH_X86_641)
792static inline int __kmp_test_nested_user_lock_with_checks(kmp_user_lock_p lck,
793                                                          kmp_int32 gtid) {
794  if (__kmp_user_lock_kind == lk_tas) {
795    int retval;
796    if (__kmp_env_consistency_check) {
797      char const *const func = "omp_test_nest_lock";
798      if ((sizeof(kmp_tas_lock_t) <= OMP_NEST_LOCK_T_SIZEsizeof(void *)) &&
799          lck->tas.lk.depth_locked == -1) {
800        KMP_FATAL(LockSimpleUsedAsNestable, func)__kmp_fatal(__kmp_msg_format(kmp_i18n_msg_LockSimpleUsedAsNestable
, func), __kmp_msg_null);
801      }
802    }
803    KMP_DEBUG_ASSERT(gtid >= 0)if (!(gtid >= 0)) { __kmp_debug_assert("gtid >= 0", "openmp/runtime/src/kmp_lock.h"
, 803); };
804    if (lck->tas.lk.poll - 1 ==
805        gtid) { /* __kmp_get_tas_lock_owner( lck ) == gtid */
806      return ++lck->tas.lk.depth_locked; /* same owner, depth increased */
807    }
808    retval = ((lck->tas.lk.poll == 0) &&
809              __kmp_atomic_compare_store_acq(&lck->tas.lk.poll, 0, gtid + 1));
810    if (retval) {
811      KMP_MB();
812      lck->tas.lk.depth_locked = 1;
813    }
814    return retval;
815  } else {
816    KMP_DEBUG_ASSERT(__kmp_test_nested_user_lock_with_checks_ != NULL)if (!(__kmp_test_nested_user_lock_with_checks_ != __null)) { __kmp_debug_assert
("__kmp_test_nested_user_lock_with_checks_ != __null", "openmp/runtime/src/kmp_lock.h"
, 816); };
817    return (*__kmp_test_nested_user_lock_with_checks_)(lck, gtid);
818  }
819}
820#else
821static inline int __kmp_test_nested_user_lock_with_checks(kmp_user_lock_p lck,
822                                                          kmp_int32 gtid) {
823  KMP_DEBUG_ASSERT(__kmp_test_nested_user_lock_with_checks_ != NULL)if (!(__kmp_test_nested_user_lock_with_checks_ != __null)) { __kmp_debug_assert
("__kmp_test_nested_user_lock_with_checks_ != __null", "openmp/runtime/src/kmp_lock.h"
, 823); };
824  return (*__kmp_test_nested_user_lock_with_checks_)(lck, gtid);
825}
826#endif
827 
828extern int (*__kmp_release_nested_user_lock_with_checks_)(kmp_user_lock_p lck,
829                                                          kmp_int32 gtid);
830 
831static inline int
832__kmp_release_nested_user_lock_with_checks(kmp_user_lock_p lck,
833                                           kmp_int32 gtid) {
834  KMP_DEBUG_ASSERT(__kmp_release_nested_user_lock_with_checks_ != NULL)if (!(__kmp_release_nested_user_lock_with_checks_ != __null))
 { __kmp_debug_assert("__kmp_release_nested_user_lock_with_checks_ != __null"
, "openmp/runtime/src/kmp_lock.h", 834); };
835  return (*__kmp_release_nested_user_lock_with_checks_)(lck, gtid);
836}
837 
838extern void (*__kmp_init_nested_user_lock_with_checks_)(kmp_user_lock_p lck);
839 
840static inline void
841__kmp_init_nested_user_lock_with_checks(kmp_user_lock_p lck) {
842  KMP_DEBUG_ASSERT(__kmp_init_nested_user_lock_with_checks_ != NULL)if (!(__kmp_init_nested_user_lock_with_checks_ != __null)) { __kmp_debug_assert
("__kmp_init_nested_user_lock_with_checks_ != __null", "openmp/runtime/src/kmp_lock.h"
, 842); };
843  (*__kmp_init_nested_user_lock_with_checks_)(lck);
844}
845 
846extern void (*__kmp_destroy_nested_user_lock_with_checks_)(kmp_user_lock_p lck);
847 
848static inline void
849__kmp_destroy_nested_user_lock_with_checks(kmp_user_lock_p lck) {
850  KMP_DEBUG_ASSERT(__kmp_destroy_nested_user_lock_with_checks_ != NULL)if (!(__kmp_destroy_nested_user_lock_with_checks_ != __null))
 { __kmp_debug_assert("__kmp_destroy_nested_user_lock_with_checks_ != __null"
, "openmp/runtime/src/kmp_lock.h", 850); };
851  (*__kmp_destroy_nested_user_lock_with_checks_)(lck);
852}
853 
854// user lock functions which do not necessarily exist for all lock kinds.
855//
856// The "set" functions usually have wrapper routines that check for a NULL set
857// function pointer and call it if non-NULL.
858//
859// In some cases, it makes sense to have a "get" wrapper function check for a
860// NULL get function pointer and return NULL / invalid value / error code if
861// the function pointer is NULL.
862//
863// In other cases, the calling code really should differentiate between an
864// unimplemented function and one that is implemented but returning NULL /
865// invalid value.  If this is the case, no get function wrapper exists.
866 
867extern int (*__kmp_is_user_lock_initialized_)(kmp_user_lock_p lck);
868 
869// no set function; fields set during local allocation
870 
871extern const ident_t *(*__kmp_get_user_lock_location_)(kmp_user_lock_p lck);
872 
873static inline const ident_t *__kmp_get_user_lock_location(kmp_user_lock_p lck) {
874  if (__kmp_get_user_lock_location_ != NULL__null) {
875    return (*__kmp_get_user_lock_location_)(lck);
876  } else {
877    return NULL__null;
878  }
879}
880 
881extern void (*__kmp_set_user_lock_location_)(kmp_user_lock_p lck,
882                                             const ident_t *loc);
883 
884static inline void __kmp_set_user_lock_location(kmp_user_lock_p lck,
885                                                const ident_t *loc) {
886  if (__kmp_set_user_lock_location_ != NULL__null) {
887    (*__kmp_set_user_lock_location_)(lck, loc);
888  }
889}
890 
891extern kmp_lock_flags_t (*__kmp_get_user_lock_flags_)(kmp_user_lock_p lck);
892 
893extern void (*__kmp_set_user_lock_flags_)(kmp_user_lock_p lck,
894                                          kmp_lock_flags_t flags);
895 
896static inline void __kmp_set_user_lock_flags(kmp_user_lock_p lck,
897                                             kmp_lock_flags_t flags) {
898  if (__kmp_set_user_lock_flags_ != NULL__null) {
899    (*__kmp_set_user_lock_flags_)(lck, flags);
900  }
901}
902 
903// The function which sets up all of the vtbl pointers for kmp_user_lock_t.
904extern void __kmp_set_user_lock_vptrs(kmp_lock_kind_t user_lock_kind);
905 
906// Macros for binding user lock functions.
907#define KMP_BIND_USER_LOCK_TEMPLATE(nest, kind, suffix)                        \
908  {                                                                            \
909    __kmp_acquire##nest##user_lock_with_checks_ = (int (*)(                    \
910        kmp_user_lock_p, kmp_int32))__kmp_acquire##nest##kind##_##suffix;      \
911    __kmp_release##nest##user_lock_with_checks_ = (int (*)(                    \
912        kmp_user_lock_p, kmp_int32))__kmp_release##nest##kind##_##suffix;      \
913    __kmp_test##nest##user_lock_with_checks_ = (int (*)(                       \
914        kmp_user_lock_p, kmp_int32))__kmp_test##nest##kind##_##suffix;         \
915    __kmp_init##nest##user_lock_with_checks_ =                                 \
916        (void (*)(kmp_user_lock_p))__kmp_init##nest##kind##_##suffix;          \
917    __kmp_destroy##nest##user_lock_with_checks_ =                              \
918        (void (*)(kmp_user_lock_p))__kmp_destroy##nest##kind##_##suffix;       \
919  }
920 
921#define KMP_BIND_USER_LOCK(kind) KMP_BIND_USER_LOCK_TEMPLATE(_, kind, lock)
922#define KMP_BIND_USER_LOCK_WITH_CHECKS(kind)                                   \
923  KMP_BIND_USER_LOCK_TEMPLATE(_, kind, lock_with_checks)
924#define KMP_BIND_NESTED_USER_LOCK(kind)                                        \
925  KMP_BIND_USER_LOCK_TEMPLATE(_nested_, kind, lock)
926#define KMP_BIND_NESTED_USER_LOCK_WITH_CHECKS(kind)                            \
927  KMP_BIND_USER_LOCK_TEMPLATE(_nested_, kind, lock_with_checks)
928 
929// User lock table & lock allocation
930/* On 64-bit Linux* OS (and OS X*) GNU compiler allocates only 4 bytems memory
931   for lock variable, which is not enough to store a pointer, so we have to use
932   lock indexes instead of pointers and maintain lock table to map indexes to
933   pointers.
934 
935 
936   Note: The first element of the table is not a pointer to lock! It is a
937   pointer to previously allocated table (or NULL if it is the first table).
938 
939   Usage:
940 
941   if ( OMP_LOCK_T_SIZE < sizeof( <lock> ) ) { // or OMP_NEST_LOCK_T_SIZE
942     Lock table is fully utilized. User locks are indexes, so table is used on
943     user lock operation.
944     Note: it may be the case (lin_32) that we don't need to use a lock
945     table for regular locks, but do need the table for nested locks.
946   }
947   else {
948     Lock table initialized but not actually used.
949   }
950*/
951 
952struct kmp_lock_table {
953  kmp_lock_index_t used; // Number of used elements
954  kmp_lock_index_t allocated; // Number of allocated elements
955  kmp_user_lock_p *table; // Lock table.
956};
957 
958typedef struct kmp_lock_table kmp_lock_table_t;
959 
960extern kmp_lock_table_t __kmp_user_lock_table;
961extern kmp_user_lock_p __kmp_lock_pool;
962 
963struct kmp_block_of_locks {
964  struct kmp_block_of_locks *next_block;
965  void *locks;
966};
967 
968typedef struct kmp_block_of_locks kmp_block_of_locks_t;
969 
970extern kmp_block_of_locks_t *__kmp_lock_blocks;
971extern int __kmp_num_locks_in_block;
972 
973extern kmp_user_lock_p __kmp_user_lock_allocate(void **user_lock,
974                                                kmp_int32 gtid,
975                                                kmp_lock_flags_t flags);
976extern void __kmp_user_lock_free(void **user_lock, kmp_int32 gtid,
977                                 kmp_user_lock_p lck);
978extern kmp_user_lock_p __kmp_lookup_user_lock(void **user_lock,
979                                              char const *func);
980extern void __kmp_cleanup_user_locks();
981 
982#define KMP_CHECK_USER_LOCK_INIT()                                             \
983  {                                                                            \
984    if (!TCR_4(__kmp_init_user_locks)(__kmp_init_user_locks)) {                                       \
985      __kmp_acquire_bootstrap_lock(&__kmp_initz_lock);                         \
986      if (!TCR_4(__kmp_init_user_locks)(__kmp_init_user_locks)) {                                     \
987        TCW_4(__kmp_init_user_locks, TRUE)(__kmp_init_user_locks) = ((!0));                                    \
988      }                                                                        \
989      __kmp_release_bootstrap_lock(&__kmp_initz_lock);                         \
990    }                                                                          \
991  }
992 
993#endif // KMP_USE_DYNAMIC_LOCK
994 
995#undef KMP_PAD
996#undef KMP_GTID_DNE(-2)
997 
998#if KMP_USE_DYNAMIC_LOCK1
999// KMP_USE_DYNAMIC_LOCK enables dynamic dispatch of lock functions without
1000// breaking the current compatibility. Essential functionality of this new code
1001// is dynamic dispatch, but it also implements (or enables implementation of)
1002// hinted user lock and critical section which will be part of OMP 4.5 soon.
1003//
1004// Lock type can be decided at creation time (i.e., lock initialization), and
1005// subsequent lock function call on the created lock object requires type
1006// extraction and call through jump table using the extracted type. This type
1007// information is stored in two different ways depending on the size of the lock
1008// object, and we differentiate lock types by this size requirement - direct and
1009// indirect locks.
1010//
1011// Direct locks:
1012// A direct lock object fits into the space created by the compiler for an
1013// omp_lock_t object, and TAS/Futex lock falls into this category. We use low
1014// one byte of the lock object as the storage for the lock type, and appropriate
1015// bit operation is required to access the data meaningful to the lock
1016// algorithms. Also, to differentiate direct lock from indirect lock, 1 is
1017// written to LSB of the lock object. The newly introduced "hle" lock is also a
1018// direct lock.
1019//
1020// Indirect locks:
1021// An indirect lock object requires more space than the compiler-generated
1022// space, and it should be allocated from heap. Depending on the size of the
1023// compiler-generated space for the lock (i.e., size of omp_lock_t), this
1024// omp_lock_t object stores either the address of the heap-allocated indirect
1025// lock (void * fits in the object) or an index to the indirect lock table entry
1026// that holds the address. Ticket/Queuing/DRDPA/Adaptive lock falls into this
1027// category, and the newly introduced "rtm" lock is also an indirect lock which
1028// was implemented on top of the Queuing lock. When the omp_lock_t object holds
1029// an index (not lock address), 0 is written to LSB to differentiate the lock
1030// from a direct lock, and the remaining part is the actual index to the
1031// indirect lock table.
1032 
1033#include <stdint.h> // for uintptr_t
1034 
1035// Shortcuts
1036#define KMP_USE_INLINED_TAS(1 && (0 || 1 || KMP_ARCH_ARM)) && 1                                                    \
1037  (KMP_OS_LINUX1 && (KMP_ARCH_X860 || KMP_ARCH_X86_641 || KMP_ARCH_ARM)) && 1
1038#define KMP_USE_INLINED_FUTEX(1 && (0 || 1 || KMP_ARCH_ARM || 0)) && 0 KMP_USE_FUTEX(1 && (0 || 1 || KMP_ARCH_ARM || 0)) && 0
1039 
1040// List of lock definitions; all nested locks are indirect locks.
1041// hle lock is xchg lock prefixed with XACQUIRE/XRELEASE.
1042// All nested locks are indirect lock types.
1043#if KMP_USE_TSX(0 || 1) && !0
1044#if KMP_USE_FUTEX(1 && (0 || 1 || KMP_ARCH_ARM || 0))
1045#define KMP_FOREACH_D_LOCK(m, a)m(tas, a) m(futex, a) m(hle, a) m(rtm_spin, a) m(tas, a) m(futex, a) m(hle, a) m(rtm_spin, a)
1046#define KMP_FOREACH_I_LOCK(m, a)m(ticket, a) m(queuing, a) m(adaptive, a) m(drdpa, a) m(rtm_queuing
, a) m(nested_tas, a) m(nested_futex, a) m(nested_ticket, a) m
(nested_queuing, a) m(nested_drdpa, a)                                               \
1047  m(ticket, a) m(queuing, a) m(adaptive, a) m(drdpa, a) m(rtm_queuing, a)      \
1048      m(nested_tas, a) m(nested_futex, a) m(nested_ticket, a)                  \
1049          m(nested_queuing, a) m(nested_drdpa, a)
1050#else
1051#define KMP_FOREACH_D_LOCK(m, a)m(tas, a) m(futex, a) m(hle, a) m(rtm_spin, a) m(tas, a) m(hle, a) m(rtm_spin, a)
1052#define KMP_FOREACH_I_LOCK(m, a)m(ticket, a) m(queuing, a) m(adaptive, a) m(drdpa, a) m(rtm_queuing
, a) m(nested_tas, a) m(nested_futex, a) m(nested_ticket, a) m
(nested_queuing, a) m(nested_drdpa, a)                                               \
1053  m(ticket, a) m(queuing, a) m(adaptive, a) m(drdpa, a) m(rtm_queuing, a)      \
1054      m(nested_tas, a) m(nested_ticket, a) m(nested_queuing, a)                \
1055          m(nested_drdpa, a)
1056#endif // KMP_USE_FUTEX
1057#define KMP_LAST_D_LOCKlockseq_rtm_spin lockseq_rtm_spin
1058#else
1059#if KMP_USE_FUTEX(1 && (0 || 1 || KMP_ARCH_ARM || 0))
1060#define KMP_FOREACH_D_LOCK(m, a)m(tas, a) m(futex, a) m(hle, a) m(rtm_spin, a) m(tas, a) m(futex, a)
1061#define KMP_FOREACH_I_LOCK(m, a)m(ticket, a) m(queuing, a) m(adaptive, a) m(drdpa, a) m(rtm_queuing
, a) m(nested_tas, a) m(nested_futex, a) m(nested_ticket, a) m
(nested_queuing, a) m(nested_drdpa, a)                                               \
1062  m(ticket, a) m(queuing, a) m(drdpa, a) m(nested_tas, a) m(nested_futex, a)   \
1063      m(nested_ticket, a) m(nested_queuing, a) m(nested_drdpa, a)
1064#define KMP_LAST_D_LOCKlockseq_rtm_spin lockseq_futex
1065#else
1066#define KMP_FOREACH_D_LOCK(m, a)m(tas, a) m(futex, a) m(hle, a) m(rtm_spin, a) m(tas, a)
1067#define KMP_FOREACH_I_LOCK(m, a)m(ticket, a) m(queuing, a) m(adaptive, a) m(drdpa, a) m(rtm_queuing
, a) m(nested_tas, a) m(nested_futex, a) m(nested_ticket, a) m
(nested_queuing, a) m(nested_drdpa, a)                                               \
1068  m(ticket, a) m(queuing, a) m(drdpa, a) m(nested_tas, a) m(nested_ticket, a)  \
1069      m(nested_queuing, a) m(nested_drdpa, a)
1070#define KMP_LAST_D_LOCKlockseq_rtm_spin lockseq_tas
1071#endif // KMP_USE_FUTEX
1072#endif // KMP_USE_TSX
1073 
1074// Information used in dynamic dispatch
1075#define KMP_LOCK_SHIFT8                                                         \
1076  8 // number of low bits to be used as tag for direct locks
1077#define KMP_FIRST_D_LOCKlockseq_tas lockseq_tas
1078#define KMP_FIRST_I_LOCKlockseq_ticket lockseq_ticket
1079#define KMP_LAST_I_LOCKlockseq_nested_drdpa lockseq_nested_drdpa
1080#define KMP_NUM_I_LOCKS(locktag_nested_drdpa + 1)                                                        \
1081  (locktag_nested_drdpa + 1) // number of indirect lock types
1082 
1083// Base type for dynamic locks.
1084typedef kmp_uint32 kmp_dyna_lock_t;
1085 
1086// Lock sequence that enumerates all lock kinds. Always make this enumeration
1087// consistent with kmp_lockseq_t in the include directory.
1088typedef enum {
1089  lockseq_indirect = 0,
1090#define expand_seq(l, a) lockseq_##l,
1091  KMP_FOREACH_D_LOCK(expand_seq, 0)expand_seq(tas, 0) expand_seq(futex, 0) expand_seq(hle, 0) expand_seq
(rtm_spin, 0) KMP_FOREACH_I_LOCK(expand_seq, 0)expand_seq(ticket, 0) expand_seq(queuing, 0) expand_seq(adaptive
, 0) expand_seq(drdpa, 0) expand_seq(rtm_queuing, 0) expand_seq
(nested_tas, 0) expand_seq(nested_futex, 0) expand_seq(nested_ticket
, 0) expand_seq(nested_queuing, 0) expand_seq(nested_drdpa, 0
)
1092#undef expand_seq
1093} kmp_dyna_lockseq_t;
1094 
1095// Enumerates indirect lock tags.
1096typedef enum {
1097#define expand_tag(l, a) locktag_##l,
1098  KMP_FOREACH_I_LOCK(expand_tag, 0)expand_tag(ticket, 0) expand_tag(queuing, 0) expand_tag(adaptive
, 0) expand_tag(drdpa, 0) expand_tag(rtm_queuing, 0) expand_tag
(nested_tas, 0) expand_tag(nested_futex, 0) expand_tag(nested_ticket
, 0) expand_tag(nested_queuing, 0) expand_tag(nested_drdpa, 0
)
1099#undef expand_tag
1100} kmp_indirect_locktag_t;
1101 
1102// Utility macros that extract information from lock sequences.
1103#define KMP_IS_D_LOCK(seq)((seq) >= lockseq_tas && (seq) <= lockseq_rtm_spin
)                                                     \
1104  ((seq) >= KMP_FIRST_D_LOCKlockseq_tas && (seq) <= KMP_LAST_D_LOCKlockseq_rtm_spin)
1105#define KMP_IS_I_LOCK(seq)((seq) >= lockseq_ticket && (seq) <= lockseq_nested_drdpa
)                                                     \
1106  ((seq) >= KMP_FIRST_I_LOCKlockseq_ticket && (seq) <= KMP_LAST_I_LOCKlockseq_nested_drdpa)
1107#define KMP_GET_I_TAG(seq)(kmp_indirect_locktag_t)((seq)-lockseq_ticket) (kmp_indirect_locktag_t)((seq)-KMP_FIRST_I_LOCKlockseq_ticket)
1108#define KMP_GET_D_TAG(seq)((seq) << 1 | 1) ((seq) << 1 | 1)
1109 
1110// Enumerates direct lock tags starting from indirect tag.
1111typedef enum {
1112#define expand_tag(l, a) locktag_##l = KMP_GET_D_TAG(lockseq_##l)((lockseq_##l) << 1 | 1),
1113  KMP_FOREACH_D_LOCK(expand_tag, 0)expand_tag(tas, 0) expand_tag(futex, 0) expand_tag(hle, 0) expand_tag
(rtm_spin, 0)
1114#undef expand_tag
1115} kmp_direct_locktag_t;
1116 
1117// Indirect lock type
1118typedef struct {
1119  kmp_user_lock_p lock;
1120  kmp_indirect_locktag_t type;
1121} kmp_indirect_lock_t;
1122 
1123// Function tables for direct locks. Set/unset/test differentiate functions
1124// with/without consistency checking.
1125extern void (*__kmp_direct_init[])(kmp_dyna_lock_t *, kmp_dyna_lockseq_t);
1126extern void (**__kmp_direct_destroy)(kmp_dyna_lock_t *);
1127extern int (**__kmp_direct_set)(kmp_dyna_lock_t *, kmp_int32);
1128extern int (**__kmp_direct_unset)(kmp_dyna_lock_t *, kmp_int32);
1129extern int (**__kmp_direct_test)(kmp_dyna_lock_t *, kmp_int32);
1130 
1131// Function tables for indirect locks. Set/unset/test differentiate functions
1132// with/without consistency checking.
1133extern void (*__kmp_indirect_init[])(kmp_user_lock_p);
1134extern void (**__kmp_indirect_destroy)(kmp_user_lock_p);
1135extern int (**__kmp_indirect_set)(kmp_user_lock_p, kmp_int32);
1136extern int (**__kmp_indirect_unset)(kmp_user_lock_p, kmp_int32);
1137extern int (**__kmp_indirect_test)(kmp_user_lock_p, kmp_int32);
1138 
1139// Extracts direct lock tag from a user lock pointer
1140#define KMP_EXTRACT_D_TAG(l)(*((kmp_dyna_lock_t *)(l)) & ((1 << 8) - 1) & -
(*((kmp_dyna_lock_t *)(l)) & 1))                                                   \
1141  (*((kmp_dyna_lock_t *)(l)) & ((1 << KMP_LOCK_SHIFT8) - 1) &                   \
1142   -(*((kmp_dyna_lock_t *)(l)) & 1))
1143 
1144// Extracts indirect lock index from a user lock pointer
1145#define KMP_EXTRACT_I_INDEX(l)(*(kmp_lock_index_t *)(l) >> 1) (*(kmp_lock_index_t *)(l) >> 1)
1146 
1147// Returns function pointer to the direct lock function with l (kmp_dyna_lock_t
1148// *) and op (operation type).
1149#define KMP_D_LOCK_FUNC(l, op)__kmp_direct_op[(*((kmp_dyna_lock_t *)(l)) & ((1 <<
 8) - 1) & -(*((kmp_dyna_lock_t *)(l)) & 1))] __kmp_direct_##op[KMP_EXTRACT_D_TAG(l)(*((kmp_dyna_lock_t *)(l)) & ((1 << 8) - 1) & -
(*((kmp_dyna_lock_t *)(l)) & 1))]
1150 
1151// Returns function pointer to the indirect lock function with l
1152// (kmp_indirect_lock_t *) and op (operation type).
1153#define KMP_I_LOCK_FUNC(l, op)__kmp_indirect_op[((kmp_indirect_lock_t *)(l))->type]                                                 \
1154  __kmp_indirect_##op[((kmp_indirect_lock_t *)(l))->type]
1155 
1156// Initializes a direct lock with the given lock pointer and lock sequence.
1157#define KMP_INIT_D_LOCK(l, seq)__kmp_direct_init[((seq) << 1 | 1)]((kmp_dyna_lock_t *)
l, seq)                                                \
1158  __kmp_direct_init[KMP_GET_D_TAG(seq)((seq) << 1 | 1)]((kmp_dyna_lock_t *)l, seq)
1159 
1160// Initializes an indirect lock with the given lock pointer and lock sequence.
1161#define KMP_INIT_I_LOCK(l, seq)__kmp_direct_init[0]((kmp_dyna_lock_t *)(l), seq)                                                \
1162  __kmp_direct_init[0]((kmp_dyna_lock_t *)(l), seq)
1163 
1164// Returns "free" lock value for the given lock type.
1165#define KMP_LOCK_FREE(type)(locktag_type) (locktag_##type)
1166 
1167// Returns "busy" lock value for the given lock teyp.
1168#define KMP_LOCK_BUSY(v, type)((v) << 8 | locktag_type) ((v) << KMP_LOCK_SHIFT8 | locktag_##type)
1169 
1170// Returns lock value after removing (shifting) lock tag.
1171#define KMP_LOCK_STRIP(v)((v) >> 8) ((v) >> KMP_LOCK_SHIFT8)
1172 
1173// Initializes global states and data structures for managing dynamic user
1174// locks.
1175extern void __kmp_init_dynamic_user_locks();
1176 
1177// Allocates and returns an indirect lock with the given indirect lock tag.
1178extern kmp_indirect_lock_t *
1179__kmp_allocate_indirect_lock(void **, kmp_int32, kmp_indirect_locktag_t);
1180 
1181// Cleans up global states and data structures for managing dynamic user locks.
1182extern void __kmp_cleanup_indirect_user_locks();
1183 
1184// Default user lock sequence when not using hinted locks.
1185extern kmp_dyna_lockseq_t __kmp_user_lock_seq;
1186 
1187// Jump table for "set lock location", available only for indirect locks.
1188extern void (*__kmp_indirect_set_location[KMP_NUM_I_LOCKS(locktag_nested_drdpa + 1)])(kmp_user_lock_p,
1189                                                            const ident_t *);
1190#define KMP_SET_I_LOCK_LOCATION(lck, loc){ if (__kmp_indirect_set_location[(lck)->type] != __null) __kmp_indirect_set_location
[(lck)->type]((lck)->lock, loc); }                                      \
1191  {                                                                            \
1192    if (__kmp_indirect_set_location[(lck)->type] != NULL__null)                      \
1193      __kmp_indirect_set_location[(lck)->type]((lck)->lock, loc);              \
1194  }
1195 
1196// Jump table for "set lock flags", available only for indirect locks.
1197extern void (*__kmp_indirect_set_flags[KMP_NUM_I_LOCKS(locktag_nested_drdpa + 1)])(kmp_user_lock_p,
1198                                                         kmp_lock_flags_t);
1199#define KMP_SET_I_LOCK_FLAGS(lck, flag){ if (__kmp_indirect_set_flags[(lck)->type] != __null) __kmp_indirect_set_flags
[(lck)->type]((lck)->lock, flag); }                                        \
1200  {                                                                            \
1201    if (__kmp_indirect_set_flags[(lck)->type] != NULL__null)                         \
1202      __kmp_indirect_set_flags[(lck)->type]((lck)->lock, flag);                \
1203  }
1204 
1205// Jump table for "get lock location", available only for indirect locks.
1206extern const ident_t *(*__kmp_indirect_get_location[KMP_NUM_I_LOCKS(locktag_nested_drdpa + 1)])(
1207    kmp_user_lock_p);
1208#define KMP_GET_I_LOCK_LOCATION(lck)(__kmp_indirect_get_location[(lck)->type] != __null ? __kmp_indirect_get_location
[(lck)->type]((lck)->lock) : __null)                                           \
1209  (__kmp_indirect_get_location[(lck)->type] != NULL__null                            \
1210       ? __kmp_indirect_get_location[(lck)->type]((lck)->lock)                 \
1211       : NULL__null)
1212 
1213// Jump table for "get lock flags", available only for indirect locks.
1214extern kmp_lock_flags_t (*__kmp_indirect_get_flags[KMP_NUM_I_LOCKS(locktag_nested_drdpa + 1)])(
1215    kmp_user_lock_p);
1216#define KMP_GET_I_LOCK_FLAGS(lck)(__kmp_indirect_get_flags[(lck)->type] != __null ? __kmp_indirect_get_flags
[(lck)->type]((lck)->lock) : __null)                                              \
1217  (__kmp_indirect_get_flags[(lck)->type] != NULL__null                               \
1218       ? __kmp_indirect_get_flags[(lck)->type]((lck)->lock)                    \
1219       : NULL__null)
1220 
1221// number of kmp_indirect_lock_t objects to be allocated together
1222#define KMP_I_LOCK_CHUNK1024 1024
1223// Keep at a power of 2 since it is used in multiplication & division
1224KMP_BUILD_ASSERT(KMP_I_LOCK_CHUNK % 2 == 0)static_assert(1024 % 2 == 0, "Build condition error");
1225// number of row entries in the initial lock table
1226#define KMP_I_LOCK_TABLE_INIT_NROW_PTRS8 8
1227 
1228// Lock table for indirect locks.
1229typedef struct kmp_indirect_lock_table {
1230  kmp_indirect_lock_t **table; // blocks of indirect locks allocated
1231  kmp_uint32 nrow_ptrs; // number *table pointer entries in table
1232  kmp_lock_index_t next; // index to the next lock to be allocated
1233  struct kmp_indirect_lock_table *next_table;
1234} kmp_indirect_lock_table_t;
1235 
1236extern kmp_indirect_lock_table_t __kmp_i_lock_table;
1237 
1238// Returns the indirect lock associated with the given index.
1239// Returns nullptr if no lock at given index
1240static inline kmp_indirect_lock_t *__kmp_get_i_lock(kmp_lock_index_t idx) {
1241  kmp_indirect_lock_table_t *lock_table = &__kmp_i_lock_table;
1242  while (lock_table) {
1243    kmp_lock_index_t max_locks = lock_table->nrow_ptrs * KMP_I_LOCK_CHUNK1024;
1244    if (idx < max_locks) {
1245      kmp_lock_index_t row = idx / KMP_I_LOCK_CHUNK1024;
1246      kmp_lock_index_t col = idx % KMP_I_LOCK_CHUNK1024;
1247      if (!lock_table->table[row] || idx >= lock_table->next)
1248        break;
1249      return &lock_table->table[row][col];
1250    }
1251    idx -= max_locks;
1252    lock_table = lock_table->next_table;
1253  }
1254  return nullptr;
1255}
1256 
1257// Number of locks in a lock block, which is fixed to "1" now.
1258// TODO: No lock block implementation now. If we do support, we need to manage
1259// lock block data structure for each indirect lock type.
1260extern int __kmp_num_locks_in_block;
1261 
1262// Fast lock table lookup without consistency checking
1263#define KMP_LOOKUP_I_LOCK(l)((sizeof(int) < sizeof(void *)) ? __kmp_get_i_lock((*(kmp_lock_index_t
 *)(l) >> 1)) : *((kmp_indirect_lock_t **)(l)))                                                   \
1264  ((OMP_LOCK_T_SIZEsizeof(int) < sizeof(void *))                                          \
1265       ? __kmp_get_i_lock(KMP_EXTRACT_I_INDEX(l)(*(kmp_lock_index_t *)(l) >> 1))                              \
1266       : *((kmp_indirect_lock_t **)(l)))
1267 
1268// Used once in kmp_error.cpp
1269extern kmp_int32 __kmp_get_user_lock_owner(kmp_user_lock_p, kmp_uint32);
1270 
1271#else // KMP_USE_DYNAMIC_LOCK
1272 
1273#define KMP_LOCK_BUSY(v, type)((v) << 8 | locktag_type) (v)
1274#define KMP_LOCK_FREE(type)(locktag_type) 0
1275#define KMP_LOCK_STRIP(v)((v) >> 8) (v)
1276 
1277#endif // KMP_USE_DYNAMIC_LOCK
1278 
1279// data structure for using backoff within spin locks.
1280typedef struct {
1281  kmp_uint32 step; // current step
1282  kmp_uint32 max_backoff; // upper bound of outer delay loop
1283  kmp_uint32 min_tick; // size of inner delay loop in ticks (machine-dependent)
1284} kmp_backoff_t;
1285 
1286// Runtime's default backoff parameters
1287extern kmp_backoff_t __kmp_spin_backoff_params;
1288 
1289// Backoff function
1290extern void __kmp_spin_backoff(kmp_backoff_t *);
1291 
1292#ifdef __cplusplus201703L
1293} // extern "C"
1294#endif // __cplusplus
1295 
1296#endif /* KMP_LOCK_H */