Bug Summary

File:build/source/openmp/runtime/src/kmp_tasking.cpp
Warning:line 3715, column 30
Dereference of null pointer

Annotated Source Code

Press '?' to see keyboard shortcuts

clang -cc1 -cc1 -triple x86_64-pc-linux-gnu -analyze -disable-free -clear-ast-before-backend -disable-llvm-verifier -discard-value-names -main-file-name kmp_tasking.cpp -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=cplusplus -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -setup-static-analyzer -analyzer-config-compatibility-mode=true -mrelocation-model pic -pic-level 2 -mframe-pointer=none -fmath-errno -ffp-contract=on -fno-rounding-math -mconstructor-aliases -funwind-tables=2 -target-cpu x86-64 -tune-cpu generic -debugger-tuning=gdb -ffunction-sections -fdata-sections -fcoverage-compilation-dir=/build/source/build-llvm/tools/clang/stage2-bins -resource-dir /usr/lib/llvm-17/lib/clang/17 -D _DEBUG -D _GLIBCXX_ASSERTIONS -D _GNU_SOURCE -D _LIBCPP_ENABLE_ASSERTIONS -D __STDC_CONSTANT_MACROS -D __STDC_FORMAT_MACROS -D __STDC_LIMIT_MACROS -D omp_EXPORTS -I projects/openmp/runtime/src -I /build/source/openmp/runtime/src -I include -I /build/source/llvm/include -I /build/source/openmp/runtime/src/i18n -I /build/source/openmp/runtime/src/include -I /build/source/openmp/runtime/src/thirdparty/ittnotify -D _FORTIFY_SOURCE=2 -D NDEBUG -D _GNU_SOURCE -D _REENTRANT -D _FORTIFY_SOURCE=2 -U NDEBUG -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/c++/10 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/x86_64-linux-gnu/c++/10 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/c++/10/backward -internal-isystem /usr/lib/llvm-17/lib/clang/17/include -internal-isystem /usr/local/include -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../x86_64-linux-gnu/include -internal-externc-isystem /usr/include/x86_64-linux-gnu -internal-externc-isystem /include -internal-externc-isystem /usr/include -fmacro-prefix-map=/build/source/build-llvm/tools/clang/stage2-bins=build-llvm/tools/clang/stage2-bins -fmacro-prefix-map=/build/source/= -fcoverage-prefix-map=/build/source/build-llvm/tools/clang/stage2-bins=build-llvm/tools/clang/stage2-bins -fcoverage-prefix-map=/build/source/= -source-date-epoch 1683717183 -O2 -Wno-unused-command-line-argument -Wno-unused-parameter -Wwrite-strings -Wno-missing-field-initializers -Wno-long-long -Wno-maybe-uninitialized -Wno-class-memaccess -Wno-redundant-move -Wno-pessimizing-move -Wno-noexcept-type -Wno-comment -Wno-misleading-indentation -Wno-extra -Wno-pedantic -Wno-maybe-uninitialized -Wno-class-memaccess -Wno-frame-address -Wno-strict-aliasing -Wno-stringop-truncation -Wno-switch -Wno-uninitialized -Wno-cast-qual -std=c++17 -fdeprecated-macro -fdebug-compilation-dir=/build/source/build-llvm/tools/clang/stage2-bins -fdebug-prefix-map=/build/source/build-llvm/tools/clang/stage2-bins=build-llvm/tools/clang/stage2-bins -fdebug-prefix-map=/build/source/= -ferror-limit 19 -fvisibility-inlines-hidden -stack-protector 2 -fno-rtti -fgnuc-version=4.2.1 -fcolor-diagnostics -vectorize-loops -vectorize-slp -analyzer-output=html -analyzer-config stable-report-filename=true -faddrsig -D__GCC_HAVE_DWARF2_CFI_ASM=1 -o /tmp/scan-build-2023-05-10-133810-16478-1 -x c++ /build/source/openmp/runtime/src/kmp_tasking.cpp

/build/source/openmp/runtime/src/kmp_tasking.cpp

1/*
2 * kmp_tasking.cpp -- OpenMP 3.0 tasking support.
3 */
4
5//===----------------------------------------------------------------------===//
6//
7// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
8// See https://llvm.org/LICENSE.txt for license information.
9// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
10//
11//===----------------------------------------------------------------------===//
12
13#include "kmp.h"
14#include "kmp_i18n.h"
15#include "kmp_itt.h"
16#include "kmp_stats.h"
17#include "kmp_wait_release.h"
18#include "kmp_taskdeps.h"
19
20#if OMPT_SUPPORT1
21#include "ompt-specific.h"
22#endif
23
24#if ENABLE_LIBOMPTARGET1
25static void (*tgt_target_nowait_query)(void **);
26
27void __kmp_init_target_task() {
28 *(void **)(&tgt_target_nowait_query) = KMP_DLSYM("__tgt_target_nowait_query")dlsym(((void *) 0), "__tgt_target_nowait_query");
29}
30#endif
31
32/* forward declaration */
33static void __kmp_enable_tasking(kmp_task_team_t *task_team,
34 kmp_info_t *this_thr);
35static void __kmp_alloc_task_deque(kmp_info_t *thread,
36 kmp_thread_data_t *thread_data);
37static int __kmp_realloc_task_threads_data(kmp_info_t *thread,
38 kmp_task_team_t *task_team);
39static void __kmp_bottom_half_finish_proxy(kmp_int32 gtid, kmp_task_t *ptask);
40
41#ifdef BUILD_TIED_TASK_STACK
42
43// __kmp_trace_task_stack: print the tied tasks from the task stack in order
44// from top do bottom
45//
46// gtid: global thread identifier for thread containing stack
47// thread_data: thread data for task team thread containing stack
48// threshold: value above which the trace statement triggers
49// location: string identifying call site of this function (for trace)
50static void __kmp_trace_task_stack(kmp_int32 gtid,
51 kmp_thread_data_t *thread_data,
52 int threshold, char *location) {
53 kmp_task_stack_t *task_stack = &thread_data->td.td_susp_tied_tasks;
54 kmp_taskdata_t **stack_top = task_stack->ts_top;
55 kmp_int32 entries = task_stack->ts_entries;
56 kmp_taskdata_t *tied_task;
57
58 KA_TRACE(if (kmp_a_debug >= threshold) { __kmp_debug_printf ("__kmp_trace_task_stack(start): location = %s, gtid = %d, entries = %d, "
"first_block = %p, stack_top = %p \n", location, gtid, entries
, task_stack->ts_first_block, stack_top); }
59 threshold,if (kmp_a_debug >= threshold) { __kmp_debug_printf ("__kmp_trace_task_stack(start): location = %s, gtid = %d, entries = %d, "
"first_block = %p, stack_top = %p \n", location, gtid, entries
, task_stack->ts_first_block, stack_top); }
60 ("__kmp_trace_task_stack(start): location = %s, gtid = %d, entries = %d, "if (kmp_a_debug >= threshold) { __kmp_debug_printf ("__kmp_trace_task_stack(start): location = %s, gtid = %d, entries = %d, "
"first_block = %p, stack_top = %p \n", location, gtid, entries
, task_stack->ts_first_block, stack_top); }
61 "first_block = %p, stack_top = %p \n",if (kmp_a_debug >= threshold) { __kmp_debug_printf ("__kmp_trace_task_stack(start): location = %s, gtid = %d, entries = %d, "
"first_block = %p, stack_top = %p \n", location, gtid, entries
, task_stack->ts_first_block, stack_top); }
62 location, gtid, entries, task_stack->ts_first_block, stack_top))if (kmp_a_debug >= threshold) { __kmp_debug_printf ("__kmp_trace_task_stack(start): location = %s, gtid = %d, entries = %d, "
"first_block = %p, stack_top = %p \n", location, gtid, entries
, task_stack->ts_first_block, stack_top); }
;
63
64 KMP_DEBUG_ASSERT(stack_top != NULL)if (!(stack_top != __null)) { __kmp_debug_assert("stack_top != __null"
, "openmp/runtime/src/kmp_tasking.cpp", 64); }
;
65 KMP_DEBUG_ASSERT(entries > 0)if (!(entries > 0)) { __kmp_debug_assert("entries > 0",
"openmp/runtime/src/kmp_tasking.cpp", 65); }
;
66
67 while (entries != 0) {
68 KMP_DEBUG_ASSERT(stack_top != &task_stack->ts_first_block.sb_block[0])if (!(stack_top != &task_stack->ts_first_block.sb_block
[0])) { __kmp_debug_assert("stack_top != &task_stack->ts_first_block.sb_block[0]"
, "openmp/runtime/src/kmp_tasking.cpp", 68); }
;
69 // fix up ts_top if we need to pop from previous block
70 if (entries & TASK_STACK_INDEX_MASK == 0) {
71 kmp_stack_block_t *stack_block = (kmp_stack_block_t *)(stack_top);
72
73 stack_block = stack_block->sb_prev;
74 stack_top = &stack_block->sb_block[TASK_STACK_BLOCK_SIZE];
75 }
76
77 // finish bookkeeping
78 stack_top--;
79 entries--;
80
81 tied_task = *stack_top;
82
83 KMP_DEBUG_ASSERT(tied_task != NULL)if (!(tied_task != __null)) { __kmp_debug_assert("tied_task != __null"
, "openmp/runtime/src/kmp_tasking.cpp", 83); }
;
84 KMP_DEBUG_ASSERT(tied_task->td_flags.tasktype == TASK_TIED)if (!(tied_task->td_flags.tasktype == 1)) { __kmp_debug_assert
("tied_task->td_flags.tasktype == 1", "openmp/runtime/src/kmp_tasking.cpp"
, 84); }
;
85
86 KA_TRACE(threshold,if (kmp_a_debug >= threshold) { __kmp_debug_printf ("__kmp_trace_task_stack(%s): gtid=%d, entry=%d, "
"stack_top=%p, tied_task=%p\n", location, gtid, entries, stack_top
, tied_task); }
87 ("__kmp_trace_task_stack(%s): gtid=%d, entry=%d, "if (kmp_a_debug >= threshold) { __kmp_debug_printf ("__kmp_trace_task_stack(%s): gtid=%d, entry=%d, "
"stack_top=%p, tied_task=%p\n", location, gtid, entries, stack_top
, tied_task); }
88 "stack_top=%p, tied_task=%p\n",if (kmp_a_debug >= threshold) { __kmp_debug_printf ("__kmp_trace_task_stack(%s): gtid=%d, entry=%d, "
"stack_top=%p, tied_task=%p\n", location, gtid, entries, stack_top
, tied_task); }
89 location, gtid, entries, stack_top, tied_task))if (kmp_a_debug >= threshold) { __kmp_debug_printf ("__kmp_trace_task_stack(%s): gtid=%d, entry=%d, "
"stack_top=%p, tied_task=%p\n", location, gtid, entries, stack_top
, tied_task); }
;
90 }
91 KMP_DEBUG_ASSERT(stack_top == &task_stack->ts_first_block.sb_block[0])if (!(stack_top == &task_stack->ts_first_block.sb_block
[0])) { __kmp_debug_assert("stack_top == &task_stack->ts_first_block.sb_block[0]"
, "openmp/runtime/src/kmp_tasking.cpp", 91); }
;
92
93 KA_TRACE(threshold,if (kmp_a_debug >= threshold) { __kmp_debug_printf ("__kmp_trace_task_stack(exit): location = %s, gtid = %d\n"
, location, gtid); }
94 ("__kmp_trace_task_stack(exit): location = %s, gtid = %d\n",if (kmp_a_debug >= threshold) { __kmp_debug_printf ("__kmp_trace_task_stack(exit): location = %s, gtid = %d\n"
, location, gtid); }
95 location, gtid))if (kmp_a_debug >= threshold) { __kmp_debug_printf ("__kmp_trace_task_stack(exit): location = %s, gtid = %d\n"
, location, gtid); }
;
96}
97
98// __kmp_init_task_stack: initialize the task stack for the first time
99// after a thread_data structure is created.
100// It should not be necessary to do this again (assuming the stack works).
101//
102// gtid: global thread identifier of calling thread
103// thread_data: thread data for task team thread containing stack
104static void __kmp_init_task_stack(kmp_int32 gtid,
105 kmp_thread_data_t *thread_data) {
106 kmp_task_stack_t *task_stack = &thread_data->td.td_susp_tied_tasks;
107 kmp_stack_block_t *first_block;
108
109 // set up the first block of the stack
110 first_block = &task_stack->ts_first_block;
111 task_stack->ts_top = (kmp_taskdata_t **)first_block;
112 memset((void *)first_block, '\0',
113 TASK_STACK_BLOCK_SIZE * sizeof(kmp_taskdata_t *));
114
115 // initialize the stack to be empty
116 task_stack->ts_entries = TASK_STACK_EMPTY;
117 first_block->sb_next = NULL__null;
118 first_block->sb_prev = NULL__null;
119}
120
121// __kmp_free_task_stack: free the task stack when thread_data is destroyed.
122//
123// gtid: global thread identifier for calling thread
124// thread_data: thread info for thread containing stack
125static void __kmp_free_task_stack(kmp_int32 gtid,
126 kmp_thread_data_t *thread_data) {
127 kmp_task_stack_t *task_stack = &thread_data->td.td_susp_tied_tasks;
128 kmp_stack_block_t *stack_block = &task_stack->ts_first_block;
129
130 KMP_DEBUG_ASSERT(task_stack->ts_entries == TASK_STACK_EMPTY)if (!(task_stack->ts_entries == TASK_STACK_EMPTY)) { __kmp_debug_assert
("task_stack->ts_entries == TASK_STACK_EMPTY", "openmp/runtime/src/kmp_tasking.cpp"
, 130); }
;
131 // free from the second block of the stack
132 while (stack_block != NULL__null) {
133 kmp_stack_block_t *next_block = (stack_block) ? stack_block->sb_next : NULL__null;
134
135 stack_block->sb_next = NULL__null;
136 stack_block->sb_prev = NULL__null;
137 if (stack_block != &task_stack->ts_first_block) {
138 __kmp_thread_free(thread,___kmp_thread_free((thread), (stack_block), "openmp/runtime/src/kmp_tasking.cpp"
, 139)
139 stack_block)___kmp_thread_free((thread), (stack_block), "openmp/runtime/src/kmp_tasking.cpp"
, 139)
; // free the block, if not the first
140 }
141 stack_block = next_block;
142 }
143 // initialize the stack to be empty
144 task_stack->ts_entries = 0;
145 task_stack->ts_top = NULL__null;
146}
147
148// __kmp_push_task_stack: Push the tied task onto the task stack.
149// Grow the stack if necessary by allocating another block.
150//
151// gtid: global thread identifier for calling thread
152// thread: thread info for thread containing stack
153// tied_task: the task to push on the stack
154static void __kmp_push_task_stack(kmp_int32 gtid, kmp_info_t *thread,
155 kmp_taskdata_t *tied_task) {
156 // GEH - need to consider what to do if tt_threads_data not allocated yet
157 kmp_thread_data_t *thread_data =
158 &thread->th.th_task_team->tt.tt_threads_data[__kmp_tid_from_gtid(gtid)];
159 kmp_task_stack_t *task_stack = &thread_data->td.td_susp_tied_tasks;
160
161 if (tied_task->td_flags.team_serial || tied_task->td_flags.tasking_ser) {
162 return; // Don't push anything on stack if team or team tasks are serialized
163 }
164
165 KMP_DEBUG_ASSERT(tied_task->td_flags.tasktype == TASK_TIED)if (!(tied_task->td_flags.tasktype == 1)) { __kmp_debug_assert
("tied_task->td_flags.tasktype == 1", "openmp/runtime/src/kmp_tasking.cpp"
, 165); }
;
166 KMP_DEBUG_ASSERT(task_stack->ts_top != NULL)if (!(task_stack->ts_top != __null)) { __kmp_debug_assert(
"task_stack->ts_top != __null", "openmp/runtime/src/kmp_tasking.cpp"
, 166); }
;
167
168 KA_TRACE(20,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_push_task_stack(enter): GTID: %d; THREAD: %p; TASK: %p\n"
, gtid, thread, tied_task); }
169 ("__kmp_push_task_stack(enter): GTID: %d; THREAD: %p; TASK: %p\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_push_task_stack(enter): GTID: %d; THREAD: %p; TASK: %p\n"
, gtid, thread, tied_task); }
170 gtid, thread, tied_task))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_push_task_stack(enter): GTID: %d; THREAD: %p; TASK: %p\n"
, gtid, thread, tied_task); }
;
171 // Store entry
172 *(task_stack->ts_top) = tied_task;
173
174 // Do bookkeeping for next push
175 task_stack->ts_top++;
176 task_stack->ts_entries++;
177
178 if (task_stack->ts_entries & TASK_STACK_INDEX_MASK == 0) {
179 // Find beginning of this task block
180 kmp_stack_block_t *stack_block =
181 (kmp_stack_block_t *)(task_stack->ts_top - TASK_STACK_BLOCK_SIZE);
182
183 // Check if we already have a block
184 if (stack_block->sb_next !=
185 NULL__null) { // reset ts_top to beginning of next block
186 task_stack->ts_top = &stack_block->sb_next->sb_block[0];
187 } else { // Alloc new block and link it up
188 kmp_stack_block_t *new_block = (kmp_stack_block_t *)__kmp_thread_calloc(
189 thread, sizeof(kmp_stack_block_t));
190
191 task_stack->ts_top = &new_block->sb_block[0];
192 stack_block->sb_next = new_block;
193 new_block->sb_prev = stack_block;
194 new_block->sb_next = NULL__null;
195
196 KA_TRACE(if (kmp_a_debug >= 30) { __kmp_debug_printf ("__kmp_push_task_stack(): GTID: %d; TASK: %p; Alloc new block: %p\n"
, gtid, tied_task, new_block); }
197 30,if (kmp_a_debug >= 30) { __kmp_debug_printf ("__kmp_push_task_stack(): GTID: %d; TASK: %p; Alloc new block: %p\n"
, gtid, tied_task, new_block); }
198 ("__kmp_push_task_stack(): GTID: %d; TASK: %p; Alloc new block: %p\n",if (kmp_a_debug >= 30) { __kmp_debug_printf ("__kmp_push_task_stack(): GTID: %d; TASK: %p; Alloc new block: %p\n"
, gtid, tied_task, new_block); }
199 gtid, tied_task, new_block))if (kmp_a_debug >= 30) { __kmp_debug_printf ("__kmp_push_task_stack(): GTID: %d; TASK: %p; Alloc new block: %p\n"
, gtid, tied_task, new_block); }
;
200 }
201 }
202 KA_TRACE(20, ("__kmp_push_task_stack(exit): GTID: %d; TASK: %p\n", gtid,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_push_task_stack(exit): GTID: %d; TASK: %p\n"
, gtid, tied_task); }
203 tied_task))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_push_task_stack(exit): GTID: %d; TASK: %p\n"
, gtid, tied_task); }
;
204}
205
206// __kmp_pop_task_stack: Pop the tied task from the task stack. Don't return
207// the task, just check to make sure it matches the ending task passed in.
208//
209// gtid: global thread identifier for the calling thread
210// thread: thread info structure containing stack
211// tied_task: the task popped off the stack
212// ending_task: the task that is ending (should match popped task)
213static void __kmp_pop_task_stack(kmp_int32 gtid, kmp_info_t *thread,
214 kmp_taskdata_t *ending_task) {
215 // GEH - need to consider what to do if tt_threads_data not allocated yet
216 kmp_thread_data_t *thread_data =
217 &thread->th.th_task_team->tt_threads_data[__kmp_tid_from_gtid(gtid)];
218 kmp_task_stack_t *task_stack = &thread_data->td.td_susp_tied_tasks;
219 kmp_taskdata_t *tied_task;
220
221 if (ending_task->td_flags.team_serial || ending_task->td_flags.tasking_ser) {
222 // Don't pop anything from stack if team or team tasks are serialized
223 return;
224 }
225
226 KMP_DEBUG_ASSERT(task_stack->ts_top != NULL)if (!(task_stack->ts_top != __null)) { __kmp_debug_assert(
"task_stack->ts_top != __null", "openmp/runtime/src/kmp_tasking.cpp"
, 226); }
;
227 KMP_DEBUG_ASSERT(task_stack->ts_entries > 0)if (!(task_stack->ts_entries > 0)) { __kmp_debug_assert
("task_stack->ts_entries > 0", "openmp/runtime/src/kmp_tasking.cpp"
, 227); }
;
228
229 KA_TRACE(20, ("__kmp_pop_task_stack(enter): GTID: %d; THREAD: %p\n", gtid,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_pop_task_stack(enter): GTID: %d; THREAD: %p\n"
, gtid, thread); }
230 thread))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_pop_task_stack(enter): GTID: %d; THREAD: %p\n"
, gtid, thread); }
;
231
232 // fix up ts_top if we need to pop from previous block
233 if (task_stack->ts_entries & TASK_STACK_INDEX_MASK == 0) {
234 kmp_stack_block_t *stack_block = (kmp_stack_block_t *)(task_stack->ts_top);
235
236 stack_block = stack_block->sb_prev;
237 task_stack->ts_top = &stack_block->sb_block[TASK_STACK_BLOCK_SIZE];
238 }
239
240 // finish bookkeeping
241 task_stack->ts_top--;
242 task_stack->ts_entries--;
243
244 tied_task = *(task_stack->ts_top);
245
246 KMP_DEBUG_ASSERT(tied_task != NULL)if (!(tied_task != __null)) { __kmp_debug_assert("tied_task != __null"
, "openmp/runtime/src/kmp_tasking.cpp", 246); }
;
247 KMP_DEBUG_ASSERT(tied_task->td_flags.tasktype == TASK_TIED)if (!(tied_task->td_flags.tasktype == 1)) { __kmp_debug_assert
("tied_task->td_flags.tasktype == 1", "openmp/runtime/src/kmp_tasking.cpp"
, 247); }
;
248 KMP_DEBUG_ASSERT(tied_task == ending_task)if (!(tied_task == ending_task)) { __kmp_debug_assert("tied_task == ending_task"
, "openmp/runtime/src/kmp_tasking.cpp", 248); }
; // If we built the stack correctly
249
250 KA_TRACE(20, ("__kmp_pop_task_stack(exit): GTID: %d; TASK: %p\n", gtid,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_pop_task_stack(exit): GTID: %d; TASK: %p\n"
, gtid, tied_task); }
251 tied_task))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_pop_task_stack(exit): GTID: %d; TASK: %p\n"
, gtid, tied_task); }
;
252 return;
253}
254#endif /* BUILD_TIED_TASK_STACK */
255
256// returns 1 if new task is allowed to execute, 0 otherwise
257// checks Task Scheduling constraint (if requested) and
258// mutexinoutset dependencies if any
259static bool __kmp_task_is_allowed(int gtid, const kmp_int32 is_constrained,
260 const kmp_taskdata_t *tasknew,
261 const kmp_taskdata_t *taskcurr) {
262 if (is_constrained && (tasknew->td_flags.tiedness == TASK_TIED1)) {
263 // Check if the candidate obeys the Task Scheduling Constraints (TSC)
264 // only descendant of all deferred tied tasks can be scheduled, checking
265 // the last one is enough, as it in turn is the descendant of all others
266 kmp_taskdata_t *current = taskcurr->td_last_tied;
267 KMP_DEBUG_ASSERT(current != NULL)if (!(current != __null)) { __kmp_debug_assert("current != __null"
, "openmp/runtime/src/kmp_tasking.cpp", 267); }
;
268 // check if the task is not suspended on barrier
269 if (current->td_flags.tasktype == TASK_EXPLICIT1 ||
270 current->td_taskwait_thread > 0) { // <= 0 on barrier
271 kmp_int32 level = current->td_level;
272 kmp_taskdata_t *parent = tasknew->td_parent;
273 while (parent != current && parent->td_level > level) {
274 // check generation up to the level of the current task
275 parent = parent->td_parent;
276 KMP_DEBUG_ASSERT(parent != NULL)if (!(parent != __null)) { __kmp_debug_assert("parent != __null"
, "openmp/runtime/src/kmp_tasking.cpp", 276); }
;
277 }
278 if (parent != current)
279 return false;
280 }
281 }
282 // Check mutexinoutset dependencies, acquire locks
283 kmp_depnode_t *node = tasknew->td_depnode;
284 if (UNLIKELY(node && (node->dn.mtx_num_locks > 0))__builtin_expect(!!(node && (node->dn.mtx_num_locks
> 0)), 0)
) {
285 for (int i = 0; i < node->dn.mtx_num_locks; ++i) {
286 KMP_DEBUG_ASSERT(node->dn.mtx_locks[i] != NULL)if (!(node->dn.mtx_locks[i] != __null)) { __kmp_debug_assert
("node->dn.mtx_locks[i] != __null", "openmp/runtime/src/kmp_tasking.cpp"
, 286); }
;
287 if (__kmp_test_lock(node->dn.mtx_locks[i], gtid))
288 continue;
289 // could not get the lock, release previous locks
290 for (int j = i - 1; j >= 0; --j)
291 __kmp_release_lock(node->dn.mtx_locks[j], gtid);
292 return false;
293 }
294 // negative num_locks means all locks acquired successfully
295 node->dn.mtx_num_locks = -node->dn.mtx_num_locks;
296 }
297 return true;
298}
299
300// __kmp_realloc_task_deque:
301// Re-allocates a task deque for a particular thread, copies the content from
302// the old deque and adjusts the necessary data structures relating to the
303// deque. This operation must be done with the deque_lock being held
304static void __kmp_realloc_task_deque(kmp_info_t *thread,
305 kmp_thread_data_t *thread_data) {
306 kmp_int32 size = TASK_DEQUE_SIZE(thread_data->td)((thread_data->td).td_deque_size);
307 KMP_DEBUG_ASSERT(TCR_4(thread_data->td.td_deque_ntasks) == size)if (!((thread_data->td.td_deque_ntasks) == size)) { __kmp_debug_assert
("(thread_data->td.td_deque_ntasks) == size", "openmp/runtime/src/kmp_tasking.cpp"
, 307); }
;
308 kmp_int32 new_size = 2 * size;
309
310 KE_TRACE(10, ("__kmp_realloc_task_deque: T#%d reallocating deque[from %d to "if (kmp_e_debug >= 10) { __kmp_debug_printf ("__kmp_realloc_task_deque: T#%d reallocating deque[from %d to "
"%d] for thread_data %p\n", __kmp_gtid_from_thread(thread), size
, new_size, thread_data); }
311 "%d] for thread_data %p\n",if (kmp_e_debug >= 10) { __kmp_debug_printf ("__kmp_realloc_task_deque: T#%d reallocating deque[from %d to "
"%d] for thread_data %p\n", __kmp_gtid_from_thread(thread), size
, new_size, thread_data); }
312 __kmp_gtid_from_thread(thread), size, new_size, thread_data))if (kmp_e_debug >= 10) { __kmp_debug_printf ("__kmp_realloc_task_deque: T#%d reallocating deque[from %d to "
"%d] for thread_data %p\n", __kmp_gtid_from_thread(thread), size
, new_size, thread_data); }
;
313
314 kmp_taskdata_t **new_deque =
315 (kmp_taskdata_t **)__kmp_allocate(new_size * sizeof(kmp_taskdata_t *))___kmp_allocate((new_size * sizeof(kmp_taskdata_t *)), "openmp/runtime/src/kmp_tasking.cpp"
, 315)
;
316
317 int i, j;
318 for (i = thread_data->td.td_deque_head, j = 0; j < size;
319 i = (i + 1) & TASK_DEQUE_MASK(thread_data->td)((thread_data->td).td_deque_size - 1), j++)
320 new_deque[j] = thread_data->td.td_deque[i];
321
322 __kmp_free(thread_data->td.td_deque)___kmp_free((thread_data->td.td_deque), "openmp/runtime/src/kmp_tasking.cpp"
, 322)
;
323
324 thread_data->td.td_deque_head = 0;
325 thread_data->td.td_deque_tail = size;
326 thread_data->td.td_deque = new_deque;
327 thread_data->td.td_deque_size = new_size;
328}
329
330static kmp_task_pri_t *__kmp_alloc_task_pri_list() {
331 kmp_task_pri_t *l = (kmp_task_pri_t *)__kmp_allocate(sizeof(kmp_task_pri_t))___kmp_allocate((sizeof(kmp_task_pri_t)), "openmp/runtime/src/kmp_tasking.cpp"
, 331)
;
332 kmp_thread_data_t *thread_data = &l->td;
333 __kmp_init_bootstrap_lock(&thread_data->td.td_deque_lock);
334 thread_data->td.td_deque_last_stolen = -1;
335 KE_TRACE(20, ("__kmp_alloc_task_pri_list: T#%d allocating deque[%d] "if (kmp_e_debug >= 20) { __kmp_debug_printf ("__kmp_alloc_task_pri_list: T#%d allocating deque[%d] "
"for thread_data %p\n", __kmp_get_global_thread_id(), (1 <<
8), thread_data); }
336 "for thread_data %p\n",if (kmp_e_debug >= 20) { __kmp_debug_printf ("__kmp_alloc_task_pri_list: T#%d allocating deque[%d] "
"for thread_data %p\n", __kmp_get_global_thread_id(), (1 <<
8), thread_data); }
337 __kmp_get_gtid(), INITIAL_TASK_DEQUE_SIZE, thread_data))if (kmp_e_debug >= 20) { __kmp_debug_printf ("__kmp_alloc_task_pri_list: T#%d allocating deque[%d] "
"for thread_data %p\n", __kmp_get_global_thread_id(), (1 <<
8), thread_data); }
;
338 thread_data->td.td_deque = (kmp_taskdata_t **)__kmp_allocate(___kmp_allocate(((1 << 8) * sizeof(kmp_taskdata_t *)), "openmp/runtime/src/kmp_tasking.cpp"
, 339)
339 INITIAL_TASK_DEQUE_SIZE * sizeof(kmp_taskdata_t *))___kmp_allocate(((1 << 8) * sizeof(kmp_taskdata_t *)), "openmp/runtime/src/kmp_tasking.cpp"
, 339)
;
340 thread_data->td.td_deque_size = INITIAL_TASK_DEQUE_SIZE(1 << 8);
341 return l;
342}
343
344// The function finds the deque of priority tasks with given priority, or
345// allocates a new deque and put it into sorted (high -> low) list of deques.
346// Deques of non-default priority tasks are shared between all threads in team,
347// as opposed to per-thread deques of tasks with default priority.
348// The function is called under the lock task_team->tt.tt_task_pri_lock.
349static kmp_thread_data_t *
350__kmp_get_priority_deque_data(kmp_task_team_t *task_team, kmp_int32 pri) {
351 kmp_thread_data_t *thread_data;
352 kmp_task_pri_t *lst = task_team->tt.tt_task_pri_list;
353 if (lst->priority == pri) {
354 // Found queue of tasks with given priority.
355 thread_data = &lst->td;
356 } else if (lst->priority < pri) {
357 // All current priority queues contain tasks with lower priority.
358 // Allocate new one for given priority tasks.
359 kmp_task_pri_t *list = __kmp_alloc_task_pri_list();
360 thread_data = &list->td;
361 list->priority = pri;
362 list->next = lst;
363 task_team->tt.tt_task_pri_list = list;
364 } else { // task_team->tt.tt_task_pri_list->priority > pri
365 kmp_task_pri_t *next_queue = lst->next;
366 while (next_queue && next_queue->priority > pri) {
367 lst = next_queue;
368 next_queue = lst->next;
369 }
370 // lst->priority > pri && (next == NULL || pri >= next->priority)
371 if (next_queue == NULL__null) {
372 // No queue with pri priority, need to allocate new one.
373 kmp_task_pri_t *list = __kmp_alloc_task_pri_list();
374 thread_data = &list->td;
375 list->priority = pri;
376 list->next = NULL__null;
377 lst->next = list;
378 } else if (next_queue->priority == pri) {
379 // Found queue of tasks with given priority.
380 thread_data = &next_queue->td;
381 } else { // lst->priority > pri > next->priority
382 // insert newly allocated between existed queues
383 kmp_task_pri_t *list = __kmp_alloc_task_pri_list();
384 thread_data = &list->td;
385 list->priority = pri;
386 list->next = next_queue;
387 lst->next = list;
388 }
389 }
390 return thread_data;
391}
392
393// __kmp_push_priority_task: Add a task to the team's priority task deque
394static kmp_int32 __kmp_push_priority_task(kmp_int32 gtid, kmp_info_t *thread,
395 kmp_taskdata_t *taskdata,
396 kmp_task_team_t *task_team,
397 kmp_int32 pri) {
398 kmp_thread_data_t *thread_data = NULL__null;
399 KA_TRACE(20,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_push_priority_task: T#%d trying to push task %p, pri %d.\n"
, gtid, taskdata, pri); }
400 ("__kmp_push_priority_task: T#%d trying to push task %p, pri %d.\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_push_priority_task: T#%d trying to push task %p, pri %d.\n"
, gtid, taskdata, pri); }
401 gtid, taskdata, pri))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_push_priority_task: T#%d trying to push task %p, pri %d.\n"
, gtid, taskdata, pri); }
;
402
403 // Find task queue specific to priority value
404 kmp_task_pri_t *lst = task_team->tt.tt_task_pri_list;
405 if (UNLIKELY(lst == NULL)__builtin_expect(!!(lst == __null), 0)) {
406 __kmp_acquire_bootstrap_lock(&task_team->tt.tt_task_pri_lock);
407 if (task_team->tt.tt_task_pri_list == NULL__null) {
408 // List of queues is still empty, allocate one.
409 kmp_task_pri_t *list = __kmp_alloc_task_pri_list();
410 thread_data = &list->td;
411 list->priority = pri;
412 list->next = NULL__null;
413 task_team->tt.tt_task_pri_list = list;
414 } else {
415 // Other thread initialized a queue. Check if it fits and get thread_data.
416 thread_data = __kmp_get_priority_deque_data(task_team, pri);
417 }
418 __kmp_release_bootstrap_lock(&task_team->tt.tt_task_pri_lock);
419 } else {
420 if (lst->priority == pri) {
421 // Found queue of tasks with given priority.
422 thread_data = &lst->td;
423 } else {
424 __kmp_acquire_bootstrap_lock(&task_team->tt.tt_task_pri_lock);
425 thread_data = __kmp_get_priority_deque_data(task_team, pri);
426 __kmp_release_bootstrap_lock(&task_team->tt.tt_task_pri_lock);
427 }
428 }
429 KMP_DEBUG_ASSERT(thread_data)if (!(thread_data)) { __kmp_debug_assert("thread_data", "openmp/runtime/src/kmp_tasking.cpp"
, 429); }
;
430
431 __kmp_acquire_bootstrap_lock(&thread_data->td.td_deque_lock);
432 // Check if deque is full
433 if (TCR_4(thread_data->td.td_deque_ntasks)(thread_data->td.td_deque_ntasks) >=
434 TASK_DEQUE_SIZE(thread_data->td)((thread_data->td).td_deque_size)) {
435 if (__kmp_enable_task_throttling &&
436 __kmp_task_is_allowed(gtid, __kmp_task_stealing_constraint, taskdata,
437 thread->th.th_current_task)) {
438 __kmp_release_bootstrap_lock(&thread_data->td.td_deque_lock);
439 KA_TRACE(20, ("__kmp_push_priority_task: T#%d deque is full; returning "if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_push_priority_task: T#%d deque is full; returning "
"TASK_NOT_PUSHED for task %p\n", gtid, taskdata); }
440 "TASK_NOT_PUSHED for task %p\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_push_priority_task: T#%d deque is full; returning "
"TASK_NOT_PUSHED for task %p\n", gtid, taskdata); }
441 gtid, taskdata))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_push_priority_task: T#%d deque is full; returning "
"TASK_NOT_PUSHED for task %p\n", gtid, taskdata); }
;
442 return TASK_NOT_PUSHED1;
443 } else {
444 // expand deque to push the task which is not allowed to execute
445 __kmp_realloc_task_deque(thread, thread_data);
446 }
447 }
448 KMP_DEBUG_ASSERT(TCR_4(thread_data->td.td_deque_ntasks) <if (!((thread_data->td.td_deque_ntasks) < ((thread_data
->td).td_deque_size))) { __kmp_debug_assert("(thread_data->td.td_deque_ntasks) < ((thread_data->td).td_deque_size)"
, "openmp/runtime/src/kmp_tasking.cpp", 449); }
449 TASK_DEQUE_SIZE(thread_data->td))if (!((thread_data->td.td_deque_ntasks) < ((thread_data
->td).td_deque_size))) { __kmp_debug_assert("(thread_data->td.td_deque_ntasks) < ((thread_data->td).td_deque_size)"
, "openmp/runtime/src/kmp_tasking.cpp", 449); }
;
450 // Push taskdata.
451 thread_data->td.td_deque[thread_data->td.td_deque_tail] = taskdata;
452 // Wrap index.
453 thread_data->td.td_deque_tail =
454 (thread_data->td.td_deque_tail + 1) & TASK_DEQUE_MASK(thread_data->td)((thread_data->td).td_deque_size - 1);
455 TCW_4(thread_data->td.td_deque_ntasks,(thread_data->td.td_deque_ntasks) = ((thread_data->td.td_deque_ntasks
) + 1)
456 TCR_4(thread_data->td.td_deque_ntasks) + 1)(thread_data->td.td_deque_ntasks) = ((thread_data->td.td_deque_ntasks
) + 1)
; // Adjust task count
457 KMP_FSYNC_RELEASING(thread->th.th_current_task)(!__kmp_itt_fsync_releasing_ptr__3_0) ? (void)0 : __kmp_itt_fsync_releasing_ptr__3_0
((void *)(thread->th.th_current_task))
; // releasing self
458 KMP_FSYNC_RELEASING(taskdata)(!__kmp_itt_fsync_releasing_ptr__3_0) ? (void)0 : __kmp_itt_fsync_releasing_ptr__3_0
((void *)(taskdata))
; // releasing child
459 KA_TRACE(20, ("__kmp_push_priority_task: T#%d returning "if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_push_priority_task: T#%d returning "
"TASK_SUCCESSFULLY_PUSHED: task=%p ntasks=%d head=%u tail=%u\n"
, gtid, taskdata, thread_data->td.td_deque_ntasks, thread_data
->td.td_deque_head, thread_data->td.td_deque_tail); }
460 "TASK_SUCCESSFULLY_PUSHED: task=%p ntasks=%d head=%u tail=%u\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_push_priority_task: T#%d returning "
"TASK_SUCCESSFULLY_PUSHED: task=%p ntasks=%d head=%u tail=%u\n"
, gtid, taskdata, thread_data->td.td_deque_ntasks, thread_data
->td.td_deque_head, thread_data->td.td_deque_tail); }
461 gtid, taskdata, thread_data->td.td_deque_ntasks,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_push_priority_task: T#%d returning "
"TASK_SUCCESSFULLY_PUSHED: task=%p ntasks=%d head=%u tail=%u\n"
, gtid, taskdata, thread_data->td.td_deque_ntasks, thread_data
->td.td_deque_head, thread_data->td.td_deque_tail); }
462 thread_data->td.td_deque_head, thread_data->td.td_deque_tail))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_push_priority_task: T#%d returning "
"TASK_SUCCESSFULLY_PUSHED: task=%p ntasks=%d head=%u tail=%u\n"
, gtid, taskdata, thread_data->td.td_deque_ntasks, thread_data
->td.td_deque_head, thread_data->td.td_deque_tail); }
;
463 __kmp_release_bootstrap_lock(&thread_data->td.td_deque_lock);
464 task_team->tt.tt_num_task_pri++; // atomic inc
465 return TASK_SUCCESSFULLY_PUSHED0;
466}
467
468// __kmp_push_task: Add a task to the thread's deque
469static kmp_int32 __kmp_push_task(kmp_int32 gtid, kmp_task_t *task) {
470 kmp_info_t *thread = __kmp_threads[gtid];
471 kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(task)(((kmp_taskdata_t *)task) - 1);
472
473 // If we encounter a hidden helper task, and the current thread is not a
474 // hidden helper thread, we have to give the task to any hidden helper thread
475 // starting from its shadow one.
476 if (UNLIKELY(taskdata->td_flags.hidden_helper &&__builtin_expect(!!(taskdata->td_flags.hidden_helper &&
!((gtid) >= 1 && (gtid) <= __kmp_hidden_helper_threads_num
)), 0)
477 !KMP_HIDDEN_HELPER_THREAD(gtid))__builtin_expect(!!(taskdata->td_flags.hidden_helper &&
!((gtid) >= 1 && (gtid) <= __kmp_hidden_helper_threads_num
)), 0)
) {
478 kmp_int32 shadow_gtid = KMP_GTID_TO_SHADOW_GTID(gtid)((gtid) % (__kmp_hidden_helper_threads_num - 1) + 2);
479 __kmpc_give_task(task, __kmp_tid_from_gtid(shadow_gtid));
480 // Signal the hidden helper threads.
481 __kmp_hidden_helper_worker_thread_signal();
482 return TASK_SUCCESSFULLY_PUSHED0;
483 }
484
485 kmp_task_team_t *task_team = thread->th.th_task_team;
486 kmp_int32 tid = __kmp_tid_from_gtid(gtid);
487 kmp_thread_data_t *thread_data;
488
489 KA_TRACE(20,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_push_task: T#%d trying to push task %p.\n"
, gtid, taskdata); }
490 ("__kmp_push_task: T#%d trying to push task %p.\n", gtid, taskdata))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_push_task: T#%d trying to push task %p.\n"
, gtid, taskdata); }
;
491
492 if (UNLIKELY(taskdata->td_flags.tiedness == TASK_UNTIED)__builtin_expect(!!(taskdata->td_flags.tiedness == 0), 0)) {
493 // untied task needs to increment counter so that the task structure is not
494 // freed prematurely
495 kmp_int32 counter = 1 + KMP_ATOMIC_INC(&taskdata->td_untied_count)(&taskdata->td_untied_count)->fetch_add(1, std::memory_order_acq_rel
)
;
496 KMP_DEBUG_USE_VAR(counter);
497 KA_TRACE(if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_push_task: T#%d untied_count (%d) incremented for task %p\n"
, gtid, counter, taskdata); }
498 20,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_push_task: T#%d untied_count (%d) incremented for task %p\n"
, gtid, counter, taskdata); }
499 ("__kmp_push_task: T#%d untied_count (%d) incremented for task %p\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_push_task: T#%d untied_count (%d) incremented for task %p\n"
, gtid, counter, taskdata); }
500 gtid, counter, taskdata))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_push_task: T#%d untied_count (%d) incremented for task %p\n"
, gtid, counter, taskdata); }
;
501 }
502
503 // The first check avoids building task_team thread data if serialized
504 if (UNLIKELY(taskdata->td_flags.task_serial)__builtin_expect(!!(taskdata->td_flags.task_serial), 0)) {
505 KA_TRACE(20, ("__kmp_push_task: T#%d team serialized; returning "if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_push_task: T#%d team serialized; returning "
"TASK_NOT_PUSHED for task %p\n", gtid, taskdata); }
506 "TASK_NOT_PUSHED for task %p\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_push_task: T#%d team serialized; returning "
"TASK_NOT_PUSHED for task %p\n", gtid, taskdata); }
507 gtid, taskdata))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_push_task: T#%d team serialized; returning "
"TASK_NOT_PUSHED for task %p\n", gtid, taskdata); }
;
508 return TASK_NOT_PUSHED1;
509 }
510
511 // Now that serialized tasks have returned, we can assume that we are not in
512 // immediate exec mode
513 KMP_DEBUG_ASSERT(__kmp_tasking_mode != tskm_immediate_exec)if (!(__kmp_tasking_mode != tskm_immediate_exec)) { __kmp_debug_assert
("__kmp_tasking_mode != tskm_immediate_exec", "openmp/runtime/src/kmp_tasking.cpp"
, 513); }
;
514 if (UNLIKELY(!KMP_TASKING_ENABLED(task_team))__builtin_expect(!!(!((!0) == ((task_team)->tt.tt_found_tasks
))), 0)
) {
515 __kmp_enable_tasking(task_team, thread);
516 }
517 KMP_DEBUG_ASSERT(TCR_4(task_team->tt.tt_found_tasks) == TRUE)if (!((task_team->tt.tt_found_tasks) == (!0))) { __kmp_debug_assert
("(task_team->tt.tt_found_tasks) == (!0)", "openmp/runtime/src/kmp_tasking.cpp"
, 517); }
;
518 KMP_DEBUG_ASSERT(TCR_PTR(task_team->tt.tt_threads_data) != NULL)if (!(((void *)(task_team->tt.tt_threads_data)) != __null)
) { __kmp_debug_assert("((void *)(task_team->tt.tt_threads_data)) != __null"
, "openmp/runtime/src/kmp_tasking.cpp", 518); }
;
519
520 if (taskdata->td_flags.priority_specified && task->data2.priority > 0 &&
521 __kmp_max_task_priority > 0) {
522 int pri = KMP_MIN(task->data2.priority, __kmp_max_task_priority)((task->data2.priority) < (__kmp_max_task_priority) ? (
task->data2.priority) : (__kmp_max_task_priority))
;
523 return __kmp_push_priority_task(gtid, thread, taskdata, task_team, pri);
524 }
525
526 // Find tasking deque specific to encountering thread
527 thread_data = &task_team->tt.tt_threads_data[tid];
528
529 // No lock needed since only owner can allocate. If the task is hidden_helper,
530 // we don't need it either because we have initialized the dequeue for hidden
531 // helper thread data.
532 if (UNLIKELY(thread_data->td.td_deque == NULL)__builtin_expect(!!(thread_data->td.td_deque == __null), 0
)
) {
533 __kmp_alloc_task_deque(thread, thread_data);
534 }
535
536 int locked = 0;
537 // Check if deque is full
538 if (TCR_4(thread_data->td.td_deque_ntasks)(thread_data->td.td_deque_ntasks) >=
539 TASK_DEQUE_SIZE(thread_data->td)((thread_data->td).td_deque_size)) {
540 if (__kmp_enable_task_throttling &&
541 __kmp_task_is_allowed(gtid, __kmp_task_stealing_constraint, taskdata,
542 thread->th.th_current_task)) {
543 KA_TRACE(20, ("__kmp_push_task: T#%d deque is full; returning "if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_push_task: T#%d deque is full; returning "
"TASK_NOT_PUSHED for task %p\n", gtid, taskdata); }
544 "TASK_NOT_PUSHED for task %p\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_push_task: T#%d deque is full; returning "
"TASK_NOT_PUSHED for task %p\n", gtid, taskdata); }
545 gtid, taskdata))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_push_task: T#%d deque is full; returning "
"TASK_NOT_PUSHED for task %p\n", gtid, taskdata); }
;
546 return TASK_NOT_PUSHED1;
547 } else {
548 __kmp_acquire_bootstrap_lock(&thread_data->td.td_deque_lock);
549 locked = 1;
550 if (TCR_4(thread_data->td.td_deque_ntasks)(thread_data->td.td_deque_ntasks) >=
551 TASK_DEQUE_SIZE(thread_data->td)((thread_data->td).td_deque_size)) {
552 // expand deque to push the task which is not allowed to execute
553 __kmp_realloc_task_deque(thread, thread_data);
554 }
555 }
556 }
557 // Lock the deque for the task push operation
558 if (!locked) {
559 __kmp_acquire_bootstrap_lock(&thread_data->td.td_deque_lock);
560 // Need to recheck as we can get a proxy task from thread outside of OpenMP
561 if (TCR_4(thread_data->td.td_deque_ntasks)(thread_data->td.td_deque_ntasks) >=
562 TASK_DEQUE_SIZE(thread_data->td)((thread_data->td).td_deque_size)) {
563 if (__kmp_enable_task_throttling &&
564 __kmp_task_is_allowed(gtid, __kmp_task_stealing_constraint, taskdata,
565 thread->th.th_current_task)) {
566 __kmp_release_bootstrap_lock(&thread_data->td.td_deque_lock);
567 KA_TRACE(20, ("__kmp_push_task: T#%d deque is full on 2nd check; "if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_push_task: T#%d deque is full on 2nd check; "
"returning TASK_NOT_PUSHED for task %p\n", gtid, taskdata); }
568 "returning TASK_NOT_PUSHED for task %p\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_push_task: T#%d deque is full on 2nd check; "
"returning TASK_NOT_PUSHED for task %p\n", gtid, taskdata); }
569 gtid, taskdata))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_push_task: T#%d deque is full on 2nd check; "
"returning TASK_NOT_PUSHED for task %p\n", gtid, taskdata); }
;
570 return TASK_NOT_PUSHED1;
571 } else {
572 // expand deque to push the task which is not allowed to execute
573 __kmp_realloc_task_deque(thread, thread_data);
574 }
575 }
576 }
577 // Must have room since no thread can add tasks but calling thread
578 KMP_DEBUG_ASSERT(TCR_4(thread_data->td.td_deque_ntasks) <if (!((thread_data->td.td_deque_ntasks) < ((thread_data
->td).td_deque_size))) { __kmp_debug_assert("(thread_data->td.td_deque_ntasks) < ((thread_data->td).td_deque_size)"
, "openmp/runtime/src/kmp_tasking.cpp", 579); }
579 TASK_DEQUE_SIZE(thread_data->td))if (!((thread_data->td.td_deque_ntasks) < ((thread_data
->td).td_deque_size))) { __kmp_debug_assert("(thread_data->td.td_deque_ntasks) < ((thread_data->td).td_deque_size)"
, "openmp/runtime/src/kmp_tasking.cpp", 579); }
;
580
581 thread_data->td.td_deque[thread_data->td.td_deque_tail] =
582 taskdata; // Push taskdata
583 // Wrap index.
584 thread_data->td.td_deque_tail =
585 (thread_data->td.td_deque_tail + 1) & TASK_DEQUE_MASK(thread_data->td)((thread_data->td).td_deque_size - 1);
586 TCW_4(thread_data->td.td_deque_ntasks,(thread_data->td.td_deque_ntasks) = ((thread_data->td.td_deque_ntasks
) + 1)
587 TCR_4(thread_data->td.td_deque_ntasks) + 1)(thread_data->td.td_deque_ntasks) = ((thread_data->td.td_deque_ntasks
) + 1)
; // Adjust task count
588 KMP_FSYNC_RELEASING(thread->th.th_current_task)(!__kmp_itt_fsync_releasing_ptr__3_0) ? (void)0 : __kmp_itt_fsync_releasing_ptr__3_0
((void *)(thread->th.th_current_task))
; // releasing self
589 KMP_FSYNC_RELEASING(taskdata)(!__kmp_itt_fsync_releasing_ptr__3_0) ? (void)0 : __kmp_itt_fsync_releasing_ptr__3_0
((void *)(taskdata))
; // releasing child
590 KA_TRACE(20, ("__kmp_push_task: T#%d returning TASK_SUCCESSFULLY_PUSHED: "if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_push_task: T#%d returning TASK_SUCCESSFULLY_PUSHED: "
"task=%p ntasks=%d head=%u tail=%u\n", gtid, taskdata, thread_data
->td.td_deque_ntasks, thread_data->td.td_deque_head, thread_data
->td.td_deque_tail); }
591 "task=%p ntasks=%d head=%u tail=%u\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_push_task: T#%d returning TASK_SUCCESSFULLY_PUSHED: "
"task=%p ntasks=%d head=%u tail=%u\n", gtid, taskdata, thread_data
->td.td_deque_ntasks, thread_data->td.td_deque_head, thread_data
->td.td_deque_tail); }
592 gtid, taskdata, thread_data->td.td_deque_ntasks,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_push_task: T#%d returning TASK_SUCCESSFULLY_PUSHED: "
"task=%p ntasks=%d head=%u tail=%u\n", gtid, taskdata, thread_data
->td.td_deque_ntasks, thread_data->td.td_deque_head, thread_data
->td.td_deque_tail); }
593 thread_data->td.td_deque_head, thread_data->td.td_deque_tail))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_push_task: T#%d returning TASK_SUCCESSFULLY_PUSHED: "
"task=%p ntasks=%d head=%u tail=%u\n", gtid, taskdata, thread_data
->td.td_deque_ntasks, thread_data->td.td_deque_head, thread_data
->td.td_deque_tail); }
;
594
595 __kmp_release_bootstrap_lock(&thread_data->td.td_deque_lock);
596
597 return TASK_SUCCESSFULLY_PUSHED0;
598}
599
600// __kmp_pop_current_task_from_thread: set up current task from called thread
601// when team ends
602//
603// this_thr: thread structure to set current_task in.
604void __kmp_pop_current_task_from_thread(kmp_info_t *this_thr) {
605 KF_TRACE(10, ("__kmp_pop_current_task_from_thread(enter): T#%d "if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_pop_current_task_from_thread(enter): T#%d "
"this_thread=%p, curtask=%p, " "curtask_parent=%p\n", 0, this_thr
, this_thr->th.th_current_task, this_thr->th.th_current_task
->td_parent); }
606 "this_thread=%p, curtask=%p, "if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_pop_current_task_from_thread(enter): T#%d "
"this_thread=%p, curtask=%p, " "curtask_parent=%p\n", 0, this_thr
, this_thr->th.th_current_task, this_thr->th.th_current_task
->td_parent); }
607 "curtask_parent=%p\n",if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_pop_current_task_from_thread(enter): T#%d "
"this_thread=%p, curtask=%p, " "curtask_parent=%p\n", 0, this_thr
, this_thr->th.th_current_task, this_thr->th.th_current_task
->td_parent); }
608 0, this_thr, this_thr->th.th_current_task,if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_pop_current_task_from_thread(enter): T#%d "
"this_thread=%p, curtask=%p, " "curtask_parent=%p\n", 0, this_thr
, this_thr->th.th_current_task, this_thr->th.th_current_task
->td_parent); }
609 this_thr->th.th_current_task->td_parent))if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_pop_current_task_from_thread(enter): T#%d "
"this_thread=%p, curtask=%p, " "curtask_parent=%p\n", 0, this_thr
, this_thr->th.th_current_task, this_thr->th.th_current_task
->td_parent); }
;
610
611 this_thr->th.th_current_task = this_thr->th.th_current_task->td_parent;
612
613 KF_TRACE(10, ("__kmp_pop_current_task_from_thread(exit): T#%d "if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_pop_current_task_from_thread(exit): T#%d "
"this_thread=%p, curtask=%p, " "curtask_parent=%p\n", 0, this_thr
, this_thr->th.th_current_task, this_thr->th.th_current_task
->td_parent); }
614 "this_thread=%p, curtask=%p, "if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_pop_current_task_from_thread(exit): T#%d "
"this_thread=%p, curtask=%p, " "curtask_parent=%p\n", 0, this_thr
, this_thr->th.th_current_task, this_thr->th.th_current_task
->td_parent); }
615 "curtask_parent=%p\n",if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_pop_current_task_from_thread(exit): T#%d "
"this_thread=%p, curtask=%p, " "curtask_parent=%p\n", 0, this_thr
, this_thr->th.th_current_task, this_thr->th.th_current_task
->td_parent); }
616 0, this_thr, this_thr->th.th_current_task,if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_pop_current_task_from_thread(exit): T#%d "
"this_thread=%p, curtask=%p, " "curtask_parent=%p\n", 0, this_thr
, this_thr->th.th_current_task, this_thr->th.th_current_task
->td_parent); }
617 this_thr->th.th_current_task->td_parent))if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_pop_current_task_from_thread(exit): T#%d "
"this_thread=%p, curtask=%p, " "curtask_parent=%p\n", 0, this_thr
, this_thr->th.th_current_task, this_thr->th.th_current_task
->td_parent); }
;
618}
619
620// __kmp_push_current_task_to_thread: set up current task in called thread for a
621// new team
622//
623// this_thr: thread structure to set up
624// team: team for implicit task data
625// tid: thread within team to set up
626void __kmp_push_current_task_to_thread(kmp_info_t *this_thr, kmp_team_t *team,
627 int tid) {
628 // current task of the thread is a parent of the new just created implicit
629 // tasks of new team
630 KF_TRACE(10, ("__kmp_push_current_task_to_thread(enter): T#%d this_thread=%p "if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_push_current_task_to_thread(enter): T#%d this_thread=%p "
"curtask=%p " "parent_task=%p\n", tid, this_thr, this_thr->
th.th_current_task, team->t.t_implicit_task_taskdata[tid].
td_parent); }
631 "curtask=%p "if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_push_current_task_to_thread(enter): T#%d this_thread=%p "
"curtask=%p " "parent_task=%p\n", tid, this_thr, this_thr->
th.th_current_task, team->t.t_implicit_task_taskdata[tid].
td_parent); }
632 "parent_task=%p\n",if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_push_current_task_to_thread(enter): T#%d this_thread=%p "
"curtask=%p " "parent_task=%p\n", tid, this_thr, this_thr->
th.th_current_task, team->t.t_implicit_task_taskdata[tid].
td_parent); }
633 tid, this_thr, this_thr->th.th_current_task,if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_push_current_task_to_thread(enter): T#%d this_thread=%p "
"curtask=%p " "parent_task=%p\n", tid, this_thr, this_thr->
th.th_current_task, team->t.t_implicit_task_taskdata[tid].
td_parent); }
634 team->t.t_implicit_task_taskdata[tid].td_parent))if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_push_current_task_to_thread(enter): T#%d this_thread=%p "
"curtask=%p " "parent_task=%p\n", tid, this_thr, this_thr->
th.th_current_task, team->t.t_implicit_task_taskdata[tid].
td_parent); }
;
635
636 KMP_DEBUG_ASSERT(this_thr != NULL)if (!(this_thr != __null)) { __kmp_debug_assert("this_thr != __null"
, "openmp/runtime/src/kmp_tasking.cpp", 636); }
;
637
638 if (tid == 0) {
639 if (this_thr->th.th_current_task != &team->t.t_implicit_task_taskdata[0]) {
640 team->t.t_implicit_task_taskdata[0].td_parent =
641 this_thr->th.th_current_task;
642 this_thr->th.th_current_task = &team->t.t_implicit_task_taskdata[0];
643 }
644 } else {
645 team->t.t_implicit_task_taskdata[tid].td_parent =
646 team->t.t_implicit_task_taskdata[0].td_parent;
647 this_thr->th.th_current_task = &team->t.t_implicit_task_taskdata[tid];
648 }
649
650 KF_TRACE(10, ("__kmp_push_current_task_to_thread(exit): T#%d this_thread=%p "if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_push_current_task_to_thread(exit): T#%d this_thread=%p "
"curtask=%p " "parent_task=%p\n", tid, this_thr, this_thr->
th.th_current_task, team->t.t_implicit_task_taskdata[tid].
td_parent); }
651 "curtask=%p "if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_push_current_task_to_thread(exit): T#%d this_thread=%p "
"curtask=%p " "parent_task=%p\n", tid, this_thr, this_thr->
th.th_current_task, team->t.t_implicit_task_taskdata[tid].
td_parent); }
652 "parent_task=%p\n",if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_push_current_task_to_thread(exit): T#%d this_thread=%p "
"curtask=%p " "parent_task=%p\n", tid, this_thr, this_thr->
th.th_current_task, team->t.t_implicit_task_taskdata[tid].
td_parent); }
653 tid, this_thr, this_thr->th.th_current_task,if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_push_current_task_to_thread(exit): T#%d this_thread=%p "
"curtask=%p " "parent_task=%p\n", tid, this_thr, this_thr->
th.th_current_task, team->t.t_implicit_task_taskdata[tid].
td_parent); }
654 team->t.t_implicit_task_taskdata[tid].td_parent))if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_push_current_task_to_thread(exit): T#%d this_thread=%p "
"curtask=%p " "parent_task=%p\n", tid, this_thr, this_thr->
th.th_current_task, team->t.t_implicit_task_taskdata[tid].
td_parent); }
;
655}
656
657// __kmp_task_start: bookkeeping for a task starting execution
658//
659// GTID: global thread id of calling thread
660// task: task starting execution
661// current_task: task suspending
662static void __kmp_task_start(kmp_int32 gtid, kmp_task_t *task,
663 kmp_taskdata_t *current_task) {
664 kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(task)(((kmp_taskdata_t *)task) - 1);
665 kmp_info_t *thread = __kmp_threads[gtid];
666
667 KA_TRACE(10,if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_task_start(enter): T#%d starting task %p: current_task=%p\n"
, gtid, taskdata, current_task); }
668 ("__kmp_task_start(enter): T#%d starting task %p: current_task=%p\n",if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_task_start(enter): T#%d starting task %p: current_task=%p\n"
, gtid, taskdata, current_task); }
669 gtid, taskdata, current_task))if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_task_start(enter): T#%d starting task %p: current_task=%p\n"
, gtid, taskdata, current_task); }
;
670
671 KMP_DEBUG_ASSERT(taskdata->td_flags.tasktype == TASK_EXPLICIT)if (!(taskdata->td_flags.tasktype == 1)) { __kmp_debug_assert
("taskdata->td_flags.tasktype == 1", "openmp/runtime/src/kmp_tasking.cpp"
, 671); }
;
672
673 // mark currently executing task as suspended
674 // TODO: GEH - make sure root team implicit task is initialized properly.
675 // KMP_DEBUG_ASSERT( current_task -> td_flags.executing == 1 );
676 current_task->td_flags.executing = 0;
677
678// Add task to stack if tied
679#ifdef BUILD_TIED_TASK_STACK
680 if (taskdata->td_flags.tiedness == TASK_TIED1) {
681 __kmp_push_task_stack(gtid, thread, taskdata);
682 }
683#endif /* BUILD_TIED_TASK_STACK */
684
685 // mark starting task as executing and as current task
686 thread->th.th_current_task = taskdata;
687
688 KMP_DEBUG_ASSERT(taskdata->td_flags.started == 0 ||if (!(taskdata->td_flags.started == 0 || taskdata->td_flags
.tiedness == 0)) { __kmp_debug_assert("taskdata->td_flags.started == 0 || taskdata->td_flags.tiedness == 0"
, "openmp/runtime/src/kmp_tasking.cpp", 689); }
689 taskdata->td_flags.tiedness == TASK_UNTIED)if (!(taskdata->td_flags.started == 0 || taskdata->td_flags
.tiedness == 0)) { __kmp_debug_assert("taskdata->td_flags.started == 0 || taskdata->td_flags.tiedness == 0"
, "openmp/runtime/src/kmp_tasking.cpp", 689); }
;
690 KMP_DEBUG_ASSERT(taskdata->td_flags.executing == 0 ||if (!(taskdata->td_flags.executing == 0 || taskdata->td_flags
.tiedness == 0)) { __kmp_debug_assert("taskdata->td_flags.executing == 0 || taskdata->td_flags.tiedness == 0"
, "openmp/runtime/src/kmp_tasking.cpp", 691); }
691 taskdata->td_flags.tiedness == TASK_UNTIED)if (!(taskdata->td_flags.executing == 0 || taskdata->td_flags
.tiedness == 0)) { __kmp_debug_assert("taskdata->td_flags.executing == 0 || taskdata->td_flags.tiedness == 0"
, "openmp/runtime/src/kmp_tasking.cpp", 691); }
;
692 taskdata->td_flags.started = 1;
693 taskdata->td_flags.executing = 1;
694 KMP_DEBUG_ASSERT(taskdata->td_flags.complete == 0)if (!(taskdata->td_flags.complete == 0)) { __kmp_debug_assert
("taskdata->td_flags.complete == 0", "openmp/runtime/src/kmp_tasking.cpp"
, 694); }
;
695 KMP_DEBUG_ASSERT(taskdata->td_flags.freed == 0)if (!(taskdata->td_flags.freed == 0)) { __kmp_debug_assert
("taskdata->td_flags.freed == 0", "openmp/runtime/src/kmp_tasking.cpp"
, 695); }
;
696
697 // GEH TODO: shouldn't we pass some sort of location identifier here?
698 // APT: yes, we will pass location here.
699 // need to store current thread state (in a thread or taskdata structure)
700 // before setting work_state, otherwise wrong state is set after end of task
701
702 KA_TRACE(10, ("__kmp_task_start(exit): T#%d task=%p\n", gtid, taskdata))if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_task_start(exit): T#%d task=%p\n"
, gtid, taskdata); }
;
703
704 return;
705}
706
707#if OMPT_SUPPORT1
708//------------------------------------------------------------------------------
709// __ompt_task_init:
710// Initialize OMPT fields maintained by a task. This will only be called after
711// ompt_start_tool, so we already know whether ompt is enabled or not.
712
713static inline void __ompt_task_init(kmp_taskdata_t *task, int tid) {
714 // The calls to __ompt_task_init already have the ompt_enabled condition.
715 task->ompt_task_info.task_data.value = 0;
716 task->ompt_task_info.frame.exit_frame = ompt_data_none{0};
717 task->ompt_task_info.frame.enter_frame = ompt_data_none{0};
718 task->ompt_task_info.frame.exit_frame_flags =
719 ompt_frame_runtime | ompt_frame_framepointer;
720 task->ompt_task_info.frame.enter_frame_flags =
721 ompt_frame_runtime | ompt_frame_framepointer;
722 task->ompt_task_info.dispatch_chunk.start = 0;
723 task->ompt_task_info.dispatch_chunk.iterations = 0;
724}
725
726// __ompt_task_start:
727// Build and trigger task-begin event
728static inline void __ompt_task_start(kmp_task_t *task,
729 kmp_taskdata_t *current_task,
730 kmp_int32 gtid) {
731 kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(task)(((kmp_taskdata_t *)task) - 1);
732 ompt_task_status_t status = ompt_task_switch;
733 if (__kmp_threads[gtid]->th.ompt_thread_info.ompt_task_yielded) {
734 status = ompt_task_yield;
735 __kmp_threads[gtid]->th.ompt_thread_info.ompt_task_yielded = 0;
736 }
737 /* let OMPT know that we're about to run this task */
738 if (ompt_enabled.ompt_callback_task_schedule) {
739 ompt_callbacks.ompt_callback(ompt_callback_task_schedule)ompt_callback_task_schedule_callback(
740 &(current_task->ompt_task_info.task_data), status,
741 &(taskdata->ompt_task_info.task_data));
742 }
743 taskdata->ompt_task_info.scheduling_parent = current_task;
744}
745
746// __ompt_task_finish:
747// Build and trigger final task-schedule event
748static inline void __ompt_task_finish(kmp_task_t *task,
749 kmp_taskdata_t *resumed_task,
750 ompt_task_status_t status) {
751 if (ompt_enabled.ompt_callback_task_schedule) {
752 kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(task)(((kmp_taskdata_t *)task) - 1);
753 if (__kmp_omp_cancellation && taskdata->td_taskgroup &&
754 taskdata->td_taskgroup->cancel_request == cancel_taskgroup) {
755 status = ompt_task_cancel;
756 }
757
758 /* let OMPT know that we're returning to the callee task */
759 ompt_callbacks.ompt_callback(ompt_callback_task_schedule)ompt_callback_task_schedule_callback(
760 &(taskdata->ompt_task_info.task_data), status,
761 (resumed_task ? &(resumed_task->ompt_task_info.task_data) : NULL__null));
762 }
763}
764#endif
765
766template <bool ompt>
767static void __kmpc_omp_task_begin_if0_template(ident_t *loc_ref, kmp_int32 gtid,
768 kmp_task_t *task,
769 void *frame_address,
770 void *return_address) {
771 kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(task)(((kmp_taskdata_t *)task) - 1);
772 kmp_taskdata_t *current_task = __kmp_threads[gtid]->th.th_current_task;
773
774 KA_TRACE(10, ("__kmpc_omp_task_begin_if0(enter): T#%d loc=%p task=%p "if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmpc_omp_task_begin_if0(enter): T#%d loc=%p task=%p "
"current_task=%p\n", gtid, loc_ref, taskdata, current_task);
}
775 "current_task=%p\n",if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmpc_omp_task_begin_if0(enter): T#%d loc=%p task=%p "
"current_task=%p\n", gtid, loc_ref, taskdata, current_task);
}
776 gtid, loc_ref, taskdata, current_task))if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmpc_omp_task_begin_if0(enter): T#%d loc=%p task=%p "
"current_task=%p\n", gtid, loc_ref, taskdata, current_task);
}
;
777
778 if (UNLIKELY(taskdata->td_flags.tiedness == TASK_UNTIED)__builtin_expect(!!(taskdata->td_flags.tiedness == 0), 0)) {
779 // untied task needs to increment counter so that the task structure is not
780 // freed prematurely
781 kmp_int32 counter = 1 + KMP_ATOMIC_INC(&taskdata->td_untied_count)(&taskdata->td_untied_count)->fetch_add(1, std::memory_order_acq_rel
)
;
782 KMP_DEBUG_USE_VAR(counter);
783 KA_TRACE(20, ("__kmpc_omp_task_begin_if0: T#%d untied_count (%d) "if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmpc_omp_task_begin_if0: T#%d untied_count (%d) "
"incremented for task %p\n", gtid, counter, taskdata); }
784 "incremented for task %p\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmpc_omp_task_begin_if0: T#%d untied_count (%d) "
"incremented for task %p\n", gtid, counter, taskdata); }
785 gtid, counter, taskdata))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmpc_omp_task_begin_if0: T#%d untied_count (%d) "
"incremented for task %p\n", gtid, counter, taskdata); }
;
786 }
787
788 taskdata->td_flags.task_serial =
789 1; // Execute this task immediately, not deferred.
790 __kmp_task_start(gtid, task, current_task);
791
792#if OMPT_SUPPORT1
793 if (ompt) {
794 if (current_task->ompt_task_info.frame.enter_frame.ptr == NULL__null) {
795 current_task->ompt_task_info.frame.enter_frame.ptr =
796 taskdata->ompt_task_info.frame.exit_frame.ptr = frame_address;
797 current_task->ompt_task_info.frame.enter_frame_flags =
798 taskdata->ompt_task_info.frame.exit_frame_flags =
799 ompt_frame_application | ompt_frame_framepointer;
800 }
801 if (ompt_enabled.ompt_callback_task_create) {
802 ompt_task_info_t *parent_info = &(current_task->ompt_task_info);
803 ompt_callbacks.ompt_callback(ompt_callback_task_create)ompt_callback_task_create_callback(
804 &(parent_info->task_data), &(parent_info->frame),
805 &(taskdata->ompt_task_info.task_data),
806 ompt_task_explicit | TASK_TYPE_DETAILS_FORMAT(taskdata)((taskdata->td_flags.task_serial || taskdata->td_flags.
tasking_ser) ? ompt_task_undeferred : 0x0) | ((!(taskdata->
td_flags.tiedness)) ? ompt_task_untied : 0x0) | (taskdata->
td_flags.final ? ompt_task_final : 0x0) | (taskdata->td_flags
.merged_if0 ? ompt_task_mergeable : 0x0)
, 0,
807 return_address);
808 }
809 __ompt_task_start(task, current_task, gtid);
810 }
811#endif // OMPT_SUPPORT
812
813 KA_TRACE(10, ("__kmpc_omp_task_begin_if0(exit): T#%d loc=%p task=%p,\n", gtid,if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmpc_omp_task_begin_if0(exit): T#%d loc=%p task=%p,\n"
, gtid, loc_ref, taskdata); }
814 loc_ref, taskdata))if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmpc_omp_task_begin_if0(exit): T#%d loc=%p task=%p,\n"
, gtid, loc_ref, taskdata); }
;
815}
816
817#if OMPT_SUPPORT1
818OMPT_NOINLINE__attribute__((noinline))
819static void __kmpc_omp_task_begin_if0_ompt(ident_t *loc_ref, kmp_int32 gtid,
820 kmp_task_t *task,
821 void *frame_address,
822 void *return_address) {
823 __kmpc_omp_task_begin_if0_template<true>(loc_ref, gtid, task, frame_address,
824 return_address);
825}
826#endif // OMPT_SUPPORT
827
828// __kmpc_omp_task_begin_if0: report that a given serialized task has started
829// execution
830//
831// loc_ref: source location information; points to beginning of task block.
832// gtid: global thread number.
833// task: task thunk for the started task.
834void __kmpc_omp_task_begin_if0(ident_t *loc_ref, kmp_int32 gtid,
835 kmp_task_t *task) {
836#if OMPT_SUPPORT1
837 if (UNLIKELY(ompt_enabled.enabled)__builtin_expect(!!(ompt_enabled.enabled), 0)) {
838 OMPT_STORE_RETURN_ADDRESS(gtid)OmptReturnAddressGuard ReturnAddressGuard{gtid, __builtin_return_address
(0)};
;
839 __kmpc_omp_task_begin_if0_ompt(loc_ref, gtid, task,
840 OMPT_GET_FRAME_ADDRESS(1)__builtin_frame_address(1),
841 OMPT_LOAD_RETURN_ADDRESS(gtid)__ompt_load_return_address(gtid));
842 return;
843 }
844#endif
845 __kmpc_omp_task_begin_if0_template<false>(loc_ref, gtid, task, NULL__null, NULL__null);
846}
847
848#ifdef TASK_UNUSED
849// __kmpc_omp_task_begin: report that a given task has started execution
850// NEVER GENERATED BY COMPILER, DEPRECATED!!!
851void __kmpc_omp_task_begin(ident_t *loc_ref, kmp_int32 gtid, kmp_task_t *task) {
852 kmp_taskdata_t *current_task = __kmp_threads[gtid]->th.th_current_task;
853
854 KA_TRACE(if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmpc_omp_task_begin(enter): T#%d loc=%p task=%p current_task=%p\n"
, gtid, loc_ref, (((kmp_taskdata_t *)task) - 1), current_task
); }
855 10,if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmpc_omp_task_begin(enter): T#%d loc=%p task=%p current_task=%p\n"
, gtid, loc_ref, (((kmp_taskdata_t *)task) - 1), current_task
); }
856 ("__kmpc_omp_task_begin(enter): T#%d loc=%p task=%p current_task=%p\n",if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmpc_omp_task_begin(enter): T#%d loc=%p task=%p current_task=%p\n"
, gtid, loc_ref, (((kmp_taskdata_t *)task) - 1), current_task
); }
857 gtid, loc_ref, KMP_TASK_TO_TASKDATA(task), current_task))if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmpc_omp_task_begin(enter): T#%d loc=%p task=%p current_task=%p\n"
, gtid, loc_ref, (((kmp_taskdata_t *)task) - 1), current_task
); }
;
858
859 __kmp_task_start(gtid, task, current_task);
860
861 KA_TRACE(10, ("__kmpc_omp_task_begin(exit): T#%d loc=%p task=%p,\n", gtid,if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmpc_omp_task_begin(exit): T#%d loc=%p task=%p,\n"
, gtid, loc_ref, (((kmp_taskdata_t *)task) - 1)); }
862 loc_ref, KMP_TASK_TO_TASKDATA(task)))if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmpc_omp_task_begin(exit): T#%d loc=%p task=%p,\n"
, gtid, loc_ref, (((kmp_taskdata_t *)task) - 1)); }
;
863 return;
864}
865#endif // TASK_UNUSED
866
867// __kmp_free_task: free the current task space and the space for shareds
868//
869// gtid: Global thread ID of calling thread
870// taskdata: task to free
871// thread: thread data structure of caller
872static void __kmp_free_task(kmp_int32 gtid, kmp_taskdata_t *taskdata,
873 kmp_info_t *thread) {
874 KA_TRACE(30, ("__kmp_free_task: T#%d freeing data from task %p\n", gtid,if (kmp_a_debug >= 30) { __kmp_debug_printf ("__kmp_free_task: T#%d freeing data from task %p\n"
, gtid, taskdata); }
875 taskdata))if (kmp_a_debug >= 30) { __kmp_debug_printf ("__kmp_free_task: T#%d freeing data from task %p\n"
, gtid, taskdata); }
;
876
877 // Check to make sure all flags and counters have the correct values
878 KMP_DEBUG_ASSERT(taskdata->td_flags.tasktype == TASK_EXPLICIT)if (!(taskdata->td_flags.tasktype == 1)) { __kmp_debug_assert
("taskdata->td_flags.tasktype == 1", "openmp/runtime/src/kmp_tasking.cpp"
, 878); }
;
879 KMP_DEBUG_ASSERT(taskdata->td_flags.executing == 0)if (!(taskdata->td_flags.executing == 0)) { __kmp_debug_assert
("taskdata->td_flags.executing == 0", "openmp/runtime/src/kmp_tasking.cpp"
, 879); }
;
880 KMP_DEBUG_ASSERT(taskdata->td_flags.complete == 1)if (!(taskdata->td_flags.complete == 1)) { __kmp_debug_assert
("taskdata->td_flags.complete == 1", "openmp/runtime/src/kmp_tasking.cpp"
, 880); }
;
881 KMP_DEBUG_ASSERT(taskdata->td_flags.freed == 0)if (!(taskdata->td_flags.freed == 0)) { __kmp_debug_assert
("taskdata->td_flags.freed == 0", "openmp/runtime/src/kmp_tasking.cpp"
, 881); }
;
882 KMP_DEBUG_ASSERT(taskdata->td_allocated_child_tasks == 0 ||if (!(taskdata->td_allocated_child_tasks == 0 || taskdata->
td_flags.task_serial == 1)) { __kmp_debug_assert("taskdata->td_allocated_child_tasks == 0 || taskdata->td_flags.task_serial == 1"
, "openmp/runtime/src/kmp_tasking.cpp", 883); }
883 taskdata->td_flags.task_serial == 1)if (!(taskdata->td_allocated_child_tasks == 0 || taskdata->
td_flags.task_serial == 1)) { __kmp_debug_assert("taskdata->td_allocated_child_tasks == 0 || taskdata->td_flags.task_serial == 1"
, "openmp/runtime/src/kmp_tasking.cpp", 883); }
;
884 KMP_DEBUG_ASSERT(taskdata->td_incomplete_child_tasks == 0)if (!(taskdata->td_incomplete_child_tasks == 0)) { __kmp_debug_assert
("taskdata->td_incomplete_child_tasks == 0", "openmp/runtime/src/kmp_tasking.cpp"
, 884); }
;
885 kmp_task_t *task = KMP_TASKDATA_TO_TASK(taskdata)(kmp_task_t *)(taskdata + 1);
886 // Clear data to not be re-used later by mistake.
887 task->data1.destructors = NULL__null;
888 task->data2.priority = 0;
889
890 taskdata->td_flags.freed = 1;
891// deallocate the taskdata and shared variable blocks associated with this task
892#if USE_FAST_MEMORY3
893 __kmp_fast_free(thread, taskdata)___kmp_fast_free((thread), (taskdata), "openmp/runtime/src/kmp_tasking.cpp"
, 893)
;
894#else /* ! USE_FAST_MEMORY */
895 __kmp_thread_free(thread, taskdata)___kmp_thread_free((thread), (taskdata), "openmp/runtime/src/kmp_tasking.cpp"
, 895)
;
896#endif
897 KA_TRACE(20, ("__kmp_free_task: T#%d freed task %p\n", gtid, taskdata))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_free_task: T#%d freed task %p\n"
, gtid, taskdata); }
;
898}
899
900// __kmp_free_task_and_ancestors: free the current task and ancestors without
901// children
902//
903// gtid: Global thread ID of calling thread
904// taskdata: task to free
905// thread: thread data structure of caller
906static void __kmp_free_task_and_ancestors(kmp_int32 gtid,
907 kmp_taskdata_t *taskdata,
908 kmp_info_t *thread) {
909 // Proxy tasks must always be allowed to free their parents
910 // because they can be run in background even in serial mode.
911 kmp_int32 team_serial =
912 (taskdata->td_flags.team_serial || taskdata->td_flags.tasking_ser) &&
913 !taskdata->td_flags.proxy;
914 KMP_DEBUG_ASSERT(taskdata->td_flags.tasktype == TASK_EXPLICIT)if (!(taskdata->td_flags.tasktype == 1)) { __kmp_debug_assert
("taskdata->td_flags.tasktype == 1", "openmp/runtime/src/kmp_tasking.cpp"
, 914); }
;
915
916 kmp_int32 children = KMP_ATOMIC_DEC(&taskdata->td_allocated_child_tasks)(&taskdata->td_allocated_child_tasks)->fetch_sub(1,
std::memory_order_acq_rel)
- 1;
917 KMP_DEBUG_ASSERT(children >= 0)if (!(children >= 0)) { __kmp_debug_assert("children >= 0"
, "openmp/runtime/src/kmp_tasking.cpp", 917); }
;
918
919 // Now, go up the ancestor tree to see if any ancestors can now be freed.
920 while (children == 0) {
921 kmp_taskdata_t *parent_taskdata = taskdata->td_parent;
922
923 KA_TRACE(20, ("__kmp_free_task_and_ancestors(enter): T#%d task %p complete "if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_free_task_and_ancestors(enter): T#%d task %p complete "
"and freeing itself\n", gtid, taskdata); }
924 "and freeing itself\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_free_task_and_ancestors(enter): T#%d task %p complete "
"and freeing itself\n", gtid, taskdata); }
925 gtid, taskdata))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_free_task_and_ancestors(enter): T#%d task %p complete "
"and freeing itself\n", gtid, taskdata); }
;
926
927 // --- Deallocate my ancestor task ---
928 __kmp_free_task(gtid, taskdata, thread);
929
930 taskdata = parent_taskdata;
931
932 if (team_serial)
933 return;
934 // Stop checking ancestors at implicit task instead of walking up ancestor
935 // tree to avoid premature deallocation of ancestors.
936 if (taskdata->td_flags.tasktype == TASK_IMPLICIT0) {
937 if (taskdata->td_dephash) { // do we need to cleanup dephash?
938 int children = KMP_ATOMIC_LD_ACQ(&taskdata->td_incomplete_child_tasks)(&taskdata->td_incomplete_child_tasks)->load(std::memory_order_acquire
)
;
939 kmp_tasking_flags_t flags_old = taskdata->td_flags;
940 if (children == 0 && flags_old.complete == 1) {
941 kmp_tasking_flags_t flags_new = flags_old;
942 flags_new.complete = 0;
943 if (KMP_COMPARE_AND_STORE_ACQ32(__sync_bool_compare_and_swap((volatile kmp_uint32 *)(reinterpret_cast
<kmp_int32 *>(&taskdata->td_flags)), (kmp_uint32
)(*reinterpret_cast<kmp_int32 *>(&flags_old)), (kmp_uint32
)(*reinterpret_cast<kmp_int32 *>(&flags_new)))
944 RCAST(kmp_int32 *, &taskdata->td_flags),__sync_bool_compare_and_swap((volatile kmp_uint32 *)(reinterpret_cast
<kmp_int32 *>(&taskdata->td_flags)), (kmp_uint32
)(*reinterpret_cast<kmp_int32 *>(&flags_old)), (kmp_uint32
)(*reinterpret_cast<kmp_int32 *>(&flags_new)))
945 *RCAST(kmp_int32 *, &flags_old),__sync_bool_compare_and_swap((volatile kmp_uint32 *)(reinterpret_cast
<kmp_int32 *>(&taskdata->td_flags)), (kmp_uint32
)(*reinterpret_cast<kmp_int32 *>(&flags_old)), (kmp_uint32
)(*reinterpret_cast<kmp_int32 *>(&flags_new)))
946 *RCAST(kmp_int32 *, &flags_new))__sync_bool_compare_and_swap((volatile kmp_uint32 *)(reinterpret_cast
<kmp_int32 *>(&taskdata->td_flags)), (kmp_uint32
)(*reinterpret_cast<kmp_int32 *>(&flags_old)), (kmp_uint32
)(*reinterpret_cast<kmp_int32 *>(&flags_new)))
) {
947 KA_TRACE(100, ("__kmp_free_task_and_ancestors: T#%d cleans "if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_free_task_and_ancestors: T#%d cleans "
"dephash of implicit task %p\n", gtid, taskdata); }
948 "dephash of implicit task %p\n",if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_free_task_and_ancestors: T#%d cleans "
"dephash of implicit task %p\n", gtid, taskdata); }
949 gtid, taskdata))if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_free_task_and_ancestors: T#%d cleans "
"dephash of implicit task %p\n", gtid, taskdata); }
;
950 // cleanup dephash of finished implicit task
951 __kmp_dephash_free_entries(thread, taskdata->td_dephash);
952 }
953 }
954 }
955 return;
956 }
957 // Predecrement simulated by "- 1" calculation
958 children = KMP_ATOMIC_DEC(&taskdata->td_allocated_child_tasks)(&taskdata->td_allocated_child_tasks)->fetch_sub(1,
std::memory_order_acq_rel)
- 1;
959 KMP_DEBUG_ASSERT(children >= 0)if (!(children >= 0)) { __kmp_debug_assert("children >= 0"
, "openmp/runtime/src/kmp_tasking.cpp", 959); }
;
960 }
961
962 KA_TRACE(if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_free_task_and_ancestors(exit): T#%d task %p has %d children; "
"not freeing it yet\n", gtid, taskdata, children); }
963 20, ("__kmp_free_task_and_ancestors(exit): T#%d task %p has %d children; "if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_free_task_and_ancestors(exit): T#%d task %p has %d children; "
"not freeing it yet\n", gtid, taskdata, children); }
964 "not freeing it yet\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_free_task_and_ancestors(exit): T#%d task %p has %d children; "
"not freeing it yet\n", gtid, taskdata, children); }
965 gtid, taskdata, children))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_free_task_and_ancestors(exit): T#%d task %p has %d children; "
"not freeing it yet\n", gtid, taskdata, children); }
;
966}
967
968// Only need to keep track of child task counts if any of the following:
969// 1. team parallel and tasking not serialized;
970// 2. it is a proxy or detachable or hidden helper task
971// 3. the children counter of its parent task is greater than 0.
972// The reason for the 3rd one is for serialized team that found detached task,
973// hidden helper task, T. In this case, the execution of T is still deferred,
974// and it is also possible that a regular task depends on T. In this case, if we
975// don't track the children, task synchronization will be broken.
976static bool __kmp_track_children_task(kmp_taskdata_t *taskdata) {
977 kmp_tasking_flags_t flags = taskdata->td_flags;
978 bool ret = !(flags.team_serial || flags.tasking_ser);
979 ret = ret || flags.proxy == TASK_PROXY1 ||
980 flags.detachable == TASK_DETACHABLE1 || flags.hidden_helper;
981 ret = ret ||
982 KMP_ATOMIC_LD_ACQ(&taskdata->td_parent->td_incomplete_child_tasks)(&taskdata->td_parent->td_incomplete_child_tasks)->
load(std::memory_order_acquire)
> 0;
983 return ret;
984}
985
986// __kmp_task_finish: bookkeeping to do when a task finishes execution
987//
988// gtid: global thread ID for calling thread
989// task: task to be finished
990// resumed_task: task to be resumed. (may be NULL if task is serialized)
991//
992// template<ompt>: effectively ompt_enabled.enabled!=0
993// the version with ompt=false is inlined, allowing to optimize away all ompt
994// code in this case
995template <bool ompt>
996static void __kmp_task_finish(kmp_int32 gtid, kmp_task_t *task,
997 kmp_taskdata_t *resumed_task) {
998 kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(task)(((kmp_taskdata_t *)task) - 1);
999 kmp_info_t *thread = __kmp_threads[gtid];
1000 kmp_task_team_t *task_team =
1001 thread->th.th_task_team; // might be NULL for serial teams...
1002#if KMP_DEBUG1
1003 kmp_int32 children = 0;
1004#endif
1005 KA_TRACE(10, ("__kmp_task_finish(enter): T#%d finishing task %p and resuming "if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_task_finish(enter): T#%d finishing task %p and resuming "
"task %p\n", gtid, taskdata, resumed_task); }
1006 "task %p\n",if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_task_finish(enter): T#%d finishing task %p and resuming "
"task %p\n", gtid, taskdata, resumed_task); }
1007 gtid, taskdata, resumed_task))if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_task_finish(enter): T#%d finishing task %p and resuming "
"task %p\n", gtid, taskdata, resumed_task); }
;
1008
1009 KMP_DEBUG_ASSERT(taskdata->td_flags.tasktype == TASK_EXPLICIT)if (!(taskdata->td_flags.tasktype == 1)) { __kmp_debug_assert
("taskdata->td_flags.tasktype == 1", "openmp/runtime/src/kmp_tasking.cpp"
, 1009); }
;
1010
1011// Pop task from stack if tied
1012#ifdef BUILD_TIED_TASK_STACK
1013 if (taskdata->td_flags.tiedness == TASK_TIED1) {
1014 __kmp_pop_task_stack(gtid, thread, taskdata);
1015 }
1016#endif /* BUILD_TIED_TASK_STACK */
1017
1018 if (UNLIKELY(taskdata->td_flags.tiedness == TASK_UNTIED)__builtin_expect(!!(taskdata->td_flags.tiedness == 0), 0)) {
1019 // untied task needs to check the counter so that the task structure is not
1020 // freed prematurely
1021 kmp_int32 counter = KMP_ATOMIC_DEC(&taskdata->td_untied_count)(&taskdata->td_untied_count)->fetch_sub(1, std::memory_order_acq_rel
)
- 1;
1022 KA_TRACE(if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_task_finish: T#%d untied_count (%d) decremented for task %p\n"
, gtid, counter, taskdata); }
1023 20,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_task_finish: T#%d untied_count (%d) decremented for task %p\n"
, gtid, counter, taskdata); }
1024 ("__kmp_task_finish: T#%d untied_count (%d) decremented for task %p\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_task_finish: T#%d untied_count (%d) decremented for task %p\n"
, gtid, counter, taskdata); }
1025 gtid, counter, taskdata))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_task_finish: T#%d untied_count (%d) decremented for task %p\n"
, gtid, counter, taskdata); }
;
1026 if (counter > 0) {
1027 // untied task is not done, to be continued possibly by other thread, do
1028 // not free it now
1029 if (resumed_task == NULL__null) {
1030 KMP_DEBUG_ASSERT(taskdata->td_flags.task_serial)if (!(taskdata->td_flags.task_serial)) { __kmp_debug_assert
("taskdata->td_flags.task_serial", "openmp/runtime/src/kmp_tasking.cpp"
, 1030); }
;
1031 resumed_task = taskdata->td_parent; // In a serialized task, the resumed
1032 // task is the parent
1033 }
1034 thread->th.th_current_task = resumed_task; // restore current_task
1035 resumed_task->td_flags.executing = 1; // resume previous task
1036 KA_TRACE(10, ("__kmp_task_finish(exit): T#%d partially done task %p, "if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_task_finish(exit): T#%d partially done task %p, "
"resuming task %p\n", gtid, taskdata, resumed_task); }
1037 "resuming task %p\n",if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_task_finish(exit): T#%d partially done task %p, "
"resuming task %p\n", gtid, taskdata, resumed_task); }
1038 gtid, taskdata, resumed_task))if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_task_finish(exit): T#%d partially done task %p, "
"resuming task %p\n", gtid, taskdata, resumed_task); }
;
1039 return;
1040 }
1041 }
1042
1043 // bookkeeping for resuming task:
1044 // GEH - note tasking_ser => task_serial
1045 KMP_DEBUG_ASSERT(if (!((taskdata->td_flags.tasking_ser || taskdata->td_flags
.task_serial) == taskdata->td_flags.task_serial)) { __kmp_debug_assert
("(taskdata->td_flags.tasking_ser || taskdata->td_flags.task_serial) == taskdata->td_flags.task_serial"
, "openmp/runtime/src/kmp_tasking.cpp", 1047); }
1046 (taskdata->td_flags.tasking_ser || taskdata->td_flags.task_serial) ==if (!((taskdata->td_flags.tasking_ser || taskdata->td_flags
.task_serial) == taskdata->td_flags.task_serial)) { __kmp_debug_assert
("(taskdata->td_flags.tasking_ser || taskdata->td_flags.task_serial) == taskdata->td_flags.task_serial"
, "openmp/runtime/src/kmp_tasking.cpp", 1047); }
1047 taskdata->td_flags.task_serial)if (!((taskdata->td_flags.tasking_ser || taskdata->td_flags
.task_serial) == taskdata->td_flags.task_serial)) { __kmp_debug_assert
("(taskdata->td_flags.tasking_ser || taskdata->td_flags.task_serial) == taskdata->td_flags.task_serial"
, "openmp/runtime/src/kmp_tasking.cpp", 1047); }
;
1048 if (taskdata->td_flags.task_serial) {
1049 if (resumed_task == NULL__null) {
1050 resumed_task = taskdata->td_parent; // In a serialized task, the resumed
1051 // task is the parent
1052 }
1053 } else {
1054 KMP_DEBUG_ASSERT(resumed_task !=if (!(resumed_task != __null)) { __kmp_debug_assert("resumed_task != __null"
, "openmp/runtime/src/kmp_tasking.cpp", 1055); }
1055 NULL)if (!(resumed_task != __null)) { __kmp_debug_assert("resumed_task != __null"
, "openmp/runtime/src/kmp_tasking.cpp", 1055); }
; // verify that resumed task is passed as argument
1056 }
1057
1058 /* If the tasks' destructor thunk flag has been set, we need to invoke the
1059 destructor thunk that has been generated by the compiler. The code is
1060 placed here, since at this point other tasks might have been released
1061 hence overlapping the destructor invocations with some other work in the
1062 released tasks. The OpenMP spec is not specific on when the destructors
1063 are invoked, so we should be free to choose. */
1064 if (UNLIKELY(taskdata->td_flags.destructors_thunk)__builtin_expect(!!(taskdata->td_flags.destructors_thunk),
0)
) {
1065 kmp_routine_entry_t destr_thunk = task->data1.destructors;
1066 KMP_ASSERT(destr_thunk)if (!(destr_thunk)) { __kmp_debug_assert("destr_thunk", "openmp/runtime/src/kmp_tasking.cpp"
, 1066); }
;
1067 destr_thunk(gtid, task);
1068 }
1069
1070 KMP_DEBUG_ASSERT(taskdata->td_flags.complete == 0)if (!(taskdata->td_flags.complete == 0)) { __kmp_debug_assert
("taskdata->td_flags.complete == 0", "openmp/runtime/src/kmp_tasking.cpp"
, 1070); }
;
1071 KMP_DEBUG_ASSERT(taskdata->td_flags.started == 1)if (!(taskdata->td_flags.started == 1)) { __kmp_debug_assert
("taskdata->td_flags.started == 1", "openmp/runtime/src/kmp_tasking.cpp"
, 1071); }
;
1072 KMP_DEBUG_ASSERT(taskdata->td_flags.freed == 0)if (!(taskdata->td_flags.freed == 0)) { __kmp_debug_assert
("taskdata->td_flags.freed == 0", "openmp/runtime/src/kmp_tasking.cpp"
, 1072); }
;
1073
1074 bool completed = true;
1075 if (UNLIKELY(taskdata->td_flags.detachable == TASK_DETACHABLE)__builtin_expect(!!(taskdata->td_flags.detachable == 1), 0
)
) {
1076 if (taskdata->td_allow_completion_event.type ==
1077 KMP_EVENT_ALLOW_COMPLETION) {
1078 // event hasn't been fulfilled yet. Try to detach task.
1079 __kmp_acquire_tas_lock(&taskdata->td_allow_completion_event.lock, gtid);
1080 if (taskdata->td_allow_completion_event.type ==
1081 KMP_EVENT_ALLOW_COMPLETION) {
1082 // task finished execution
1083 KMP_DEBUG_ASSERT(taskdata->td_flags.executing == 1)if (!(taskdata->td_flags.executing == 1)) { __kmp_debug_assert
("taskdata->td_flags.executing == 1", "openmp/runtime/src/kmp_tasking.cpp"
, 1083); }
;
1084 taskdata->td_flags.executing = 0; // suspend the finishing task
1085
1086#if OMPT_SUPPORT1
1087 // For a detached task, which is not completed, we switch back
1088 // the omp_fulfill_event signals completion
1089 // locking is necessary to avoid a race with ompt_task_late_fulfill
1090 if (ompt)
1091 __ompt_task_finish(task, resumed_task, ompt_task_detach);
1092#endif
1093
1094 // no access to taskdata after this point!
1095 // __kmp_fulfill_event might free taskdata at any time from now
1096
1097 taskdata->td_flags.proxy = TASK_PROXY1; // proxify!
1098 completed = false;
1099 }
1100 __kmp_release_tas_lock(&taskdata->td_allow_completion_event.lock, gtid);
1101 }
1102 }
1103
1104 // Tasks with valid target async handles must be re-enqueued.
1105 if (taskdata->td_target_data.async_handle != NULL__null) {
1106 // Note: no need to translate gtid to its shadow. If the current thread is a
1107 // hidden helper one, then the gtid is already correct. Otherwise, hidden
1108 // helper threads are disabled, and gtid refers to a OpenMP thread.
1109 __kmpc_give_task(task, __kmp_tid_from_gtid(gtid));
1110 if (KMP_HIDDEN_HELPER_THREAD(gtid)((gtid) >= 1 && (gtid) <= __kmp_hidden_helper_threads_num
)
)
1111 __kmp_hidden_helper_worker_thread_signal();
1112 completed = false;
1113 }
1114
1115 if (completed) {
1116 taskdata->td_flags.complete = 1; // mark the task as completed
1117
1118#if OMPT_SUPPORT1
1119 // This is not a detached task, we are done here
1120 if (ompt)
1121 __ompt_task_finish(task, resumed_task, ompt_task_complete);
1122#endif
1123 // TODO: What would be the balance between the conditions in the function
1124 // and an atomic operation?
1125 if (__kmp_track_children_task(taskdata)) {
1126 __kmp_release_deps(gtid, taskdata);
1127 // Predecrement simulated by "- 1" calculation
1128#if KMP_DEBUG1
1129 children = -1 +
1130#endif
1131 KMP_ATOMIC_DEC(&taskdata->td_parent->td_incomplete_child_tasks)(&taskdata->td_parent->td_incomplete_child_tasks)->
fetch_sub(1, std::memory_order_acq_rel)
;
1132 KMP_DEBUG_ASSERT(children >= 0)if (!(children >= 0)) { __kmp_debug_assert("children >= 0"
, "openmp/runtime/src/kmp_tasking.cpp", 1132); }
;
1133 if (taskdata->td_taskgroup)
1134 KMP_ATOMIC_DEC(&taskdata->td_taskgroup->count)(&taskdata->td_taskgroup->count)->fetch_sub(1, std
::memory_order_acq_rel)
;
1135 } else if (task_team && (task_team->tt.tt_found_proxy_tasks ||
1136 task_team->tt.tt_hidden_helper_task_encountered)) {
1137 // if we found proxy or hidden helper tasks there could exist a dependency
1138 // chain with the proxy task as origin
1139 __kmp_release_deps(gtid, taskdata);
1140 }
1141 // td_flags.executing must be marked as 0 after __kmp_release_deps has been
1142 // called. Othertwise, if a task is executed immediately from the
1143 // release_deps code, the flag will be reset to 1 again by this same
1144 // function
1145 KMP_DEBUG_ASSERT(taskdata->td_flags.executing == 1)if (!(taskdata->td_flags.executing == 1)) { __kmp_debug_assert
("taskdata->td_flags.executing == 1", "openmp/runtime/src/kmp_tasking.cpp"
, 1145); }
;
1146 taskdata->td_flags.executing = 0; // suspend the finishing task
1147
1148 // Decrement the counter of hidden helper tasks to be executed.
1149 if (taskdata->td_flags.hidden_helper) {
1150 // Hidden helper tasks can only be executed by hidden helper threads.
1151 KMP_ASSERT(KMP_HIDDEN_HELPER_THREAD(gtid))if (!(((gtid) >= 1 && (gtid) <= __kmp_hidden_helper_threads_num
))) { __kmp_debug_assert("KMP_HIDDEN_HELPER_THREAD(gtid)", "openmp/runtime/src/kmp_tasking.cpp"
, 1151); }
;
1152 KMP_ATOMIC_DEC(&__kmp_unexecuted_hidden_helper_tasks)(&__kmp_unexecuted_hidden_helper_tasks)->fetch_sub(1, std
::memory_order_acq_rel)
;
1153 }
1154 }
1155
1156 KA_TRACE(if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_task_finish: T#%d finished task %p, %d incomplete children\n"
, gtid, taskdata, children); }
1157 20, ("__kmp_task_finish: T#%d finished task %p, %d incomplete children\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_task_finish: T#%d finished task %p, %d incomplete children\n"
, gtid, taskdata, children); }
1158 gtid, taskdata, children))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_task_finish: T#%d finished task %p, %d incomplete children\n"
, gtid, taskdata, children); }
;
1159
1160 // Free this task and then ancestor tasks if they have no children.
1161 // Restore th_current_task first as suggested by John:
1162 // johnmc: if an asynchronous inquiry peers into the runtime system
1163 // it doesn't see the freed task as the current task.
1164 thread->th.th_current_task = resumed_task;
1165 if (completed)
1166 __kmp_free_task_and_ancestors(gtid, taskdata, thread);
1167
1168 // TODO: GEH - make sure root team implicit task is initialized properly.
1169 // KMP_DEBUG_ASSERT( resumed_task->td_flags.executing == 0 );
1170 resumed_task->td_flags.executing = 1; // resume previous task
1171
1172 KA_TRACE(if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_task_finish(exit): T#%d finished task %p, resuming task %p\n"
, gtid, taskdata, resumed_task); }
1173 10, ("__kmp_task_finish(exit): T#%d finished task %p, resuming task %p\n",if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_task_finish(exit): T#%d finished task %p, resuming task %p\n"
, gtid, taskdata, resumed_task); }
1174 gtid, taskdata, resumed_task))if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_task_finish(exit): T#%d finished task %p, resuming task %p\n"
, gtid, taskdata, resumed_task); }
;
1175
1176 return;
1177}
1178
1179template <bool ompt>
1180static void __kmpc_omp_task_complete_if0_template(ident_t *loc_ref,
1181 kmp_int32 gtid,
1182 kmp_task_t *task) {
1183 KA_TRACE(10, ("__kmpc_omp_task_complete_if0(enter): T#%d loc=%p task=%p\n",if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmpc_omp_task_complete_if0(enter): T#%d loc=%p task=%p\n"
, gtid, loc_ref, (((kmp_taskdata_t *)task) - 1)); }
1184 gtid, loc_ref, KMP_TASK_TO_TASKDATA(task)))if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmpc_omp_task_complete_if0(enter): T#%d loc=%p task=%p\n"
, gtid, loc_ref, (((kmp_taskdata_t *)task) - 1)); }
;
1185 KMP_DEBUG_ASSERT(gtid >= 0)if (!(gtid >= 0)) { __kmp_debug_assert("gtid >= 0", "openmp/runtime/src/kmp_tasking.cpp"
, 1185); }
;
1186 // this routine will provide task to resume
1187 __kmp_task_finish<ompt>(gtid, task, NULL__null);
1188
1189 KA_TRACE(10, ("__kmpc_omp_task_complete_if0(exit): T#%d loc=%p task=%p\n",if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmpc_omp_task_complete_if0(exit): T#%d loc=%p task=%p\n"
, gtid, loc_ref, (((kmp_taskdata_t *)task) - 1)); }
1190 gtid, loc_ref, KMP_TASK_TO_TASKDATA(task)))if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmpc_omp_task_complete_if0(exit): T#%d loc=%p task=%p\n"
, gtid, loc_ref, (((kmp_taskdata_t *)task) - 1)); }
;
1191
1192#if OMPT_SUPPORT1
1193 if (ompt) {
1194 ompt_frame_t *ompt_frame;
1195 __ompt_get_task_info_internal(0, NULL__null, NULL__null, &ompt_frame, NULL__null, NULL__null);
1196 ompt_frame->enter_frame = ompt_data_none{0};
1197 ompt_frame->enter_frame_flags =
1198 ompt_frame_runtime | ompt_frame_framepointer;
1199 }
1200#endif
1201
1202 return;
1203}
1204
1205#if OMPT_SUPPORT1
1206OMPT_NOINLINE__attribute__((noinline))
1207void __kmpc_omp_task_complete_if0_ompt(ident_t *loc_ref, kmp_int32 gtid,
1208 kmp_task_t *task) {
1209 __kmpc_omp_task_complete_if0_template<true>(loc_ref, gtid, task);
1210}
1211#endif // OMPT_SUPPORT
1212
1213// __kmpc_omp_task_complete_if0: report that a task has completed execution
1214//
1215// loc_ref: source location information; points to end of task block.
1216// gtid: global thread number.
1217// task: task thunk for the completed task.
1218void __kmpc_omp_task_complete_if0(ident_t *loc_ref, kmp_int32 gtid,
1219 kmp_task_t *task) {
1220#if OMPT_SUPPORT1
1221 if (UNLIKELY(ompt_enabled.enabled)__builtin_expect(!!(ompt_enabled.enabled), 0)) {
1222 __kmpc_omp_task_complete_if0_ompt(loc_ref, gtid, task);
1223 return;
1224 }
1225#endif
1226 __kmpc_omp_task_complete_if0_template<false>(loc_ref, gtid, task);
1227}
1228
1229#ifdef TASK_UNUSED
1230// __kmpc_omp_task_complete: report that a task has completed execution
1231// NEVER GENERATED BY COMPILER, DEPRECATED!!!
1232void __kmpc_omp_task_complete(ident_t *loc_ref, kmp_int32 gtid,
1233 kmp_task_t *task) {
1234 KA_TRACE(10, ("__kmpc_omp_task_complete(enter): T#%d loc=%p task=%p\n", gtid,if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmpc_omp_task_complete(enter): T#%d loc=%p task=%p\n"
, gtid, loc_ref, (((kmp_taskdata_t *)task) - 1)); }
1235 loc_ref, KMP_TASK_TO_TASKDATA(task)))if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmpc_omp_task_complete(enter): T#%d loc=%p task=%p\n"
, gtid, loc_ref, (((kmp_taskdata_t *)task) - 1)); }
;
1236
1237 __kmp_task_finish<false>(gtid, task,
1238 NULL__null); // Not sure how to find task to resume
1239
1240 KA_TRACE(10, ("__kmpc_omp_task_complete(exit): T#%d loc=%p task=%p\n", gtid,if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmpc_omp_task_complete(exit): T#%d loc=%p task=%p\n"
, gtid, loc_ref, (((kmp_taskdata_t *)task) - 1)); }
1241 loc_ref, KMP_TASK_TO_TASKDATA(task)))if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmpc_omp_task_complete(exit): T#%d loc=%p task=%p\n"
, gtid, loc_ref, (((kmp_taskdata_t *)task) - 1)); }
;
1242 return;
1243}
1244#endif // TASK_UNUSED
1245
1246// __kmp_init_implicit_task: Initialize the appropriate fields in the implicit
1247// task for a given thread
1248//
1249// loc_ref: reference to source location of parallel region
1250// this_thr: thread data structure corresponding to implicit task
1251// team: team for this_thr
1252// tid: thread id of given thread within team
1253// set_curr_task: TRUE if need to push current task to thread
1254// NOTE: Routine does not set up the implicit task ICVS. This is assumed to
1255// have already been done elsewhere.
1256// TODO: Get better loc_ref. Value passed in may be NULL
1257void __kmp_init_implicit_task(ident_t *loc_ref, kmp_info_t *this_thr,
1258 kmp_team_t *team, int tid, int set_curr_task) {
1259 kmp_taskdata_t *task = &team->t.t_implicit_task_taskdata[tid];
1260
1261 KF_TRACE(if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_init_implicit_task(enter): T#:%d team=%p task=%p, reinit=%s\n"
, tid, team, task, set_curr_task ? "TRUE" : "FALSE"); }
1262 10,if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_init_implicit_task(enter): T#:%d team=%p task=%p, reinit=%s\n"
, tid, team, task, set_curr_task ? "TRUE" : "FALSE"); }
1263 ("__kmp_init_implicit_task(enter): T#:%d team=%p task=%p, reinit=%s\n",if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_init_implicit_task(enter): T#:%d team=%p task=%p, reinit=%s\n"
, tid, team, task, set_curr_task ? "TRUE" : "FALSE"); }
1264 tid, team, task, set_curr_task ? "TRUE" : "FALSE"))if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_init_implicit_task(enter): T#:%d team=%p task=%p, reinit=%s\n"
, tid, team, task, set_curr_task ? "TRUE" : "FALSE"); }
;
1265
1266 task->td_task_id = KMP_GEN_TASK_ID()(~0);
1267 task->td_team = team;
1268 // task->td_parent = NULL; // fix for CQ230101 (broken parent task info
1269 // in debugger)
1270 task->td_ident = loc_ref;
1271 task->td_taskwait_ident = NULL__null;
1272 task->td_taskwait_counter = 0;
1273 task->td_taskwait_thread = 0;
1274
1275 task->td_flags.tiedness = TASK_TIED1;
1276 task->td_flags.tasktype = TASK_IMPLICIT0;
1277 task->td_flags.proxy = TASK_FULL0;
1278
1279 // All implicit tasks are executed immediately, not deferred
1280 task->td_flags.task_serial = 1;
1281 task->td_flags.tasking_ser = (__kmp_tasking_mode == tskm_immediate_exec);
1282 task->td_flags.team_serial = (team->t.t_serialized) ? 1 : 0;
1283
1284 task->td_flags.started = 1;
1285 task->td_flags.executing = 1;
1286 task->td_flags.complete = 0;
1287 task->td_flags.freed = 0;
1288
1289 task->td_depnode = NULL__null;
1290 task->td_last_tied = task;
1291 task->td_allow_completion_event.type = KMP_EVENT_UNINITIALIZED;
1292
1293 if (set_curr_task) { // only do this init first time thread is created
1294 KMP_ATOMIC_ST_REL(&task->td_incomplete_child_tasks, 0)(&task->td_incomplete_child_tasks)->store(0, std::memory_order_release
)
;
1295 // Not used: don't need to deallocate implicit task
1296 KMP_ATOMIC_ST_REL(&task->td_allocated_child_tasks, 0)(&task->td_allocated_child_tasks)->store(0, std::memory_order_release
)
;
1297 task->td_taskgroup = NULL__null; // An implicit task does not have taskgroup
1298 task->td_dephash = NULL__null;
1299 __kmp_push_current_task_to_thread(this_thr, team, tid);
1300 } else {
1301 KMP_DEBUG_ASSERT(task->td_incomplete_child_tasks == 0)if (!(task->td_incomplete_child_tasks == 0)) { __kmp_debug_assert
("task->td_incomplete_child_tasks == 0", "openmp/runtime/src/kmp_tasking.cpp"
, 1301); }
;
1302 KMP_DEBUG_ASSERT(task->td_allocated_child_tasks == 0)if (!(task->td_allocated_child_tasks == 0)) { __kmp_debug_assert
("task->td_allocated_child_tasks == 0", "openmp/runtime/src/kmp_tasking.cpp"
, 1302); }
;
1303 }
1304
1305#if OMPT_SUPPORT1
1306 if (UNLIKELY(ompt_enabled.enabled)__builtin_expect(!!(ompt_enabled.enabled), 0))
1307 __ompt_task_init(task, tid);
1308#endif
1309
1310 KF_TRACE(10, ("__kmp_init_implicit_task(exit): T#:%d team=%p task=%p\n", tid,if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_init_implicit_task(exit): T#:%d team=%p task=%p\n"
, tid, team, task); }
1311 team, task))if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_init_implicit_task(exit): T#:%d team=%p task=%p\n"
, tid, team, task); }
;
1312}
1313
1314// __kmp_finish_implicit_task: Release resources associated to implicit tasks
1315// at the end of parallel regions. Some resources are kept for reuse in the next
1316// parallel region.
1317//
1318// thread: thread data structure corresponding to implicit task
1319void __kmp_finish_implicit_task(kmp_info_t *thread) {
1320 kmp_taskdata_t *task = thread->th.th_current_task;
1321 if (task->td_dephash) {
1322 int children;
1323 task->td_flags.complete = 1;
1324 children = KMP_ATOMIC_LD_ACQ(&task->td_incomplete_child_tasks)(&task->td_incomplete_child_tasks)->load(std::memory_order_acquire
)
;
1325 kmp_tasking_flags_t flags_old = task->td_flags;
1326 if (children == 0 && flags_old.complete == 1) {
1327 kmp_tasking_flags_t flags_new = flags_old;
1328 flags_new.complete = 0;
1329 if (KMP_COMPARE_AND_STORE_ACQ32(RCAST(kmp_int32 *, &task->td_flags),__sync_bool_compare_and_swap((volatile kmp_uint32 *)(reinterpret_cast
<kmp_int32 *>(&task->td_flags)), (kmp_uint32)(*reinterpret_cast
<kmp_int32 *>(&flags_old)), (kmp_uint32)(*reinterpret_cast
<kmp_int32 *>(&flags_new)))
1330 *RCAST(kmp_int32 *, &flags_old),__sync_bool_compare_and_swap((volatile kmp_uint32 *)(reinterpret_cast
<kmp_int32 *>(&task->td_flags)), (kmp_uint32)(*reinterpret_cast
<kmp_int32 *>(&flags_old)), (kmp_uint32)(*reinterpret_cast
<kmp_int32 *>(&flags_new)))
1331 *RCAST(kmp_int32 *, &flags_new))__sync_bool_compare_and_swap((volatile kmp_uint32 *)(reinterpret_cast
<kmp_int32 *>(&task->td_flags)), (kmp_uint32)(*reinterpret_cast
<kmp_int32 *>(&flags_old)), (kmp_uint32)(*reinterpret_cast
<kmp_int32 *>(&flags_new)))
) {
1332 KA_TRACE(100, ("__kmp_finish_implicit_task: T#%d cleans "if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_finish_implicit_task: T#%d cleans "
"dephash of implicit task %p\n", thread->th.th_info.ds.ds_gtid
, task); }
1333 "dephash of implicit task %p\n",if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_finish_implicit_task: T#%d cleans "
"dephash of implicit task %p\n", thread->th.th_info.ds.ds_gtid
, task); }
1334 thread->th.th_info.ds.ds_gtid, task))if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_finish_implicit_task: T#%d cleans "
"dephash of implicit task %p\n", thread->th.th_info.ds.ds_gtid
, task); }
;
1335 __kmp_dephash_free_entries(thread, task->td_dephash);
1336 }
1337 }
1338 }
1339}
1340
1341// __kmp_free_implicit_task: Release resources associated to implicit tasks
1342// when these are destroyed regions
1343//
1344// thread: thread data structure corresponding to implicit task
1345void __kmp_free_implicit_task(kmp_info_t *thread) {
1346 kmp_taskdata_t *task = thread->th.th_current_task;
1347 if (task && task->td_dephash) {
1348 __kmp_dephash_free(thread, task->td_dephash);
1349 task->td_dephash = NULL__null;
1350 }
1351}
1352
1353// Round up a size to a power of two specified by val: Used to insert padding
1354// between structures co-allocated using a single malloc() call
1355static size_t __kmp_round_up_to_val(size_t size, size_t val) {
1356 if (size & (val - 1)) {
1357 size &= ~(val - 1);
1358 if (size <= KMP_SIZE_T_MAX(0xFFFFFFFFFFFFFFFF) - val) {
1359 size += val; // Round up if there is no overflow.
1360 }
1361 }
1362 return size;
1363} // __kmp_round_up_to_va
1364
1365// __kmp_task_alloc: Allocate the taskdata and task data structures for a task
1366//
1367// loc_ref: source location information
1368// gtid: global thread number.
1369// flags: include tiedness & task type (explicit vs. implicit) of the ''new''
1370// task encountered. Converted from kmp_int32 to kmp_tasking_flags_t in routine.
1371// sizeof_kmp_task_t: Size in bytes of kmp_task_t data structure including
1372// private vars accessed in task.
1373// sizeof_shareds: Size in bytes of array of pointers to shared vars accessed
1374// in task.
1375// task_entry: Pointer to task code entry point generated by compiler.
1376// returns: a pointer to the allocated kmp_task_t structure (task).
1377kmp_task_t *__kmp_task_alloc(ident_t *loc_ref, kmp_int32 gtid,
1378 kmp_tasking_flags_t *flags,
1379 size_t sizeof_kmp_task_t, size_t sizeof_shareds,
1380 kmp_routine_entry_t task_entry) {
1381 kmp_task_t *task;
1382 kmp_taskdata_t *taskdata;
1383 kmp_info_t *thread = __kmp_threads[gtid];
1384 kmp_team_t *team = thread->th.th_team;
1385 kmp_taskdata_t *parent_task = thread->th.th_current_task;
1386 size_t shareds_offset;
1387
1388 if (UNLIKELY(!TCR_4(__kmp_init_middle))__builtin_expect(!!(!(__kmp_init_middle)), 0))
1389 __kmp_middle_initialize();
1390
1391 if (flags->hidden_helper) {
1392 if (__kmp_enable_hidden_helper) {
1393 if (!TCR_4(__kmp_init_hidden_helper)(__kmp_init_hidden_helper))
1394 __kmp_hidden_helper_initialize();
1395 } else {
1396 // If the hidden helper task is not enabled, reset the flag to FALSE.
1397 flags->hidden_helper = FALSE0;
1398 }
1399 }
1400
1401 KA_TRACE(10, ("__kmp_task_alloc(enter): T#%d loc=%p, flags=(0x%x) "if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_task_alloc(enter): T#%d loc=%p, flags=(0x%x) "
"sizeof_task=%ld sizeof_shared=%ld entry=%p\n", gtid, loc_ref
, *((kmp_int32 *)flags), sizeof_kmp_task_t, sizeof_shareds, task_entry
); }
1402 "sizeof_task=%ld sizeof_shared=%ld entry=%p\n",if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_task_alloc(enter): T#%d loc=%p, flags=(0x%x) "
"sizeof_task=%ld sizeof_shared=%ld entry=%p\n", gtid, loc_ref
, *((kmp_int32 *)flags), sizeof_kmp_task_t, sizeof_shareds, task_entry
); }
1403 gtid, loc_ref, *((kmp_int32 *)flags), sizeof_kmp_task_t,if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_task_alloc(enter): T#%d loc=%p, flags=(0x%x) "
"sizeof_task=%ld sizeof_shared=%ld entry=%p\n", gtid, loc_ref
, *((kmp_int32 *)flags), sizeof_kmp_task_t, sizeof_shareds, task_entry
); }
1404 sizeof_shareds, task_entry))if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_task_alloc(enter): T#%d loc=%p, flags=(0x%x) "
"sizeof_task=%ld sizeof_shared=%ld entry=%p\n", gtid, loc_ref
, *((kmp_int32 *)flags), sizeof_kmp_task_t, sizeof_shareds, task_entry
); }
;
1405
1406 KMP_DEBUG_ASSERT(parent_task)if (!(parent_task)) { __kmp_debug_assert("parent_task", "openmp/runtime/src/kmp_tasking.cpp"
, 1406); }
;
1407 if (parent_task->td_flags.final) {
1408 if (flags->merged_if0) {
1409 }
1410 flags->final = 1;
1411 }
1412
1413 if (flags->tiedness == TASK_UNTIED0 && !team->t.t_serialized) {
1414 // Untied task encountered causes the TSC algorithm to check entire deque of
1415 // the victim thread. If no untied task encountered, then checking the head
1416 // of the deque should be enough.
1417 KMP_CHECK_UPDATE(thread->th.th_task_team->tt.tt_untied_task_encountered, 1)if ((thread->th.th_task_team->tt.tt_untied_task_encountered
) != (1)) (thread->th.th_task_team->tt.tt_untied_task_encountered
) = (1)
;
1418 }
1419
1420 // Detachable tasks are not proxy tasks yet but could be in the future. Doing
1421 // the tasking setup
1422 // when that happens is too late.
1423 if (UNLIKELY(flags->proxy == TASK_PROXY ||__builtin_expect(!!(flags->proxy == 1 || flags->detachable
== 1 || flags->hidden_helper), 0)
1424 flags->detachable == TASK_DETACHABLE || flags->hidden_helper)__builtin_expect(!!(flags->proxy == 1 || flags->detachable
== 1 || flags->hidden_helper), 0)
) {
1425 if (flags->proxy == TASK_PROXY1) {
1426 flags->tiedness = TASK_UNTIED0;
1427 flags->merged_if0 = 1;
1428 }
1429 /* are we running in a sequential parallel or tskm_immediate_exec... we need
1430 tasking support enabled */
1431 if ((thread->th.th_task_team) == NULL__null) {
1432 /* This should only happen if the team is serialized
1433 setup a task team and propagate it to the thread */
1434 KMP_DEBUG_ASSERT(team->t.t_serialized)if (!(team->t.t_serialized)) { __kmp_debug_assert("team->t.t_serialized"
, "openmp/runtime/src/kmp_tasking.cpp", 1434); }
;
1435 KA_TRACE(30,if (kmp_a_debug >= 30) { __kmp_debug_printf ("T#%d creating task team in __kmp_task_alloc for proxy task\n"
, gtid); }
1436 ("T#%d creating task team in __kmp_task_alloc for proxy task\n",if (kmp_a_debug >= 30) { __kmp_debug_printf ("T#%d creating task team in __kmp_task_alloc for proxy task\n"
, gtid); }
1437 gtid))if (kmp_a_debug >= 30) { __kmp_debug_printf ("T#%d creating task team in __kmp_task_alloc for proxy task\n"
, gtid); }
;
1438 // 1 indicates setup the current team regardless of nthreads
1439 __kmp_task_team_setup(thread, team, 1);
1440 thread->th.th_task_team = team->t.t_task_team[thread->th.th_task_state];
1441 }
1442 kmp_task_team_t *task_team = thread->th.th_task_team;
1443
1444 /* tasking must be enabled now as the task might not be pushed */
1445 if (!KMP_TASKING_ENABLED(task_team)((!0) == ((task_team)->tt.tt_found_tasks))) {
1446 KA_TRACE(if (kmp_a_debug >= 30) { __kmp_debug_printf ("T#%d enabling tasking in __kmp_task_alloc for proxy task\n"
, gtid); }
1447 30,if (kmp_a_debug >= 30) { __kmp_debug_printf ("T#%d enabling tasking in __kmp_task_alloc for proxy task\n"
, gtid); }
1448 ("T#%d enabling tasking in __kmp_task_alloc for proxy task\n", gtid))if (kmp_a_debug >= 30) { __kmp_debug_printf ("T#%d enabling tasking in __kmp_task_alloc for proxy task\n"
, gtid); }
;
1449 __kmp_enable_tasking(task_team, thread);
1450 kmp_int32 tid = thread->th.th_info.ds.ds_tid;
1451 kmp_thread_data_t *thread_data = &task_team->tt.tt_threads_data[tid];
1452 // No lock needed since only owner can allocate
1453 if (thread_data->td.td_deque == NULL__null) {
1454 __kmp_alloc_task_deque(thread, thread_data);
1455 }
1456 }
1457
1458 if ((flags->proxy == TASK_PROXY1 || flags->detachable == TASK_DETACHABLE1) &&
1459 task_team->tt.tt_found_proxy_tasks == FALSE0)
1460 TCW_4(task_team->tt.tt_found_proxy_tasks, TRUE)(task_team->tt.tt_found_proxy_tasks) = ((!0));
1461 if (flags->hidden_helper &&
1462 task_team->tt.tt_hidden_helper_task_encountered == FALSE0)
1463 TCW_4(task_team->tt.tt_hidden_helper_task_encountered, TRUE)(task_team->tt.tt_hidden_helper_task_encountered) = ((!0));
1464 }
1465
1466 // Calculate shared structure offset including padding after kmp_task_t struct
1467 // to align pointers in shared struct
1468 shareds_offset = sizeof(kmp_taskdata_t) + sizeof_kmp_task_t;
1469 shareds_offset = __kmp_round_up_to_val(shareds_offset, sizeof(void *));
1470
1471 // Allocate a kmp_taskdata_t block and a kmp_task_t block.
1472 KA_TRACE(30, ("__kmp_task_alloc: T#%d First malloc size: %ld\n", gtid,if (kmp_a_debug >= 30) { __kmp_debug_printf ("__kmp_task_alloc: T#%d First malloc size: %ld\n"
, gtid, shareds_offset); }
1473 shareds_offset))if (kmp_a_debug >= 30) { __kmp_debug_printf ("__kmp_task_alloc: T#%d First malloc size: %ld\n"
, gtid, shareds_offset); }
;
1474 KA_TRACE(30, ("__kmp_task_alloc: T#%d Second malloc size: %ld\n", gtid,if (kmp_a_debug >= 30) { __kmp_debug_printf ("__kmp_task_alloc: T#%d Second malloc size: %ld\n"
, gtid, sizeof_shareds); }
1475 sizeof_shareds))if (kmp_a_debug >= 30) { __kmp_debug_printf ("__kmp_task_alloc: T#%d Second malloc size: %ld\n"
, gtid, sizeof_shareds); }
;
1476
1477 // Avoid double allocation here by combining shareds with taskdata
1478#if USE_FAST_MEMORY3
1479 taskdata = (kmp_taskdata_t *)__kmp_fast_allocate(thread, shareds_offset +___kmp_fast_allocate((thread), (shareds_offset + sizeof_shareds
), "openmp/runtime/src/kmp_tasking.cpp", 1480)
1480 sizeof_shareds)___kmp_fast_allocate((thread), (shareds_offset + sizeof_shareds
), "openmp/runtime/src/kmp_tasking.cpp", 1480)
;
1481#else /* ! USE_FAST_MEMORY */
1482 taskdata = (kmp_taskdata_t *)__kmp_thread_malloc(thread, shareds_offset +___kmp_thread_malloc((thread), (shareds_offset + sizeof_shareds
), "openmp/runtime/src/kmp_tasking.cpp", 1483)
1483 sizeof_shareds)___kmp_thread_malloc((thread), (shareds_offset + sizeof_shareds
), "openmp/runtime/src/kmp_tasking.cpp", 1483)
;
1484#endif /* USE_FAST_MEMORY */
1485
1486 task = KMP_TASKDATA_TO_TASK(taskdata)(kmp_task_t *)(taskdata + 1);
1487
1488// Make sure task & taskdata are aligned appropriately
1489#if KMP_ARCH_X860 || KMP_ARCH_PPC64(0 || 0) || !KMP_HAVE_QUAD0
1490 KMP_DEBUG_ASSERT((((kmp_uintptr_t)taskdata) & (sizeof(double) - 1)) == 0)if (!((((kmp_uintptr_t)taskdata) & (sizeof(double) - 1)) ==
0)) { __kmp_debug_assert("(((kmp_uintptr_t)taskdata) & (sizeof(double) - 1)) == 0"
, "openmp/runtime/src/kmp_tasking.cpp", 1490); }
;
1491 KMP_DEBUG_ASSERT((((kmp_uintptr_t)task) & (sizeof(double) - 1)) == 0)if (!((((kmp_uintptr_t)task) & (sizeof(double) - 1)) == 0
)) { __kmp_debug_assert("(((kmp_uintptr_t)task) & (sizeof(double) - 1)) == 0"
, "openmp/runtime/src/kmp_tasking.cpp", 1491); }
;
1492#else
1493 KMP_DEBUG_ASSERT((((kmp_uintptr_t)taskdata) & (sizeof(_Quad) - 1)) == 0)if (!((((kmp_uintptr_t)taskdata) & (sizeof(_Quad) - 1)) ==
0)) { __kmp_debug_assert("(((kmp_uintptr_t)taskdata) & (sizeof(_Quad) - 1)) == 0"
, "openmp/runtime/src/kmp_tasking.cpp", 1493); }
;
1494 KMP_DEBUG_ASSERT((((kmp_uintptr_t)task) & (sizeof(_Quad) - 1)) == 0)if (!((((kmp_uintptr_t)task) & (sizeof(_Quad) - 1)) == 0)
) { __kmp_debug_assert("(((kmp_uintptr_t)task) & (sizeof(_Quad) - 1)) == 0"
, "openmp/runtime/src/kmp_tasking.cpp", 1494); }
;
1495#endif
1496 if (sizeof_shareds > 0) {
1497 // Avoid double allocation here by combining shareds with taskdata
1498 task->shareds = &((char *)taskdata)[shareds_offset];
1499 // Make sure shareds struct is aligned to pointer size
1500 KMP_DEBUG_ASSERT((((kmp_uintptr_t)task->shareds) & (sizeof(void *) - 1)) ==if (!((((kmp_uintptr_t)task->shareds) & (sizeof(void *
) - 1)) == 0)) { __kmp_debug_assert("(((kmp_uintptr_t)task->shareds) & (sizeof(void *) - 1)) == 0"
, "openmp/runtime/src/kmp_tasking.cpp", 1501); }
1501 0)if (!((((kmp_uintptr_t)task->shareds) & (sizeof(void *
) - 1)) == 0)) { __kmp_debug_assert("(((kmp_uintptr_t)task->shareds) & (sizeof(void *) - 1)) == 0"
, "openmp/runtime/src/kmp_tasking.cpp", 1501); }
;
1502 } else {
1503 task->shareds = NULL__null;
1504 }
1505 task->routine = task_entry;
1506 task->part_id = 0; // AC: Always start with 0 part id
1507
1508 taskdata->td_task_id = KMP_GEN_TASK_ID()(~0);
1509 taskdata->td_team = thread->th.th_team;
1510 taskdata->td_alloc_thread = thread;
1511 taskdata->td_parent = parent_task;
1512 taskdata->td_level = parent_task->td_level + 1; // increment nesting level
1513 KMP_ATOMIC_ST_RLX(&taskdata->td_untied_count, 0)(&taskdata->td_untied_count)->store(0, std::memory_order_relaxed
)
;
1514 taskdata->td_ident = loc_ref;
1515 taskdata->td_taskwait_ident = NULL__null;
1516 taskdata->td_taskwait_counter = 0;
1517 taskdata->td_taskwait_thread = 0;
1518 KMP_DEBUG_ASSERT(taskdata->td_parent != NULL)if (!(taskdata->td_parent != __null)) { __kmp_debug_assert
("taskdata->td_parent != __null", "openmp/runtime/src/kmp_tasking.cpp"
, 1518); }
;
1519 // avoid copying icvs for proxy tasks
1520 if (flags->proxy == TASK_FULL0)
1521 copy_icvs(&taskdata->td_icvs, &taskdata->td_parent->td_icvs);
1522
1523 taskdata->td_flags = *flags;
1524 taskdata->td_task_team = thread->th.th_task_team;
1525 taskdata->td_size_alloc = shareds_offset + sizeof_shareds;
1526 taskdata->td_flags.tasktype = TASK_EXPLICIT1;
1527 // If it is hidden helper task, we need to set the team and task team
1528 // correspondingly.
1529 if (flags->hidden_helper) {
1530 kmp_info_t *shadow_thread = __kmp_threads[KMP_GTID_TO_SHADOW_GTID(gtid)((gtid) % (__kmp_hidden_helper_threads_num - 1) + 2)];
1531 taskdata->td_team = shadow_thread->th.th_team;
1532 taskdata->td_task_team = shadow_thread->th.th_task_team;
1533 }
1534
1535 // GEH - TODO: fix this to copy parent task's value of tasking_ser flag
1536 taskdata->td_flags.tasking_ser = (__kmp_tasking_mode == tskm_immediate_exec);
1537
1538 // GEH - TODO: fix this to copy parent task's value of team_serial flag
1539 taskdata->td_flags.team_serial = (team->t.t_serialized) ? 1 : 0;
1540
1541 // GEH - Note we serialize the task if the team is serialized to make sure
1542 // implicit parallel region tasks are not left until program termination to
1543 // execute. Also, it helps locality to execute immediately.
1544
1545 taskdata->td_flags.task_serial =
1546 (parent_task->td_flags.final || taskdata->td_flags.team_serial ||
1547 taskdata->td_flags.tasking_ser || flags->merged_if0);
1548
1549 taskdata->td_flags.started = 0;
1550 taskdata->td_flags.executing = 0;
1551 taskdata->td_flags.complete = 0;
1552 taskdata->td_flags.freed = 0;
1553
1554 KMP_ATOMIC_ST_RLX(&taskdata->td_incomplete_child_tasks, 0)(&taskdata->td_incomplete_child_tasks)->store(0, std
::memory_order_relaxed)
;
1555 // start at one because counts current task and children
1556 KMP_ATOMIC_ST_RLX(&taskdata->td_allocated_child_tasks, 1)(&taskdata->td_allocated_child_tasks)->store(1, std
::memory_order_relaxed)
;
1557 taskdata->td_taskgroup =
1558 parent_task->td_taskgroup; // task inherits taskgroup from the parent task
1559 taskdata->td_dephash = NULL__null;
1560 taskdata->td_depnode = NULL__null;
1561 taskdata->td_target_data.async_handle = NULL__null;
1562 if (flags->tiedness == TASK_UNTIED0)
1563 taskdata->td_last_tied = NULL__null; // will be set when the task is scheduled
1564 else
1565 taskdata->td_last_tied = taskdata;
1566 taskdata->td_allow_completion_event.type = KMP_EVENT_UNINITIALIZED;
1567#if OMPT_SUPPORT1
1568 if (UNLIKELY(ompt_enabled.enabled)__builtin_expect(!!(ompt_enabled.enabled), 0))
1569 __ompt_task_init(taskdata, gtid);
1570#endif
1571 // TODO: What would be the balance between the conditions in the function and
1572 // an atomic operation?
1573 if (__kmp_track_children_task(taskdata)) {
1574 KMP_ATOMIC_INC(&parent_task->td_incomplete_child_tasks)(&parent_task->td_incomplete_child_tasks)->fetch_add
(1, std::memory_order_acq_rel)
;
1575 if (parent_task->td_taskgroup)
1576 KMP_ATOMIC_INC(&parent_task->td_taskgroup->count)(&parent_task->td_taskgroup->count)->fetch_add(1
, std::memory_order_acq_rel)
;
1577 // Only need to keep track of allocated child tasks for explicit tasks since
1578 // implicit not deallocated
1579 if (taskdata->td_parent->td_flags.tasktype == TASK_EXPLICIT1) {
1580 KMP_ATOMIC_INC(&taskdata->td_parent->td_allocated_child_tasks)(&taskdata->td_parent->td_allocated_child_tasks)->
fetch_add(1, std::memory_order_acq_rel)
;
1581 }
1582 if (flags->hidden_helper) {
1583 taskdata->td_flags.task_serial = FALSE0;
1584 // Increment the number of hidden helper tasks to be executed
1585 KMP_ATOMIC_INC(&__kmp_unexecuted_hidden_helper_tasks)(&__kmp_unexecuted_hidden_helper_tasks)->fetch_add(1, std
::memory_order_acq_rel)
;
1586 }
1587 }
1588
1589 KA_TRACE(20, ("__kmp_task_alloc(exit): T#%d created task %p parent=%p\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_task_alloc(exit): T#%d created task %p parent=%p\n"
, gtid, taskdata, taskdata->td_parent); }
1590 gtid, taskdata, taskdata->td_parent))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_task_alloc(exit): T#%d created task %p parent=%p\n"
, gtid, taskdata, taskdata->td_parent); }
;
1591
1592 return task;
1593}
1594
1595kmp_task_t *__kmpc_omp_task_alloc(ident_t *loc_ref, kmp_int32 gtid,
1596 kmp_int32 flags, size_t sizeof_kmp_task_t,
1597 size_t sizeof_shareds,
1598 kmp_routine_entry_t task_entry) {
1599 kmp_task_t *retval;
1600 kmp_tasking_flags_t *input_flags = (kmp_tasking_flags_t *)&flags;
1601 __kmp_assert_valid_gtid(gtid);
1602 input_flags->native = FALSE0;
1603 // __kmp_task_alloc() sets up all other runtime flags
1604 KA_TRACE(10, ("__kmpc_omp_task_alloc(enter): T#%d loc=%p, flags=(%s %s %s) "if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmpc_omp_task_alloc(enter): T#%d loc=%p, flags=(%s %s %s) "
"sizeof_task=%ld sizeof_shared=%ld entry=%p\n", gtid, loc_ref
, input_flags->tiedness ? "tied " : "untied", input_flags
->proxy ? "proxy" : "", input_flags->detachable ? "detachable"
: "", sizeof_kmp_task_t, sizeof_shareds, task_entry); }
1605 "sizeof_task=%ld sizeof_shared=%ld entry=%p\n",if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmpc_omp_task_alloc(enter): T#%d loc=%p, flags=(%s %s %s) "
"sizeof_task=%ld sizeof_shared=%ld entry=%p\n", gtid, loc_ref
, input_flags->tiedness ? "tied " : "untied", input_flags
->proxy ? "proxy" : "", input_flags->detachable ? "detachable"
: "", sizeof_kmp_task_t, sizeof_shareds, task_entry); }
1606 gtid, loc_ref, input_flags->tiedness ? "tied " : "untied",if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmpc_omp_task_alloc(enter): T#%d loc=%p, flags=(%s %s %s) "
"sizeof_task=%ld sizeof_shared=%ld entry=%p\n", gtid, loc_ref
, input_flags->tiedness ? "tied " : "untied", input_flags
->proxy ? "proxy" : "", input_flags->detachable ? "detachable"
: "", sizeof_kmp_task_t, sizeof_shareds, task_entry); }
1607 input_flags->proxy ? "proxy" : "",if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmpc_omp_task_alloc(enter): T#%d loc=%p, flags=(%s %s %s) "
"sizeof_task=%ld sizeof_shared=%ld entry=%p\n", gtid, loc_ref
, input_flags->tiedness ? "tied " : "untied", input_flags
->proxy ? "proxy" : "", input_flags->detachable ? "detachable"
: "", sizeof_kmp_task_t, sizeof_shareds, task_entry); }
1608 input_flags->detachable ? "detachable" : "", sizeof_kmp_task_t,if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmpc_omp_task_alloc(enter): T#%d loc=%p, flags=(%s %s %s) "
"sizeof_task=%ld sizeof_shared=%ld entry=%p\n", gtid, loc_ref
, input_flags->tiedness ? "tied " : "untied", input_flags
->proxy ? "proxy" : "", input_flags->detachable ? "detachable"
: "", sizeof_kmp_task_t, sizeof_shareds, task_entry); }
1609 sizeof_shareds, task_entry))if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmpc_omp_task_alloc(enter): T#%d loc=%p, flags=(%s %s %s) "
"sizeof_task=%ld sizeof_shared=%ld entry=%p\n", gtid, loc_ref
, input_flags->tiedness ? "tied " : "untied", input_flags
->proxy ? "proxy" : "", input_flags->detachable ? "detachable"
: "", sizeof_kmp_task_t, sizeof_shareds, task_entry); }
;
1610
1611 retval = __kmp_task_alloc(loc_ref, gtid, input_flags, sizeof_kmp_task_t,
1612 sizeof_shareds, task_entry);
1613
1614 KA_TRACE(20, ("__kmpc_omp_task_alloc(exit): T#%d retval %p\n", gtid, retval))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmpc_omp_task_alloc(exit): T#%d retval %p\n"
, gtid, retval); }
;
1615
1616 return retval;
1617}
1618
1619kmp_task_t *__kmpc_omp_target_task_alloc(ident_t *loc_ref, kmp_int32 gtid,
1620 kmp_int32 flags,
1621 size_t sizeof_kmp_task_t,
1622 size_t sizeof_shareds,
1623 kmp_routine_entry_t task_entry,
1624 kmp_int64 device_id) {
1625 auto &input_flags = reinterpret_cast<kmp_tasking_flags_t &>(flags);
1626 // target task is untied defined in the specification
1627 input_flags.tiedness = TASK_UNTIED0;
1628
1629 if (__kmp_enable_hidden_helper)
1630 input_flags.hidden_helper = TRUE(!0);
1631
1632 return __kmpc_omp_task_alloc(loc_ref, gtid, flags, sizeof_kmp_task_t,
1633 sizeof_shareds, task_entry);
1634}
1635
1636/*!
1637@ingroup TASKING
1638@param loc_ref location of the original task directive
1639@param gtid Global Thread ID of encountering thread
1640@param new_task task thunk allocated by __kmpc_omp_task_alloc() for the ''new
1641task''
1642@param naffins Number of affinity items
1643@param affin_list List of affinity items
1644@return Returns non-zero if registering affinity information was not successful.
1645 Returns 0 if registration was successful
1646This entry registers the affinity information attached to a task with the task
1647thunk structure kmp_taskdata_t.
1648*/
1649kmp_int32
1650__kmpc_omp_reg_task_with_affinity(ident_t *loc_ref, kmp_int32 gtid,
1651 kmp_task_t *new_task, kmp_int32 naffins,
1652 kmp_task_affinity_info_t *affin_list) {
1653 return 0;
1654}
1655
1656// __kmp_invoke_task: invoke the specified task
1657//
1658// gtid: global thread ID of caller
1659// task: the task to invoke
1660// current_task: the task to resume after task invocation
1661static void __kmp_invoke_task(kmp_int32 gtid, kmp_task_t *task,
1662 kmp_taskdata_t *current_task) {
1663 kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(task)(((kmp_taskdata_t *)task) - 1);
1664 kmp_info_t *thread;
1665 int discard = 0 /* false */;
1666 KA_TRACE(if (kmp_a_debug >= 30) { __kmp_debug_printf ("__kmp_invoke_task(enter): T#%d invoking task %p, current_task=%p\n"
, gtid, taskdata, current_task); }
1667 30, ("__kmp_invoke_task(enter): T#%d invoking task %p, current_task=%p\n",if (kmp_a_debug >= 30) { __kmp_debug_printf ("__kmp_invoke_task(enter): T#%d invoking task %p, current_task=%p\n"
, gtid, taskdata, current_task); }
1668 gtid, taskdata, current_task))if (kmp_a_debug >= 30) { __kmp_debug_printf ("__kmp_invoke_task(enter): T#%d invoking task %p, current_task=%p\n"
, gtid, taskdata, current_task); }
;
1669 KMP_DEBUG_ASSERT(task)if (!(task)) { __kmp_debug_assert("task", "openmp/runtime/src/kmp_tasking.cpp"
, 1669); }
;
1670 if (UNLIKELY(taskdata->td_flags.proxy == TASK_PROXY &&__builtin_expect(!!(taskdata->td_flags.proxy == 1 &&
taskdata->td_flags.complete == 1), 0)
1671 taskdata->td_flags.complete == 1)__builtin_expect(!!(taskdata->td_flags.proxy == 1 &&
taskdata->td_flags.complete == 1), 0)
) {
1672 // This is a proxy task that was already completed but it needs to run
1673 // its bottom-half finish
1674 KA_TRACE(if (kmp_a_debug >= 30) { __kmp_debug_printf ("__kmp_invoke_task: T#%d running bottom finish for proxy task %p\n"
, gtid, taskdata); }
1675 30,if (kmp_a_debug >= 30) { __kmp_debug_printf ("__kmp_invoke_task: T#%d running bottom finish for proxy task %p\n"
, gtid, taskdata); }
1676 ("__kmp_invoke_task: T#%d running bottom finish for proxy task %p\n",if (kmp_a_debug >= 30) { __kmp_debug_printf ("__kmp_invoke_task: T#%d running bottom finish for proxy task %p\n"
, gtid, taskdata); }
1677 gtid, taskdata))if (kmp_a_debug >= 30) { __kmp_debug_printf ("__kmp_invoke_task: T#%d running bottom finish for proxy task %p\n"
, gtid, taskdata); }
;
1678
1679 __kmp_bottom_half_finish_proxy(gtid, task);
1680
1681 KA_TRACE(30, ("__kmp_invoke_task(exit): T#%d completed bottom finish for "if (kmp_a_debug >= 30) { __kmp_debug_printf ("__kmp_invoke_task(exit): T#%d completed bottom finish for "
"proxy task %p, resuming task %p\n", gtid, taskdata, current_task
); }
1682 "proxy task %p, resuming task %p\n",if (kmp_a_debug >= 30) { __kmp_debug_printf ("__kmp_invoke_task(exit): T#%d completed bottom finish for "
"proxy task %p, resuming task %p\n", gtid, taskdata, current_task
); }
1683 gtid, taskdata, current_task))if (kmp_a_debug >= 30) { __kmp_debug_printf ("__kmp_invoke_task(exit): T#%d completed bottom finish for "
"proxy task %p, resuming task %p\n", gtid, taskdata, current_task
); }
;
1684
1685 return;
1686 }
1687
1688#if OMPT_SUPPORT1
1689 // For untied tasks, the first task executed only calls __kmpc_omp_task and
1690 // does not execute code.
1691 ompt_thread_info_t oldInfo;
1692 if (UNLIKELY(ompt_enabled.enabled)__builtin_expect(!!(ompt_enabled.enabled), 0)) {
1693 // Store the threads states and restore them after the task
1694 thread = __kmp_threads[gtid];
1695 oldInfo = thread->th.ompt_thread_info;
1696 thread->th.ompt_thread_info.wait_id = 0;
1697 thread->th.ompt_thread_info.state = (thread->th.th_team_serialized)
1698 ? ompt_state_work_serial
1699 : ompt_state_work_parallel;
1700 taskdata->ompt_task_info.frame.exit_frame.ptr = OMPT_GET_FRAME_ADDRESS(0)__builtin_frame_address(0);
1701 }
1702#endif
1703
1704 // Proxy tasks are not handled by the runtime
1705 if (taskdata->td_flags.proxy != TASK_PROXY1) {
1706 __kmp_task_start(gtid, task, current_task); // OMPT only if not discarded
1707 }
1708
1709 // TODO: cancel tasks if the parallel region has also been cancelled
1710 // TODO: check if this sequence can be hoisted above __kmp_task_start
1711 // if cancellation has been enabled for this run ...
1712 if (UNLIKELY(__kmp_omp_cancellation)__builtin_expect(!!(__kmp_omp_cancellation), 0)) {
1713 thread = __kmp_threads[gtid];
1714 kmp_team_t *this_team = thread->th.th_team;
1715 kmp_taskgroup_t *taskgroup = taskdata->td_taskgroup;
1716 if ((taskgroup && taskgroup->cancel_request) ||
1717 (this_team->t.t_cancel_request == cancel_parallel)) {
1718#if OMPT_SUPPORT1 && OMPT_OPTIONAL1
1719 ompt_data_t *task_data;
1720 if (UNLIKELY(ompt_enabled.ompt_callback_cancel)__builtin_expect(!!(ompt_enabled.ompt_callback_cancel), 0)) {
1721 __ompt_get_task_info_internal(0, NULL__null, &task_data, NULL__null, NULL__null, NULL__null);
1722 ompt_callbacks.ompt_callback(ompt_callback_cancel)ompt_callback_cancel_callback(
1723 task_data,
1724 ((taskgroup && taskgroup->cancel_request) ? ompt_cancel_taskgroup
1725 : ompt_cancel_parallel) |
1726 ompt_cancel_discarded_task,
1727 NULL__null);
1728 }
1729#endif
1730 KMP_COUNT_BLOCK(TASK_cancelled)((void)0);
1731 // this task belongs to a task group and we need to cancel it
1732 discard = 1 /* true */;
1733 }
1734 }
1735
1736 // Invoke the task routine and pass in relevant data.
1737 // Thunks generated by gcc take a different argument list.
1738 if (!discard) {
1739 if (taskdata->td_flags.tiedness == TASK_UNTIED0) {
1740 taskdata->td_last_tied = current_task->td_last_tied;
1741 KMP_DEBUG_ASSERT(taskdata->td_last_tied)if (!(taskdata->td_last_tied)) { __kmp_debug_assert("taskdata->td_last_tied"
, "openmp/runtime/src/kmp_tasking.cpp", 1741); }
;
1742 }
1743#if KMP_STATS_ENABLED0
1744 KMP_COUNT_BLOCK(TASK_executed)((void)0);
1745 switch (KMP_GET_THREAD_STATE()((void)0)) {
1746 case FORK_JOIN_BARRIER:
1747 KMP_PUSH_PARTITIONED_TIMER(OMP_task_join_bar)((void)0);
1748 break;
1749 case PLAIN_BARRIER:
1750 KMP_PUSH_PARTITIONED_TIMER(OMP_task_plain_bar)((void)0);
1751 break;
1752 case TASKYIELD:
1753 KMP_PUSH_PARTITIONED_TIMER(OMP_task_taskyield)((void)0);
1754 break;
1755 case TASKWAIT:
1756 KMP_PUSH_PARTITIONED_TIMER(OMP_task_taskwait)((void)0);
1757 break;
1758 case TASKGROUP:
1759 KMP_PUSH_PARTITIONED_TIMER(OMP_task_taskgroup)((void)0);
1760 break;
1761 default:
1762 KMP_PUSH_PARTITIONED_TIMER(OMP_task_immediate)((void)0);
1763 break;
1764 }
1765#endif // KMP_STATS_ENABLED
1766
1767// OMPT task begin
1768#if OMPT_SUPPORT1
1769 if (UNLIKELY(ompt_enabled.enabled)__builtin_expect(!!(ompt_enabled.enabled), 0))
1770 __ompt_task_start(task, current_task, gtid);
1771#endif
1772#if OMPT_SUPPORT1 && OMPT_OPTIONAL1
1773 if (UNLIKELY(ompt_enabled.ompt_callback_dispatch &&__builtin_expect(!!(ompt_enabled.ompt_callback_dispatch &&
taskdata->ompt_task_info.dispatch_chunk.iterations > 0
), 0)
1774 taskdata->ompt_task_info.dispatch_chunk.iterations > 0)__builtin_expect(!!(ompt_enabled.ompt_callback_dispatch &&
taskdata->ompt_task_info.dispatch_chunk.iterations > 0
), 0)
) {
1775 ompt_data_t instance = ompt_data_none{0};
1776 instance.ptr = &(taskdata->ompt_task_info.dispatch_chunk);
1777 ompt_team_info_t *team_info = __ompt_get_teaminfo(0, NULL__null);
1778 ompt_callbacks.ompt_callback(ompt_callback_dispatch)ompt_callback_dispatch_callback(
1779 &(team_info->parallel_data), &(taskdata->ompt_task_info.task_data),
1780 ompt_dispatch_taskloop_chunk, instance);
1781 taskdata->ompt_task_info.dispatch_chunk = {0, 0};
1782 }
1783#endif // OMPT_SUPPORT && OMPT_OPTIONAL
1784
1785#if OMPD_SUPPORT1
1786 if (ompd_state & OMPD_ENABLE_BP0x1)
1787 ompd_bp_task_begin();
1788#endif
1789
1790#if USE_ITT_BUILD1 && USE_ITT_NOTIFY1
1791 kmp_uint64 cur_time;
1792 kmp_int32 kmp_itt_count_task =
1793 __kmp_forkjoin_frames_mode == 3 && !taskdata->td_flags.task_serial &&
1794 current_task->td_flags.tasktype == TASK_IMPLICIT0;
1795 if (kmp_itt_count_task) {
1796 thread = __kmp_threads[gtid];
1797 // Time outer level explicit task on barrier for adjusting imbalance time
1798 if (thread->th.th_bar_arrive_time)
1799 cur_time = __itt_get_timestamp(!__kmp_itt_get_timestamp_ptr__3_0) ? 0 : __kmp_itt_get_timestamp_ptr__3_0();
1800 else
1801 kmp_itt_count_task = 0; // thread is not on a barrier - skip timing
1802 }
1803 KMP_FSYNC_ACQUIRED(taskdata)(!__kmp_itt_fsync_acquired_ptr__3_0) ? (void)0 : __kmp_itt_fsync_acquired_ptr__3_0
((void *)(taskdata))
; // acquired self (new task)
1804#endif
1805
1806#if ENABLE_LIBOMPTARGET1
1807 if (taskdata->td_target_data.async_handle != NULL__null) {
1808 // If we have a valid target async handle, that means that we have already
1809 // executed the task routine once. We must query for the handle completion
1810 // instead of re-executing the routine.
1811 KMP_ASSERT(tgt_target_nowait_query)if (!(tgt_target_nowait_query)) { __kmp_debug_assert("tgt_target_nowait_query"
, "openmp/runtime/src/kmp_tasking.cpp", 1811); }
;
1812 tgt_target_nowait_query(&taskdata->td_target_data.async_handle);
1813 } else
1814#endif
1815 if (task->routine != NULL__null) {
1816#ifdef KMP_GOMP_COMPAT
1817 if (taskdata->td_flags.native) {
1818 ((void (*)(void *))(*(task->routine)))(task->shareds);
1819 } else
1820#endif /* KMP_GOMP_COMPAT */
1821 {
1822 (*(task->routine))(gtid, task);
1823 }
1824 }
1825 KMP_POP_PARTITIONED_TIMER()((void)0);
1826
1827#if USE_ITT_BUILD1 && USE_ITT_NOTIFY1
1828 if (kmp_itt_count_task) {
1829 // Barrier imbalance - adjust arrive time with the task duration
1830 thread->th.th_bar_arrive_time += (__itt_get_timestamp(!__kmp_itt_get_timestamp_ptr__3_0) ? 0 : __kmp_itt_get_timestamp_ptr__3_0() - cur_time);
1831 }
1832 KMP_FSYNC_CANCEL(taskdata)(!__kmp_itt_fsync_cancel_ptr__3_0) ? (void)0 : __kmp_itt_fsync_cancel_ptr__3_0
((void *)(taskdata))
; // destroy self (just executed)
1833 KMP_FSYNC_RELEASING(taskdata->td_parent)(!__kmp_itt_fsync_releasing_ptr__3_0) ? (void)0 : __kmp_itt_fsync_releasing_ptr__3_0
((void *)(taskdata->td_parent))
; // releasing parent
1834#endif
1835 }
1836
1837#if OMPD_SUPPORT1
1838 if (ompd_state & OMPD_ENABLE_BP0x1)
1839 ompd_bp_task_end();
1840#endif
1841
1842 // Proxy tasks are not handled by the runtime
1843 if (taskdata->td_flags.proxy != TASK_PROXY1) {
1844#if OMPT_SUPPORT1
1845 if (UNLIKELY(ompt_enabled.enabled)__builtin_expect(!!(ompt_enabled.enabled), 0)) {
1846 thread->th.ompt_thread_info = oldInfo;
1847 if (taskdata->td_flags.tiedness == TASK_TIED1) {
1848 taskdata->ompt_task_info.frame.exit_frame = ompt_data_none{0};
1849 }
1850 __kmp_task_finish<true>(gtid, task, current_task);
1851 } else
1852#endif
1853 __kmp_task_finish<false>(gtid, task, current_task);
1854 }
1855
1856 KA_TRACE(if (kmp_a_debug >= 30) { __kmp_debug_printf ("__kmp_invoke_task(exit): T#%d completed task %p, resuming task %p\n"
, gtid, taskdata, current_task); }
1857 30,if (kmp_a_debug >= 30) { __kmp_debug_printf ("__kmp_invoke_task(exit): T#%d completed task %p, resuming task %p\n"
, gtid, taskdata, current_task); }
1858 ("__kmp_invoke_task(exit): T#%d completed task %p, resuming task %p\n",if (kmp_a_debug >= 30) { __kmp_debug_printf ("__kmp_invoke_task(exit): T#%d completed task %p, resuming task %p\n"
, gtid, taskdata, current_task); }
1859 gtid, taskdata, current_task))if (kmp_a_debug >= 30) { __kmp_debug_printf ("__kmp_invoke_task(exit): T#%d completed task %p, resuming task %p\n"
, gtid, taskdata, current_task); }
;
1860 return;
1861}
1862
1863// __kmpc_omp_task_parts: Schedule a thread-switchable task for execution
1864//
1865// loc_ref: location of original task pragma (ignored)
1866// gtid: Global Thread ID of encountering thread
1867// new_task: task thunk allocated by __kmp_omp_task_alloc() for the ''new task''
1868// Returns:
1869// TASK_CURRENT_NOT_QUEUED (0) if did not suspend and queue current task to
1870// be resumed later.
1871// TASK_CURRENT_QUEUED (1) if suspended and queued the current task to be
1872// resumed later.
1873kmp_int32 __kmpc_omp_task_parts(ident_t *loc_ref, kmp_int32 gtid,
1874 kmp_task_t *new_task) {
1875 kmp_taskdata_t *new_taskdata = KMP_TASK_TO_TASKDATA(new_task)(((kmp_taskdata_t *)new_task) - 1);
1876
1877 KA_TRACE(10, ("__kmpc_omp_task_parts(enter): T#%d loc=%p task=%p\n", gtid,if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmpc_omp_task_parts(enter): T#%d loc=%p task=%p\n"
, gtid, loc_ref, new_taskdata); }
1878 loc_ref, new_taskdata))if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmpc_omp_task_parts(enter): T#%d loc=%p task=%p\n"
, gtid, loc_ref, new_taskdata); }
;
1879
1880#if OMPT_SUPPORT1
1881 kmp_taskdata_t *parent;
1882 if (UNLIKELY(ompt_enabled.enabled)__builtin_expect(!!(ompt_enabled.enabled), 0)) {
1883 parent = new_taskdata->td_parent;
1884 if (ompt_enabled.ompt_callback_task_create) {
1885 ompt_callbacks.ompt_callback(ompt_callback_task_create)ompt_callback_task_create_callback(
1886 &(parent->ompt_task_info.task_data), &(parent->ompt_task_info.frame),
1887 &(new_taskdata->ompt_task_info.task_data), ompt_task_explicit, 0,
1888 OMPT_GET_RETURN_ADDRESS(0)__builtin_return_address(0));
1889 }
1890 }
1891#endif
1892
1893 /* Should we execute the new task or queue it? For now, let's just always try
1894 to queue it. If the queue fills up, then we'll execute it. */
1895
1896 if (__kmp_push_task(gtid, new_task) == TASK_NOT_PUSHED1) // if cannot defer
1897 { // Execute this task immediately
1898 kmp_taskdata_t *current_task = __kmp_threads[gtid]->th.th_current_task;
1899 new_taskdata->td_flags.task_serial = 1;
1900 __kmp_invoke_task(gtid, new_task, current_task);
1901 }
1902
1903 KA_TRACE(if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmpc_omp_task_parts(exit): T#%d returning TASK_CURRENT_NOT_QUEUED: "
"loc=%p task=%p, return: TASK_CURRENT_NOT_QUEUED\n", gtid, loc_ref
, new_taskdata); }
1904 10,if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmpc_omp_task_parts(exit): T#%d returning TASK_CURRENT_NOT_QUEUED: "
"loc=%p task=%p, return: TASK_CURRENT_NOT_QUEUED\n", gtid, loc_ref
, new_taskdata); }
1905 ("__kmpc_omp_task_parts(exit): T#%d returning TASK_CURRENT_NOT_QUEUED: "if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmpc_omp_task_parts(exit): T#%d returning TASK_CURRENT_NOT_QUEUED: "
"loc=%p task=%p, return: TASK_CURRENT_NOT_QUEUED\n", gtid, loc_ref
, new_taskdata); }
1906 "loc=%p task=%p, return: TASK_CURRENT_NOT_QUEUED\n",if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmpc_omp_task_parts(exit): T#%d returning TASK_CURRENT_NOT_QUEUED: "
"loc=%p task=%p, return: TASK_CURRENT_NOT_QUEUED\n", gtid, loc_ref
, new_taskdata); }
1907 gtid, loc_ref, new_taskdata))if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmpc_omp_task_parts(exit): T#%d returning TASK_CURRENT_NOT_QUEUED: "
"loc=%p task=%p, return: TASK_CURRENT_NOT_QUEUED\n", gtid, loc_ref
, new_taskdata); }
;
1908
1909#if OMPT_SUPPORT1
1910 if (UNLIKELY(ompt_enabled.enabled)__builtin_expect(!!(ompt_enabled.enabled), 0)) {
1911 parent->ompt_task_info.frame.enter_frame = ompt_data_none{0};
1912 }
1913#endif
1914 return TASK_CURRENT_NOT_QUEUED0;
1915}
1916
1917// __kmp_omp_task: Schedule a non-thread-switchable task for execution
1918//
1919// gtid: Global Thread ID of encountering thread
1920// new_task:non-thread-switchable task thunk allocated by __kmp_omp_task_alloc()
1921// serialize_immediate: if TRUE then if the task is executed immediately its
1922// execution will be serialized
1923// Returns:
1924// TASK_CURRENT_NOT_QUEUED (0) if did not suspend and queue current task to
1925// be resumed later.
1926// TASK_CURRENT_QUEUED (1) if suspended and queued the current task to be
1927// resumed later.
1928kmp_int32 __kmp_omp_task(kmp_int32 gtid, kmp_task_t *new_task,
1929 bool serialize_immediate) {
1930 kmp_taskdata_t *new_taskdata = KMP_TASK_TO_TASKDATA(new_task)(((kmp_taskdata_t *)new_task) - 1);
1931
1932 /* Should we execute the new task or queue it? For now, let's just always try
1933 to queue it. If the queue fills up, then we'll execute it. */
1934 if (new_taskdata->td_flags.proxy == TASK_PROXY1 ||
1935 __kmp_push_task(gtid, new_task) == TASK_NOT_PUSHED1) // if cannot defer
1936 { // Execute this task immediately
1937 kmp_taskdata_t *current_task = __kmp_threads[gtid]->th.th_current_task;
1938 if (serialize_immediate)
1939 new_taskdata->td_flags.task_serial = 1;
1940 __kmp_invoke_task(gtid, new_task, current_task);
1941 } else if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME(2147483647) &&
1942 __kmp_wpolicy_passive) {
1943 kmp_info_t *this_thr = __kmp_threads[gtid];
1944 kmp_team_t *team = this_thr->th.th_team;
1945 kmp_int32 nthreads = this_thr->th.th_team_nproc;
1946 for (int i = 0; i < nthreads; ++i) {
1947 kmp_info_t *thread = team->t.t_threads[i];
1948 if (thread == this_thr)
1949 continue;
1950 if (thread->th.th_sleep_loc != NULL__null) {
1951 __kmp_null_resume_wrapper(thread);
1952 break; // awake one thread at a time
1953 }
1954 }
1955 }
1956 return TASK_CURRENT_NOT_QUEUED0;
1957}
1958
1959// __kmpc_omp_task: Wrapper around __kmp_omp_task to schedule a
1960// non-thread-switchable task from the parent thread only!
1961//
1962// loc_ref: location of original task pragma (ignored)
1963// gtid: Global Thread ID of encountering thread
1964// new_task: non-thread-switchable task thunk allocated by
1965// __kmp_omp_task_alloc()
1966// Returns:
1967// TASK_CURRENT_NOT_QUEUED (0) if did not suspend and queue current task to
1968// be resumed later.
1969// TASK_CURRENT_QUEUED (1) if suspended and queued the current task to be
1970// resumed later.
1971kmp_int32 __kmpc_omp_task(ident_t *loc_ref, kmp_int32 gtid,
1972 kmp_task_t *new_task) {
1973 kmp_int32 res;
1974 KMP_SET_THREAD_STATE_BLOCK(EXPLICIT_TASK)((void)0);
1975
1976#if KMP_DEBUG1 || OMPT_SUPPORT1
1977 kmp_taskdata_t *new_taskdata = KMP_TASK_TO_TASKDATA(new_task)(((kmp_taskdata_t *)new_task) - 1);
1978#endif
1979 KA_TRACE(10, ("__kmpc_omp_task(enter): T#%d loc=%p task=%p\n", gtid, loc_ref,if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmpc_omp_task(enter): T#%d loc=%p task=%p\n"
, gtid, loc_ref, new_taskdata); }
1980 new_taskdata))if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmpc_omp_task(enter): T#%d loc=%p task=%p\n"
, gtid, loc_ref, new_taskdata); }
;
1981 __kmp_assert_valid_gtid(gtid);
1982
1983#if OMPT_SUPPORT1
1984 kmp_taskdata_t *parent = NULL__null;
1985 if (UNLIKELY(ompt_enabled.enabled)__builtin_expect(!!(ompt_enabled.enabled), 0)) {
1986 if (!new_taskdata->td_flags.started) {
1987 OMPT_STORE_RETURN_ADDRESS(gtid)OmptReturnAddressGuard ReturnAddressGuard{gtid, __builtin_return_address
(0)};
;
1988 parent = new_taskdata->td_parent;
1989 if (!parent->ompt_task_info.frame.enter_frame.ptr) {
1990 parent->ompt_task_info.frame.enter_frame.ptr =
1991 OMPT_GET_FRAME_ADDRESS(0)__builtin_frame_address(0);
1992 }
1993 if (ompt_enabled.ompt_callback_task_create) {
1994 ompt_callbacks.ompt_callback(ompt_callback_task_create)ompt_callback_task_create_callback(
1995 &(parent->ompt_task_info.task_data),
1996 &(parent->ompt_task_info.frame),
1997 &(new_taskdata->ompt_task_info.task_data),
1998 ompt_task_explicit | TASK_TYPE_DETAILS_FORMAT(new_taskdata)((new_taskdata->td_flags.task_serial || new_taskdata->td_flags
.tasking_ser) ? ompt_task_undeferred : 0x0) | ((!(new_taskdata
->td_flags.tiedness)) ? ompt_task_untied : 0x0) | (new_taskdata
->td_flags.final ? ompt_task_final : 0x0) | (new_taskdata->
td_flags.merged_if0 ? ompt_task_mergeable : 0x0)
, 0,
1999 OMPT_LOAD_RETURN_ADDRESS(gtid)__ompt_load_return_address(gtid));
2000 }
2001 } else {
2002 // We are scheduling the continuation of an UNTIED task.
2003 // Scheduling back to the parent task.
2004 __ompt_task_finish(new_task,
2005 new_taskdata->ompt_task_info.scheduling_parent,
2006 ompt_task_switch);
2007 new_taskdata->ompt_task_info.frame.exit_frame = ompt_data_none{0};
2008 }
2009 }
2010#endif
2011
2012 res = __kmp_omp_task(gtid, new_task, true);
2013
2014 KA_TRACE(10, ("__kmpc_omp_task(exit): T#%d returning "if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmpc_omp_task(exit): T#%d returning "
"TASK_CURRENT_NOT_QUEUED: loc=%p task=%p\n", gtid, loc_ref, new_taskdata
); }
2015 "TASK_CURRENT_NOT_QUEUED: loc=%p task=%p\n",if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmpc_omp_task(exit): T#%d returning "
"TASK_CURRENT_NOT_QUEUED: loc=%p task=%p\n", gtid, loc_ref, new_taskdata
); }
2016 gtid, loc_ref, new_taskdata))if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmpc_omp_task(exit): T#%d returning "
"TASK_CURRENT_NOT_QUEUED: loc=%p task=%p\n", gtid, loc_ref, new_taskdata
); }
;
2017#if OMPT_SUPPORT1
2018 if (UNLIKELY(ompt_enabled.enabled && parent != NULL)__builtin_expect(!!(ompt_enabled.enabled && parent !=
__null), 0)
) {
2019 parent->ompt_task_info.frame.enter_frame = ompt_data_none{0};
2020 }
2021#endif
2022 return res;
2023}
2024
2025// __kmp_omp_taskloop_task: Wrapper around __kmp_omp_task to schedule
2026// a taskloop task with the correct OMPT return address
2027//
2028// loc_ref: location of original task pragma (ignored)
2029// gtid: Global Thread ID of encountering thread
2030// new_task: non-thread-switchable task thunk allocated by
2031// __kmp_omp_task_alloc()
2032// codeptr_ra: return address for OMPT callback
2033// Returns:
2034// TASK_CURRENT_NOT_QUEUED (0) if did not suspend and queue current task to
2035// be resumed later.
2036// TASK_CURRENT_QUEUED (1) if suspended and queued the current task to be
2037// resumed later.
2038kmp_int32 __kmp_omp_taskloop_task(ident_t *loc_ref, kmp_int32 gtid,
2039 kmp_task_t *new_task, void *codeptr_ra) {
2040 kmp_int32 res;
2041 KMP_SET_THREAD_STATE_BLOCK(EXPLICIT_TASK)((void)0);
2042
2043#if KMP_DEBUG1 || OMPT_SUPPORT1
2044 kmp_taskdata_t *new_taskdata = KMP_TASK_TO_TASKDATA(new_task)(((kmp_taskdata_t *)new_task) - 1);
2045#endif
2046 KA_TRACE(10, ("__kmpc_omp_task(enter): T#%d loc=%p task=%p\n", gtid, loc_ref,if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmpc_omp_task(enter): T#%d loc=%p task=%p\n"
, gtid, loc_ref, new_taskdata); }
2047 new_taskdata))if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmpc_omp_task(enter): T#%d loc=%p task=%p\n"
, gtid, loc_ref, new_taskdata); }
;
2048
2049#if OMPT_SUPPORT1
2050 kmp_taskdata_t *parent = NULL__null;
2051 if (UNLIKELY(ompt_enabled.enabled && !new_taskdata->td_flags.started)__builtin_expect(!!(ompt_enabled.enabled && !new_taskdata
->td_flags.started), 0)
) {
2052 parent = new_taskdata->td_parent;
2053 if (!parent->ompt_task_info.frame.enter_frame.ptr)
2054 parent->ompt_task_info.frame.enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0)__builtin_frame_address(0);
2055 if (ompt_enabled.ompt_callback_task_create) {
2056 ompt_callbacks.ompt_callback(ompt_callback_task_create)ompt_callback_task_create_callback(
2057 &(parent->ompt_task_info.task_data), &(parent->ompt_task_info.frame),
2058 &(new_taskdata->ompt_task_info.task_data),
2059 ompt_task_explicit | TASK_TYPE_DETAILS_FORMAT(new_taskdata)((new_taskdata->td_flags.task_serial || new_taskdata->td_flags
.tasking_ser) ? ompt_task_undeferred : 0x0) | ((!(new_taskdata
->td_flags.tiedness)) ? ompt_task_untied : 0x0) | (new_taskdata
->td_flags.final ? ompt_task_final : 0x0) | (new_taskdata->
td_flags.merged_if0 ? ompt_task_mergeable : 0x0)
, 0,
2060 codeptr_ra);
2061 }
2062 }
2063#endif
2064
2065 res = __kmp_omp_task(gtid, new_task, true);
2066
2067 KA_TRACE(10, ("__kmpc_omp_task(exit): T#%d returning "if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmpc_omp_task(exit): T#%d returning "
"TASK_CURRENT_NOT_QUEUED: loc=%p task=%p\n", gtid, loc_ref, new_taskdata
); }
2068 "TASK_CURRENT_NOT_QUEUED: loc=%p task=%p\n",if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmpc_omp_task(exit): T#%d returning "
"TASK_CURRENT_NOT_QUEUED: loc=%p task=%p\n", gtid, loc_ref, new_taskdata
); }
2069 gtid, loc_ref, new_taskdata))if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmpc_omp_task(exit): T#%d returning "
"TASK_CURRENT_NOT_QUEUED: loc=%p task=%p\n", gtid, loc_ref, new_taskdata
); }
;
2070#if OMPT_SUPPORT1
2071 if (UNLIKELY(ompt_enabled.enabled && parent != NULL)__builtin_expect(!!(ompt_enabled.enabled && parent !=
__null), 0)
) {
2072 parent->ompt_task_info.frame.enter_frame = ompt_data_none{0};
2073 }
2074#endif
2075 return res;
2076}
2077
2078template <bool ompt>
2079static kmp_int32 __kmpc_omp_taskwait_template(ident_t *loc_ref, kmp_int32 gtid,
2080 void *frame_address,
2081 void *return_address) {
2082 kmp_taskdata_t *taskdata = nullptr;
2083 kmp_info_t *thread;
2084 int thread_finished = FALSE0;
2085 KMP_SET_THREAD_STATE_BLOCK(TASKWAIT)((void)0);
2086
2087 KA_TRACE(10, ("__kmpc_omp_taskwait(enter): T#%d loc=%p\n", gtid, loc_ref))if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmpc_omp_taskwait(enter): T#%d loc=%p\n"
, gtid, loc_ref); }
;
2088 KMP_DEBUG_ASSERT(gtid >= 0)if (!(gtid >= 0)) { __kmp_debug_assert("gtid >= 0", "openmp/runtime/src/kmp_tasking.cpp"
, 2088); }
;
2089
2090 if (__kmp_tasking_mode != tskm_immediate_exec) {
2091 thread = __kmp_threads[gtid];
2092 taskdata = thread->th.th_current_task;
2093
2094#if OMPT_SUPPORT1 && OMPT_OPTIONAL1
2095 ompt_data_t *my_task_data;
2096 ompt_data_t *my_parallel_data;
2097
2098 if (ompt) {
2099 my_task_data = &(taskdata->ompt_task_info.task_data);
2100 my_parallel_data = OMPT_CUR_TEAM_DATA(thread)(&(thread->th.th_team->t.ompt_team_info.parallel_data
))
;
2101
2102 taskdata->ompt_task_info.frame.enter_frame.ptr = frame_address;
2103
2104 if (ompt_enabled.ompt_callback_sync_region) {
2105 ompt_callbacks.ompt_callback(ompt_callback_sync_region)ompt_callback_sync_region_callback(
2106 ompt_sync_region_taskwait, ompt_scope_begin, my_parallel_data,
2107 my_task_data, return_address);
2108 }
2109
2110 if (ompt_enabled.ompt_callback_sync_region_wait) {
2111 ompt_callbacks.ompt_callback(ompt_callback_sync_region_wait)ompt_callback_sync_region_wait_callback(
2112 ompt_sync_region_taskwait, ompt_scope_begin, my_parallel_data,
2113 my_task_data, return_address);
2114 }
2115 }
2116#endif // OMPT_SUPPORT && OMPT_OPTIONAL
2117
2118// Debugger: The taskwait is active. Store location and thread encountered the
2119// taskwait.
2120#if USE_ITT_BUILD1
2121// Note: These values are used by ITT events as well.
2122#endif /* USE_ITT_BUILD */
2123 taskdata->td_taskwait_counter += 1;
2124 taskdata->td_taskwait_ident = loc_ref;
2125 taskdata->td_taskwait_thread = gtid + 1;
2126
2127#if USE_ITT_BUILD1
2128 void *itt_sync_obj = NULL__null;
2129#if USE_ITT_NOTIFY1
2130 KMP_ITT_TASKWAIT_STARTING(itt_sync_obj)if (__builtin_expect(!!(__kmp_itt_sync_create_ptr__3_0), 0)) {
itt_sync_obj = __kmp_itt_taskwait_object(gtid); if (itt_sync_obj
!= __null) { __kmp_itt_taskwait_starting(gtid, itt_sync_obj)
; } }
;
2131#endif /* USE_ITT_NOTIFY */
2132#endif /* USE_ITT_BUILD */
2133
2134 bool must_wait =
2135 !taskdata->td_flags.team_serial && !taskdata->td_flags.final;
2136
2137 must_wait = must_wait || (thread->th.th_task_team != NULL__null &&
2138 thread->th.th_task_team->tt.tt_found_proxy_tasks);
2139 // If hidden helper thread is encountered, we must enable wait here.
2140 must_wait =
2141 must_wait ||
2142 (__kmp_enable_hidden_helper && thread->th.th_task_team != NULL__null &&
2143 thread->th.th_task_team->tt.tt_hidden_helper_task_encountered);
2144
2145 if (must_wait) {
2146 kmp_flag_32<false, false> flag(
2147 RCAST(std::atomic<kmp_uint32> *,reinterpret_cast<std::atomic<kmp_uint32> *>(&
(taskdata->td_incomplete_child_tasks))
2148 &(taskdata->td_incomplete_child_tasks))reinterpret_cast<std::atomic<kmp_uint32> *>(&
(taskdata->td_incomplete_child_tasks))
,
2149 0U);
2150 while (KMP_ATOMIC_LD_ACQ(&taskdata->td_incomplete_child_tasks)(&taskdata->td_incomplete_child_tasks)->load(std::memory_order_acquire
)
!= 0) {
2151 flag.execute_tasks(thread, gtid, FALSE0,
2152 &thread_finished USE_ITT_BUILD_ARG(itt_sync_obj), itt_sync_obj,
2153 __kmp_task_stealing_constraint);
2154 }
2155 }
2156#if USE_ITT_BUILD1
2157 KMP_ITT_TASKWAIT_FINISHED(itt_sync_obj)if (__builtin_expect(!!(itt_sync_obj != __null), 0)) __kmp_itt_taskwait_finished
(gtid, itt_sync_obj);
;
2158 KMP_FSYNC_ACQUIRED(taskdata)(!__kmp_itt_fsync_acquired_ptr__3_0) ? (void)0 : __kmp_itt_fsync_acquired_ptr__3_0
((void *)(taskdata))
; // acquire self - sync with children
2159#endif /* USE_ITT_BUILD */
2160
2161 // Debugger: The taskwait is completed. Location remains, but thread is
2162 // negated.
2163 taskdata->td_taskwait_thread = -taskdata->td_taskwait_thread;
2164
2165#if OMPT_SUPPORT1 && OMPT_OPTIONAL1
2166 if (ompt) {
2167 if (ompt_enabled.ompt_callback_sync_region_wait) {
2168 ompt_callbacks.ompt_callback(ompt_callback_sync_region_wait)ompt_callback_sync_region_wait_callback(
2169 ompt_sync_region_taskwait, ompt_scope_end, my_parallel_data,
2170 my_task_data, return_address);
2171 }
2172 if (ompt_enabled.ompt_callback_sync_region) {
2173 ompt_callbacks.ompt_callback(ompt_callback_sync_region)ompt_callback_sync_region_callback(
2174 ompt_sync_region_taskwait, ompt_scope_end, my_parallel_data,
2175 my_task_data, return_address);
2176 }
2177 taskdata->ompt_task_info.frame.enter_frame = ompt_data_none{0};
2178 }
2179#endif // OMPT_SUPPORT && OMPT_OPTIONAL
2180
2181 }
2182
2183 KA_TRACE(10, ("__kmpc_omp_taskwait(exit): T#%d task %p finished waiting, "if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmpc_omp_taskwait(exit): T#%d task %p finished waiting, "
"returning TASK_CURRENT_NOT_QUEUED\n", gtid, taskdata); }
2184 "returning TASK_CURRENT_NOT_QUEUED\n",if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmpc_omp_taskwait(exit): T#%d task %p finished waiting, "
"returning TASK_CURRENT_NOT_QUEUED\n", gtid, taskdata); }
2185 gtid, taskdata))if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmpc_omp_taskwait(exit): T#%d task %p finished waiting, "
"returning TASK_CURRENT_NOT_QUEUED\n", gtid, taskdata); }
;
2186
2187 return TASK_CURRENT_NOT_QUEUED0;
2188}
2189
2190#if OMPT_SUPPORT1 && OMPT_OPTIONAL1
2191OMPT_NOINLINE__attribute__((noinline))
2192static kmp_int32 __kmpc_omp_taskwait_ompt(ident_t *loc_ref, kmp_int32 gtid,
2193 void *frame_address,
2194 void *return_address) {
2195 return __kmpc_omp_taskwait_template<true>(loc_ref, gtid, frame_address,
2196 return_address);
2197}
2198#endif // OMPT_SUPPORT && OMPT_OPTIONAL
2199
2200// __kmpc_omp_taskwait: Wait until all tasks generated by the current task are
2201// complete
2202kmp_int32 __kmpc_omp_taskwait(ident_t *loc_ref, kmp_int32 gtid) {
2203#if OMPT_SUPPORT1 && OMPT_OPTIONAL1
2204 if (UNLIKELY(ompt_enabled.enabled)__builtin_expect(!!(ompt_enabled.enabled), 0)) {
2205 OMPT_STORE_RETURN_ADDRESS(gtid)OmptReturnAddressGuard ReturnAddressGuard{gtid, __builtin_return_address
(0)};
;
2206 return __kmpc_omp_taskwait_ompt(loc_ref, gtid, OMPT_GET_FRAME_ADDRESS(0)__builtin_frame_address(0),
2207 OMPT_LOAD_RETURN_ADDRESS(gtid)__ompt_load_return_address(gtid));
2208 }
2209#endif
2210 return __kmpc_omp_taskwait_template<false>(loc_ref, gtid, NULL__null, NULL__null);
2211}
2212
2213// __kmpc_omp_taskyield: switch to a different task
2214kmp_int32 __kmpc_omp_taskyield(ident_t *loc_ref, kmp_int32 gtid, int end_part) {
2215 kmp_taskdata_t *taskdata = NULL__null;
2216 kmp_info_t *thread;
2217 int thread_finished = FALSE0;
2218
2219 KMP_COUNT_BLOCK(OMP_TASKYIELD)((void)0);
2220 KMP_SET_THREAD_STATE_BLOCK(TASKYIELD)((void)0);
2221
2222 KA_TRACE(10, ("__kmpc_omp_taskyield(enter): T#%d loc=%p end_part = %d\n",if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmpc_omp_taskyield(enter): T#%d loc=%p end_part = %d\n"
, gtid, loc_ref, end_part); }
2223 gtid, loc_ref, end_part))if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmpc_omp_taskyield(enter): T#%d loc=%p end_part = %d\n"
, gtid, loc_ref, end_part); }
;
2224 __kmp_assert_valid_gtid(gtid);
2225
2226 if (__kmp_tasking_mode != tskm_immediate_exec && __kmp_init_parallel) {
2227 thread = __kmp_threads[gtid];
2228 taskdata = thread->th.th_current_task;
2229// Should we model this as a task wait or not?
2230// Debugger: The taskwait is active. Store location and thread encountered the
2231// taskwait.
2232#if USE_ITT_BUILD1
2233// Note: These values are used by ITT events as well.
2234#endif /* USE_ITT_BUILD */
2235 taskdata->td_taskwait_counter += 1;
2236 taskdata->td_taskwait_ident = loc_ref;
2237 taskdata->td_taskwait_thread = gtid + 1;
2238
2239#if USE_ITT_BUILD1
2240 void *itt_sync_obj = NULL__null;
2241#if USE_ITT_NOTIFY1
2242 KMP_ITT_TASKWAIT_STARTING(itt_sync_obj)if (__builtin_expect(!!(__kmp_itt_sync_create_ptr__3_0), 0)) {
itt_sync_obj = __kmp_itt_taskwait_object(gtid); if (itt_sync_obj
!= __null) { __kmp_itt_taskwait_starting(gtid, itt_sync_obj)
; } }
;
2243#endif /* USE_ITT_NOTIFY */
2244#endif /* USE_ITT_BUILD */
2245 if (!taskdata->td_flags.team_serial) {
2246 kmp_task_team_t *task_team = thread->th.th_task_team;
2247 if (task_team != NULL__null) {
2248 if (KMP_TASKING_ENABLED(task_team)((!0) == ((task_team)->tt.tt_found_tasks))) {
2249#if OMPT_SUPPORT1
2250 if (UNLIKELY(ompt_enabled.enabled)__builtin_expect(!!(ompt_enabled.enabled), 0))
2251 thread->th.ompt_thread_info.ompt_task_yielded = 1;
2252#endif
2253 __kmp_execute_tasks_32(
2254 thread, gtid, (kmp_flag_32<> *)NULL__null, FALSE0,
2255 &thread_finished USE_ITT_BUILD_ARG(itt_sync_obj), itt_sync_obj,
2256 __kmp_task_stealing_constraint);
2257#if OMPT_SUPPORT1
2258 if (UNLIKELY(ompt_enabled.enabled)__builtin_expect(!!(ompt_enabled.enabled), 0))
2259 thread->th.ompt_thread_info.ompt_task_yielded = 0;
2260#endif
2261 }
2262 }
2263 }
2264#if USE_ITT_BUILD1
2265 KMP_ITT_TASKWAIT_FINISHED(itt_sync_obj)if (__builtin_expect(!!(itt_sync_obj != __null), 0)) __kmp_itt_taskwait_finished
(gtid, itt_sync_obj);
;
2266#endif /* USE_ITT_BUILD */
2267
2268 // Debugger: The taskwait is completed. Location remains, but thread is
2269 // negated.
2270 taskdata->td_taskwait_thread = -taskdata->td_taskwait_thread;
2271 }
2272
2273 KA_TRACE(10, ("__kmpc_omp_taskyield(exit): T#%d task %p resuming, "if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmpc_omp_taskyield(exit): T#%d task %p resuming, "
"returning TASK_CURRENT_NOT_QUEUED\n", gtid, taskdata); }
2274 "returning TASK_CURRENT_NOT_QUEUED\n",if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmpc_omp_taskyield(exit): T#%d task %p resuming, "
"returning TASK_CURRENT_NOT_QUEUED\n", gtid, taskdata); }
2275 gtid, taskdata))if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmpc_omp_taskyield(exit): T#%d task %p resuming, "
"returning TASK_CURRENT_NOT_QUEUED\n", gtid, taskdata); }
;
2276
2277 return TASK_CURRENT_NOT_QUEUED0;
2278}
2279
2280// Task Reduction implementation
2281//
2282// Note: initial implementation didn't take into account the possibility
2283// to specify omp_orig for initializer of the UDR (user defined reduction).
2284// Corrected implementation takes into account the omp_orig object.
2285// Compiler is free to use old implementation if omp_orig is not specified.
2286
2287/*!
2288@ingroup BASIC_TYPES
2289@{
2290*/
2291
2292/*!
2293Flags for special info per task reduction item.
2294*/
2295typedef struct kmp_taskred_flags {
2296 /*! 1 - use lazy alloc/init (e.g. big objects, num tasks < num threads) */
2297 unsigned lazy_priv : 1;
2298 unsigned reserved31 : 31;
2299} kmp_taskred_flags_t;
2300
2301/*!
2302Internal struct for reduction data item related info set up by compiler.
2303*/
2304typedef struct kmp_task_red_input {
2305 void *reduce_shar; /**< shared between tasks item to reduce into */
2306 size_t reduce_size; /**< size of data item in bytes */
2307 // three compiler-generated routines (init, fini are optional):
2308 void *reduce_init; /**< data initialization routine (single parameter) */
2309 void *reduce_fini; /**< data finalization routine */
2310 void *reduce_comb; /**< data combiner routine */
2311 kmp_taskred_flags_t flags; /**< flags for additional info from compiler */
2312} kmp_task_red_input_t;
2313
2314/*!
2315Internal struct for reduction data item related info saved by the library.
2316*/
2317typedef struct kmp_taskred_data {
2318 void *reduce_shar; /**< shared between tasks item to reduce into */
2319 size_t reduce_size; /**< size of data item */
2320 kmp_taskred_flags_t flags; /**< flags for additional info from compiler */
2321 void *reduce_priv; /**< array of thread specific items */
2322 void *reduce_pend; /**< end of private data for faster comparison op */
2323 // three compiler-generated routines (init, fini are optional):
2324 void *reduce_comb; /**< data combiner routine */
2325 void *reduce_init; /**< data initialization routine (two parameters) */
2326 void *reduce_fini; /**< data finalization routine */
2327 void *reduce_orig; /**< original item (can be used in UDR initializer) */
2328} kmp_taskred_data_t;
2329
2330/*!
2331Internal struct for reduction data item related info set up by compiler.
2332
2333New interface: added reduce_orig field to provide omp_orig for UDR initializer.
2334*/
2335typedef struct kmp_taskred_input {
2336 void *reduce_shar; /**< shared between tasks item to reduce into */
2337 void *reduce_orig; /**< original reduction item used for initialization */
2338 size_t reduce_size; /**< size of data item */
2339 // three compiler-generated routines (init, fini are optional):
2340 void *reduce_init; /**< data initialization routine (two parameters) */
2341 void *reduce_fini; /**< data finalization routine */
2342 void *reduce_comb; /**< data combiner routine */
2343 kmp_taskred_flags_t flags; /**< flags for additional info from compiler */
2344} kmp_taskred_input_t;
2345/*!
2346@}
2347*/
2348
2349template <typename T> void __kmp_assign_orig(kmp_taskred_data_t &item, T &src);
2350template <>
2351void __kmp_assign_orig<kmp_task_red_input_t>(kmp_taskred_data_t &item,
2352 kmp_task_red_input_t &src) {
2353 item.reduce_orig = NULL__null;
2354}
2355template <>
2356void __kmp_assign_orig<kmp_taskred_input_t>(kmp_taskred_data_t &item,
2357 kmp_taskred_input_t &src) {
2358 if (src.reduce_orig != NULL__null) {
2359 item.reduce_orig = src.reduce_orig;
2360 } else {
2361 item.reduce_orig = src.reduce_shar;
2362 } // non-NULL reduce_orig means new interface used
2363}
2364
2365template <typename T> void __kmp_call_init(kmp_taskred_data_t &item, size_t j);
2366template <>
2367void __kmp_call_init<kmp_task_red_input_t>(kmp_taskred_data_t &item,
2368 size_t offset) {
2369 ((void (*)(void *))item.reduce_init)((char *)(item.reduce_priv) + offset);
2370}
2371template <>
2372void __kmp_call_init<kmp_taskred_input_t>(kmp_taskred_data_t &item,
2373 size_t offset) {
2374 ((void (*)(void *, void *))item.reduce_init)(
2375 (char *)(item.reduce_priv) + offset, item.reduce_orig);
2376}
2377
2378template <typename T>
2379void *__kmp_task_reduction_init(int gtid, int num, T *data) {
2380 __kmp_assert_valid_gtid(gtid);
2381 kmp_info_t *thread = __kmp_threads[gtid];
2382 kmp_taskgroup_t *tg = thread->th.th_current_task->td_taskgroup;
2383 kmp_uint32 nth = thread->th.th_team_nproc;
2384 kmp_taskred_data_t *arr;
2385
2386 // check input data just in case
2387 KMP_ASSERT(tg != NULL)if (!(tg != __null)) { __kmp_debug_assert("tg != NULL", "openmp/runtime/src/kmp_tasking.cpp"
, 2387); }
;
2388 KMP_ASSERT(data != NULL)if (!(data != __null)) { __kmp_debug_assert("data != NULL", "openmp/runtime/src/kmp_tasking.cpp"
, 2388); }
;
2389 KMP_ASSERT(num > 0)if (!(num > 0)) { __kmp_debug_assert("num > 0", "openmp/runtime/src/kmp_tasking.cpp"
, 2389); }
;
2390 if (nth == 1) {
2391 KA_TRACE(10, ("__kmpc_task_reduction_init: T#%d, tg %p, exiting nth=1\n",if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmpc_task_reduction_init: T#%d, tg %p, exiting nth=1\n"
, gtid, tg); }
2392 gtid, tg))if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmpc_task_reduction_init: T#%d, tg %p, exiting nth=1\n"
, gtid, tg); }
;
2393 return (void *)tg;
2394 }
2395 KA_TRACE(10, ("__kmpc_task_reduction_init: T#%d, taskgroup %p, #items %d\n",if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmpc_task_reduction_init: T#%d, taskgroup %p, #items %d\n"
, gtid, tg, num); }
2396 gtid, tg, num))if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmpc_task_reduction_init: T#%d, taskgroup %p, #items %d\n"
, gtid, tg, num); }
;
2397 arr = (kmp_taskred_data_t *)__kmp_thread_malloc(___kmp_thread_malloc((thread), (num * sizeof(kmp_taskred_data_t
)), "openmp/runtime/src/kmp_tasking.cpp", 2398)
2398 thread, num * sizeof(kmp_taskred_data_t))___kmp_thread_malloc((thread), (num * sizeof(kmp_taskred_data_t
)), "openmp/runtime/src/kmp_tasking.cpp", 2398)
;
2399 for (int i = 0; i < num; ++i) {
2400 size_t size = data[i].reduce_size - 1;
2401 // round the size up to cache line per thread-specific item
2402 size += CACHE_LINE64 - size % CACHE_LINE64;
2403 KMP_ASSERT(data[i].reduce_comb != NULL)if (!(data[i].reduce_comb != __null)) { __kmp_debug_assert("data[i].reduce_comb != NULL"
, "openmp/runtime/src/kmp_tasking.cpp", 2403); }
; // combiner is mandatory
2404 arr[i].reduce_shar = data[i].reduce_shar;
2405 arr[i].reduce_size = size;
2406 arr[i].flags = data[i].flags;
2407 arr[i].reduce_comb = data[i].reduce_comb;
2408 arr[i].reduce_init = data[i].reduce_init;
2409 arr[i].reduce_fini = data[i].reduce_fini;
2410 __kmp_assign_orig<T>(arr[i], data[i]);
2411 if (!arr[i].flags.lazy_priv) {
2412 // allocate cache-line aligned block and fill it with zeros
2413 arr[i].reduce_priv = __kmp_allocate(nth * size)___kmp_allocate((nth * size), "openmp/runtime/src/kmp_tasking.cpp"
, 2413)
;
2414 arr[i].reduce_pend = (char *)(arr[i].reduce_priv) + nth * size;
2415 if (arr[i].reduce_init != NULL__null) {
2416 // initialize all thread-specific items
2417 for (size_t j = 0; j < nth; ++j) {
2418 __kmp_call_init<T>(arr[i], j * size);
2419 }
2420 }
2421 } else {
2422 // only allocate space for pointers now,
2423 // objects will be lazily allocated/initialized if/when requested
2424 // note that __kmp_allocate zeroes the allocated memory
2425 arr[i].reduce_priv = __kmp_allocate(nth * sizeof(void *))___kmp_allocate((nth * sizeof(void *)), "openmp/runtime/src/kmp_tasking.cpp"
, 2425)
;
2426 }
2427 }
2428 tg->reduce_data = (void *)arr;
2429 tg->reduce_num_data = num;
2430 return (void *)tg;
2431}
2432
2433/*!
2434@ingroup TASKING
2435@param gtid Global thread ID
2436@param num Number of data items to reduce
2437@param data Array of data for reduction
2438@return The taskgroup identifier
2439
2440Initialize task reduction for the taskgroup.
2441
2442Note: this entry supposes the optional compiler-generated initializer routine
2443has single parameter - pointer to object to be initialized. That means
2444the reduction either does not use omp_orig object, or the omp_orig is accessible
2445without help of the runtime library.
2446*/
2447void *__kmpc_task_reduction_init(int gtid, int num, void *data) {
2448 return __kmp_task_reduction_init(gtid, num, (kmp_task_red_input_t *)data);
2449}
2450
2451/*!
2452@ingroup TASKING
2453@param gtid Global thread ID
2454@param num Number of data items to reduce
2455@param data Array of data for reduction
2456@return The taskgroup identifier
2457
2458Initialize task reduction for the taskgroup.
2459
2460Note: this entry supposes the optional compiler-generated initializer routine
2461has two parameters, pointer to object to be initialized and pointer to omp_orig
2462*/
2463void *__kmpc_taskred_init(int gtid, int num, void *data) {
2464 return __kmp_task_reduction_init(gtid, num, (kmp_taskred_input_t *)data);
2465}
2466
2467// Copy task reduction data (except for shared pointers).
2468template <typename T>
2469void __kmp_task_reduction_init_copy(kmp_info_t *thr, int num, T *data,
2470 kmp_taskgroup_t *tg, void *reduce_data) {
2471 kmp_taskred_data_t *arr;
2472 KA_TRACE(20, ("__kmp_task_reduction_init_copy: Th %p, init taskgroup %p,"if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_task_reduction_init_copy: Th %p, init taskgroup %p,"
" from data %p\n", thr, tg, reduce_data); }
2473 " from data %p\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_task_reduction_init_copy: Th %p, init taskgroup %p,"
" from data %p\n", thr, tg, reduce_data); }
2474 thr, tg, reduce_data))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_task_reduction_init_copy: Th %p, init taskgroup %p,"
" from data %p\n", thr, tg, reduce_data); }
;
2475 arr = (kmp_taskred_data_t *)__kmp_thread_malloc(___kmp_thread_malloc((thr), (num * sizeof(kmp_taskred_data_t)
), "openmp/runtime/src/kmp_tasking.cpp", 2476)
2476 thr, num * sizeof(kmp_taskred_data_t))___kmp_thread_malloc((thr), (num * sizeof(kmp_taskred_data_t)
), "openmp/runtime/src/kmp_tasking.cpp", 2476)
;
2477 // threads will share private copies, thunk routines, sizes, flags, etc.:
2478 KMP_MEMCPYmemcpy(arr, reduce_data, num * sizeof(kmp_taskred_data_t));
2479 for (int i = 0; i < num; ++i) {
2480 arr[i].reduce_shar = data[i].reduce_shar; // init unique shared pointers
2481 }
2482 tg->reduce_data = (void *)arr;
2483 tg->reduce_num_data = num;
2484}
2485
2486/*!
2487@ingroup TASKING
2488@param gtid Global thread ID
2489@param tskgrp The taskgroup ID (optional)
2490@param data Shared location of the item
2491@return The pointer to per-thread data
2492
2493Get thread-specific location of data item
2494*/
2495void *__kmpc_task_reduction_get_th_data(int gtid, void *tskgrp, void *data) {
2496 __kmp_assert_valid_gtid(gtid);
2497 kmp_info_t *thread = __kmp_threads[gtid];
2498 kmp_int32 nth = thread->th.th_team_nproc;
2499 if (nth == 1)
2500 return data; // nothing to do
2501
2502 kmp_taskgroup_t *tg = (kmp_taskgroup_t *)tskgrp;
2503 if (tg == NULL__null)
2504 tg = thread->th.th_current_task->td_taskgroup;
2505 KMP_ASSERT(tg != NULL)if (!(tg != __null)) { __kmp_debug_assert("tg != NULL", "openmp/runtime/src/kmp_tasking.cpp"
, 2505); }
;
2506 kmp_taskred_data_t *arr = (kmp_taskred_data_t *)(tg->reduce_data);
2507 kmp_int32 num = tg->reduce_num_data;
2508 kmp_int32 tid = thread->th.th_info.ds.ds_tid;
2509
2510 KMP_ASSERT(data != NULL)if (!(data != __null)) { __kmp_debug_assert("data != NULL", "openmp/runtime/src/kmp_tasking.cpp"
, 2510); }
;
2511 while (tg != NULL__null) {
2512 for (int i = 0; i < num; ++i) {
2513 if (!arr[i].flags.lazy_priv) {
2514 if (data == arr[i].reduce_shar ||
2515 (data >= arr[i].reduce_priv && data < arr[i].reduce_pend))
2516 return (char *)(arr[i].reduce_priv) + tid * arr[i].reduce_size;
2517 } else {
2518 // check shared location first
2519 void **p_priv = (void **)(arr[i].reduce_priv);
2520 if (data == arr[i].reduce_shar)
2521 goto found;
2522 // check if we get some thread specific location as parameter
2523 for (int j = 0; j < nth; ++j)
2524 if (data == p_priv[j])
2525 goto found;
2526 continue; // not found, continue search
2527 found:
2528 if (p_priv[tid] == NULL__null) {
2529 // allocate thread specific object lazily
2530 p_priv[tid] = __kmp_allocate(arr[i].reduce_size)___kmp_allocate((arr[i].reduce_size), "openmp/runtime/src/kmp_tasking.cpp"
, 2530)
;
2531 if (arr[i].reduce_init != NULL__null) {
2532 if (arr[i].reduce_orig != NULL__null) { // new interface
2533 ((void (*)(void *, void *))arr[i].reduce_init)(
2534 p_priv[tid], arr[i].reduce_orig);
2535 } else { // old interface (single parameter)
2536 ((void (*)(void *))arr[i].reduce_init)(p_priv[tid]);
2537 }
2538 }
2539 }
2540 return p_priv[tid];
2541 }
2542 }
2543 tg = tg->parent;
2544 arr = (kmp_taskred_data_t *)(tg->reduce_data);
2545 num = tg->reduce_num_data;
2546 }
2547 KMP_ASSERT2(0, "Unknown task reduction item")if (!(0)) { __kmp_debug_assert(("Unknown task reduction item"
), "openmp/runtime/src/kmp_tasking.cpp", 2547); }
;
2548 return NULL__null; // ERROR, this line never executed
2549}
2550
2551// Finalize task reduction.
2552// Called from __kmpc_end_taskgroup()
2553static void __kmp_task_reduction_fini(kmp_info_t *th, kmp_taskgroup_t *tg) {
2554 kmp_int32 nth = th->th.th_team_nproc;
2555 KMP_DEBUG_ASSERT(nth > 1)if (!(nth > 1)) { __kmp_debug_assert("nth > 1", "openmp/runtime/src/kmp_tasking.cpp"
, 2555); }
; // should not be called if nth == 1
2556 kmp_taskred_data_t *arr = (kmp_taskred_data_t *)tg->reduce_data;
2557 kmp_int32 num = tg->reduce_num_data;
2558 for (int i = 0; i < num; ++i) {
2559 void *sh_data = arr[i].reduce_shar;
2560 void (*f_fini)(void *) = (void (*)(void *))(arr[i].reduce_fini);
2561 void (*f_comb)(void *, void *) =
2562 (void (*)(void *, void *))(arr[i].reduce_comb);
2563 if (!arr[i].flags.lazy_priv) {
2564 void *pr_data = arr[i].reduce_priv;
2565 size_t size = arr[i].reduce_size;
2566 for (int j = 0; j < nth; ++j) {
2567 void *priv_data = (char *)pr_data + j * size;
2568 f_comb(sh_data, priv_data); // combine results
2569 if (f_fini)
2570 f_fini(priv_data); // finalize if needed
2571 }
2572 } else {
2573 void **pr_data = (void **)(arr[i].reduce_priv);
2574 for (int j = 0; j < nth; ++j) {
2575 if (pr_data[j] != NULL__null) {
2576 f_comb(sh_data, pr_data[j]); // combine results
2577 if (f_fini)
2578 f_fini(pr_data[j]); // finalize if needed
2579 __kmp_free(pr_data[j])___kmp_free((pr_data[j]), "openmp/runtime/src/kmp_tasking.cpp"
, 2579)
;
2580 }
2581 }
2582 }
2583 __kmp_free(arr[i].reduce_priv)___kmp_free((arr[i].reduce_priv), "openmp/runtime/src/kmp_tasking.cpp"
, 2583)
;
2584 }
2585 __kmp_thread_free(th, arr)___kmp_thread_free((th), (arr), "openmp/runtime/src/kmp_tasking.cpp"
, 2585)
;
2586 tg->reduce_data = NULL__null;
2587 tg->reduce_num_data = 0;
2588}
2589
2590// Cleanup task reduction data for parallel or worksharing,
2591// do not touch task private data other threads still working with.
2592// Called from __kmpc_end_taskgroup()
2593static void __kmp_task_reduction_clean(kmp_info_t *th, kmp_taskgroup_t *tg) {
2594 __kmp_thread_free(th, tg->reduce_data)___kmp_thread_free((th), (tg->reduce_data), "openmp/runtime/src/kmp_tasking.cpp"
, 2594)
;
2595 tg->reduce_data = NULL__null;
2596 tg->reduce_num_data = 0;
2597}
2598
2599template <typename T>
2600void *__kmp_task_reduction_modifier_init(ident_t *loc, int gtid, int is_ws,
2601 int num, T *data) {
2602 __kmp_assert_valid_gtid(gtid);
2603 kmp_info_t *thr = __kmp_threads[gtid];
2604 kmp_int32 nth = thr->th.th_team_nproc;
2605 __kmpc_taskgroup(loc, gtid); // form new taskgroup first
2606 if (nth == 1) {
2607 KA_TRACE(10,if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmpc_reduction_modifier_init: T#%d, tg %p, exiting nth=1\n"
, gtid, thr->th.th_current_task->td_taskgroup); }
2608 ("__kmpc_reduction_modifier_init: T#%d, tg %p, exiting nth=1\n",if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmpc_reduction_modifier_init: T#%d, tg %p, exiting nth=1\n"
, gtid, thr->th.th_current_task->td_taskgroup); }
2609 gtid, thr->th.th_current_task->td_taskgroup))if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmpc_reduction_modifier_init: T#%d, tg %p, exiting nth=1\n"
, gtid, thr->th.th_current_task->td_taskgroup); }
;
2610 return (void *)thr->th.th_current_task->td_taskgroup;
2611 }
2612 kmp_team_t *team = thr->th.th_team;
2613 void *reduce_data;
2614 kmp_taskgroup_t *tg;
2615 reduce_data = KMP_ATOMIC_LD_RLX(&team->t.t_tg_reduce_data[is_ws])(&team->t.t_tg_reduce_data[is_ws])->load(std::memory_order_relaxed
)
;
2616 if (reduce_data == NULL__null &&
2617 __kmp_atomic_compare_store(&team->t.t_tg_reduce_data[is_ws], reduce_data,
2618 (void *)1)) {
2619 // single thread enters this block to initialize common reduction data
2620 KMP_DEBUG_ASSERT(reduce_data == NULL)if (!(reduce_data == __null)) { __kmp_debug_assert("reduce_data == __null"
, "openmp/runtime/src/kmp_tasking.cpp", 2620); }
;
2621 // first initialize own data, then make a copy other threads can use
2622 tg = (kmp_taskgroup_t *)__kmp_task_reduction_init<T>(gtid, num, data);
2623 reduce_data = __kmp_thread_malloc(thr, num * sizeof(kmp_taskred_data_t))___kmp_thread_malloc((thr), (num * sizeof(kmp_taskred_data_t)
), "openmp/runtime/src/kmp_tasking.cpp", 2623)
;
2624 KMP_MEMCPYmemcpy(reduce_data, tg->reduce_data, num * sizeof(kmp_taskred_data_t));
2625 // fini counters should be 0 at this point
2626 KMP_DEBUG_ASSERT(KMP_ATOMIC_LD_RLX(&team->t.t_tg_fini_counter[0]) == 0)if (!((&team->t.t_tg_fini_counter[0])->load(std::memory_order_relaxed
) == 0)) { __kmp_debug_assert("(&team->t.t_tg_fini_counter[0])->load(std::memory_order_relaxed) == 0"
, "openmp/runtime/src/kmp_tasking.cpp", 2626); }
;
2627 KMP_DEBUG_ASSERT(KMP_ATOMIC_LD_RLX(&team->t.t_tg_fini_counter[1]) == 0)if (!((&team->t.t_tg_fini_counter[1])->load(std::memory_order_relaxed
) == 0)) { __kmp_debug_assert("(&team->t.t_tg_fini_counter[1])->load(std::memory_order_relaxed) == 0"
, "openmp/runtime/src/kmp_tasking.cpp", 2627); }
;
2628 KMP_ATOMIC_ST_REL(&team->t.t_tg_reduce_data[is_ws], reduce_data)(&team->t.t_tg_reduce_data[is_ws])->store(reduce_data
, std::memory_order_release)
;
2629 } else {
2630 while (
2631 (reduce_data = KMP_ATOMIC_LD_ACQ(&team->t.t_tg_reduce_data[is_ws])(&team->t.t_tg_reduce_data[is_ws])->load(std::memory_order_acquire
)
) ==
2632 (void *)1) { // wait for task reduction initialization
2633 KMP_CPU_PAUSE()__kmp_x86_pause();
2634 }
2635 KMP_DEBUG_ASSERT(reduce_data > (void *)1)if (!(reduce_data > (void *)1)) { __kmp_debug_assert("reduce_data > (void *)1"
, "openmp/runtime/src/kmp_tasking.cpp", 2635); }
; // should be valid pointer here
2636 tg = thr->th.th_current_task->td_taskgroup;
2637 __kmp_task_reduction_init_copy<T>(thr, num, data, tg, reduce_data);
2638 }
2639 return tg;
2640}
2641
2642/*!
2643@ingroup TASKING
2644@param loc Source location info
2645@param gtid Global thread ID
2646@param is_ws Is 1 if the reduction is for worksharing, 0 otherwise
2647@param num Number of data items to reduce
2648@param data Array of data for reduction
2649@return The taskgroup identifier
2650
2651Initialize task reduction for a parallel or worksharing.
2652
2653Note: this entry supposes the optional compiler-generated initializer routine
2654has single parameter - pointer to object to be initialized. That means
2655the reduction either does not use omp_orig object, or the omp_orig is accessible
2656without help of the runtime library.
2657*/
2658void *__kmpc_task_reduction_modifier_init(ident_t *loc, int gtid, int is_ws,
2659 int num, void *data) {
2660 return __kmp_task_reduction_modifier_init(loc, gtid, is_ws, num,
2661 (kmp_task_red_input_t *)data);
2662}
2663
2664/*!
2665@ingroup TASKING
2666@param loc Source location info
2667@param gtid Global thread ID
2668@param is_ws Is 1 if the reduction is for worksharing, 0 otherwise
2669@param num Number of data items to reduce
2670@param data Array of data for reduction
2671@return The taskgroup identifier
2672
2673Initialize task reduction for a parallel or worksharing.
2674
2675Note: this entry supposes the optional compiler-generated initializer routine
2676has two parameters, pointer to object to be initialized and pointer to omp_orig
2677*/
2678void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int is_ws, int num,
2679 void *data) {
2680 return __kmp_task_reduction_modifier_init(loc, gtid, is_ws, num,
2681 (kmp_taskred_input_t *)data);
2682}
2683
2684/*!
2685@ingroup TASKING
2686@param loc Source location info
2687@param gtid Global thread ID
2688@param is_ws Is 1 if the reduction is for worksharing, 0 otherwise
2689
2690Finalize task reduction for a parallel or worksharing.
2691*/
2692void __kmpc_task_reduction_modifier_fini(ident_t *loc, int gtid, int is_ws) {
2693 __kmpc_end_taskgroup(loc, gtid);
2694}
2695
2696// __kmpc_taskgroup: Start a new taskgroup
2697void __kmpc_taskgroup(ident_t *loc, int gtid) {
2698 __kmp_assert_valid_gtid(gtid);
2699 kmp_info_t *thread = __kmp_threads[gtid];
2700 kmp_taskdata_t *taskdata = thread->th.th_current_task;
2701 kmp_taskgroup_t *tg_new =
2702 (kmp_taskgroup_t *)__kmp_thread_malloc(thread, sizeof(kmp_taskgroup_t))___kmp_thread_malloc((thread), (sizeof(kmp_taskgroup_t)), "openmp/runtime/src/kmp_tasking.cpp"
, 2702)
;
2703 KA_TRACE(10, ("__kmpc_taskgroup: T#%d loc=%p group=%p\n", gtid, loc, tg_new))if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmpc_taskgroup: T#%d loc=%p group=%p\n"
, gtid, loc, tg_new); }
;
2704 KMP_ATOMIC_ST_RLX(&tg_new->count, 0)(&tg_new->count)->store(0, std::memory_order_relaxed
)
;
2705 KMP_ATOMIC_ST_RLX(&tg_new->cancel_request, cancel_noreq)(&tg_new->cancel_request)->store(cancel_noreq, std::
memory_order_relaxed)
;
2706 tg_new->parent = taskdata->td_taskgroup;
2707 tg_new->reduce_data = NULL__null;
2708 tg_new->reduce_num_data = 0;
2709 tg_new->gomp_data = NULL__null;
2710 taskdata->td_taskgroup = tg_new;
2711
2712#if OMPT_SUPPORT1 && OMPT_OPTIONAL1
2713 if (UNLIKELY(ompt_enabled.ompt_callback_sync_region)__builtin_expect(!!(ompt_enabled.ompt_callback_sync_region), 0
)
) {
2714 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid)__ompt_load_return_address(gtid);
2715 if (!codeptr)
2716 codeptr = OMPT_GET_RETURN_ADDRESS(0)__builtin_return_address(0);
2717 kmp_team_t *team = thread->th.th_team;
2718 ompt_data_t my_task_data = taskdata->ompt_task_info.task_data;
2719 // FIXME: I think this is wrong for lwt!
2720 ompt_data_t my_parallel_data = team->t.ompt_team_info.parallel_data;
2721
2722 ompt_callbacks.ompt_callback(ompt_callback_sync_region)ompt_callback_sync_region_callback(
2723 ompt_sync_region_taskgroup, ompt_scope_begin, &(my_parallel_data),
2724 &(my_task_data), codeptr);
2725 }
2726#endif
2727}
2728
2729// __kmpc_end_taskgroup: Wait until all tasks generated by the current task
2730// and its descendants are complete
2731void __kmpc_end_taskgroup(ident_t *loc, int gtid) {
2732 __kmp_assert_valid_gtid(gtid);
2733 kmp_info_t *thread = __kmp_threads[gtid];
2734 kmp_taskdata_t *taskdata = thread->th.th_current_task;
2735 kmp_taskgroup_t *taskgroup = taskdata->td_taskgroup;
2736 int thread_finished = FALSE0;
2737
2738#if OMPT_SUPPORT1 && OMPT_OPTIONAL1
2739 kmp_team_t *team;
2740 ompt_data_t my_task_data;
2741 ompt_data_t my_parallel_data;
2742 void *codeptr = nullptr;
2743 if (UNLIKELY(ompt_enabled.enabled)__builtin_expect(!!(ompt_enabled.enabled), 0)) {
2744 team = thread->th.th_team;
2745 my_task_data = taskdata->ompt_task_info.task_data;
2746 // FIXME: I think this is wrong for lwt!
2747 my_parallel_data = team->t.ompt_team_info.parallel_data;
2748 codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid)__ompt_load_return_address(gtid);
2749 if (!codeptr)
2750 codeptr = OMPT_GET_RETURN_ADDRESS(0)__builtin_return_address(0);
2751 }
2752#endif
2753
2754 KA_TRACE(10, ("__kmpc_end_taskgroup(enter): T#%d loc=%p\n", gtid, loc))if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmpc_end_taskgroup(enter): T#%d loc=%p\n"
, gtid, loc); }
;
2755 KMP_DEBUG_ASSERT(taskgroup != NULL)if (!(taskgroup != __null)) { __kmp_debug_assert("taskgroup != __null"
, "openmp/runtime/src/kmp_tasking.cpp", 2755); }
;
2756 KMP_SET_THREAD_STATE_BLOCK(TASKGROUP)((void)0);
2757
2758 if (__kmp_tasking_mode != tskm_immediate_exec) {
2759 // mark task as waiting not on a barrier
2760 taskdata->td_taskwait_counter += 1;
2761 taskdata->td_taskwait_ident = loc;
2762 taskdata->td_taskwait_thread = gtid + 1;
2763#if USE_ITT_BUILD1
2764 // For ITT the taskgroup wait is similar to taskwait until we need to
2765 // distinguish them
2766 void *itt_sync_obj = NULL__null;
2767#if USE_ITT_NOTIFY1
2768 KMP_ITT_TASKWAIT_STARTING(itt_sync_obj)if (__builtin_expect(!!(__kmp_itt_sync_create_ptr__3_0), 0)) {
itt_sync_obj = __kmp_itt_taskwait_object(gtid); if (itt_sync_obj
!= __null) { __kmp_itt_taskwait_starting(gtid, itt_sync_obj)
; } }
;
2769#endif /* USE_ITT_NOTIFY */
2770#endif /* USE_ITT_BUILD */
2771
2772#if OMPT_SUPPORT1 && OMPT_OPTIONAL1
2773 if (UNLIKELY(ompt_enabled.ompt_callback_sync_region_wait)__builtin_expect(!!(ompt_enabled.ompt_callback_sync_region_wait
), 0)
) {
2774 ompt_callbacks.ompt_callback(ompt_callback_sync_region_wait)ompt_callback_sync_region_wait_callback(
2775 ompt_sync_region_taskgroup, ompt_scope_begin, &(my_parallel_data),
2776 &(my_task_data), codeptr);
2777 }
2778#endif
2779
2780 if (!taskdata->td_flags.team_serial ||
2781 (thread->th.th_task_team != NULL__null &&
2782 (thread->th.th_task_team->tt.tt_found_proxy_tasks ||
2783 thread->th.th_task_team->tt.tt_hidden_helper_task_encountered))) {
2784 kmp_flag_32<false, false> flag(
2785 RCAST(std::atomic<kmp_uint32> *, &(taskgroup->count))reinterpret_cast<std::atomic<kmp_uint32> *>(&
(taskgroup->count))
, 0U);
2786 while (KMP_ATOMIC_LD_ACQ(&taskgroup->count)(&taskgroup->count)->load(std::memory_order_acquire
)
!= 0) {
2787 flag.execute_tasks(thread, gtid, FALSE0,
2788 &thread_finished USE_ITT_BUILD_ARG(itt_sync_obj), itt_sync_obj,
2789 __kmp_task_stealing_constraint);
2790 }
2791 }
2792 taskdata->td_taskwait_thread = -taskdata->td_taskwait_thread; // end waiting
2793
2794#if OMPT_SUPPORT1 && OMPT_OPTIONAL1
2795 if (UNLIKELY(ompt_enabled.ompt_callback_sync_region_wait)__builtin_expect(!!(ompt_enabled.ompt_callback_sync_region_wait
), 0)
) {
2796 ompt_callbacks.ompt_callback(ompt_callback_sync_region_wait)ompt_callback_sync_region_wait_callback(
2797 ompt_sync_region_taskgroup, ompt_scope_end, &(my_parallel_data),
2798 &(my_task_data), codeptr);
2799 }
2800#endif
2801
2802#if USE_ITT_BUILD1
2803 KMP_ITT_TASKWAIT_FINISHED(itt_sync_obj)if (__builtin_expect(!!(itt_sync_obj != __null), 0)) __kmp_itt_taskwait_finished
(gtid, itt_sync_obj);
;
2804 KMP_FSYNC_ACQUIRED(taskdata)(!__kmp_itt_fsync_acquired_ptr__3_0) ? (void)0 : __kmp_itt_fsync_acquired_ptr__3_0
((void *)(taskdata))
; // acquire self - sync with descendants
2805#endif /* USE_ITT_BUILD */
2806 }
2807 KMP_DEBUG_ASSERT(taskgroup->count == 0)if (!(taskgroup->count == 0)) { __kmp_debug_assert("taskgroup->count == 0"
, "openmp/runtime/src/kmp_tasking.cpp", 2807); }
;
2808
2809 if (taskgroup->reduce_data != NULL__null &&
2810 !taskgroup->gomp_data) { // need to reduce?
2811 int cnt;
2812 void *reduce_data;
2813 kmp_team_t *t = thread->th.th_team;
2814 kmp_taskred_data_t *arr = (kmp_taskred_data_t *)taskgroup->reduce_data;
2815 // check if <priv> data of the first reduction variable shared for the team
2816 void *priv0 = arr[0].reduce_priv;
2817 if ((reduce_data = KMP_ATOMIC_LD_ACQ(&t->t.t_tg_reduce_data[0])(&t->t.t_tg_reduce_data[0])->load(std::memory_order_acquire
)
) != NULL__null &&
2818 ((kmp_taskred_data_t *)reduce_data)[0].reduce_priv == priv0) {
2819 // finishing task reduction on parallel
2820 cnt = KMP_ATOMIC_INC(&t->t.t_tg_fini_counter[0])(&t->t.t_tg_fini_counter[0])->fetch_add(1, std::memory_order_acq_rel
)
;
2821 if (cnt == thread->th.th_team_nproc - 1) {
2822 // we are the last thread passing __kmpc_reduction_modifier_fini()
2823 // finalize task reduction:
2824 __kmp_task_reduction_fini(thread, taskgroup);
2825 // cleanup fields in the team structure:
2826 // TODO: is relaxed store enough here (whole barrier should follow)?
2827 __kmp_thread_free(thread, reduce_data)___kmp_thread_free((thread), (reduce_data), "openmp/runtime/src/kmp_tasking.cpp"
, 2827)
;
2828 KMP_ATOMIC_ST_REL(&t->t.t_tg_reduce_data[0], NULL)(&t->t.t_tg_reduce_data[0])->store(__null, std::memory_order_release
)
;
2829 KMP_ATOMIC_ST_REL(&t->t.t_tg_fini_counter[0], 0)(&t->t.t_tg_fini_counter[0])->store(0, std::memory_order_release
)
;
2830 } else {
2831 // we are not the last thread passing __kmpc_reduction_modifier_fini(),
2832 // so do not finalize reduction, just clean own copy of the data
2833 __kmp_task_reduction_clean(thread, taskgroup);
2834 }
2835 } else if ((reduce_data = KMP_ATOMIC_LD_ACQ(&t->t.t_tg_reduce_data[1])(&t->t.t_tg_reduce_data[1])->load(std::memory_order_acquire
)
) !=
2836 NULL__null &&
2837 ((kmp_taskred_data_t *)reduce_data)[0].reduce_priv == priv0) {
2838 // finishing task reduction on worksharing
2839 cnt = KMP_ATOMIC_INC(&t->t.t_tg_fini_counter[1])(&t->t.t_tg_fini_counter[1])->fetch_add(1, std::memory_order_acq_rel
)
;
2840 if (cnt == thread->th.th_team_nproc - 1) {
2841 // we are the last thread passing __kmpc_reduction_modifier_fini()
2842 __kmp_task_reduction_fini(thread, taskgroup);
2843 // cleanup fields in team structure:
2844 // TODO: is relaxed store enough here (whole barrier should follow)?
2845 __kmp_thread_free(thread, reduce_data)___kmp_thread_free((thread), (reduce_data), "openmp/runtime/src/kmp_tasking.cpp"
, 2845)
;
2846 KMP_ATOMIC_ST_REL(&t->t.t_tg_reduce_data[1], NULL)(&t->t.t_tg_reduce_data[1])->store(__null, std::memory_order_release
)
;
2847 KMP_ATOMIC_ST_REL(&t->t.t_tg_fini_counter[1], 0)(&t->t.t_tg_fini_counter[1])->store(0, std::memory_order_release
)
;
2848 } else {
2849 // we are not the last thread passing __kmpc_reduction_modifier_fini(),
2850 // so do not finalize reduction, just clean own copy of the data
2851 __kmp_task_reduction_clean(thread, taskgroup);
2852 }
2853 } else {
2854 // finishing task reduction on taskgroup
2855 __kmp_task_reduction_fini(thread, taskgroup);
2856 }
2857 }
2858 // Restore parent taskgroup for the current task
2859 taskdata->td_taskgroup = taskgroup->parent;
2860 __kmp_thread_free(thread, taskgroup)___kmp_thread_free((thread), (taskgroup), "openmp/runtime/src/kmp_tasking.cpp"
, 2860)
;
2861
2862 KA_TRACE(10, ("__kmpc_end_taskgroup(exit): T#%d task %p finished waiting\n",if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmpc_end_taskgroup(exit): T#%d task %p finished waiting\n"
, gtid, taskdata); }
2863 gtid, taskdata))if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmpc_end_taskgroup(exit): T#%d task %p finished waiting\n"
, gtid, taskdata); }
;
2864
2865#if OMPT_SUPPORT1 && OMPT_OPTIONAL1
2866 if (UNLIKELY(ompt_enabled.ompt_callback_sync_region)__builtin_expect(!!(ompt_enabled.ompt_callback_sync_region), 0
)
) {
2867 ompt_callbacks.ompt_callback(ompt_callback_sync_region)ompt_callback_sync_region_callback(
2868 ompt_sync_region_taskgroup, ompt_scope_end, &(my_parallel_data),
2869 &(my_task_data), codeptr);
2870 }
2871#endif
2872}
2873
2874static kmp_task_t *__kmp_get_priority_task(kmp_int32 gtid,
2875 kmp_task_team_t *task_team,
2876 kmp_int32 is_constrained) {
2877 kmp_task_t *task = NULL__null;
2878 kmp_taskdata_t *taskdata;
2879 kmp_taskdata_t *current;
2880 kmp_thread_data_t *thread_data;
2881 int ntasks = task_team->tt.tt_num_task_pri;
2882 if (ntasks == 0) {
2883 KA_TRACE(if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_get_priority_task(exit #1): T#%d No tasks to get\n"
, gtid); }
2884 20, ("__kmp_get_priority_task(exit #1): T#%d No tasks to get\n", gtid))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_get_priority_task(exit #1): T#%d No tasks to get\n"
, gtid); }
;
2885 return NULL__null;
2886 }
2887 do {
2888 // decrement num_tasks to "reserve" one task to get for execution
2889 if (__kmp_atomic_compare_store(&task_team->tt.tt_num_task_pri, ntasks,
2890 ntasks - 1))
2891 break;
2892 } while (ntasks > 0);
2893 if (ntasks == 0) {
2894 KA_TRACE(20, ("__kmp_get_priority_task(exit #2): T#%d No tasks to get\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_get_priority_task(exit #2): T#%d No tasks to get\n"
, __kmp_get_global_thread_id()); }
2895 __kmp_get_gtid()))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_get_priority_task(exit #2): T#%d No tasks to get\n"
, __kmp_get_global_thread_id()); }
;
2896 return NULL__null;
2897 }
2898 // We got a "ticket" to get a "reserved" priority task
2899 int deque_ntasks;
2900 kmp_task_pri_t *list = task_team->tt.tt_task_pri_list;
2901 do {
2902 KMP_ASSERT(list != NULL)if (!(list != __null)) { __kmp_debug_assert("list != NULL", "openmp/runtime/src/kmp_tasking.cpp"
, 2902); }
;
2903 thread_data = &list->td;
2904 __kmp_acquire_bootstrap_lock(&thread_data->td.td_deque_lock);
2905 deque_ntasks = thread_data->td.td_deque_ntasks;
2906 if (deque_ntasks == 0) {
2907 __kmp_release_bootstrap_lock(&thread_data->td.td_deque_lock);
2908 KA_TRACE(20, ("__kmp_get_priority_task: T#%d No tasks to get from %p\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_get_priority_task: T#%d No tasks to get from %p\n"
, __kmp_get_global_thread_id(), thread_data); }
2909 __kmp_get_gtid(), thread_data))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_get_priority_task: T#%d No tasks to get from %p\n"
, __kmp_get_global_thread_id(), thread_data); }
;
2910 list = list->next;
2911 }
2912 } while (deque_ntasks == 0);
2913 KMP_DEBUG_ASSERT(deque_ntasks)if (!(deque_ntasks)) { __kmp_debug_assert("deque_ntasks", "openmp/runtime/src/kmp_tasking.cpp"
, 2913); }
;
2914 int target = thread_data->td.td_deque_head;
2915 current = __kmp_threads[gtid]->th.th_current_task;
2916 taskdata = thread_data->td.td_deque[target];
2917 if (__kmp_task_is_allowed(gtid, is_constrained, taskdata, current)) {
2918 // Bump head pointer and Wrap.
2919 thread_data->td.td_deque_head =
2920 (target + 1) & TASK_DEQUE_MASK(thread_data->td)((thread_data->td).td_deque_size - 1);
2921 } else {
2922 if (!task_team->tt.tt_untied_task_encountered) {
2923 // The TSC does not allow to steal victim task
2924 __kmp_release_bootstrap_lock(&thread_data->td.td_deque_lock);
2925 KA_TRACE(20, ("__kmp_get_priority_task(exit #3): T#%d could not get task "if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_get_priority_task(exit #3): T#%d could not get task "
"from %p: task_team=%p ntasks=%d head=%u tail=%u\n", gtid, thread_data
, task_team, deque_ntasks, target, thread_data->td.td_deque_tail
); }
2926 "from %p: task_team=%p ntasks=%d head=%u tail=%u\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_get_priority_task(exit #3): T#%d could not get task "
"from %p: task_team=%p ntasks=%d head=%u tail=%u\n", gtid, thread_data
, task_team, deque_ntasks, target, thread_data->td.td_deque_tail
); }
2927 gtid, thread_data, task_team, deque_ntasks, target,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_get_priority_task(exit #3): T#%d could not get task "
"from %p: task_team=%p ntasks=%d head=%u tail=%u\n", gtid, thread_data
, task_team, deque_ntasks, target, thread_data->td.td_deque_tail
); }
2928 thread_data->td.td_deque_tail))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_get_priority_task(exit #3): T#%d could not get task "
"from %p: task_team=%p ntasks=%d head=%u tail=%u\n", gtid, thread_data
, task_team, deque_ntasks, target, thread_data->td.td_deque_tail
); }
;
2929 task_team->tt.tt_num_task_pri++; // atomic inc, restore value
2930 return NULL__null;
2931 }
2932 int i;
2933 // walk through the deque trying to steal any task
2934 taskdata = NULL__null;
2935 for (i = 1; i < deque_ntasks; ++i) {
2936 target = (target + 1) & TASK_DEQUE_MASK(thread_data->td)((thread_data->td).td_deque_size - 1);
2937 taskdata = thread_data->td.td_deque[target];
2938 if (__kmp_task_is_allowed(gtid, is_constrained, taskdata, current)) {
2939 break; // found task to execute
2940 } else {
2941 taskdata = NULL__null;
2942 }
2943 }
2944 if (taskdata == NULL__null) {
2945 // No appropriate candidate found to execute
2946 __kmp_release_bootstrap_lock(&thread_data->td.td_deque_lock);
2947 KA_TRACE(if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_get_priority_task(exit #4): T#%d could not get task from "
"%p: task_team=%p ntasks=%d head=%u tail=%u\n", gtid, thread_data
, task_team, deque_ntasks, thread_data->td.td_deque_head, thread_data
->td.td_deque_tail); }
2948 10, ("__kmp_get_priority_task(exit #4): T#%d could not get task from "if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_get_priority_task(exit #4): T#%d could not get task from "
"%p: task_team=%p ntasks=%d head=%u tail=%u\n", gtid, thread_data
, task_team, deque_ntasks, thread_data->td.td_deque_head, thread_data
->td.td_deque_tail); }
2949 "%p: task_team=%p ntasks=%d head=%u tail=%u\n",if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_get_priority_task(exit #4): T#%d could not get task from "
"%p: task_team=%p ntasks=%d head=%u tail=%u\n", gtid, thread_data
, task_team, deque_ntasks, thread_data->td.td_deque_head, thread_data
->td.td_deque_tail); }
2950 gtid, thread_data, task_team, deque_ntasks,if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_get_priority_task(exit #4): T#%d could not get task from "
"%p: task_team=%p ntasks=%d head=%u tail=%u\n", gtid, thread_data
, task_team, deque_ntasks, thread_data->td.td_deque_head, thread_data
->td.td_deque_tail); }
2951 thread_data->td.td_deque_head, thread_data->td.td_deque_tail))if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_get_priority_task(exit #4): T#%d could not get task from "
"%p: task_team=%p ntasks=%d head=%u tail=%u\n", gtid, thread_data
, task_team, deque_ntasks, thread_data->td.td_deque_head, thread_data
->td.td_deque_tail); }
;
2952 task_team->tt.tt_num_task_pri++; // atomic inc, restore value
2953 return NULL__null;
2954 }
2955 int prev = target;
2956 for (i = i + 1; i < deque_ntasks; ++i) {
2957 // shift remaining tasks in the deque left by 1
2958 target = (target + 1) & TASK_DEQUE_MASK(thread_data->td)((thread_data->td).td_deque_size - 1);
2959 thread_data->td.td_deque[prev] = thread_data->td.td_deque[target];
2960 prev = target;
2961 }
2962 KMP_DEBUG_ASSERT(if (!(thread_data->td.td_deque_tail == (kmp_uint32)((target
+ 1) & ((thread_data->td).td_deque_size - 1)))) { __kmp_debug_assert
("thread_data->td.td_deque_tail == (kmp_uint32)((target + 1) & ((thread_data->td).td_deque_size - 1))"
, "openmp/runtime/src/kmp_tasking.cpp", 2964); }
2963 thread_data->td.td_deque_tail ==if (!(thread_data->td.td_deque_tail == (kmp_uint32)((target
+ 1) & ((thread_data->td).td_deque_size - 1)))) { __kmp_debug_assert
("thread_data->td.td_deque_tail == (kmp_uint32)((target + 1) & ((thread_data->td).td_deque_size - 1))"
, "openmp/runtime/src/kmp_tasking.cpp", 2964); }
2964 (kmp_uint32)((target + 1) & TASK_DEQUE_MASK(thread_data->td)))if (!(thread_data->td.td_deque_tail == (kmp_uint32)((target
+ 1) & ((thread_data->td).td_deque_size - 1)))) { __kmp_debug_assert
("thread_data->td.td_deque_tail == (kmp_uint32)((target + 1) & ((thread_data->td).td_deque_size - 1))"
, "openmp/runtime/src/kmp_tasking.cpp", 2964); }
;
2965 thread_data->td.td_deque_tail = target; // tail -= 1 (wrapped))
2966 }
2967 thread_data->td.td_deque_ntasks = deque_ntasks - 1;
2968 __kmp_release_bootstrap_lock(&thread_data->td.td_deque_lock);
2969 task = KMP_TASKDATA_TO_TASK(taskdata)(kmp_task_t *)(taskdata + 1);
2970 return task;
2971}
2972
2973// __kmp_remove_my_task: remove a task from my own deque
2974static kmp_task_t *__kmp_remove_my_task(kmp_info_t *thread, kmp_int32 gtid,
2975 kmp_task_team_t *task_team,
2976 kmp_int32 is_constrained) {
2977 kmp_task_t *task;
2978 kmp_taskdata_t *taskdata;
2979 kmp_thread_data_t *thread_data;
2980 kmp_uint32 tail;
2981
2982 KMP_DEBUG_ASSERT(__kmp_tasking_mode != tskm_immediate_exec)if (!(__kmp_tasking_mode != tskm_immediate_exec)) { __kmp_debug_assert
("__kmp_tasking_mode != tskm_immediate_exec", "openmp/runtime/src/kmp_tasking.cpp"
, 2982); }
;
2983 KMP_DEBUG_ASSERT(task_team->tt.tt_threads_data !=if (!(task_team->tt.tt_threads_data != __null)) { __kmp_debug_assert
("task_team->tt.tt_threads_data != __null", "openmp/runtime/src/kmp_tasking.cpp"
, 2984); }
2984 NULL)if (!(task_team->tt.tt_threads_data != __null)) { __kmp_debug_assert
("task_team->tt.tt_threads_data != __null", "openmp/runtime/src/kmp_tasking.cpp"
, 2984); }
; // Caller should check this condition
2985
2986 thread_data = &task_team->tt.tt_threads_data[__kmp_tid_from_gtid(gtid)];
2987
2988 KA_TRACE(10, ("__kmp_remove_my_task(enter): T#%d ntasks=%d head=%u tail=%u\n",if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_remove_my_task(enter): T#%d ntasks=%d head=%u tail=%u\n"
, gtid, thread_data->td.td_deque_ntasks, thread_data->td
.td_deque_head, thread_data->td.td_deque_tail); }
2989 gtid, thread_data->td.td_deque_ntasks,if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_remove_my_task(enter): T#%d ntasks=%d head=%u tail=%u\n"
, gtid, thread_data->td.td_deque_ntasks, thread_data->td
.td_deque_head, thread_data->td.td_deque_tail); }
2990 thread_data->td.td_deque_head, thread_data->td.td_deque_tail))if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_remove_my_task(enter): T#%d ntasks=%d head=%u tail=%u\n"
, gtid, thread_data->td.td_deque_ntasks, thread_data->td
.td_deque_head, thread_data->td.td_deque_tail); }
;
2991
2992 if (TCR_4(thread_data->td.td_deque_ntasks)(thread_data->td.td_deque_ntasks) == 0) {
2993 KA_TRACE(10,if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_remove_my_task(exit #1): T#%d No tasks to remove: "
"ntasks=%d head=%u tail=%u\n", gtid, thread_data->td.td_deque_ntasks
, thread_data->td.td_deque_head, thread_data->td.td_deque_tail
); }
2994 ("__kmp_remove_my_task(exit #1): T#%d No tasks to remove: "if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_remove_my_task(exit #1): T#%d No tasks to remove: "
"ntasks=%d head=%u tail=%u\n", gtid, thread_data->td.td_deque_ntasks
, thread_data->td.td_deque_head, thread_data->td.td_deque_tail
); }
2995 "ntasks=%d head=%u tail=%u\n",if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_remove_my_task(exit #1): T#%d No tasks to remove: "
"ntasks=%d head=%u tail=%u\n", gtid, thread_data->td.td_deque_ntasks
, thread_data->td.td_deque_head, thread_data->td.td_deque_tail
); }
2996 gtid, thread_data->td.td_deque_ntasks,if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_remove_my_task(exit #1): T#%d No tasks to remove: "
"ntasks=%d head=%u tail=%u\n", gtid, thread_data->td.td_deque_ntasks
, thread_data->td.td_deque_head, thread_data->td.td_deque_tail
); }
2997 thread_data->td.td_deque_head, thread_data->td.td_deque_tail))if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_remove_my_task(exit #1): T#%d No tasks to remove: "
"ntasks=%d head=%u tail=%u\n", gtid, thread_data->td.td_deque_ntasks
, thread_data->td.td_deque_head, thread_data->td.td_deque_tail
); }
;
2998 return NULL__null;
2999 }
3000
3001 __kmp_acquire_bootstrap_lock(&thread_data->td.td_deque_lock);
3002
3003 if (TCR_4(thread_data->td.td_deque_ntasks)(thread_data->td.td_deque_ntasks) == 0) {
3004 __kmp_release_bootstrap_lock(&thread_data->td.td_deque_lock);
3005 KA_TRACE(10,if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_remove_my_task(exit #2): T#%d No tasks to remove: "
"ntasks=%d head=%u tail=%u\n", gtid, thread_data->td.td_deque_ntasks
, thread_data->td.td_deque_head, thread_data->td.td_deque_tail
); }
3006 ("__kmp_remove_my_task(exit #2): T#%d No tasks to remove: "if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_remove_my_task(exit #2): T#%d No tasks to remove: "
"ntasks=%d head=%u tail=%u\n", gtid, thread_data->td.td_deque_ntasks
, thread_data->td.td_deque_head, thread_data->td.td_deque_tail
); }
3007 "ntasks=%d head=%u tail=%u\n",if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_remove_my_task(exit #2): T#%d No tasks to remove: "
"ntasks=%d head=%u tail=%u\n", gtid, thread_data->td.td_deque_ntasks
, thread_data->td.td_deque_head, thread_data->td.td_deque_tail
); }
3008 gtid, thread_data->td.td_deque_ntasks,if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_remove_my_task(exit #2): T#%d No tasks to remove: "
"ntasks=%d head=%u tail=%u\n", gtid, thread_data->td.td_deque_ntasks
, thread_data->td.td_deque_head, thread_data->td.td_deque_tail
); }
3009 thread_data->td.td_deque_head, thread_data->td.td_deque_tail))if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_remove_my_task(exit #2): T#%d No tasks to remove: "
"ntasks=%d head=%u tail=%u\n", gtid, thread_data->td.td_deque_ntasks
, thread_data->td.td_deque_head, thread_data->td.td_deque_tail
); }
;
3010 return NULL__null;
3011 }
3012
3013 tail = (thread_data->td.td_deque_tail - 1) &
3014 TASK_DEQUE_MASK(thread_data->td)((thread_data->td).td_deque_size - 1); // Wrap index.
3015 taskdata = thread_data->td.td_deque[tail];
3016
3017 if (!__kmp_task_is_allowed(gtid, is_constrained, taskdata,
3018 thread->th.th_current_task)) {
3019 // The TSC does not allow to steal victim task
3020 __kmp_release_bootstrap_lock(&thread_data->td.td_deque_lock);
3021 KA_TRACE(10,if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_remove_my_task(exit #3): T#%d TSC blocks tail task: "
"ntasks=%d head=%u tail=%u\n", gtid, thread_data->td.td_deque_ntasks
, thread_data->td.td_deque_head, thread_data->td.td_deque_tail
); }
3022 ("__kmp_remove_my_task(exit #3): T#%d TSC blocks tail task: "if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_remove_my_task(exit #3): T#%d TSC blocks tail task: "
"ntasks=%d head=%u tail=%u\n", gtid, thread_data->td.td_deque_ntasks
, thread_data->td.td_deque_head, thread_data->td.td_deque_tail
); }
3023 "ntasks=%d head=%u tail=%u\n",if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_remove_my_task(exit #3): T#%d TSC blocks tail task: "
"ntasks=%d head=%u tail=%u\n", gtid, thread_data->td.td_deque_ntasks
, thread_data->td.td_deque_head, thread_data->td.td_deque_tail
); }
3024 gtid, thread_data->td.td_deque_ntasks,if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_remove_my_task(exit #3): T#%d TSC blocks tail task: "
"ntasks=%d head=%u tail=%u\n", gtid, thread_data->td.td_deque_ntasks
, thread_data->td.td_deque_head, thread_data->td.td_deque_tail
); }
3025 thread_data->td.td_deque_head, thread_data->td.td_deque_tail))if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_remove_my_task(exit #3): T#%d TSC blocks tail task: "
"ntasks=%d head=%u tail=%u\n", gtid, thread_data->td.td_deque_ntasks
, thread_data->td.td_deque_head, thread_data->td.td_deque_tail
); }
;
3026 return NULL__null;
3027 }
3028
3029 thread_data->td.td_deque_tail = tail;
3030 TCW_4(thread_data->td.td_deque_ntasks, thread_data->td.td_deque_ntasks - 1)(thread_data->td.td_deque_ntasks) = (thread_data->td.td_deque_ntasks
- 1)
;
3031
3032 __kmp_release_bootstrap_lock(&thread_data->td.td_deque_lock);
3033
3034 KA_TRACE(10, ("__kmp_remove_my_task(exit #4): T#%d task %p removed: "if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_remove_my_task(exit #4): T#%d task %p removed: "
"ntasks=%d head=%u tail=%u\n", gtid, taskdata, thread_data->
td.td_deque_ntasks, thread_data->td.td_deque_head, thread_data
->td.td_deque_tail); }
3035 "ntasks=%d head=%u tail=%u\n",if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_remove_my_task(exit #4): T#%d task %p removed: "
"ntasks=%d head=%u tail=%u\n", gtid, taskdata, thread_data->
td.td_deque_ntasks, thread_data->td.td_deque_head, thread_data
->td.td_deque_tail); }
3036 gtid, taskdata, thread_data->td.td_deque_ntasks,if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_remove_my_task(exit #4): T#%d task %p removed: "
"ntasks=%d head=%u tail=%u\n", gtid, taskdata, thread_data->
td.td_deque_ntasks, thread_data->td.td_deque_head, thread_data
->td.td_deque_tail); }
3037 thread_data->td.td_deque_head, thread_data->td.td_deque_tail))if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_remove_my_task(exit #4): T#%d task %p removed: "
"ntasks=%d head=%u tail=%u\n", gtid, taskdata, thread_data->
td.td_deque_ntasks, thread_data->td.td_deque_head, thread_data
->td.td_deque_tail); }
;
3038
3039 task = KMP_TASKDATA_TO_TASK(taskdata)(kmp_task_t *)(taskdata + 1);
3040 return task;
3041}
3042
3043// __kmp_steal_task: remove a task from another thread's deque
3044// Assume that calling thread has already checked existence of
3045// task_team thread_data before calling this routine.
3046static kmp_task_t *__kmp_steal_task(kmp_info_t *victim_thr, kmp_int32 gtid,
3047 kmp_task_team_t *task_team,
3048 std::atomic<kmp_int32> *unfinished_threads,
3049 int *thread_finished,
3050 kmp_int32 is_constrained) {
3051 kmp_task_t *task;
3052 kmp_taskdata_t *taskdata;
3053 kmp_taskdata_t *current;
3054 kmp_thread_data_t *victim_td, *threads_data;
3055 kmp_int32 target;
3056 kmp_int32 victim_tid;
3057
3058 KMP_DEBUG_ASSERT(__kmp_tasking_mode != tskm_immediate_exec)if (!(__kmp_tasking_mode != tskm_immediate_exec)) { __kmp_debug_assert
("__kmp_tasking_mode != tskm_immediate_exec", "openmp/runtime/src/kmp_tasking.cpp"
, 3058); }
;
3059
3060 threads_data = task_team->tt.tt_threads_data;
3061 KMP_DEBUG_ASSERT(threads_data != NULL)if (!(threads_data != __null)) { __kmp_debug_assert("threads_data != __null"
, "openmp/runtime/src/kmp_tasking.cpp", 3061); }
; // Caller should check this condition
3062
3063 victim_tid = victim_thr->th.th_info.ds.ds_tid;
3064 victim_td = &threads_data[victim_tid];
3065
3066 KA_TRACE(10, ("__kmp_steal_task(enter): T#%d try to steal from T#%d: "if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_steal_task(enter): T#%d try to steal from T#%d: "
"task_team=%p ntasks=%d head=%u tail=%u\n", gtid, __kmp_gtid_from_thread
(victim_thr), task_team, victim_td->td.td_deque_ntasks, victim_td
->td.td_deque_head, victim_td->td.td_deque_tail); }
3067 "task_team=%p ntasks=%d head=%u tail=%u\n",if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_steal_task(enter): T#%d try to steal from T#%d: "
"task_team=%p ntasks=%d head=%u tail=%u\n", gtid, __kmp_gtid_from_thread
(victim_thr), task_team, victim_td->td.td_deque_ntasks, victim_td
->td.td_deque_head, victim_td->td.td_deque_tail); }
3068 gtid, __kmp_gtid_from_thread(victim_thr), task_team,if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_steal_task(enter): T#%d try to steal from T#%d: "
"task_team=%p ntasks=%d head=%u tail=%u\n", gtid, __kmp_gtid_from_thread
(victim_thr), task_team, victim_td->td.td_deque_ntasks, victim_td
->td.td_deque_head, victim_td->td.td_deque_tail); }
3069 victim_td->td.td_deque_ntasks, victim_td->td.td_deque_head,if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_steal_task(enter): T#%d try to steal from T#%d: "
"task_team=%p ntasks=%d head=%u tail=%u\n", gtid, __kmp_gtid_from_thread
(victim_thr), task_team, victim_td->td.td_deque_ntasks, victim_td
->td.td_deque_head, victim_td->td.td_deque_tail); }
3070 victim_td->td.td_deque_tail))if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_steal_task(enter): T#%d try to steal from T#%d: "
"task_team=%p ntasks=%d head=%u tail=%u\n", gtid, __kmp_gtid_from_thread
(victim_thr), task_team, victim_td->td.td_deque_ntasks, victim_td
->td.td_deque_head, victim_td->td.td_deque_tail); }
;
3071
3072 if (TCR_4(victim_td->td.td_deque_ntasks)(victim_td->td.td_deque_ntasks) == 0) {
3073 KA_TRACE(10, ("__kmp_steal_task(exit #1): T#%d could not steal from T#%d: "if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_steal_task(exit #1): T#%d could not steal from T#%d: "
"task_team=%p ntasks=%d head=%u tail=%u\n", gtid, __kmp_gtid_from_thread
(victim_thr), task_team, victim_td->td.td_deque_ntasks, victim_td
->td.td_deque_head, victim_td->td.td_deque_tail); }
3074 "task_team=%p ntasks=%d head=%u tail=%u\n",if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_steal_task(exit #1): T#%d could not steal from T#%d: "
"task_team=%p ntasks=%d head=%u tail=%u\n", gtid, __kmp_gtid_from_thread
(victim_thr), task_team, victim_td->td.td_deque_ntasks, victim_td
->td.td_deque_head, victim_td->td.td_deque_tail); }
3075 gtid, __kmp_gtid_from_thread(victim_thr), task_team,if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_steal_task(exit #1): T#%d could not steal from T#%d: "
"task_team=%p ntasks=%d head=%u tail=%u\n", gtid, __kmp_gtid_from_thread
(victim_thr), task_team, victim_td->td.td_deque_ntasks, victim_td
->td.td_deque_head, victim_td->td.td_deque_tail); }
3076 victim_td->td.td_deque_ntasks, victim_td->td.td_deque_head,if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_steal_task(exit #1): T#%d could not steal from T#%d: "
"task_team=%p ntasks=%d head=%u tail=%u\n", gtid, __kmp_gtid_from_thread
(victim_thr), task_team, victim_td->td.td_deque_ntasks, victim_td
->td.td_deque_head, victim_td->td.td_deque_tail); }
3077 victim_td->td.td_deque_tail))if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_steal_task(exit #1): T#%d could not steal from T#%d: "
"task_team=%p ntasks=%d head=%u tail=%u\n", gtid, __kmp_gtid_from_thread
(victim_thr), task_team, victim_td->td.td_deque_ntasks, victim_td
->td.td_deque_head, victim_td->td.td_deque_tail); }
;
3078 return NULL__null;
3079 }
3080
3081 __kmp_acquire_bootstrap_lock(&victim_td->td.td_deque_lock);
3082
3083 int ntasks = TCR_4(victim_td->td.td_deque_ntasks)(victim_td->td.td_deque_ntasks);
3084 // Check again after we acquire the lock
3085 if (ntasks == 0) {
3086 __kmp_release_bootstrap_lock(&victim_td->td.td_deque_lock);
3087 KA_TRACE(10, ("__kmp_steal_task(exit #2): T#%d could not steal from T#%d: "if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_steal_task(exit #2): T#%d could not steal from T#%d: "
"task_team=%p ntasks=%d head=%u tail=%u\n", gtid, __kmp_gtid_from_thread
(victim_thr), task_team, ntasks, victim_td->td.td_deque_head
, victim_td->td.td_deque_tail); }
3088 "task_team=%p ntasks=%d head=%u tail=%u\n",if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_steal_task(exit #2): T#%d could not steal from T#%d: "
"task_team=%p ntasks=%d head=%u tail=%u\n", gtid, __kmp_gtid_from_thread
(victim_thr), task_team, ntasks, victim_td->td.td_deque_head
, victim_td->td.td_deque_tail); }
3089 gtid, __kmp_gtid_from_thread(victim_thr), task_team, ntasks,if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_steal_task(exit #2): T#%d could not steal from T#%d: "
"task_team=%p ntasks=%d head=%u tail=%u\n", gtid, __kmp_gtid_from_thread
(victim_thr), task_team, ntasks, victim_td->td.td_deque_head
, victim_td->td.td_deque_tail); }
3090 victim_td->td.td_deque_head, victim_td->td.td_deque_tail))if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_steal_task(exit #2): T#%d could not steal from T#%d: "
"task_team=%p ntasks=%d head=%u tail=%u\n", gtid, __kmp_gtid_from_thread
(victim_thr), task_team, ntasks, victim_td->td.td_deque_head
, victim_td->td.td_deque_tail); }
;
3091 return NULL__null;
3092 }
3093
3094 KMP_DEBUG_ASSERT(victim_td->td.td_deque != NULL)if (!(victim_td->td.td_deque != __null)) { __kmp_debug_assert
("victim_td->td.td_deque != __null", "openmp/runtime/src/kmp_tasking.cpp"
, 3094); }
;
3095 current = __kmp_threads[gtid]->th.th_current_task;
3096 taskdata = victim_td->td.td_deque[victim_td->td.td_deque_head];
3097 if (__kmp_task_is_allowed(gtid, is_constrained, taskdata, current)) {
3098 // Bump head pointer and Wrap.
3099 victim_td->td.td_deque_head =
3100 (victim_td->td.td_deque_head + 1) & TASK_DEQUE_MASK(victim_td->td)((victim_td->td).td_deque_size - 1);
3101 } else {
3102 if (!task_team->tt.tt_untied_task_encountered) {
3103 // The TSC does not allow to steal victim task
3104 __kmp_release_bootstrap_lock(&victim_td->td.td_deque_lock);
3105 KA_TRACE(10, ("__kmp_steal_task(exit #3): T#%d could not steal from "if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_steal_task(exit #3): T#%d could not steal from "
"T#%d: task_team=%p ntasks=%d head=%u tail=%u\n", gtid, __kmp_gtid_from_thread
(victim_thr), task_team, ntasks, victim_td->td.td_deque_head
, victim_td->td.td_deque_tail); }
3106 "T#%d: task_team=%p ntasks=%d head=%u tail=%u\n",if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_steal_task(exit #3): T#%d could not steal from "
"T#%d: task_team=%p ntasks=%d head=%u tail=%u\n", gtid, __kmp_gtid_from_thread
(victim_thr), task_team, ntasks, victim_td->td.td_deque_head
, victim_td->td.td_deque_tail); }
3107 gtid, __kmp_gtid_from_thread(victim_thr), task_team, ntasks,if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_steal_task(exit #3): T#%d could not steal from "
"T#%d: task_team=%p ntasks=%d head=%u tail=%u\n", gtid, __kmp_gtid_from_thread
(victim_thr), task_team, ntasks, victim_td->td.td_deque_head
, victim_td->td.td_deque_tail); }
3108 victim_td->td.td_deque_head, victim_td->td.td_deque_tail))if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_steal_task(exit #3): T#%d could not steal from "
"T#%d: task_team=%p ntasks=%d head=%u tail=%u\n", gtid, __kmp_gtid_from_thread
(victim_thr), task_team, ntasks, victim_td->td.td_deque_head
, victim_td->td.td_deque_tail); }
;
3109 return NULL__null;
3110 }
3111 int i;
3112 // walk through victim's deque trying to steal any task
3113 target = victim_td->td.td_deque_head;
3114 taskdata = NULL__null;
3115 for (i = 1; i < ntasks; ++i) {
3116 target = (target + 1) & TASK_DEQUE_MASK(victim_td->td)((victim_td->td).td_deque_size - 1);
3117 taskdata = victim_td->td.td_deque[target];
3118 if (__kmp_task_is_allowed(gtid, is_constrained, taskdata, current)) {
3119 break; // found victim task
3120 } else {
3121 taskdata = NULL__null;
3122 }
3123 }
3124 if (taskdata == NULL__null) {
3125 // No appropriate candidate to steal found
3126 __kmp_release_bootstrap_lock(&victim_td->td.td_deque_lock);
3127 KA_TRACE(10, ("__kmp_steal_task(exit #4): T#%d could not steal from "if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_steal_task(exit #4): T#%d could not steal from "
"T#%d: task_team=%p ntasks=%d head=%u tail=%u\n", gtid, __kmp_gtid_from_thread
(victim_thr), task_team, ntasks, victim_td->td.td_deque_head
, victim_td->td.td_deque_tail); }
3128 "T#%d: task_team=%p ntasks=%d head=%u tail=%u\n",if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_steal_task(exit #4): T#%d could not steal from "
"T#%d: task_team=%p ntasks=%d head=%u tail=%u\n", gtid, __kmp_gtid_from_thread
(victim_thr), task_team, ntasks, victim_td->td.td_deque_head
, victim_td->td.td_deque_tail); }
3129 gtid, __kmp_gtid_from_thread(victim_thr), task_team, ntasks,if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_steal_task(exit #4): T#%d could not steal from "
"T#%d: task_team=%p ntasks=%d head=%u tail=%u\n", gtid, __kmp_gtid_from_thread
(victim_thr), task_team, ntasks, victim_td->td.td_deque_head
, victim_td->td.td_deque_tail); }
3130 victim_td->td.td_deque_head, victim_td->td.td_deque_tail))if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_steal_task(exit #4): T#%d could not steal from "
"T#%d: task_team=%p ntasks=%d head=%u tail=%u\n", gtid, __kmp_gtid_from_thread
(victim_thr), task_team, ntasks, victim_td->td.td_deque_head
, victim_td->td.td_deque_tail); }
;
3131 return NULL__null;
3132 }
3133 int prev = target;
3134 for (i = i + 1; i < ntasks; ++i) {
3135 // shift remaining tasks in the deque left by 1
3136 target = (target + 1) & TASK_DEQUE_MASK(victim_td->td)((victim_td->td).td_deque_size - 1);
3137 victim_td->td.td_deque[prev] = victim_td->td.td_deque[target];
3138 prev = target;
3139 }
3140 KMP_DEBUG_ASSERT(if (!(victim_td->td.td_deque_tail == (kmp_uint32)((target +
1) & ((victim_td->td).td_deque_size - 1)))) { __kmp_debug_assert
("victim_td->td.td_deque_tail == (kmp_uint32)((target + 1) & ((victim_td->td).td_deque_size - 1))"
, "openmp/runtime/src/kmp_tasking.cpp", 3142); }
3141 victim_td->td.td_deque_tail ==if (!(victim_td->td.td_deque_tail == (kmp_uint32)((target +
1) & ((victim_td->td).td_deque_size - 1)))) { __kmp_debug_assert
("victim_td->td.td_deque_tail == (kmp_uint32)((target + 1) & ((victim_td->td).td_deque_size - 1))"
, "openmp/runtime/src/kmp_tasking.cpp", 3142); }
3142 (kmp_uint32)((target + 1) & TASK_DEQUE_MASK(victim_td->td)))if (!(victim_td->td.td_deque_tail == (kmp_uint32)((target +
1) & ((victim_td->td).td_deque_size - 1)))) { __kmp_debug_assert
("victim_td->td.td_deque_tail == (kmp_uint32)((target + 1) & ((victim_td->td).td_deque_size - 1))"
, "openmp/runtime/src/kmp_tasking.cpp", 3142); }
;
3143 victim_td->td.td_deque_tail = target; // tail -= 1 (wrapped))
3144 }
3145 if (*thread_finished) {
3146 // We need to un-mark this victim as a finished victim. This must be done
3147 // before releasing the lock, or else other threads (starting with the
3148 // primary thread victim) might be prematurely released from the barrier!!!
3149#if KMP_DEBUG1
3150 kmp_int32 count =
3151#endif
3152 KMP_ATOMIC_INC(unfinished_threads)(unfinished_threads)->fetch_add(1, std::memory_order_acq_rel
)
;
3153 KA_TRACE(if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_steal_task: T#%d inc unfinished_threads to %d: task_team=%p\n"
, gtid, count + 1, task_team); }
3154 20,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_steal_task: T#%d inc unfinished_threads to %d: task_team=%p\n"
, gtid, count + 1, task_team); }
3155 ("__kmp_steal_task: T#%d inc unfinished_threads to %d: task_team=%p\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_steal_task: T#%d inc unfinished_threads to %d: task_team=%p\n"
, gtid, count + 1, task_team); }
3156 gtid, count + 1, task_team))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_steal_task: T#%d inc unfinished_threads to %d: task_team=%p\n"
, gtid, count + 1, task_team); }
;
3157 *thread_finished = FALSE0;
3158 }
3159 TCW_4(victim_td->td.td_deque_ntasks, ntasks - 1)(victim_td->td.td_deque_ntasks) = (ntasks - 1);
3160
3161 __kmp_release_bootstrap_lock(&victim_td->td.td_deque_lock);
3162
3163 KMP_COUNT_BLOCK(TASK_stolen)((void)0);
3164 KA_TRACE(10,if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_steal_task(exit #5): T#%d stole task %p from T#%d: "
"task_team=%p ntasks=%d head=%u tail=%u\n", gtid, taskdata, __kmp_gtid_from_thread
(victim_thr), task_team, ntasks, victim_td->td.td_deque_head
, victim_td->td.td_deque_tail); }
3165 ("__kmp_steal_task(exit #5): T#%d stole task %p from T#%d: "if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_steal_task(exit #5): T#%d stole task %p from T#%d: "
"task_team=%p ntasks=%d head=%u tail=%u\n", gtid, taskdata, __kmp_gtid_from_thread
(victim_thr), task_team, ntasks, victim_td->td.td_deque_head
, victim_td->td.td_deque_tail); }
3166 "task_team=%p ntasks=%d head=%u tail=%u\n",if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_steal_task(exit #5): T#%d stole task %p from T#%d: "
"task_team=%p ntasks=%d head=%u tail=%u\n", gtid, taskdata, __kmp_gtid_from_thread
(victim_thr), task_team, ntasks, victim_td->td.td_deque_head
, victim_td->td.td_deque_tail); }
3167 gtid, taskdata, __kmp_gtid_from_thread(victim_thr), task_team,if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_steal_task(exit #5): T#%d stole task %p from T#%d: "
"task_team=%p ntasks=%d head=%u tail=%u\n", gtid, taskdata, __kmp_gtid_from_thread
(victim_thr), task_team, ntasks, victim_td->td.td_deque_head
, victim_td->td.td_deque_tail); }
3168 ntasks, victim_td->td.td_deque_head, victim_td->td.td_deque_tail))if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_steal_task(exit #5): T#%d stole task %p from T#%d: "
"task_team=%p ntasks=%d head=%u tail=%u\n", gtid, taskdata, __kmp_gtid_from_thread
(victim_thr), task_team, ntasks, victim_td->td.td_deque_head
, victim_td->td.td_deque_tail); }
;
3169
3170 task = KMP_TASKDATA_TO_TASK(taskdata)(kmp_task_t *)(taskdata + 1);
3171 return task;
3172}
3173
3174// __kmp_execute_tasks_template: Choose and execute tasks until either the
3175// condition is statisfied (return true) or there are none left (return false).
3176//
3177// final_spin is TRUE if this is the spin at the release barrier.
3178// thread_finished indicates whether the thread is finished executing all
3179// the tasks it has on its deque, and is at the release barrier.
3180// spinner is the location on which to spin.
3181// spinner == NULL means only execute a single task and return.
3182// checker is the value to check to terminate the spin.
3183template <class C>
3184static inline int __kmp_execute_tasks_template(
3185 kmp_info_t *thread, kmp_int32 gtid, C *flag, int final_spin,
3186 int *thread_finished USE_ITT_BUILD_ARG(void *itt_sync_obj), void *itt_sync_obj,
3187 kmp_int32 is_constrained) {
3188 kmp_task_team_t *task_team = thread->th.th_task_team;
3189 kmp_thread_data_t *threads_data;
3190 kmp_task_t *task;
3191 kmp_info_t *other_thread;
3192 kmp_taskdata_t *current_task = thread->th.th_current_task;
3193 std::atomic<kmp_int32> *unfinished_threads;
3194 kmp_int32 nthreads, victim_tid = -2, use_own_tasks = 1, new_victim = 0,
3195 tid = thread->th.th_info.ds.ds_tid;
3196
3197 KMP_DEBUG_ASSERT(__kmp_tasking_mode != tskm_immediate_exec)if (!(__kmp_tasking_mode != tskm_immediate_exec)) { __kmp_debug_assert
("__kmp_tasking_mode != tskm_immediate_exec", "openmp/runtime/src/kmp_tasking.cpp"
, 3197); }
;
3198 KMP_DEBUG_ASSERT(thread == __kmp_threads[gtid])if (!(thread == __kmp_threads[gtid])) { __kmp_debug_assert("thread == __kmp_threads[gtid]"
, "openmp/runtime/src/kmp_tasking.cpp", 3198); }
;
3199
3200 if (task_team == NULL__null || current_task == NULL__null)
3201 return FALSE0;
3202
3203 KA_TRACE(15, ("__kmp_execute_tasks_template(enter): T#%d final_spin=%d "if (kmp_a_debug >= 15) { __kmp_debug_printf ("__kmp_execute_tasks_template(enter): T#%d final_spin=%d "
"*thread_finished=%d\n", gtid, final_spin, *thread_finished)
; }
3204 "*thread_finished=%d\n",if (kmp_a_debug >= 15) { __kmp_debug_printf ("__kmp_execute_tasks_template(enter): T#%d final_spin=%d "
"*thread_finished=%d\n", gtid, final_spin, *thread_finished)
; }
3205 gtid, final_spin, *thread_finished))if (kmp_a_debug >= 15) { __kmp_debug_printf ("__kmp_execute_tasks_template(enter): T#%d final_spin=%d "
"*thread_finished=%d\n", gtid, final_spin, *thread_finished)
; }
;
3206
3207 thread->th.th_reap_state = KMP_NOT_SAFE_TO_REAP0;
3208 threads_data = (kmp_thread_data_t *)TCR_PTR(task_team->tt.tt_threads_data)((void *)(task_team->tt.tt_threads_data));
3209
3210 KMP_DEBUG_ASSERT(threads_data != NULL)if (!(threads_data != __null)) { __kmp_debug_assert("threads_data != __null"
, "openmp/runtime/src/kmp_tasking.cpp", 3210); }
;
3211
3212 nthreads = task_team->tt.tt_nproc;
3213 unfinished_threads = &(task_team->tt.tt_unfinished_threads);
3214 KMP_DEBUG_ASSERT(nthreads > 1 || task_team->tt.tt_found_proxy_tasks ||if (!(nthreads > 1 || task_team->tt.tt_found_proxy_tasks
|| task_team->tt.tt_hidden_helper_task_encountered)) { __kmp_debug_assert
("nthreads > 1 || task_team->tt.tt_found_proxy_tasks || task_team->tt.tt_hidden_helper_task_encountered"
, "openmp/runtime/src/kmp_tasking.cpp", 3215); }
3215 task_team->tt.tt_hidden_helper_task_encountered)if (!(nthreads > 1 || task_team->tt.tt_found_proxy_tasks
|| task_team->tt.tt_hidden_helper_task_encountered)) { __kmp_debug_assert
("nthreads > 1 || task_team->tt.tt_found_proxy_tasks || task_team->tt.tt_hidden_helper_task_encountered"
, "openmp/runtime/src/kmp_tasking.cpp", 3215); }
;
3216 KMP_DEBUG_ASSERT(*unfinished_threads >= 0)if (!(*unfinished_threads >= 0)) { __kmp_debug_assert("*unfinished_threads >= 0"
, "openmp/runtime/src/kmp_tasking.cpp", 3216); }
;
3217
3218 while (1) { // Outer loop keeps trying to find tasks in case of single thread
3219 // getting tasks from target constructs
3220 while (1) { // Inner loop to find a task and execute it
3221 task = NULL__null;
3222 if (task_team->tt.tt_num_task_pri) { // get priority task first
3223 task = __kmp_get_priority_task(gtid, task_team, is_constrained);
3224 }
3225 if (task == NULL__null && use_own_tasks) { // check own queue next
3226 task = __kmp_remove_my_task(thread, gtid, task_team, is_constrained);
3227 }
3228 if ((task == NULL__null) && (nthreads > 1)) { // Steal a task finally
3229 int asleep = 1;
3230 use_own_tasks = 0;
3231 // Try to steal from the last place I stole from successfully.
3232 if (victim_tid == -2) { // haven't stolen anything yet
3233 victim_tid = threads_data[tid].td.td_deque_last_stolen;
3234 if (victim_tid !=
3235 -1) // if we have a last stolen from victim, get the thread
3236 other_thread = threads_data[victim_tid].td.td_thr;
3237 }
3238 if (victim_tid != -1) { // found last victim
3239 asleep = 0;
3240 } else if (!new_victim) { // no recent steals and we haven't already
3241 // used a new victim; select a random thread
3242 do { // Find a different thread to steal work from.
3243 // Pick a random thread. Initial plan was to cycle through all the
3244 // threads, and only return if we tried to steal from every thread,
3245 // and failed. Arch says that's not such a great idea.
3246 victim_tid = __kmp_get_random(thread) % (nthreads - 1);
3247 if (victim_tid >= tid) {
3248 ++victim_tid; // Adjusts random distribution to exclude self
3249 }
3250 // Found a potential victim
3251 other_thread = threads_data[victim_tid].td.td_thr;
3252 // There is a slight chance that __kmp_enable_tasking() did not wake
3253 // up all threads waiting at the barrier. If victim is sleeping,
3254 // then wake it up. Since we were going to pay the cache miss
3255 // penalty for referencing another thread's kmp_info_t struct
3256 // anyway,
3257 // the check shouldn't cost too much performance at this point. In
3258 // extra barrier mode, tasks do not sleep at the separate tasking
3259 // barrier, so this isn't a problem.
3260 asleep = 0;
3261 if ((__kmp_tasking_mode == tskm_task_teams) &&
3262 (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME(2147483647)) &&
3263 (TCR_PTR(CCAST(void *, other_thread->th.th_sleep_loc))((void *)(const_cast<void *>(other_thread->th.th_sleep_loc
)))
!=
3264 NULL__null)) {
3265 asleep = 1;
3266 __kmp_null_resume_wrapper(other_thread);
3267 // A sleeping thread should not have any tasks on it's queue.
3268 // There is a slight possibility that it resumes, steals a task
3269 // from another thread, which spawns more tasks, all in the time
3270 // that it takes this thread to check => don't write an assertion
3271 // that the victim's queue is empty. Try stealing from a
3272 // different thread.
3273 }
3274 } while (asleep);
3275 }
3276
3277 if (!asleep) {
3278 // We have a victim to try to steal from
3279 task = __kmp_steal_task(other_thread, gtid, task_team,
3280 unfinished_threads, thread_finished,
3281 is_constrained);
3282 }
3283 if (task != NULL__null) { // set last stolen to victim
3284 if (threads_data[tid].td.td_deque_last_stolen != victim_tid) {
3285 threads_data[tid].td.td_deque_last_stolen = victim_tid;
3286 // The pre-refactored code did not try more than 1 successful new
3287 // vicitm, unless the last one generated more local tasks;
3288 // new_victim keeps track of this
3289 new_victim = 1;
3290 }
3291 } else { // No tasks found; unset last_stolen
3292 KMP_CHECK_UPDATE(threads_data[tid].td.td_deque_last_stolen, -1)if ((threads_data[tid].td.td_deque_last_stolen) != (-1)) (threads_data
[tid].td.td_deque_last_stolen) = (-1)
;
3293 victim_tid = -2; // no successful victim found
3294 }
3295 }
3296
3297 if (task == NULL__null)
3298 break; // break out of tasking loop
3299
3300// Found a task; execute it
3301#if USE_ITT_BUILD1 && USE_ITT_NOTIFY1
3302 if (__itt_sync_create_ptr__kmp_itt_sync_create_ptr__3_0 || KMP_ITT_DEBUG0) {
3303 if (itt_sync_obj == NULL__null) { // we are at fork barrier where we could not
3304 // get the object reliably
3305 itt_sync_obj = __kmp_itt_barrier_object(gtid, bs_forkjoin_barrier);
3306 }
3307 __kmp_itt_task_starting(itt_sync_obj);
3308 }
3309#endif /* USE_ITT_BUILD && USE_ITT_NOTIFY */
3310 __kmp_invoke_task(gtid, task, current_task);
3311#if USE_ITT_BUILD1
3312 if (itt_sync_obj != NULL__null)
3313 __kmp_itt_task_finished(itt_sync_obj);
3314#endif /* USE_ITT_BUILD */
3315 // If this thread is only partway through the barrier and the condition is
3316 // met, then return now, so that the barrier gather/release pattern can
3317 // proceed. If this thread is in the last spin loop in the barrier,
3318 // waiting to be released, we know that the termination condition will not
3319 // be satisfied, so don't waste any cycles checking it.
3320 if (flag == NULL__null || (!final_spin && flag->done_check())) {
3321 KA_TRACE(if (kmp_a_debug >= 15) { __kmp_debug_printf ("__kmp_execute_tasks_template: T#%d spin condition satisfied\n"
, gtid); }
3322 15,if (kmp_a_debug >= 15) { __kmp_debug_printf ("__kmp_execute_tasks_template: T#%d spin condition satisfied\n"
, gtid); }
3323 ("__kmp_execute_tasks_template: T#%d spin condition satisfied\n",if (kmp_a_debug >= 15) { __kmp_debug_printf ("__kmp_execute_tasks_template: T#%d spin condition satisfied\n"
, gtid); }
3324 gtid))if (kmp_a_debug >= 15) { __kmp_debug_printf ("__kmp_execute_tasks_template: T#%d spin condition satisfied\n"
, gtid); }
;
3325 return TRUE(!0);
3326 }
3327 if (thread->th.th_task_team == NULL__null) {
3328 break;
3329 }
3330 KMP_YIELD(__kmp_library == library_throughput){ __kmp_x86_pause(); if ((__kmp_library == library_throughput
) && (((__kmp_use_yield == 1) || (__kmp_use_yield == 2
&& (((__kmp_nth) > (__kmp_avail_proc ? __kmp_avail_proc
: __kmp_xproc))))))) __kmp_yield(); }
; // Yield before next task
3331 // If execution of a stolen task results in more tasks being placed on our
3332 // run queue, reset use_own_tasks
3333 if (!use_own_tasks && TCR_4(threads_data[tid].td.td_deque_ntasks)(threads_data[tid].td.td_deque_ntasks) != 0) {
3334 KA_TRACE(20, ("__kmp_execute_tasks_template: T#%d stolen task spawned "if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_execute_tasks_template: T#%d stolen task spawned "
"other tasks, restart\n", gtid); }
3335 "other tasks, restart\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_execute_tasks_template: T#%d stolen task spawned "
"other tasks, restart\n", gtid); }
3336 gtid))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_execute_tasks_template: T#%d stolen task spawned "
"other tasks, restart\n", gtid); }
;
3337 use_own_tasks = 1;
3338 new_victim = 0;
3339 }
3340 }
3341
3342 // The task source has been exhausted. If in final spin loop of barrier,
3343 // check if termination condition is satisfied. The work queue may be empty
3344 // but there might be proxy tasks still executing.
3345 if (final_spin &&
3346 KMP_ATOMIC_LD_ACQ(&current_task->td_incomplete_child_tasks)(&current_task->td_incomplete_child_tasks)->load(std
::memory_order_acquire)
== 0) {
3347 // First, decrement the #unfinished threads, if that has not already been
3348 // done. This decrement might be to the spin location, and result in the
3349 // termination condition being satisfied.
3350 if (!*thread_finished) {
3351#if KMP_DEBUG1
3352 kmp_int32 count = -1 +
3353#endif
3354 KMP_ATOMIC_DEC(unfinished_threads)(unfinished_threads)->fetch_sub(1, std::memory_order_acq_rel
)
;
3355 KA_TRACE(20, ("__kmp_execute_tasks_template: T#%d dec "if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_execute_tasks_template: T#%d dec "
"unfinished_threads to %d task_team=%p\n", gtid, count, task_team
); }
3356 "unfinished_threads to %d task_team=%p\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_execute_tasks_template: T#%d dec "
"unfinished_threads to %d task_team=%p\n", gtid, count, task_team
); }
3357 gtid, count, task_team))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_execute_tasks_template: T#%d dec "
"unfinished_threads to %d task_team=%p\n", gtid, count, task_team
); }
;
3358 *thread_finished = TRUE(!0);
3359 }
3360
3361 // It is now unsafe to reference thread->th.th_team !!!
3362 // Decrementing task_team->tt.tt_unfinished_threads can allow the primary
3363 // thread to pass through the barrier, where it might reset each thread's
3364 // th.th_team field for the next parallel region. If we can steal more
3365 // work, we know that this has not happened yet.
3366 if (flag != NULL__null && flag->done_check()) {
3367 KA_TRACE(if (kmp_a_debug >= 15) { __kmp_debug_printf ("__kmp_execute_tasks_template: T#%d spin condition satisfied\n"
, gtid); }
3368 15,if (kmp_a_debug >= 15) { __kmp_debug_printf ("__kmp_execute_tasks_template: T#%d spin condition satisfied\n"
, gtid); }
3369 ("__kmp_execute_tasks_template: T#%d spin condition satisfied\n",if (kmp_a_debug >= 15) { __kmp_debug_printf ("__kmp_execute_tasks_template: T#%d spin condition satisfied\n"
, gtid); }
3370 gtid))if (kmp_a_debug >= 15) { __kmp_debug_printf ("__kmp_execute_tasks_template: T#%d spin condition satisfied\n"
, gtid); }
;
3371 return TRUE(!0);
3372 }
3373 }
3374
3375 // If this thread's task team is NULL, primary thread has recognized that
3376 // there are no more tasks; bail out
3377 if (thread->th.th_task_team == NULL__null) {
3378 KA_TRACE(15,if (kmp_a_debug >= 15) { __kmp_debug_printf ("__kmp_execute_tasks_template: T#%d no more tasks\n"
, gtid); }
3379 ("__kmp_execute_tasks_template: T#%d no more tasks\n", gtid))if (kmp_a_debug >= 15) { __kmp_debug_printf ("__kmp_execute_tasks_template: T#%d no more tasks\n"
, gtid); }
;
3380 return FALSE0;
3381 }
3382
3383 // Check the flag again to see if it has already done in case to be trapped
3384 // into infinite loop when a if0 task depends on a hidden helper task
3385 // outside any parallel region. Detached tasks are not impacted in this case
3386 // because the only thread executing this function has to execute the proxy
3387 // task so it is in another code path that has the same check.
3388 if (flag == NULL__null || (!final_spin && flag->done_check())) {
3389 KA_TRACE(15,if (kmp_a_debug >= 15) { __kmp_debug_printf ("__kmp_execute_tasks_template: T#%d spin condition satisfied\n"
, gtid); }
3390 ("__kmp_execute_tasks_template: T#%d spin condition satisfied\n",if (kmp_a_debug >= 15) { __kmp_debug_printf ("__kmp_execute_tasks_template: T#%d spin condition satisfied\n"
, gtid); }
3391 gtid))if (kmp_a_debug >= 15) { __kmp_debug_printf ("__kmp_execute_tasks_template: T#%d spin condition satisfied\n"
, gtid); }
;
3392 return TRUE(!0);
3393 }
3394
3395 // We could be getting tasks from target constructs; if this is the only
3396 // thread, keep trying to execute tasks from own queue
3397 if (nthreads == 1 &&
3398 KMP_ATOMIC_LD_ACQ(&current_task->td_incomplete_child_tasks)(&current_task->td_incomplete_child_tasks)->load(std
::memory_order_acquire)
)
3399 use_own_tasks = 1;
3400 else {
3401 KA_TRACE(15,if (kmp_a_debug >= 15) { __kmp_debug_printf ("__kmp_execute_tasks_template: T#%d can't find work\n"
, gtid); }
3402 ("__kmp_execute_tasks_template: T#%d can't find work\n", gtid))if (kmp_a_debug >= 15) { __kmp_debug_printf ("__kmp_execute_tasks_template: T#%d can't find work\n"
, gtid); }
;
3403 return FALSE0;
3404 }
3405 }
3406}
3407
3408template <bool C, bool S>
3409int __kmp_execute_tasks_32(
3410 kmp_info_t *thread, kmp_int32 gtid, kmp_flag_32<C, S> *flag, int final_spin,
3411 int *thread_finished USE_ITT_BUILD_ARG(void *itt_sync_obj), void *itt_sync_obj,
3412 kmp_int32 is_constrained) {
3413 return __kmp_execute_tasks_template(
3414 thread, gtid, flag, final_spin,
3415 thread_finished USE_ITT_BUILD_ARG(itt_sync_obj), itt_sync_obj, is_constrained);
3416}
3417
3418template <bool C, bool S>
3419int __kmp_execute_tasks_64(
3420 kmp_info_t *thread, kmp_int32 gtid, kmp_flag_64<C, S> *flag, int final_spin,
3421 int *thread_finished USE_ITT_BUILD_ARG(void *itt_sync_obj), void *itt_sync_obj,
3422 kmp_int32 is_constrained) {
3423 return __kmp_execute_tasks_template(
3424 thread, gtid, flag, final_spin,
3425 thread_finished USE_ITT_BUILD_ARG(itt_sync_obj), itt_sync_obj, is_constrained);
3426}
3427
3428template <bool C, bool S>
3429int __kmp_atomic_execute_tasks_64(
3430 kmp_info_t *thread, kmp_int32 gtid, kmp_atomic_flag_64<C, S> *flag,
3431 int final_spin, int *thread_finished USE_ITT_BUILD_ARG(void *itt_sync_obj), void *itt_sync_obj,
3432 kmp_int32 is_constrained) {
3433 return __kmp_execute_tasks_template(
3434 thread, gtid, flag, final_spin,
3435 thread_finished USE_ITT_BUILD_ARG(itt_sync_obj), itt_sync_obj, is_constrained);
3436}
3437
3438int __kmp_execute_tasks_oncore(
3439 kmp_info_t *thread, kmp_int32 gtid, kmp_flag_oncore *flag, int final_spin,
3440 int *thread_finished USE_ITT_BUILD_ARG(void *itt_sync_obj), void *itt_sync_obj,
3441 kmp_int32 is_constrained) {
3442 return __kmp_execute_tasks_template(
3443 thread, gtid, flag, final_spin,
3444 thread_finished USE_ITT_BUILD_ARG(itt_sync_obj), itt_sync_obj, is_constrained);
3445}
3446
3447template int
3448__kmp_execute_tasks_32<false, false>(kmp_info_t *, kmp_int32,
3449 kmp_flag_32<false, false> *, int,
3450 int *USE_ITT_BUILD_ARG(void *), void *, kmp_int32);
3451
3452template int __kmp_execute_tasks_64<false, true>(kmp_info_t *, kmp_int32,
3453 kmp_flag_64<false, true> *,
3454 int,
3455 int *USE_ITT_BUILD_ARG(void *), void *,
3456 kmp_int32);
3457
3458template int __kmp_execute_tasks_64<true, false>(kmp_info_t *, kmp_int32,
3459 kmp_flag_64<true, false> *,
3460 int,
3461 int *USE_ITT_BUILD_ARG(void *), void *,
3462 kmp_int32);
3463
3464template int __kmp_atomic_execute_tasks_64<false, true>(
3465 kmp_info_t *, kmp_int32, kmp_atomic_flag_64<false, true> *, int,
3466 int *USE_ITT_BUILD_ARG(void *), void *, kmp_int32);
3467
3468template int __kmp_atomic_execute_tasks_64<true, false>(
3469 kmp_info_t *, kmp_int32, kmp_atomic_flag_64<true, false> *, int,
3470 int *USE_ITT_BUILD_ARG(void *), void *, kmp_int32);
3471
3472// __kmp_enable_tasking: Allocate task team and resume threads sleeping at the
3473// next barrier so they can assist in executing enqueued tasks.
3474// First thread in allocates the task team atomically.
3475static void __kmp_enable_tasking(kmp_task_team_t *task_team,
3476 kmp_info_t *this_thr) {
3477 kmp_thread_data_t *threads_data;
3478 int nthreads, i, is_init_thread;
3479
3480 KA_TRACE(10, ("__kmp_enable_tasking(enter): T#%d\n",if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_enable_tasking(enter): T#%d\n"
, __kmp_gtid_from_thread(this_thr)); }
3481 __kmp_gtid_from_thread(this_thr)))if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_enable_tasking(enter): T#%d\n"
, __kmp_gtid_from_thread(this_thr)); }
;
3482
3483 KMP_DEBUG_ASSERT(task_team != NULL)if (!(task_team != __null)) { __kmp_debug_assert("task_team != __null"
, "openmp/runtime/src/kmp_tasking.cpp", 3483); }
;
3484 KMP_DEBUG_ASSERT(this_thr->th.th_team != NULL)if (!(this_thr->th.th_team != __null)) { __kmp_debug_assert
("this_thr->th.th_team != __null", "openmp/runtime/src/kmp_tasking.cpp"
, 3484); }
;
3485
3486 nthreads = task_team->tt.tt_nproc;
3487 KMP_DEBUG_ASSERT(nthreads > 0)if (!(nthreads > 0)) { __kmp_debug_assert("nthreads > 0"
, "openmp/runtime/src/kmp_tasking.cpp", 3487); }
;
3488 KMP_DEBUG_ASSERT(nthreads == this_thr->th.th_team->t.t_nproc)if (!(nthreads == this_thr->th.th_team->t.t_nproc)) { __kmp_debug_assert
("nthreads == this_thr->th.th_team->t.t_nproc", "openmp/runtime/src/kmp_tasking.cpp"
, 3488); }
;
3489
3490 // Allocate or increase the size of threads_data if necessary
3491 is_init_thread = __kmp_realloc_task_threads_data(this_thr, task_team);
3492
3493 if (!is_init_thread) {
3494 // Some other thread already set up the array.
3495 KA_TRACE(if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_enable_tasking(exit): T#%d: threads array already set up.\n"
, __kmp_gtid_from_thread(this_thr)); }
3496 20,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_enable_tasking(exit): T#%d: threads array already set up.\n"
, __kmp_gtid_from_thread(this_thr)); }
3497 ("__kmp_enable_tasking(exit): T#%d: threads array already set up.\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_enable_tasking(exit): T#%d: threads array already set up.\n"
, __kmp_gtid_from_thread(this_thr)); }
3498 __kmp_gtid_from_thread(this_thr)))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_enable_tasking(exit): T#%d: threads array already set up.\n"
, __kmp_gtid_from_thread(this_thr)); }
;
3499 return;
3500 }
3501 threads_data = (kmp_thread_data_t *)TCR_PTR(task_team->tt.tt_threads_data)((void *)(task_team->tt.tt_threads_data));
3502 KMP_DEBUG_ASSERT(threads_data != NULL)if (!(threads_data != __null)) { __kmp_debug_assert("threads_data != __null"
, "openmp/runtime/src/kmp_tasking.cpp", 3502); }
;
3503
3504 if (__kmp_tasking_mode == tskm_task_teams &&
3505 (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME(2147483647))) {
3506 // Release any threads sleeping at the barrier, so that they can steal
3507 // tasks and execute them. In extra barrier mode, tasks do not sleep
3508 // at the separate tasking barrier, so this isn't a problem.
3509 for (i = 0; i < nthreads; i++) {
3510 void *sleep_loc;
3511 kmp_info_t *thread = threads_data[i].td.td_thr;
3512
3513 if (i == this_thr->th.th_info.ds.ds_tid) {
3514 continue;
3515 }
3516 // Since we haven't locked the thread's suspend mutex lock at this
3517 // point, there is a small window where a thread might be putting
3518 // itself to sleep, but hasn't set the th_sleep_loc field yet.
3519 // To work around this, __kmp_execute_tasks_template() periodically checks
3520 // see if other threads are sleeping (using the same random mechanism that
3521 // is used for task stealing) and awakens them if they are.
3522 if ((sleep_loc = TCR_PTR(CCAST(void *, thread->th.th_sleep_loc))((void *)(const_cast<void *>(thread->th.th_sleep_loc
)))
) !=
3523 NULL__null) {
3524 KF_TRACE(50, ("__kmp_enable_tasking: T#%d waking up thread T#%d\n",if (kmp_f_debug >= 50) { __kmp_debug_printf ("__kmp_enable_tasking: T#%d waking up thread T#%d\n"
, __kmp_gtid_from_thread(this_thr), __kmp_gtid_from_thread(thread
)); }
3525 __kmp_gtid_from_thread(this_thr),if (kmp_f_debug >= 50) { __kmp_debug_printf ("__kmp_enable_tasking: T#%d waking up thread T#%d\n"
, __kmp_gtid_from_thread(this_thr), __kmp_gtid_from_thread(thread
)); }
3526 __kmp_gtid_from_thread(thread)))if (kmp_f_debug >= 50) { __kmp_debug_printf ("__kmp_enable_tasking: T#%d waking up thread T#%d\n"
, __kmp_gtid_from_thread(this_thr), __kmp_gtid_from_thread(thread
)); }
;
3527 __kmp_null_resume_wrapper(thread);
3528 } else {
3529 KF_TRACE(50, ("__kmp_enable_tasking: T#%d don't wake up thread T#%d\n",if (kmp_f_debug >= 50) { __kmp_debug_printf ("__kmp_enable_tasking: T#%d don't wake up thread T#%d\n"
, __kmp_gtid_from_thread(this_thr), __kmp_gtid_from_thread(thread
)); }
3530 __kmp_gtid_from_thread(this_thr),if (kmp_f_debug >= 50) { __kmp_debug_printf ("__kmp_enable_tasking: T#%d don't wake up thread T#%d\n"
, __kmp_gtid_from_thread(this_thr), __kmp_gtid_from_thread(thread
)); }
3531 __kmp_gtid_from_thread(thread)))if (kmp_f_debug >= 50) { __kmp_debug_printf ("__kmp_enable_tasking: T#%d don't wake up thread T#%d\n"
, __kmp_gtid_from_thread(this_thr), __kmp_gtid_from_thread(thread
)); }
;
3532 }
3533 }
3534 }
3535
3536 KA_TRACE(10, ("__kmp_enable_tasking(exit): T#%d\n",if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_enable_tasking(exit): T#%d\n"
, __kmp_gtid_from_thread(this_thr)); }
3537 __kmp_gtid_from_thread(this_thr)))if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_enable_tasking(exit): T#%d\n"
, __kmp_gtid_from_thread(this_thr)); }
;
3538}
3539
3540/* // TODO: Check the comment consistency
3541 * Utility routines for "task teams". A task team (kmp_task_t) is kind of
3542 * like a shadow of the kmp_team_t data struct, with a different lifetime.
3543 * After a child * thread checks into a barrier and calls __kmp_release() from
3544 * the particular variant of __kmp_<barrier_kind>_barrier_gather(), it can no
3545 * longer assume that the kmp_team_t structure is intact (at any moment, the
3546 * primary thread may exit the barrier code and free the team data structure,
3547 * and return the threads to the thread pool).
3548 *
3549 * This does not work with the tasking code, as the thread is still
3550 * expected to participate in the execution of any tasks that may have been
3551 * spawned my a member of the team, and the thread still needs access to all
3552 * to each thread in the team, so that it can steal work from it.
3553 *
3554 * Enter the existence of the kmp_task_team_t struct. It employs a reference
3555 * counting mechanism, and is allocated by the primary thread before calling
3556 * __kmp_<barrier_kind>_release, and then is release by the last thread to
3557 * exit __kmp_<barrier_kind>_release at the next barrier. I.e. the lifetimes
3558 * of the kmp_task_team_t structs for consecutive barriers can overlap
3559 * (and will, unless the primary thread is the last thread to exit the barrier
3560 * release phase, which is not typical). The existence of such a struct is
3561 * useful outside the context of tasking.
3562 *
3563 * We currently use the existence of the threads array as an indicator that
3564 * tasks were spawned since the last barrier. If the structure is to be
3565 * useful outside the context of tasking, then this will have to change, but
3566 * not setting the field minimizes the performance impact of tasking on
3567 * barriers, when no explicit tasks were spawned (pushed, actually).
3568 */
3569
3570static kmp_task_team_t *__kmp_free_task_teams =
3571 NULL__null; // Free list for task_team data structures
3572// Lock for task team data structures
3573kmp_bootstrap_lock_t __kmp_task_team_lock =
3574 KMP_BOOTSTRAP_LOCK_INITIALIZER(__kmp_task_team_lock){ { true, &((__kmp_task_team_lock)), __null, 0U, 0U, 0, -
1 } }
;
3575
3576// __kmp_alloc_task_deque:
3577// Allocates a task deque for a particular thread, and initialize the necessary
3578// data structures relating to the deque. This only happens once per thread
3579// per task team since task teams are recycled. No lock is needed during
3580// allocation since each thread allocates its own deque.
3581static void __kmp_alloc_task_deque(kmp_info_t *thread,
3582 kmp_thread_data_t *thread_data) {
3583 __kmp_init_bootstrap_lock(&thread_data->td.td_deque_lock);
3584 KMP_DEBUG_ASSERT(thread_data->td.td_deque == NULL)if (!(thread_data->td.td_deque == __null)) { __kmp_debug_assert
("thread_data->td.td_deque == __null", "openmp/runtime/src/kmp_tasking.cpp"
, 3584); }
;
3585
3586 // Initialize last stolen task field to "none"
3587 thread_data->td.td_deque_last_stolen = -1;
3588
3589 KMP_DEBUG_ASSERT(TCR_4(thread_data->td.td_deque_ntasks) == 0)if (!((thread_data->td.td_deque_ntasks) == 0)) { __kmp_debug_assert
("(thread_data->td.td_deque_ntasks) == 0", "openmp/runtime/src/kmp_tasking.cpp"
, 3589); }
;
3590 KMP_DEBUG_ASSERT(thread_data->td.td_deque_head == 0)if (!(thread_data->td.td_deque_head == 0)) { __kmp_debug_assert
("thread_data->td.td_deque_head == 0", "openmp/runtime/src/kmp_tasking.cpp"
, 3590); }
;
3591 KMP_DEBUG_ASSERT(thread_data->td.td_deque_tail == 0)if (!(thread_data->td.td_deque_tail == 0)) { __kmp_debug_assert
("thread_data->td.td_deque_tail == 0", "openmp/runtime/src/kmp_tasking.cpp"
, 3591); }
;
3592
3593 KE_TRACE(if (kmp_e_debug >= 10) { __kmp_debug_printf ("__kmp_alloc_task_deque: T#%d allocating deque[%d] for thread_data %p\n"
, __kmp_gtid_from_thread(thread), (1 << 8), thread_data
); }
3594 10,if (kmp_e_debug >= 10) { __kmp_debug_printf ("__kmp_alloc_task_deque: T#%d allocating deque[%d] for thread_data %p\n"
, __kmp_gtid_from_thread(thread), (1 << 8), thread_data
); }
3595 ("__kmp_alloc_task_deque: T#%d allocating deque[%d] for thread_data %p\n",if (kmp_e_debug >= 10) { __kmp_debug_printf ("__kmp_alloc_task_deque: T#%d allocating deque[%d] for thread_data %p\n"
, __kmp_gtid_from_thread(thread), (1 << 8), thread_data
); }
3596 __kmp_gtid_from_thread(thread), INITIAL_TASK_DEQUE_SIZE, thread_data))if (kmp_e_debug >= 10) { __kmp_debug_printf ("__kmp_alloc_task_deque: T#%d allocating deque[%d] for thread_data %p\n"
, __kmp_gtid_from_thread(thread), (1 << 8), thread_data
); }
;
3597 // Allocate space for task deque, and zero the deque
3598 // Cannot use __kmp_thread_calloc() because threads not around for
3599 // kmp_reap_task_team( ).
3600 thread_data->td.td_deque = (kmp_taskdata_t **)__kmp_allocate(___kmp_allocate(((1 << 8) * sizeof(kmp_taskdata_t *)), "openmp/runtime/src/kmp_tasking.cpp"
, 3601)
3601 INITIAL_TASK_DEQUE_SIZE * sizeof(kmp_taskdata_t *))___kmp_allocate(((1 << 8) * sizeof(kmp_taskdata_t *)), "openmp/runtime/src/kmp_tasking.cpp"
, 3601)
;
3602 thread_data->td.td_deque_size = INITIAL_TASK_DEQUE_SIZE(1 << 8);
3603}
3604
3605// __kmp_free_task_deque:
3606// Deallocates a task deque for a particular thread. Happens at library
3607// deallocation so don't need to reset all thread data fields.
3608static void __kmp_free_task_deque(kmp_thread_data_t *thread_data) {
3609 if (thread_data->td.td_deque != NULL__null) {
3610 __kmp_acquire_bootstrap_lock(&thread_data->td.td_deque_lock);
3611 TCW_4(thread_data->td.td_deque_ntasks, 0)(thread_data->td.td_deque_ntasks) = (0);
3612 __kmp_free(thread_data->td.td_deque)___kmp_free((thread_data->td.td_deque), "openmp/runtime/src/kmp_tasking.cpp"
, 3612)
;
3613 thread_data->td.td_deque = NULL__null;
3614 __kmp_release_bootstrap_lock(&thread_data->td.td_deque_lock);
3615 }
3616
3617#ifdef BUILD_TIED_TASK_STACK
3618 // GEH: Figure out what to do here for td_susp_tied_tasks
3619 if (thread_data->td.td_susp_tied_tasks.ts_entries != TASK_STACK_EMPTY) {
3620 __kmp_free_task_stack(__kmp_thread_from_gtid(gtid), thread_data);
3621 }
3622#endif // BUILD_TIED_TASK_STACK
3623}
3624
3625// __kmp_realloc_task_threads_data:
3626// Allocates a threads_data array for a task team, either by allocating an
3627// initial array or enlarging an existing array. Only the first thread to get
3628// the lock allocs or enlarges the array and re-initializes the array elements.
3629// That thread returns "TRUE", the rest return "FALSE".
3630// Assumes that the new array size is given by task_team -> tt.tt_nproc.
3631// The current size is given by task_team -> tt.tt_max_threads.
3632static int __kmp_realloc_task_threads_data(kmp_info_t *thread,
3633 kmp_task_team_t *task_team) {
3634 kmp_thread_data_t **threads_data_p;
3635 kmp_int32 nthreads, maxthreads;
3636 int is_init_thread = FALSE0;
3637
3638 if (TCR_4(task_team->tt.tt_found_tasks)(task_team->tt.tt_found_tasks)) {
1
Assuming field 'tt_found_tasks' is 0
2
Taking false branch
3639 // Already reallocated and initialized.
3640 return FALSE0;
3641 }
3642
3643 threads_data_p = &task_team->tt.tt_threads_data;
3644 nthreads = task_team->tt.tt_nproc;
3645 maxthreads = task_team->tt.tt_max_threads;
3646
3647 // All threads must lock when they encounter the first task of the implicit
3648 // task region to make sure threads_data fields are (re)initialized before
3649 // used.
3650 __kmp_acquire_bootstrap_lock(&task_team->tt.tt_threads_lock);
3
Calling '__kmp_acquire_bootstrap_lock'
5
Returning from '__kmp_acquire_bootstrap_lock'
3651
3652 if (!TCR_4(task_team->tt.tt_found_tasks)(task_team->tt.tt_found_tasks)) {
6
Assuming field 'tt_found_tasks' is 0
7
Taking true branch
3653 // first thread to enable tasking
3654 kmp_team_t *team = thread->th.th_team;
3655 int i;
3656
3657 is_init_thread = TRUE(!0);
3658 if (maxthreads < nthreads) {
8
Assuming 'maxthreads' is >= 'nthreads'
9
Taking false branch
3659
3660 if (*threads_data_p != NULL__null) {
3661 kmp_thread_data_t *old_data = *threads_data_p;
3662 kmp_thread_data_t *new_data = NULL__null;
3663
3664 KE_TRACE(if (kmp_e_debug >= 10) { __kmp_debug_printf ("__kmp_realloc_task_threads_data: T#%d reallocating "
"threads data for task_team %p, new_size = %d, old_size = %d\n"
, __kmp_gtid_from_thread(thread), task_team, nthreads, maxthreads
); }
3665 10,if (kmp_e_debug >= 10) { __kmp_debug_printf ("__kmp_realloc_task_threads_data: T#%d reallocating "
"threads data for task_team %p, new_size = %d, old_size = %d\n"
, __kmp_gtid_from_thread(thread), task_team, nthreads, maxthreads
); }
3666 ("__kmp_realloc_task_threads_data: T#%d reallocating "if (kmp_e_debug >= 10) { __kmp_debug_printf ("__kmp_realloc_task_threads_data: T#%d reallocating "
"threads data for task_team %p, new_size = %d, old_size = %d\n"
, __kmp_gtid_from_thread(thread), task_team, nthreads, maxthreads
); }
3667 "threads data for task_team %p, new_size = %d, old_size = %d\n",if (kmp_e_debug >= 10) { __kmp_debug_printf ("__kmp_realloc_task_threads_data: T#%d reallocating "
"threads data for task_team %p, new_size = %d, old_size = %d\n"
, __kmp_gtid_from_thread(thread), task_team, nthreads, maxthreads
); }
3668 __kmp_gtid_from_thread(thread), task_team, nthreads, maxthreads))if (kmp_e_debug >= 10) { __kmp_debug_printf ("__kmp_realloc_task_threads_data: T#%d reallocating "
"threads data for task_team %p, new_size = %d, old_size = %d\n"
, __kmp_gtid_from_thread(thread), task_team, nthreads, maxthreads
); }
;
3669 // Reallocate threads_data to have more elements than current array
3670 // Cannot use __kmp_thread_realloc() because threads not around for
3671 // kmp_reap_task_team( ). Note all new array entries are initialized
3672 // to zero by __kmp_allocate().
3673 new_data = (kmp_thread_data_t *)__kmp_allocate(___kmp_allocate((nthreads * sizeof(kmp_thread_data_t)), "openmp/runtime/src/kmp_tasking.cpp"
, 3674)
3674 nthreads * sizeof(kmp_thread_data_t))___kmp_allocate((nthreads * sizeof(kmp_thread_data_t)), "openmp/runtime/src/kmp_tasking.cpp"
, 3674)
;
3675 // copy old data to new data
3676 KMP_MEMCPY_S((void *)new_data, nthreads * sizeof(kmp_thread_data_t),memcpy((void *)new_data, (void *)old_data, maxthreads * sizeof
(kmp_thread_data_t))
3677 (void *)old_data, maxthreads * sizeof(kmp_thread_data_t))memcpy((void *)new_data, (void *)old_data, maxthreads * sizeof
(kmp_thread_data_t))
;
3678
3679#ifdef BUILD_TIED_TASK_STACK
3680 // GEH: Figure out if this is the right thing to do
3681 for (i = maxthreads; i < nthreads; i++) {
3682 kmp_thread_data_t *thread_data = &(*threads_data_p)[i];
3683 __kmp_init_task_stack(__kmp_gtid_from_thread(thread), thread_data);
3684 }
3685#endif // BUILD_TIED_TASK_STACK
3686 // Install the new data and free the old data
3687 (*threads_data_p) = new_data;
3688 __kmp_free(old_data)___kmp_free((old_data), "openmp/runtime/src/kmp_tasking.cpp",
3688)
;
3689 } else {
3690 KE_TRACE(10, ("__kmp_realloc_task_threads_data: T#%d allocating "if (kmp_e_debug >= 10) { __kmp_debug_printf ("__kmp_realloc_task_threads_data: T#%d allocating "
"threads data for task_team %p, size = %d\n", __kmp_gtid_from_thread
(thread), task_team, nthreads); }
3691 "threads data for task_team %p, size = %d\n",if (kmp_e_debug >= 10) { __kmp_debug_printf ("__kmp_realloc_task_threads_data: T#%d allocating "
"threads data for task_team %p, size = %d\n", __kmp_gtid_from_thread
(thread), task_team, nthreads); }
3692 __kmp_gtid_from_thread(thread), task_team, nthreads))if (kmp_e_debug >= 10) { __kmp_debug_printf ("__kmp_realloc_task_threads_data: T#%d allocating "
"threads data for task_team %p, size = %d\n", __kmp_gtid_from_thread
(thread), task_team, nthreads); }
;
3693 // Make the initial allocate for threads_data array, and zero entries
3694 // Cannot use __kmp_thread_calloc() because threads not around for
3695 // kmp_reap_task_team( ).
3696 *threads_data_p = (kmp_thread_data_t *)__kmp_allocate(___kmp_allocate((nthreads * sizeof(kmp_thread_data_t)), "openmp/runtime/src/kmp_tasking.cpp"
, 3697)
3697 nthreads * sizeof(kmp_thread_data_t))___kmp_allocate((nthreads * sizeof(kmp_thread_data_t)), "openmp/runtime/src/kmp_tasking.cpp"
, 3697)
;
3698#ifdef BUILD_TIED_TASK_STACK
3699 // GEH: Figure out if this is the right thing to do
3700 for (i = 0; i < nthreads; i++) {
3701 kmp_thread_data_t *thread_data = &(*threads_data_p)[i];
3702 __kmp_init_task_stack(__kmp_gtid_from_thread(thread), thread_data);
3703 }
3704#endif // BUILD_TIED_TASK_STACK
3705 }
3706 task_team->tt.tt_max_threads = nthreads;
3707 } else {
3708 // If array has (more than) enough elements, go ahead and use it
3709 KMP_DEBUG_ASSERT(*threads_data_p != NULL)if (!(*threads_data_p != __null)) { __kmp_debug_assert("*threads_data_p != __null"
, "openmp/runtime/src/kmp_tasking.cpp", 3709); }
;
10
Assuming the condition is false
11
Taking true branch
3710 }
3711
3712 // initialize threads_data pointers back to thread_info structures
3713 for (i = 0; i < nthreads; i++) {
12
Assuming 'i' is < 'nthreads'
13
Loop condition is true. Entering loop body
3714 kmp_thread_data_t *thread_data = &(*threads_data_p)[i];
14
'thread_data' initialized to a null pointer value
3715 thread_data->td.td_thr = team->t.t_threads[i];
15
Dereference of null pointer
3716
3717 if (thread_data->td.td_deque_last_stolen >= nthreads) {
3718 // The last stolen field survives across teams / barrier, and the number
3719 // of threads may have changed. It's possible (likely?) that a new
3720 // parallel region will exhibit the same behavior as previous region.
3721 thread_data->td.td_deque_last_stolen = -1;
3722 }
3723 }
3724
3725 KMP_MB();
3726 TCW_SYNC_4(task_team->tt.tt_found_tasks, TRUE)(task_team->tt.tt_found_tasks) = ((!0));
3727 }
3728
3729 __kmp_release_bootstrap_lock(&task_team->tt.tt_threads_lock);
3730 return is_init_thread;
3731}
3732
3733// __kmp_free_task_threads_data:
3734// Deallocates a threads_data array for a task team, including any attached
3735// tasking deques. Only occurs at library shutdown.
3736static void __kmp_free_task_threads_data(kmp_task_team_t *task_team) {
3737 __kmp_acquire_bootstrap_lock(&task_team->tt.tt_threads_lock);
3738 if (task_team->tt.tt_threads_data != NULL__null) {
3739 int i;
3740 for (i = 0; i < task_team->tt.tt_max_threads; i++) {
3741 __kmp_free_task_deque(&task_team->tt.tt_threads_data[i]);
3742 }
3743 __kmp_free(task_team->tt.tt_threads_data)___kmp_free((task_team->tt.tt_threads_data), "openmp/runtime/src/kmp_tasking.cpp"
, 3743)
;
3744 task_team->tt.tt_threads_data = NULL__null;
3745 }
3746 __kmp_release_bootstrap_lock(&task_team->tt.tt_threads_lock);
3747}
3748
3749// __kmp_free_task_pri_list:
3750// Deallocates tasking deques used for priority tasks.
3751// Only occurs at library shutdown.
3752static void __kmp_free_task_pri_list(kmp_task_team_t *task_team) {
3753 __kmp_acquire_bootstrap_lock(&task_team->tt.tt_task_pri_lock);
3754 if (task_team->tt.tt_task_pri_list != NULL__null) {
3755 kmp_task_pri_t *list = task_team->tt.tt_task_pri_list;
3756 while (list != NULL__null) {
3757 kmp_task_pri_t *next = list->next;
3758 __kmp_free_task_deque(&list->td);
3759 __kmp_free(list)___kmp_free((list), "openmp/runtime/src/kmp_tasking.cpp", 3759
)
;
3760 list = next;
3761 }
3762 task_team->tt.tt_task_pri_list = NULL__null;
3763 }
3764 __kmp_release_bootstrap_lock(&task_team->tt.tt_task_pri_lock);
3765}
3766
3767// __kmp_allocate_task_team:
3768// Allocates a task team associated with a specific team, taking it from
3769// the global task team free list if possible. Also initializes data
3770// structures.
3771static kmp_task_team_t *__kmp_allocate_task_team(kmp_info_t *thread,
3772 kmp_team_t *team) {
3773 kmp_task_team_t *task_team = NULL__null;
3774 int nthreads;
3775
3776 KA_TRACE(20, ("__kmp_allocate_task_team: T#%d entering; team = %p\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_allocate_task_team: T#%d entering; team = %p\n"
, (thread ? __kmp_gtid_from_thread(thread) : -1), team); }
3777 (thread ? __kmp_gtid_from_thread(thread) : -1), team))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_allocate_task_team: T#%d entering; team = %p\n"
, (thread ? __kmp_gtid_from_thread(thread) : -1), team); }
;
3778
3779 if (TCR_PTR(__kmp_free_task_teams)((void *)(__kmp_free_task_teams)) != NULL__null) {
3780 // Take a task team from the task team pool
3781 __kmp_acquire_bootstrap_lock(&__kmp_task_team_lock);
3782 if (__kmp_free_task_teams != NULL__null) {
3783 task_team = __kmp_free_task_teams;
3784 TCW_PTR(__kmp_free_task_teams, task_team->tt.tt_next)((__kmp_free_task_teams)) = ((task_team->tt.tt_next));
3785 task_team->tt.tt_next = NULL__null;
3786 }
3787 __kmp_release_bootstrap_lock(&__kmp_task_team_lock);
3788 }
3789
3790 if (task_team == NULL__null) {
3791 KE_TRACE(10, ("__kmp_allocate_task_team: T#%d allocating "if (kmp_e_debug >= 10) { __kmp_debug_printf ("__kmp_allocate_task_team: T#%d allocating "
"task team for team %p\n", __kmp_gtid_from_thread(thread), team
); }
3792 "task team for team %p\n",if (kmp_e_debug >= 10) { __kmp_debug_printf ("__kmp_allocate_task_team: T#%d allocating "
"task team for team %p\n", __kmp_gtid_from_thread(thread), team
); }
3793 __kmp_gtid_from_thread(thread), team))if (kmp_e_debug >= 10) { __kmp_debug_printf ("__kmp_allocate_task_team: T#%d allocating "
"task team for team %p\n", __kmp_gtid_from_thread(thread), team
); }
;
3794 // Allocate a new task team if one is not available. Cannot use
3795 // __kmp_thread_malloc because threads not around for kmp_reap_task_team.
3796 task_team = (kmp_task_team_t *)__kmp_allocate(sizeof(kmp_task_team_t))___kmp_allocate((sizeof(kmp_task_team_t)), "openmp/runtime/src/kmp_tasking.cpp"
, 3796)
;
3797 __kmp_init_bootstrap_lock(&task_team->tt.tt_threads_lock);
3798 __kmp_init_bootstrap_lock(&task_team->tt.tt_task_pri_lock);
3799#if USE_ITT_BUILD1 && USE_ITT_NOTIFY1 && KMP_DEBUG1
3800 // suppress race conditions detection on synchronization flags in debug mode
3801 // this helps to analyze library internals eliminating false positives
3802 __itt_suppress_mark_range(!__kmp_itt_suppress_mark_range_ptr__3_0) ? (void)0 : __kmp_itt_suppress_mark_range_ptr__3_0(
3803 __itt_suppress_range, __itt_suppress_threading_errors0x000000ff,
3804 &task_team->tt.tt_found_tasks, sizeof(task_team->tt.tt_found_tasks));
3805 __itt_suppress_mark_range(!__kmp_itt_suppress_mark_range_ptr__3_0) ? (void)0 : __kmp_itt_suppress_mark_range_ptr__3_0(__itt_suppress_range,
3806 __itt_suppress_threading_errors0x000000ff,
3807 CCAST(kmp_uint32 *, &task_team->tt.tt_active)const_cast<kmp_uint32 *>(&task_team->tt.tt_active
)
,
3808 sizeof(task_team->tt.tt_active));
3809#endif /* USE_ITT_BUILD && USE_ITT_NOTIFY && KMP_DEBUG */
3810 // Note: __kmp_allocate zeroes returned memory, othewise we would need:
3811 // task_team->tt.tt_threads_data = NULL;
3812 // task_team->tt.tt_max_threads = 0;
3813 // task_team->tt.tt_next = NULL;
3814 }
3815
3816 TCW_4(task_team->tt.tt_found_tasks, FALSE)(task_team->tt.tt_found_tasks) = (0);
3817 TCW_4(task_team->tt.tt_found_proxy_tasks, FALSE)(task_team->tt.tt_found_proxy_tasks) = (0);
3818 TCW_4(task_team->tt.tt_hidden_helper_task_encountered, FALSE)(task_team->tt.tt_hidden_helper_task_encountered) = (0);
3819 task_team->tt.tt_nproc = nthreads = team->t.t_nproc;
3820
3821 KMP_ATOMIC_ST_REL(&task_team->tt.tt_unfinished_threads, nthreads)(&task_team->tt.tt_unfinished_threads)->store(nthreads
, std::memory_order_release)
;
3822 TCW_4(task_team->tt.tt_hidden_helper_task_encountered, FALSE)(task_team->tt.tt_hidden_helper_task_encountered) = (0);
3823 TCW_4(task_team->tt.tt_active, TRUE)(task_team->tt.tt_active) = ((!0));
3824
3825 KA_TRACE(20, ("__kmp_allocate_task_team: T#%d exiting; task_team = %p "if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_allocate_task_team: T#%d exiting; task_team = %p "
"unfinished_threads init'd to %d\n", (thread ? __kmp_gtid_from_thread
(thread) : -1), task_team, (&task_team->tt.tt_unfinished_threads
)->load(std::memory_order_relaxed)); }
3826 "unfinished_threads init'd to %d\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_allocate_task_team: T#%d exiting; task_team = %p "
"unfinished_threads init'd to %d\n", (thread ? __kmp_gtid_from_thread
(thread) : -1), task_team, (&task_team->tt.tt_unfinished_threads
)->load(std::memory_order_relaxed)); }
3827 (thread ? __kmp_gtid_from_thread(thread) : -1), task_team,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_allocate_task_team: T#%d exiting; task_team = %p "
"unfinished_threads init'd to %d\n", (thread ? __kmp_gtid_from_thread
(thread) : -1), task_team, (&task_team->tt.tt_unfinished_threads
)->load(std::memory_order_relaxed)); }
3828 KMP_ATOMIC_LD_RLX(&task_team->tt.tt_unfinished_threads)))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_allocate_task_team: T#%d exiting; task_team = %p "
"unfinished_threads init'd to %d\n", (thread ? __kmp_gtid_from_thread
(thread) : -1), task_team, (&task_team->tt.tt_unfinished_threads
)->load(std::memory_order_relaxed)); }
;
3829 return task_team;
3830}
3831
3832// __kmp_free_task_team:
3833// Frees the task team associated with a specific thread, and adds it
3834// to the global task team free list.
3835void __kmp_free_task_team(kmp_info_t *thread, kmp_task_team_t *task_team) {
3836 KA_TRACE(20, ("__kmp_free_task_team: T#%d task_team = %p\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_free_task_team: T#%d task_team = %p\n"
, thread ? __kmp_gtid_from_thread(thread) : -1, task_team); }
3837 thread ? __kmp_gtid_from_thread(thread) : -1, task_team))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_free_task_team: T#%d task_team = %p\n"
, thread ? __kmp_gtid_from_thread(thread) : -1, task_team); }
;
3838
3839 // Put task team back on free list
3840 __kmp_acquire_bootstrap_lock(&__kmp_task_team_lock);
3841
3842 KMP_DEBUG_ASSERT(task_team->tt.tt_next == NULL)if (!(task_team->tt.tt_next == __null)) { __kmp_debug_assert
("task_team->tt.tt_next == __null", "openmp/runtime/src/kmp_tasking.cpp"
, 3842); }
;
3843 task_team->tt.tt_next = __kmp_free_task_teams;
3844 TCW_PTR(__kmp_free_task_teams, task_team)((__kmp_free_task_teams)) = ((task_team));
3845
3846 __kmp_release_bootstrap_lock(&__kmp_task_team_lock);
3847}
3848
3849// __kmp_reap_task_teams:
3850// Free all the task teams on the task team free list.
3851// Should only be done during library shutdown.
3852// Cannot do anything that needs a thread structure or gtid since they are
3853// already gone.
3854void __kmp_reap_task_teams(void) {
3855 kmp_task_team_t *task_team;
3856
3857 if (TCR_PTR(__kmp_free_task_teams)((void *)(__kmp_free_task_teams)) != NULL__null) {
3858 // Free all task_teams on the free list
3859 __kmp_acquire_bootstrap_lock(&__kmp_task_team_lock);
3860 while ((task_team = __kmp_free_task_teams) != NULL__null) {
3861 __kmp_free_task_teams = task_team->tt.tt_next;
3862 task_team->tt.tt_next = NULL__null;
3863
3864 // Free threads_data if necessary
3865 if (task_team->tt.tt_threads_data != NULL__null) {
3866 __kmp_free_task_threads_data(task_team);
3867 }
3868 if (task_team->tt.tt_task_pri_list != NULL__null) {
3869 __kmp_free_task_pri_list(task_team);
3870 }
3871 __kmp_free(task_team)___kmp_free((task_team), "openmp/runtime/src/kmp_tasking.cpp"
, 3871)
;
3872 }
3873 __kmp_release_bootstrap_lock(&__kmp_task_team_lock);
3874 }
3875}
3876
3877// __kmp_wait_to_unref_task_teams:
3878// Some threads could still be in the fork barrier release code, possibly
3879// trying to steal tasks. Wait for each thread to unreference its task team.
3880void __kmp_wait_to_unref_task_teams(void) {
3881 kmp_info_t *thread;
3882 kmp_uint32 spins;
3883 kmp_uint64 time;
3884 int done;
3885
3886 KMP_INIT_YIELD(spins){ (spins) = __kmp_yield_init; };
3887 KMP_INIT_BACKOFF(time){ (time) = __kmp_pause_init; };
3888
3889 for (;;) {
3890 done = TRUE(!0);
3891
3892 // TODO: GEH - this may be is wrong because some sync would be necessary
3893 // in case threads are added to the pool during the traversal. Need to
3894 // verify that lock for thread pool is held when calling this routine.
3895 for (thread = CCAST(kmp_info_t *, __kmp_thread_pool)const_cast<kmp_info_t *>(__kmp_thread_pool); thread != NULL__null;
3896 thread = thread->th.th_next_pool) {
3897#if KMP_OS_WINDOWS0
3898 DWORD exit_val;
3899#endif
3900 if (TCR_PTR(thread->th.th_task_team)((void *)(thread->th.th_task_team)) == NULL__null) {
3901 KA_TRACE(10, ("__kmp_wait_to_unref_task_team: T#%d task_team == NULL\n",if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_wait_to_unref_task_team: T#%d task_team == NULL\n"
, __kmp_gtid_from_thread(thread)); }
3902 __kmp_gtid_from_thread(thread)))if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_wait_to_unref_task_team: T#%d task_team == NULL\n"
, __kmp_gtid_from_thread(thread)); }
;
3903 continue;
3904 }
3905#if KMP_OS_WINDOWS0
3906 // TODO: GEH - add this check for Linux* OS / OS X* as well?
3907 if (!__kmp_is_thread_alive(thread, &exit_val)) {
3908 thread->th.th_task_team = NULL__null;
3909 continue;
3910 }
3911#endif
3912
3913 done = FALSE0; // Because th_task_team pointer is not NULL for this thread
3914
3915 KA_TRACE(10, ("__kmp_wait_to_unref_task_team: Waiting for T#%d to "if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_wait_to_unref_task_team: Waiting for T#%d to "
"unreference task_team\n", __kmp_gtid_from_thread(thread)); }
3916 "unreference task_team\n",if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_wait_to_unref_task_team: Waiting for T#%d to "
"unreference task_team\n", __kmp_gtid_from_thread(thread)); }
3917 __kmp_gtid_from_thread(thread)))if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_wait_to_unref_task_team: Waiting for T#%d to "
"unreference task_team\n", __kmp_gtid_from_thread(thread)); }
;
3918
3919 if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME(2147483647)) {
3920 void *sleep_loc;
3921 // If the thread is sleeping, awaken it.
3922 if ((sleep_loc = TCR_PTR(CCAST(void *, thread->th.th_sleep_loc))((void *)(const_cast<void *>(thread->th.th_sleep_loc
)))
) !=
3923 NULL__null) {
3924 KA_TRACE(if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_wait_to_unref_task_team: T#%d waking up thread T#%d\n"
, __kmp_gtid_from_thread(thread), __kmp_gtid_from_thread(thread
)); }
3925 10,if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_wait_to_unref_task_team: T#%d waking up thread T#%d\n"
, __kmp_gtid_from_thread(thread), __kmp_gtid_from_thread(thread
)); }
3926 ("__kmp_wait_to_unref_task_team: T#%d waking up thread T#%d\n",if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_wait_to_unref_task_team: T#%d waking up thread T#%d\n"
, __kmp_gtid_from_thread(thread), __kmp_gtid_from_thread(thread
)); }
3927 __kmp_gtid_from_thread(thread), __kmp_gtid_from_thread(thread)))if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_wait_to_unref_task_team: T#%d waking up thread T#%d\n"
, __kmp_gtid_from_thread(thread), __kmp_gtid_from_thread(thread
)); }
;
3928 __kmp_null_resume_wrapper(thread);
3929 }
3930 }
3931 }
3932 if (done) {
3933 break;
3934 }
3935
3936 // If oversubscribed or have waited a bit, yield.
3937 KMP_YIELD_OVERSUB_ELSE_SPIN(spins, time){ if (__kmp_tpause_enabled) { if (((__kmp_nth) > (__kmp_avail_proc
? __kmp_avail_proc : __kmp_xproc))) { __kmp_tpause(0, (time)
); } else { __kmp_tpause(__kmp_tpause_hint, (time)); } (time)
= (time << 1 | 1) & ((kmp_uint64)0xFFFF); } else {
__kmp_x86_pause(); if ((((__kmp_use_yield == 1 || __kmp_use_yield
== 2) && (((__kmp_nth) > (__kmp_avail_proc ? __kmp_avail_proc
: __kmp_xproc)))))) { __kmp_yield(); } else if (__kmp_use_yield
== 1) { (spins) -= 2; if (!(spins)) { __kmp_yield(); (spins)
= __kmp_yield_next; } } } }
;
3938 }
3939}
3940
3941void __kmp_shift_task_state_stack(kmp_info_t *this_thr, kmp_uint8 value) {
3942 // Shift values from th_task_state_top+1 to task_state_stack_sz
3943 if (this_thr->th.th_task_state_top + 1 >=
3944 this_thr->th.th_task_state_stack_sz) { // increase size
3945 kmp_uint32 new_size = 2 * this_thr->th.th_task_state_stack_sz;
3946 kmp_uint8 *old_stack, *new_stack;
3947 kmp_uint32 i;
3948 new_stack = (kmp_uint8 *)__kmp_allocate(new_size)___kmp_allocate((new_size), "openmp/runtime/src/kmp_tasking.cpp"
, 3948)
;
3949 for (i = 0; i <= this_thr->th.th_task_state_top; ++i) {
3950 new_stack[i] = this_thr->th.th_task_state_memo_stack[i];
3951 }
3952 // If we need to reallocate do the shift at the same time.
3953 for (; i < this_thr->th.th_task_state_stack_sz; ++i) {
3954 new_stack[i + 1] = this_thr->th.th_task_state_memo_stack[i];
3955 }
3956 for (i = this_thr->th.th_task_state_stack_sz; i < new_size;
3957 ++i) { // zero-init rest of stack
3958 new_stack[i] = 0;
3959 }
3960 old_stack = this_thr->th.th_task_state_memo_stack;
3961 this_thr->th.th_task_state_memo_stack = new_stack;
3962 this_thr->th.th_task_state_stack_sz = new_size;
3963 __kmp_free(old_stack)___kmp_free((old_stack), "openmp/runtime/src/kmp_tasking.cpp"
, 3963)
;
3964 } else {
3965 kmp_uint8 *end;
3966 kmp_uint32 i;
3967
3968 end = &this_thr->th
3969 .th_task_state_memo_stack[this_thr->th.th_task_state_stack_sz];
3970
3971 for (i = this_thr->th.th_task_state_stack_sz - 1;
3972 i > this_thr->th.th_task_state_top; i--, end--)
3973 end[0] = end[-1];
3974 }
3975 this_thr->th.th_task_state_memo_stack[this_thr->th.th_task_state_top + 1] =
3976 value;
3977}
3978
3979// __kmp_task_team_setup: Create a task_team for the current team, but use
3980// an already created, unused one if it already exists.
3981void __kmp_task_team_setup(kmp_info_t *this_thr, kmp_team_t *team, int always) {
3982 KMP_DEBUG_ASSERT(__kmp_tasking_mode != tskm_immediate_exec)if (!(__kmp_tasking_mode != tskm_immediate_exec)) { __kmp_debug_assert
("__kmp_tasking_mode != tskm_immediate_exec", "openmp/runtime/src/kmp_tasking.cpp"
, 3982); }
;
3983
3984 // If this task_team hasn't been created yet, allocate it. It will be used in
3985 // the region after the next.
3986 // If it exists, it is the current task team and shouldn't be touched yet as
3987 // it may still be in use.
3988 if (team->t.t_task_team[this_thr->th.th_task_state] == NULL__null &&
3989 (always || team->t.t_nproc > 1)) {
3990 team->t.t_task_team[this_thr->th.th_task_state] =
3991 __kmp_allocate_task_team(this_thr, team);
3992 KA_TRACE(20, ("__kmp_task_team_setup: Primary T#%d created new task_team %p"if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_task_team_setup: Primary T#%d created new task_team %p"
" for team %d at parity=%d\n", __kmp_gtid_from_thread(this_thr
), team->t.t_task_team[this_thr->th.th_task_state], team
->t.t_id, this_thr->th.th_task_state); }
3993 " for team %d at parity=%d\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_task_team_setup: Primary T#%d created new task_team %p"
" for team %d at parity=%d\n", __kmp_gtid_from_thread(this_thr
), team->t.t_task_team[this_thr->th.th_task_state], team
->t.t_id, this_thr->th.th_task_state); }
3994 __kmp_gtid_from_thread(this_thr),if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_task_team_setup: Primary T#%d created new task_team %p"
" for team %d at parity=%d\n", __kmp_gtid_from_thread(this_thr
), team->t.t_task_team[this_thr->th.th_task_state], team
->t.t_id, this_thr->th.th_task_state); }
3995 team->t.t_task_team[this_thr->th.th_task_state], team->t.t_id,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_task_team_setup: Primary T#%d created new task_team %p"
" for team %d at parity=%d\n", __kmp_gtid_from_thread(this_thr
), team->t.t_task_team[this_thr->th.th_task_state], team
->t.t_id, this_thr->th.th_task_state); }
3996 this_thr->th.th_task_state))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_task_team_setup: Primary T#%d created new task_team %p"
" for team %d at parity=%d\n", __kmp_gtid_from_thread(this_thr
), team->t.t_task_team[this_thr->th.th_task_state], team
->t.t_id, this_thr->th.th_task_state); }
;
3997 }
3998 if (this_thr->th.th_task_state == 1 && always && team->t.t_nproc == 1) {
3999 // fix task state stack to adjust for proxy and helper tasks
4000 KA_TRACE(20, ("__kmp_task_team_setup: Primary T#%d needs to shift stack"if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_task_team_setup: Primary T#%d needs to shift stack"
" for team %d at parity=%d\n", __kmp_gtid_from_thread(this_thr
), team->t.t_id, this_thr->th.th_task_state); }
4001 " for team %d at parity=%d\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_task_team_setup: Primary T#%d needs to shift stack"
" for team %d at parity=%d\n", __kmp_gtid_from_thread(this_thr
), team->t.t_id, this_thr->th.th_task_state); }
4002 __kmp_gtid_from_thread(this_thr), team->t.t_id,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_task_team_setup: Primary T#%d needs to shift stack"
" for team %d at parity=%d\n", __kmp_gtid_from_thread(this_thr
), team->t.t_id, this_thr->th.th_task_state); }
4003 this_thr->th.th_task_state))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_task_team_setup: Primary T#%d needs to shift stack"
" for team %d at parity=%d\n", __kmp_gtid_from_thread(this_thr
), team->t.t_id, this_thr->th.th_task_state); }
;
4004 __kmp_shift_task_state_stack(this_thr, this_thr->th.th_task_state);
4005 }
4006
4007 // After threads exit the release, they will call sync, and then point to this
4008 // other task_team; make sure it is allocated and properly initialized. As
4009 // threads spin in the barrier release phase, they will continue to use the
4010 // previous task_team struct(above), until they receive the signal to stop
4011 // checking for tasks (they can't safely reference the kmp_team_t struct,
4012 // which could be reallocated by the primary thread). No task teams are formed
4013 // for serialized teams.
4014 if (team->t.t_nproc > 1) {
4015 int other_team = 1 - this_thr->th.th_task_state;
4016 KMP_DEBUG_ASSERT(other_team >= 0 && other_team < 2)if (!(other_team >= 0 && other_team < 2)) { __kmp_debug_assert
("other_team >= 0 && other_team < 2", "openmp/runtime/src/kmp_tasking.cpp"
, 4016); }
;
4017 if (team->t.t_task_team[other_team] == NULL__null) { // setup other team as well
4018 team->t.t_task_team[other_team] =
4019 __kmp_allocate_task_team(this_thr, team);
4020 KA_TRACE(20, ("__kmp_task_team_setup: Primary T#%d created second new "if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_task_team_setup: Primary T#%d created second new "
"task_team %p for team %d at parity=%d\n", __kmp_gtid_from_thread
(this_thr), team->t.t_task_team[other_team], team->t.t_id
, other_team); }
4021 "task_team %p for team %d at parity=%d\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_task_team_setup: Primary T#%d created second new "
"task_team %p for team %d at parity=%d\n", __kmp_gtid_from_thread
(this_thr), team->t.t_task_team[other_team], team->t.t_id
, other_team); }
4022 __kmp_gtid_from_thread(this_thr),if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_task_team_setup: Primary T#%d created second new "
"task_team %p for team %d at parity=%d\n", __kmp_gtid_from_thread
(this_thr), team->t.t_task_team[other_team], team->t.t_id
, other_team); }
4023 team->t.t_task_team[other_team], team->t.t_id, other_team))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_task_team_setup: Primary T#%d created second new "
"task_team %p for team %d at parity=%d\n", __kmp_gtid_from_thread
(this_thr), team->t.t_task_team[other_team], team->t.t_id
, other_team); }
;
4024 } else { // Leave the old task team struct in place for the upcoming region;
4025 // adjust as needed
4026 kmp_task_team_t *task_team = team->t.t_task_team[other_team];
4027 if (!task_team->tt.tt_active ||
4028 team->t.t_nproc != task_team->tt.tt_nproc) {
4029 TCW_4(task_team->tt.tt_nproc, team->t.t_nproc)(task_team->tt.tt_nproc) = (team->t.t_nproc);
4030 TCW_4(task_team->tt.tt_found_tasks, FALSE)(task_team->tt.tt_found_tasks) = (0);
4031 TCW_4(task_team->tt.tt_found_proxy_tasks, FALSE)(task_team->tt.tt_found_proxy_tasks) = (0);
4032 TCW_4(task_team->tt.tt_hidden_helper_task_encountered, FALSE)(task_team->tt.tt_hidden_helper_task_encountered) = (0);
4033 KMP_ATOMIC_ST_REL(&task_team->tt.tt_unfinished_threads,(&task_team->tt.tt_unfinished_threads)->store(team->
t.t_nproc, std::memory_order_release)
4034 team->t.t_nproc)(&task_team->tt.tt_unfinished_threads)->store(team->
t.t_nproc, std::memory_order_release)
;
4035 TCW_4(task_team->tt.tt_active, TRUE)(task_team->tt.tt_active) = ((!0));
4036 }
4037 // if team size has changed, the first thread to enable tasking will
4038 // realloc threads_data if necessary
4039 KA_TRACE(20, ("__kmp_task_team_setup: Primary T#%d reset next task_team "if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_task_team_setup: Primary T#%d reset next task_team "
"%p for team %d at parity=%d\n", __kmp_gtid_from_thread(this_thr
), team->t.t_task_team[other_team], team->t.t_id, other_team
); }
4040 "%p for team %d at parity=%d\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_task_team_setup: Primary T#%d reset next task_team "
"%p for team %d at parity=%d\n", __kmp_gtid_from_thread(this_thr
), team->t.t_task_team[other_team], team->t.t_id, other_team
); }
4041 __kmp_gtid_from_thread(this_thr),if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_task_team_setup: Primary T#%d reset next task_team "
"%p for team %d at parity=%d\n", __kmp_gtid_from_thread(this_thr
), team->t.t_task_team[other_team], team->t.t_id, other_team
); }
4042 team->t.t_task_team[other_team], team->t.t_id, other_team))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_task_team_setup: Primary T#%d reset next task_team "
"%p for team %d at parity=%d\n", __kmp_gtid_from_thread(this_thr
), team->t.t_task_team[other_team], team->t.t_id, other_team
); }
;
4043 }
4044 }
4045
4046 // For regular thread, task enabling should be called when the task is going
4047 // to be pushed to a dequeue. However, for the hidden helper thread, we need
4048 // it ahead of time so that some operations can be performed without race
4049 // condition.
4050 if (this_thr == __kmp_hidden_helper_main_thread) {
4051 for (int i = 0; i < 2; ++i) {
4052 kmp_task_team_t *task_team = team->t.t_task_team[i];
4053 if (KMP_TASKING_ENABLED(task_team)((!0) == ((task_team)->tt.tt_found_tasks))) {
4054 continue;
4055 }
4056 __kmp_enable_tasking(task_team, this_thr);
4057 for (int j = 0; j < task_team->tt.tt_nproc; ++j) {
4058 kmp_thread_data_t *thread_data = &task_team->tt.tt_threads_data[j];
4059 if (thread_data->td.td_deque == NULL__null) {
4060 __kmp_alloc_task_deque(__kmp_hidden_helper_threads[j], thread_data);
4061 }
4062 }
4063 }
4064 }
4065}
4066
4067// __kmp_task_team_sync: Propagation of task team data from team to threads
4068// which happens just after the release phase of a team barrier. This may be
4069// called by any thread, but only for teams with # threads > 1.
4070void __kmp_task_team_sync(kmp_info_t *this_thr, kmp_team_t *team) {
4071 KMP_DEBUG_ASSERT(__kmp_tasking_mode != tskm_immediate_exec)if (!(__kmp_tasking_mode != tskm_immediate_exec)) { __kmp_debug_assert
("__kmp_tasking_mode != tskm_immediate_exec", "openmp/runtime/src/kmp_tasking.cpp"
, 4071); }
;
4072
4073 // Toggle the th_task_state field, to switch which task_team this thread
4074 // refers to
4075 this_thr->th.th_task_state = (kmp_uint8)(1 - this_thr->th.th_task_state);
4076
4077 // It is now safe to propagate the task team pointer from the team struct to
4078 // the current thread.
4079 TCW_PTR(this_thr->th.th_task_team,((this_thr->th.th_task_team)) = ((team->t.t_task_team[this_thr
->th.th_task_state]))
4080 team->t.t_task_team[this_thr->th.th_task_state])((this_thr->th.th_task_team)) = ((team->t.t_task_team[this_thr
->th.th_task_state]))
;
4081 KA_TRACE(20,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_task_team_sync: Thread T#%d task team switched to task_team "
"%p from Team #%d (parity=%d)\n", __kmp_gtid_from_thread(this_thr
), this_thr->th.th_task_team, team->t.t_id, this_thr->
th.th_task_state); }
4082 ("__kmp_task_team_sync: Thread T#%d task team switched to task_team "if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_task_team_sync: Thread T#%d task team switched to task_team "
"%p from Team #%d (parity=%d)\n", __kmp_gtid_from_thread(this_thr
), this_thr->th.th_task_team, team->t.t_id, this_thr->
th.th_task_state); }
4083 "%p from Team #%d (parity=%d)\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_task_team_sync: Thread T#%d task team switched to task_team "
"%p from Team #%d (parity=%d)\n", __kmp_gtid_from_thread(this_thr
), this_thr->th.th_task_team, team->t.t_id, this_thr->
th.th_task_state); }
4084 __kmp_gtid_from_thread(this_thr), this_thr->th.th_task_team,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_task_team_sync: Thread T#%d task team switched to task_team "
"%p from Team #%d (parity=%d)\n", __kmp_gtid_from_thread(this_thr
), this_thr->th.th_task_team, team->t.t_id, this_thr->
th.th_task_state); }
4085 team->t.t_id, this_thr->th.th_task_state))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_task_team_sync: Thread T#%d task team switched to task_team "
"%p from Team #%d (parity=%d)\n", __kmp_gtid_from_thread(this_thr
), this_thr->th.th_task_team, team->t.t_id, this_thr->
th.th_task_state); }
;
4086}
4087
4088// __kmp_task_team_wait: Primary thread waits for outstanding tasks after the
4089// barrier gather phase. Only called by primary thread if #threads in team > 1
4090// or if proxy tasks were created.
4091//
4092// wait is a flag that defaults to 1 (see kmp.h), but waiting can be turned off
4093// by passing in 0 optionally as the last argument. When wait is zero, primary
4094// thread does not wait for unfinished_threads to reach 0.
4095void __kmp_task_team_wait(
4096 kmp_info_t *this_thr,
4097 kmp_team_t *team USE_ITT_BUILD_ARG(void *itt_sync_obj), void *itt_sync_obj, int wait) {
4098 kmp_task_team_t *task_team = team->t.t_task_team[this_thr->th.th_task_state];
4099
4100 KMP_DEBUG_ASSERT(__kmp_tasking_mode != tskm_immediate_exec)if (!(__kmp_tasking_mode != tskm_immediate_exec)) { __kmp_debug_assert
("__kmp_tasking_mode != tskm_immediate_exec", "openmp/runtime/src/kmp_tasking.cpp"
, 4100); }
;
4101 KMP_DEBUG_ASSERT(task_team == this_thr->th.th_task_team)if (!(task_team == this_thr->th.th_task_team)) { __kmp_debug_assert
("task_team == this_thr->th.th_task_team", "openmp/runtime/src/kmp_tasking.cpp"
, 4101); }
;
4102
4103 if ((task_team != NULL__null) && KMP_TASKING_ENABLED(task_team)((!0) == ((task_team)->tt.tt_found_tasks))) {
4104 if (wait) {
4105 KA_TRACE(20, ("__kmp_task_team_wait: Primary T#%d waiting for all tasks "if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_task_team_wait: Primary T#%d waiting for all tasks "
"(for unfinished_threads to reach 0) on task_team = %p\n", __kmp_gtid_from_thread
(this_thr), task_team); }
4106 "(for unfinished_threads to reach 0) on task_team = %p\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_task_team_wait: Primary T#%d waiting for all tasks "
"(for unfinished_threads to reach 0) on task_team = %p\n", __kmp_gtid_from_thread
(this_thr), task_team); }
4107 __kmp_gtid_from_thread(this_thr), task_team))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_task_team_wait: Primary T#%d waiting for all tasks "
"(for unfinished_threads to reach 0) on task_team = %p\n", __kmp_gtid_from_thread
(this_thr), task_team); }
;
4108 // Worker threads may have dropped through to release phase, but could
4109 // still be executing tasks. Wait here for tasks to complete. To avoid
4110 // memory contention, only primary thread checks termination condition.
4111 kmp_flag_32<false, false> flag(
4112 RCAST(std::atomic<kmp_uint32> *,reinterpret_cast<std::atomic<kmp_uint32> *>(&
task_team->tt.tt_unfinished_threads)
4113 &task_team->tt.tt_unfinished_threads)reinterpret_cast<std::atomic<kmp_uint32> *>(&
task_team->tt.tt_unfinished_threads)
,
4114 0U);
4115 flag.wait(this_thr, TRUE(!0) USE_ITT_BUILD_ARG(itt_sync_obj), itt_sync_obj);
4116 }
4117 // Deactivate the old task team, so that the worker threads will stop
4118 // referencing it while spinning.
4119 KA_TRACE(if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_task_team_wait: Primary T#%d deactivating task_team %p: "
"setting active to false, setting local and team's pointer to NULL\n"
, __kmp_gtid_from_thread(this_thr), task_team); }
4120 20,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_task_team_wait: Primary T#%d deactivating task_team %p: "
"setting active to false, setting local and team's pointer to NULL\n"
, __kmp_gtid_from_thread(this_thr), task_team); }
4121 ("__kmp_task_team_wait: Primary T#%d deactivating task_team %p: "if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_task_team_wait: Primary T#%d deactivating task_team %p: "
"setting active to false, setting local and team's pointer to NULL\n"
, __kmp_gtid_from_thread(this_thr), task_team); }
4122 "setting active to false, setting local and team's pointer to NULL\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_task_team_wait: Primary T#%d deactivating task_team %p: "
"setting active to false, setting local and team's pointer to NULL\n"
, __kmp_gtid_from_thread(this_thr), task_team); }
4123 __kmp_gtid_from_thread(this_thr), task_team))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_task_team_wait: Primary T#%d deactivating task_team %p: "
"setting active to false, setting local and team's pointer to NULL\n"
, __kmp_gtid_from_thread(this_thr), task_team); }
;
4124 KMP_DEBUG_ASSERT(task_team->tt.tt_nproc > 1 ||if (!(task_team->tt.tt_nproc > 1 || task_team->tt.tt_found_proxy_tasks
== (!0) || task_team->tt.tt_hidden_helper_task_encountered
== (!0))) { __kmp_debug_assert("task_team->tt.tt_nproc > 1 || task_team->tt.tt_found_proxy_tasks == (!0) || task_team->tt.tt_hidden_helper_task_encountered == (!0)"
, "openmp/runtime/src/kmp_tasking.cpp", 4126); }
4125 task_team->tt.tt_found_proxy_tasks == TRUE ||if (!(task_team->tt.tt_nproc > 1 || task_team->tt.tt_found_proxy_tasks
== (!0) || task_team->tt.tt_hidden_helper_task_encountered
== (!0))) { __kmp_debug_assert("task_team->tt.tt_nproc > 1 || task_team->tt.tt_found_proxy_tasks == (!0) || task_team->tt.tt_hidden_helper_task_encountered == (!0)"
, "openmp/runtime/src/kmp_tasking.cpp", 4126); }
4126 task_team->tt.tt_hidden_helper_task_encountered == TRUE)if (!(task_team->tt.tt_nproc > 1 || task_team->tt.tt_found_proxy_tasks
== (!0) || task_team->tt.tt_hidden_helper_task_encountered
== (!0))) { __kmp_debug_assert("task_team->tt.tt_nproc > 1 || task_team->tt.tt_found_proxy_tasks == (!0) || task_team->tt.tt_hidden_helper_task_encountered == (!0)"
, "openmp/runtime/src/kmp_tasking.cpp", 4126); }
;
4127 TCW_SYNC_4(task_team->tt.tt_found_proxy_tasks, FALSE)(task_team->tt.tt_found_proxy_tasks) = (0);
4128 TCW_SYNC_4(task_team->tt.tt_hidden_helper_task_encountered, FALSE)(task_team->tt.tt_hidden_helper_task_encountered) = (0);
4129 KMP_CHECK_UPDATE(task_team->tt.tt_untied_task_encountered, 0)if ((task_team->tt.tt_untied_task_encountered) != (0)) (task_team
->tt.tt_untied_task_encountered) = (0)
;
4130 TCW_SYNC_4(task_team->tt.tt_active, FALSE)(task_team->tt.tt_active) = (0);
4131 KMP_MB();
4132
4133 TCW_PTR(this_thr->th.th_task_team, NULL)((this_thr->th.th_task_team)) = ((__null));
4134 }
4135}
4136
4137// __kmp_tasking_barrier:
4138// This routine is called only when __kmp_tasking_mode == tskm_extra_barrier.
4139// Internal function to execute all tasks prior to a regular barrier or a join
4140// barrier. It is a full barrier itself, which unfortunately turns regular
4141// barriers into double barriers and join barriers into 1 1/2 barriers.
4142void __kmp_tasking_barrier(kmp_team_t *team, kmp_info_t *thread, int gtid) {
4143 std::atomic<kmp_uint32> *spin = RCAST(reinterpret_cast<std::atomic<kmp_uint32> *>(&
team->t.t_task_team[thread->th.th_task_state]->tt.tt_unfinished_threads
)
4144 std::atomic<kmp_uint32> *,reinterpret_cast<std::atomic<kmp_uint32> *>(&
team->t.t_task_team[thread->th.th_task_state]->tt.tt_unfinished_threads
)
4145 &team->t.t_task_team[thread->th.th_task_state]->tt.tt_unfinished_threads)reinterpret_cast<std::atomic<kmp_uint32> *>(&
team->t.t_task_team[thread->th.th_task_state]->tt.tt_unfinished_threads
)
;
4146 int flag = FALSE0;
4147 KMP_DEBUG_ASSERT(__kmp_tasking_mode == tskm_extra_barrier)if (!(__kmp_tasking_mode == tskm_extra_barrier)) { __kmp_debug_assert
("__kmp_tasking_mode == tskm_extra_barrier", "openmp/runtime/src/kmp_tasking.cpp"
, 4147); }
;
4148
4149#if USE_ITT_BUILD1
4150 KMP_FSYNC_SPIN_INIT(spin, NULL)int sync_iters = 0; if (__kmp_itt_fsync_prepare_ptr__3_0) { if
(spin == __null) { spin = __null; } } __asm__ __volatile__("movl %0, %%ebx; .byte 0x64, 0x67, 0x90 "
::"i"(0x4376) : "%ebx")
;
4151#endif /* USE_ITT_BUILD */
4152 kmp_flag_32<false, false> spin_flag(spin, 0U);
4153 while (!spin_flag.execute_tasks(thread, gtid, TRUE(!0),
4154 &flag USE_ITT_BUILD_ARG(NULL), __null, 0)) {
4155#if USE_ITT_BUILD1
4156 // TODO: What about itt_sync_obj??
4157 KMP_FSYNC_SPIN_PREPARE(RCAST(void *, spin))do { if (__kmp_itt_fsync_prepare_ptr__3_0 && sync_iters
< __kmp_itt_prepare_delay) { ++sync_iters; if (sync_iters
>= __kmp_itt_prepare_delay) { (!__kmp_itt_fsync_prepare_ptr__3_0
) ? (void)0 : __kmp_itt_fsync_prepare_ptr__3_0((void *)((void
*)reinterpret_cast<void *>(spin))); } } } while (0)
;
4158#endif /* USE_ITT_BUILD */
4159
4160 if (TCR_4(__kmp_global.g.g_done)(__kmp_global.g.g_done)) {
4161 if (__kmp_global.g.g_abort)
4162 __kmp_abort_thread();
4163 break;
4164 }
4165 KMP_YIELD(TRUE){ __kmp_x86_pause(); if (((!0)) && (((__kmp_use_yield
== 1) || (__kmp_use_yield == 2 && (((__kmp_nth) >
(__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc))))))) __kmp_yield
(); }
;
4166 }
4167#if USE_ITT_BUILD1
4168 KMP_FSYNC_SPIN_ACQUIRED(RCAST(void *, spin))do { __asm__ __volatile__("movl %0, %%ebx; .byte 0x64, 0x67, 0x90 "
::"i"(0x4377) : "%ebx"); if (sync_iters >= __kmp_itt_prepare_delay
) { (!__kmp_itt_fsync_acquired_ptr__3_0) ? (void)0 : __kmp_itt_fsync_acquired_ptr__3_0
((void *)((void *)reinterpret_cast<void *>(spin))); } }
while (0)
;
4169#endif /* USE_ITT_BUILD */
4170}
4171
4172// __kmp_give_task puts a task into a given thread queue if:
4173// - the queue for that thread was created
4174// - there's space in that queue
4175// Because of this, __kmp_push_task needs to check if there's space after
4176// getting the lock
4177static bool __kmp_give_task(kmp_info_t *thread, kmp_int32 tid, kmp_task_t *task,
4178 kmp_int32 pass) {
4179 kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(task)(((kmp_taskdata_t *)task) - 1);
4180 kmp_task_team_t *task_team = taskdata->td_task_team;
4181
4182 KA_TRACE(20, ("__kmp_give_task: trying to give task %p to thread %d.\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_give_task: trying to give task %p to thread %d.\n"
, taskdata, tid); }
4183 taskdata, tid))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_give_task: trying to give task %p to thread %d.\n"
, taskdata, tid); }
;
4184
4185 // If task_team is NULL something went really bad...
4186 KMP_DEBUG_ASSERT(task_team != NULL)if (!(task_team != __null)) { __kmp_debug_assert("task_team != __null"
, "openmp/runtime/src/kmp_tasking.cpp", 4186); }
;
4187
4188 bool result = false;
4189 kmp_thread_data_t *thread_data = &task_team->tt.tt_threads_data[tid];
4190
4191 if (thread_data->td.td_deque == NULL__null) {
4192 // There's no queue in this thread, go find another one
4193 // We're guaranteed that at least one thread has a queue
4194 KA_TRACE(30,if (kmp_a_debug >= 30) { __kmp_debug_printf ("__kmp_give_task: thread %d has no queue while giving task %p.\n"
, tid, taskdata); }
4195 ("__kmp_give_task: thread %d has no queue while giving task %p.\n",if (kmp_a_debug >= 30) { __kmp_debug_printf ("__kmp_give_task: thread %d has no queue while giving task %p.\n"
, tid, taskdata); }
4196 tid, taskdata))if (kmp_a_debug >= 30) { __kmp_debug_printf ("__kmp_give_task: thread %d has no queue while giving task %p.\n"
, tid, taskdata); }
;
4197 return result;
4198 }
4199
4200 if (TCR_4(thread_data->td.td_deque_ntasks)(thread_data->td.td_deque_ntasks) >=
4201 TASK_DEQUE_SIZE(thread_data->td)((thread_data->td).td_deque_size)) {
4202 KA_TRACE(if (kmp_a_debug >= 30) { __kmp_debug_printf ("__kmp_give_task: queue is full while giving task %p to thread %d.\n"
, taskdata, tid); }
4203 30,if (kmp_a_debug >= 30) { __kmp_debug_printf ("__kmp_give_task: queue is full while giving task %p to thread %d.\n"
, taskdata, tid); }
4204 ("__kmp_give_task: queue is full while giving task %p to thread %d.\n",if (kmp_a_debug >= 30) { __kmp_debug_printf ("__kmp_give_task: queue is full while giving task %p to thread %d.\n"
, taskdata, tid); }
4205 taskdata, tid))if (kmp_a_debug >= 30) { __kmp_debug_printf ("__kmp_give_task: queue is full while giving task %p to thread %d.\n"
, taskdata, tid); }
;
4206
4207 // if this deque is bigger than the pass ratio give a chance to another
4208 // thread
4209 if (TASK_DEQUE_SIZE(thread_data->td)((thread_data->td).td_deque_size) / INITIAL_TASK_DEQUE_SIZE(1 << 8) >= pass)
4210 return result;
4211
4212 __kmp_acquire_bootstrap_lock(&thread_data->td.td_deque_lock);
4213 if (TCR_4(thread_data->td.td_deque_ntasks)(thread_data->td.td_deque_ntasks) >=
4214 TASK_DEQUE_SIZE(thread_data->td)((thread_data->td).td_deque_size)) {
4215 // expand deque to push the task which is not allowed to execute
4216 __kmp_realloc_task_deque(thread, thread_data);
4217 }
4218
4219 } else {
4220
4221 __kmp_acquire_bootstrap_lock(&thread_data->td.td_deque_lock);
4222
4223 if (TCR_4(thread_data->td.td_deque_ntasks)(thread_data->td.td_deque_ntasks) >=
4224 TASK_DEQUE_SIZE(thread_data->td)((thread_data->td).td_deque_size)) {
4225 KA_TRACE(30, ("__kmp_give_task: queue is full while giving task %p to "if (kmp_a_debug >= 30) { __kmp_debug_printf ("__kmp_give_task: queue is full while giving task %p to "
"thread %d.\n", taskdata, tid); }
4226 "thread %d.\n",if (kmp_a_debug >= 30) { __kmp_debug_printf ("__kmp_give_task: queue is full while giving task %p to "
"thread %d.\n", taskdata, tid); }
4227 taskdata, tid))if (kmp_a_debug >= 30) { __kmp_debug_printf ("__kmp_give_task: queue is full while giving task %p to "
"thread %d.\n", taskdata, tid); }
;
4228
4229 // if this deque is bigger than the pass ratio give a chance to another
4230 // thread
4231 if (TASK_DEQUE_SIZE(thread_data->td)((thread_data->td).td_deque_size) / INITIAL_TASK_DEQUE_SIZE(1 << 8) >= pass)
4232 goto release_and_exit;
4233
4234 __kmp_realloc_task_deque(thread, thread_data);
4235 }
4236 }
4237
4238 // lock is held here, and there is space in the deque
4239
4240 thread_data->td.td_deque[thread_data->td.td_deque_tail] = taskdata;
4241 // Wrap index.
4242 thread_data->td.td_deque_tail =
4243 (thread_data->td.td_deque_tail + 1) & TASK_DEQUE_MASK(thread_data->td)((thread_data->td).td_deque_size - 1);
4244 TCW_4(thread_data->td.td_deque_ntasks,(thread_data->td.td_deque_ntasks) = ((thread_data->td.td_deque_ntasks
) + 1)
4245 TCR_4(thread_data->td.td_deque_ntasks) + 1)(thread_data->td.td_deque_ntasks) = ((thread_data->td.td_deque_ntasks
) + 1)
;
4246
4247 result = true;
4248 KA_TRACE(30, ("__kmp_give_task: successfully gave task %p to thread %d.\n",if (kmp_a_debug >= 30) { __kmp_debug_printf ("__kmp_give_task: successfully gave task %p to thread %d.\n"
, taskdata, tid); }
4249 taskdata, tid))if (kmp_a_debug >= 30) { __kmp_debug_printf ("__kmp_give_task: successfully gave task %p to thread %d.\n"
, taskdata, tid); }
;
4250
4251release_and_exit:
4252 __kmp_release_bootstrap_lock(&thread_data->td.td_deque_lock);
4253
4254 return result;
4255}
4256
4257#define PROXY_TASK_FLAG0x40000000 0x40000000
4258/* The finish of the proxy tasks is divided in two pieces:
4259 - the top half is the one that can be done from a thread outside the team
4260 - the bottom half must be run from a thread within the team
4261
4262 In order to run the bottom half the task gets queued back into one of the
4263 threads of the team. Once the td_incomplete_child_task counter of the parent
4264 is decremented the threads can leave the barriers. So, the bottom half needs
4265 to be queued before the counter is decremented. The top half is therefore
4266 divided in two parts:
4267 - things that can be run before queuing the bottom half
4268 - things that must be run after queuing the bottom half
4269
4270 This creates a second race as the bottom half can free the task before the
4271 second top half is executed. To avoid this we use the
4272 td_incomplete_child_task of the proxy task to synchronize the top and bottom
4273 half. */
4274static void __kmp_first_top_half_finish_proxy(kmp_taskdata_t *taskdata) {
4275 KMP_DEBUG_ASSERT(taskdata->td_flags.tasktype == TASK_EXPLICIT)if (!(taskdata->td_flags.tasktype == 1)) { __kmp_debug_assert
("taskdata->td_flags.tasktype == 1", "openmp/runtime/src/kmp_tasking.cpp"
, 4275); }
;
4276 KMP_DEBUG_ASSERT(taskdata->td_flags.proxy == TASK_PROXY)if (!(taskdata->td_flags.proxy == 1)) { __kmp_debug_assert
("taskdata->td_flags.proxy == 1", "openmp/runtime/src/kmp_tasking.cpp"
, 4276); }
;
4277 KMP_DEBUG_ASSERT(taskdata->td_flags.complete == 0)if (!(taskdata->td_flags.complete == 0)) { __kmp_debug_assert
("taskdata->td_flags.complete == 0", "openmp/runtime/src/kmp_tasking.cpp"
, 4277); }
;
4278 KMP_DEBUG_ASSERT(taskdata->td_flags.freed == 0)if (!(taskdata->td_flags.freed == 0)) { __kmp_debug_assert
("taskdata->td_flags.freed == 0", "openmp/runtime/src/kmp_tasking.cpp"
, 4278); }
;
4279
4280 taskdata->td_flags.complete = 1; // mark the task as completed
4281
4282 if (taskdata->td_taskgroup)
4283 KMP_ATOMIC_DEC(&taskdata->td_taskgroup->count)(&taskdata->td_taskgroup->count)->fetch_sub(1, std
::memory_order_acq_rel)
;
4284
4285 // Create an imaginary children for this task so the bottom half cannot
4286 // release the task before we have completed the second top half
4287 KMP_ATOMIC_OR(&taskdata->td_incomplete_child_tasks, PROXY_TASK_FLAG)(&taskdata->td_incomplete_child_tasks)->fetch_or(0x40000000
, std::memory_order_acq_rel)
;
4288}
4289
4290static void __kmp_second_top_half_finish_proxy(kmp_taskdata_t *taskdata) {
4291#if KMP_DEBUG1
4292 kmp_int32 children = 0;
4293 // Predecrement simulated by "- 1" calculation
4294 children = -1 +
4295#endif
4296 KMP_ATOMIC_DEC(&taskdata->td_parent->td_incomplete_child_tasks)(&taskdata->td_parent->td_incomplete_child_tasks)->
fetch_sub(1, std::memory_order_acq_rel)
;
4297 KMP_DEBUG_ASSERT(children >= 0)if (!(children >= 0)) { __kmp_debug_assert("children >= 0"
, "openmp/runtime/src/kmp_tasking.cpp", 4297); }
;
4298
4299 // Remove the imaginary children
4300 KMP_ATOMIC_AND(&taskdata->td_incomplete_child_tasks, ~PROXY_TASK_FLAG)(&taskdata->td_incomplete_child_tasks)->fetch_and(~
0x40000000, std::memory_order_acq_rel)
;
4301}
4302
4303static void __kmp_bottom_half_finish_proxy(kmp_int32 gtid, kmp_task_t *ptask) {
4304 kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(ptask)(((kmp_taskdata_t *)ptask) - 1);
4305 kmp_info_t *thread = __kmp_threads[gtid];
4306
4307 KMP_DEBUG_ASSERT(taskdata->td_flags.proxy == TASK_PROXY)if (!(taskdata->td_flags.proxy == 1)) { __kmp_debug_assert
("taskdata->td_flags.proxy == 1", "openmp/runtime/src/kmp_tasking.cpp"
, 4307); }
;
4308 KMP_DEBUG_ASSERT(taskdata->td_flags.complete ==if (!(taskdata->td_flags.complete == 1)) { __kmp_debug_assert
("taskdata->td_flags.complete == 1", "openmp/runtime/src/kmp_tasking.cpp"
, 4309); }
4309 1)if (!(taskdata->td_flags.complete == 1)) { __kmp_debug_assert
("taskdata->td_flags.complete == 1", "openmp/runtime/src/kmp_tasking.cpp"
, 4309); }
; // top half must run before bottom half
4310
4311 // We need to wait to make sure the top half is finished
4312 // Spinning here should be ok as this should happen quickly
4313 while ((KMP_ATOMIC_LD_ACQ(&taskdata->td_incomplete_child_tasks)(&taskdata->td_incomplete_child_tasks)->load(std::memory_order_acquire
)
&
4314 PROXY_TASK_FLAG0x40000000) > 0)
4315 ;
4316
4317 __kmp_release_deps(gtid, taskdata);
4318 __kmp_free_task_and_ancestors(gtid, taskdata, thread);
4319}
4320
4321/*!
4322@ingroup TASKING
4323@param gtid Global Thread ID of encountering thread
4324@param ptask Task which execution is completed
4325
4326Execute the completion of a proxy task from a thread of that is part of the
4327team. Run first and bottom halves directly.
4328*/
4329void __kmpc_proxy_task_completed(kmp_int32 gtid, kmp_task_t *ptask) {
4330 KMP_DEBUG_ASSERT(ptask != NULL)if (!(ptask != __null)) { __kmp_debug_assert("ptask != __null"
, "openmp/runtime/src/kmp_tasking.cpp", 4330); }
;
4331 kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(ptask)(((kmp_taskdata_t *)ptask) - 1);
4332 KA_TRACE(if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_proxy_task_completed(enter): T#%d proxy task %p completing\n"
, gtid, taskdata); }
4333 10, ("__kmp_proxy_task_completed(enter): T#%d proxy task %p completing\n",if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_proxy_task_completed(enter): T#%d proxy task %p completing\n"
, gtid, taskdata); }
4334 gtid, taskdata))if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_proxy_task_completed(enter): T#%d proxy task %p completing\n"
, gtid, taskdata); }
;
4335 __kmp_assert_valid_gtid(gtid);
4336 KMP_DEBUG_ASSERT(taskdata->td_flags.proxy == TASK_PROXY)if (!(taskdata->td_flags.proxy == 1)) { __kmp_debug_assert
("taskdata->td_flags.proxy == 1", "openmp/runtime/src/kmp_tasking.cpp"
, 4336); }
;
4337
4338 __kmp_first_top_half_finish_proxy(taskdata);
4339 __kmp_second_top_half_finish_proxy(taskdata);
4340 __kmp_bottom_half_finish_proxy(gtid, ptask);
4341
4342 KA_TRACE(10,if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_proxy_task_completed(exit): T#%d proxy task %p completing\n"
, gtid, taskdata); }
4343 ("__kmp_proxy_task_completed(exit): T#%d proxy task %p completing\n",if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_proxy_task_completed(exit): T#%d proxy task %p completing\n"
, gtid, taskdata); }
4344 gtid, taskdata))if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_proxy_task_completed(exit): T#%d proxy task %p completing\n"
, gtid, taskdata); }
;
4345}
4346
4347void __kmpc_give_task(kmp_task_t *ptask, kmp_int32 start = 0) {
4348 KMP_DEBUG_ASSERT(ptask != NULL)if (!(ptask != __null)) { __kmp_debug_assert("ptask != __null"
, "openmp/runtime/src/kmp_tasking.cpp", 4348); }
;
4349 kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(ptask)(((kmp_taskdata_t *)ptask) - 1);
4350
4351 // Enqueue task to complete bottom half completion from a thread within the
4352 // corresponding team
4353 kmp_team_t *team = taskdata->td_team;
4354 kmp_int32 nthreads = team->t.t_nproc;
4355 kmp_info_t *thread;
4356
4357 // This should be similar to start_k = __kmp_get_random( thread ) % nthreads
4358 // but we cannot use __kmp_get_random here
4359 kmp_int32 start_k = start % nthreads;
4360 kmp_int32 pass = 1;
4361 kmp_int32 k = start_k;
4362
4363 do {
4364 // For now we're just linearly trying to find a thread
4365 thread = team->t.t_threads[k];
4366 k = (k + 1) % nthreads;
4367
4368 // we did a full pass through all the threads
4369 if (k == start_k)
4370 pass = pass << 1;
4371
4372 } while (!__kmp_give_task(thread, k, ptask, pass));
4373
4374 if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME(2147483647) && __kmp_wpolicy_passive) {
4375 // awake at least one thread to execute given task
4376 for (int i = 0; i < nthreads; ++i) {
4377 thread = team->t.t_threads[i];
4378 if (thread->th.th_sleep_loc != NULL__null) {
4379 __kmp_null_resume_wrapper(thread);
4380 break;
4381 }
4382 }
4383 }
4384}
4385
4386/*!
4387@ingroup TASKING
4388@param ptask Task which execution is completed
4389
4390Execute the completion of a proxy task from a thread that could not belong to
4391the team.
4392*/
4393void __kmpc_proxy_task_completed_ooo(kmp_task_t *ptask) {
4394 KMP_DEBUG_ASSERT(ptask != NULL)if (!(ptask != __null)) { __kmp_debug_assert("ptask != __null"
, "openmp/runtime/src/kmp_tasking.cpp", 4394); }
;
4395 kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(ptask)(((kmp_taskdata_t *)ptask) - 1);
4396
4397 KA_TRACE(if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_proxy_task_completed_ooo(enter): proxy task completing ooo %p\n"
, taskdata); }
4398 10,if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_proxy_task_completed_ooo(enter): proxy task completing ooo %p\n"
, taskdata); }
4399 ("__kmp_proxy_task_completed_ooo(enter): proxy task completing ooo %p\n",if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_proxy_task_completed_ooo(enter): proxy task completing ooo %p\n"
, taskdata); }
4400 taskdata))if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_proxy_task_completed_ooo(enter): proxy task completing ooo %p\n"
, taskdata); }
;
4401
4402 KMP_DEBUG_ASSERT(taskdata->td_flags.proxy == TASK_PROXY)if (!(taskdata->td_flags.proxy == 1)) { __kmp_debug_assert
("taskdata->td_flags.proxy == 1", "openmp/runtime/src/kmp_tasking.cpp"
, 4402); }
;
4403
4404 __kmp_first_top_half_finish_proxy(taskdata);
4405
4406 __kmpc_give_task(ptask);
4407
4408 __kmp_second_top_half_finish_proxy(taskdata);
4409
4410 KA_TRACE(if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_proxy_task_completed_ooo(exit): proxy task completing ooo %p\n"
, taskdata); }
4411 10,if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_proxy_task_completed_ooo(exit): proxy task completing ooo %p\n"
, taskdata); }
4412 ("__kmp_proxy_task_completed_ooo(exit): proxy task completing ooo %p\n",if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_proxy_task_completed_ooo(exit): proxy task completing ooo %p\n"
, taskdata); }
4413 taskdata))if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_proxy_task_completed_ooo(exit): proxy task completing ooo %p\n"
, taskdata); }
;
4414}
4415
4416kmp_event_t *__kmpc_task_allow_completion_event(ident_t *loc_ref, int gtid,
4417 kmp_task_t *task) {
4418 kmp_taskdata_t *td = KMP_TASK_TO_TASKDATA(task)(((kmp_taskdata_t *)task) - 1);
4419 if (td->td_allow_completion_event.type == KMP_EVENT_UNINITIALIZED) {
4420 td->td_allow_completion_event.type = KMP_EVENT_ALLOW_COMPLETION;
4421 td->td_allow_completion_event.ed.task = task;
4422 __kmp_init_tas_lock(&td->td_allow_completion_event.lock);
4423 }
4424 return &td->td_allow_completion_event;
4425}
4426
4427void __kmp_fulfill_event(kmp_event_t *event) {
4428 if (event->type == KMP_EVENT_ALLOW_COMPLETION) {
4429 kmp_task_t *ptask = event->ed.task;
4430 kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(ptask)(((kmp_taskdata_t *)ptask) - 1);
4431 bool detached = false;
4432 int gtid = __kmp_get_gtid()__kmp_get_global_thread_id();
4433
4434 // The associated task might have completed or could be completing at this
4435 // point.
4436 // We need to take the lock to avoid races
4437 __kmp_acquire_tas_lock(&event->lock, gtid);
4438 if (taskdata->td_flags.proxy == TASK_PROXY1) {
4439 detached = true;
4440 } else {
4441#if OMPT_SUPPORT1
4442 // The OMPT event must occur under mutual exclusion,
4443 // otherwise the tool might access ptask after free
4444 if (UNLIKELY(ompt_enabled.enabled)__builtin_expect(!!(ompt_enabled.enabled), 0))
4445 __ompt_task_finish(ptask, NULL__null, ompt_task_early_fulfill);
4446#endif
4447 }
4448 event->type = KMP_EVENT_UNINITIALIZED;
4449 __kmp_release_tas_lock(&event->lock, gtid);
4450
4451 if (detached) {
4452#if OMPT_SUPPORT1
4453 // We free ptask afterwards and know the task is finished,
4454 // so locking is not necessary
4455 if (UNLIKELY(ompt_enabled.enabled)__builtin_expect(!!(ompt_enabled.enabled), 0))
4456 __ompt_task_finish(ptask, NULL__null, ompt_task_late_fulfill);
4457#endif
4458 // If the task detached complete the proxy task
4459 if (gtid >= 0) {
4460 kmp_team_t *team = taskdata->td_team;
4461 kmp_info_t *thread = __kmp_get_thread()(__kmp_thread_from_gtid(__kmp_get_global_thread_id()));
4462 if (thread->th.th_team == team) {
4463 __kmpc_proxy_task_completed(gtid, ptask);
4464 return;
4465 }
4466 }
4467
4468 // fallback
4469 __kmpc_proxy_task_completed_ooo(ptask);
4470 }
4471 }
4472}
4473
4474// __kmp_task_dup_alloc: Allocate the taskdata and make a copy of source task
4475// for taskloop
4476//
4477// thread: allocating thread
4478// task_src: pointer to source task to be duplicated
4479// returns: a pointer to the allocated kmp_task_t structure (task).
4480kmp_task_t *__kmp_task_dup_alloc(kmp_info_t *thread, kmp_task_t *task_src) {
4481 kmp_task_t *task;
4482 kmp_taskdata_t *taskdata;
4483 kmp_taskdata_t *taskdata_src = KMP_TASK_TO_TASKDATA(task_src)(((kmp_taskdata_t *)task_src) - 1);
4484 kmp_taskdata_t *parent_task = taskdata_src->td_parent; // same parent task
4485 size_t shareds_offset;
4486 size_t task_size;
4487
4488 KA_TRACE(10, ("__kmp_task_dup_alloc(enter): Th %p, source task %p\n", thread,if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_task_dup_alloc(enter): Th %p, source task %p\n"
, thread, task_src); }
4489 task_src))if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_task_dup_alloc(enter): Th %p, source task %p\n"
, thread, task_src); }
;
4490 KMP_DEBUG_ASSERT(taskdata_src->td_flags.proxy ==if (!(taskdata_src->td_flags.proxy == 0)) { __kmp_debug_assert
("taskdata_src->td_flags.proxy == 0", "openmp/runtime/src/kmp_tasking.cpp"
, 4491); }
4491 TASK_FULL)if (!(taskdata_src->td_flags.proxy == 0)) { __kmp_debug_assert
("taskdata_src->td_flags.proxy == 0", "openmp/runtime/src/kmp_tasking.cpp"
, 4491); }
; // it should not be proxy task
4492 KMP_DEBUG_ASSERT(taskdata_src->td_flags.tasktype == TASK_EXPLICIT)if (!(taskdata_src->td_flags.tasktype == 1)) { __kmp_debug_assert
("taskdata_src->td_flags.tasktype == 1", "openmp/runtime/src/kmp_tasking.cpp"
, 4492); }
;
4493 task_size = taskdata_src->td_size_alloc;
4494
4495 // Allocate a kmp_taskdata_t block and a kmp_task_t block.
4496 KA_TRACE(30, ("__kmp_task_dup_alloc: Th %p, malloc size %ld\n", thread,if (kmp_a_debug >= 30) { __kmp_debug_printf ("__kmp_task_dup_alloc: Th %p, malloc size %ld\n"
, thread, task_size); }
4497 task_size))if (kmp_a_debug >= 30) { __kmp_debug_printf ("__kmp_task_dup_alloc: Th %p, malloc size %ld\n"
, thread, task_size); }
;
4498#if USE_FAST_MEMORY3
4499 taskdata = (kmp_taskdata_t *)__kmp_fast_allocate(thread, task_size)___kmp_fast_allocate((thread), (task_size), "openmp/runtime/src/kmp_tasking.cpp"
, 4499)
;
4500#else
4501 taskdata = (kmp_taskdata_t *)__kmp_thread_malloc(thread, task_size)___kmp_thread_malloc((thread), (task_size), "openmp/runtime/src/kmp_tasking.cpp"
, 4501)
;
4502#endif /* USE_FAST_MEMORY */
4503 KMP_MEMCPYmemcpy(taskdata, taskdata_src, task_size);
4504
4505 task = KMP_TASKDATA_TO_TASK(taskdata)(kmp_task_t *)(taskdata + 1);
4506
4507 // Initialize new task (only specific fields not affected by memcpy)
4508 taskdata->td_task_id = KMP_GEN_TASK_ID()(~0);
4509 if (task->shareds != NULL__null) { // need setup shareds pointer
4510 shareds_offset = (char *)task_src->shareds - (char *)taskdata_src;
4511 task->shareds = &((char *)taskdata)[shareds_offset];
4512 KMP_DEBUG_ASSERT((((kmp_uintptr_t)task->shareds) & (sizeof(void *) - 1)) ==if (!((((kmp_uintptr_t)task->shareds) & (sizeof(void *
) - 1)) == 0)) { __kmp_debug_assert("(((kmp_uintptr_t)task->shareds) & (sizeof(void *) - 1)) == 0"
, "openmp/runtime/src/kmp_tasking.cpp", 4513); }
4513 0)if (!((((kmp_uintptr_t)task->shareds) & (sizeof(void *
) - 1)) == 0)) { __kmp_debug_assert("(((kmp_uintptr_t)task->shareds) & (sizeof(void *) - 1)) == 0"
, "openmp/runtime/src/kmp_tasking.cpp", 4513); }
;
4514 }
4515 taskdata->td_alloc_thread = thread;
4516 taskdata->td_parent = parent_task;
4517 // task inherits the taskgroup from the parent task
4518 taskdata->td_taskgroup = parent_task->td_taskgroup;
4519 // tied task needs to initialize the td_last_tied at creation,
4520 // untied one does this when it is scheduled for execution
4521 if (taskdata->td_flags.tiedness == TASK_TIED1)
4522 taskdata->td_last_tied = taskdata;
4523
4524 // Only need to keep track of child task counts if team parallel and tasking
4525 // not serialized
4526 if (!(taskdata->td_flags.team_serial || taskdata->td_flags.tasking_ser)) {
4527 KMP_ATOMIC_INC(&parent_task->td_incomplete_child_tasks)(&parent_task->td_incomplete_child_tasks)->fetch_add
(1, std::memory_order_acq_rel)
;
4528 if (parent_task->td_taskgroup)
4529 KMP_ATOMIC_INC(&parent_task->td_taskgroup->count)(&parent_task->td_taskgroup->count)->fetch_add(1
, std::memory_order_acq_rel)
;
4530 // Only need to keep track of allocated child tasks for explicit tasks since
4531 // implicit not deallocated
4532 if (taskdata->td_parent->td_flags.tasktype == TASK_EXPLICIT1)
4533 KMP_ATOMIC_INC(&taskdata->td_parent->td_allocated_child_tasks)(&taskdata->td_parent->td_allocated_child_tasks)->
fetch_add(1, std::memory_order_acq_rel)
;
4534 }
4535
4536 KA_TRACE(20,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_task_dup_alloc(exit): Th %p, created task %p, parent=%p\n"
, thread, taskdata, taskdata->td_parent); }
4537 ("__kmp_task_dup_alloc(exit): Th %p, created task %p, parent=%p\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_task_dup_alloc(exit): Th %p, created task %p, parent=%p\n"
, thread, taskdata, taskdata->td_parent); }
4538 thread, taskdata, taskdata->td_parent))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_task_dup_alloc(exit): Th %p, created task %p, parent=%p\n"
, thread, taskdata, taskdata->td_parent); }
;
4539#if OMPT_SUPPORT1
4540 if (UNLIKELY(ompt_enabled.enabled)__builtin_expect(!!(ompt_enabled.enabled), 0))
4541 __ompt_task_init(taskdata, thread->th.th_info.ds.ds_gtid);
4542#endif
4543 return task;
4544}
4545
4546// Routine optionally generated by the compiler for setting the lastprivate flag
4547// and calling needed constructors for private/firstprivate objects
4548// (used to form taskloop tasks from pattern task)
4549// Parameters: dest task, src task, lastprivate flag.
4550typedef void (*p_task_dup_t)(kmp_task_t *, kmp_task_t *, kmp_int32);
4551
4552KMP_BUILD_ASSERT(sizeof(long) == 4 || sizeof(long) == 8)static_assert(sizeof(long) == 4 || sizeof(long) == 8, "Build condition error"
)
;
4553
4554// class to encapsulate manipulating loop bounds in a taskloop task.
4555// this abstracts away the Intel vs GOMP taskloop interface for setting/getting
4556// the loop bound variables.
4557class kmp_taskloop_bounds_t {
4558 kmp_task_t *task;
4559 const kmp_taskdata_t *taskdata;
4560 size_t lower_offset;
4561 size_t upper_offset;
4562
4563public:
4564 kmp_taskloop_bounds_t(kmp_task_t *_task, kmp_uint64 *lb, kmp_uint64 *ub)
4565 : task(_task), taskdata(KMP_TASK_TO_TASKDATA(task)(((kmp_taskdata_t *)task) - 1)),
4566 lower_offset((char *)lb - (char *)task),
4567 upper_offset((char *)ub - (char *)task) {
4568 KMP_DEBUG_ASSERT((char *)lb > (char *)_task)if (!((char *)lb > (char *)_task)) { __kmp_debug_assert("(char *)lb > (char *)_task"
, "openmp/runtime/src/kmp_tasking.cpp", 4568); }
;
4569 KMP_DEBUG_ASSERT((char *)ub > (char *)_task)if (!((char *)ub > (char *)_task)) { __kmp_debug_assert("(char *)ub > (char *)_task"
, "openmp/runtime/src/kmp_tasking.cpp", 4569); }
;
4570 }
4571 kmp_taskloop_bounds_t(kmp_task_t *_task, const kmp_taskloop_bounds_t &bounds)
4572 : task(_task), taskdata(KMP_TASK_TO_TASKDATA(_task)(((kmp_taskdata_t *)_task) - 1)),
4573 lower_offset(bounds.lower_offset), upper_offset(bounds.upper_offset) {}
4574 size_t get_lower_offset() const { return lower_offset; }
4575 size_t get_upper_offset() const { return upper_offset; }
4576 kmp_uint64 get_lb() const {
4577 kmp_int64 retval;
4578#if defined(KMP_GOMP_COMPAT)
4579 // Intel task just returns the lower bound normally
4580 if (!taskdata->td_flags.native) {
4581 retval = *(kmp_int64 *)((char *)task + lower_offset);
4582 } else {
4583 // GOMP task has to take into account the sizeof(long)
4584 if (taskdata->td_size_loop_bounds == 4) {
4585 kmp_int32 *lb = RCAST(kmp_int32 *, task->shareds)reinterpret_cast<kmp_int32 *>(task->shareds);
4586 retval = (kmp_int64)*lb;
4587 } else {
4588 kmp_int64 *lb = RCAST(kmp_int64 *, task->shareds)reinterpret_cast<kmp_int64 *>(task->shareds);
4589 retval = (kmp_int64)*lb;
4590 }
4591 }
4592#else
4593 (void)taskdata;
4594 retval = *(kmp_int64 *)((char *)task + lower_offset);
4595#endif // defined(KMP_GOMP_COMPAT)
4596 return retval;
4597 }
4598 kmp_uint64 get_ub() const {
4599 kmp_int64 retval;
4600#if defined(KMP_GOMP_COMPAT)
4601 // Intel task just returns the upper bound normally
4602 if (!taskdata->td_flags.native) {
4603 retval = *(kmp_int64 *)((char *)task + upper_offset);
4604 } else {
4605 // GOMP task has to take into account the sizeof(long)
4606 if (taskdata->td_size_loop_bounds == 4) {
4607 kmp_int32 *ub = RCAST(kmp_int32 *, task->shareds)reinterpret_cast<kmp_int32 *>(task->shareds) + 1;
4608 retval = (kmp_int64)*ub;
4609 } else {
4610 kmp_int64 *ub = RCAST(kmp_int64 *, task->shareds)reinterpret_cast<kmp_int64 *>(task->shareds) + 1;
4611 retval = (kmp_int64)*ub;
4612 }
4613 }
4614#else
4615 retval = *(kmp_int64 *)((char *)task + upper_offset);
4616#endif // defined(KMP_GOMP_COMPAT)
4617 return retval;
4618 }
4619 void set_lb(kmp_uint64 lb) {
4620#if defined(KMP_GOMP_COMPAT)
4621 // Intel task just sets the lower bound normally
4622 if (!taskdata->td_flags.native) {
4623 *(kmp_uint64 *)((char *)task + lower_offset) = lb;
4624 } else {
4625 // GOMP task has to take into account the sizeof(long)
4626 if (taskdata->td_size_loop_bounds == 4) {
4627 kmp_uint32 *lower = RCAST(kmp_uint32 *, task->shareds)reinterpret_cast<kmp_uint32 *>(task->shareds);
4628 *lower = (kmp_uint32)lb;
4629 } else {
4630 kmp_uint64 *lower = RCAST(kmp_uint64 *, task->shareds)reinterpret_cast<kmp_uint64 *>(task->shareds);
4631 *lower = (kmp_uint64)lb;
4632 }
4633 }
4634#else
4635 *(kmp_uint64 *)((char *)task + lower_offset) = lb;
4636#endif // defined(KMP_GOMP_COMPAT)
4637 }
4638 void set_ub(kmp_uint64 ub) {
4639#if defined(KMP_GOMP_COMPAT)
4640 // Intel task just sets the upper bound normally
4641 if (!taskdata->td_flags.native) {
4642 *(kmp_uint64 *)((char *)task + upper_offset) = ub;
4643 } else {
4644 // GOMP task has to take into account the sizeof(long)
4645 if (taskdata->td_size_loop_bounds == 4) {
4646 kmp_uint32 *upper = RCAST(kmp_uint32 *, task->shareds)reinterpret_cast<kmp_uint32 *>(task->shareds) + 1;
4647 *upper = (kmp_uint32)ub;
4648 } else {
4649 kmp_uint64 *upper = RCAST(kmp_uint64 *, task->shareds)reinterpret_cast<kmp_uint64 *>(task->shareds) + 1;
4650 *upper = (kmp_uint64)ub;
4651 }
4652 }
4653#else
4654 *(kmp_uint64 *)((char *)task + upper_offset) = ub;
4655#endif // defined(KMP_GOMP_COMPAT)
4656 }
4657};
4658
4659// __kmp_taskloop_linear: Start tasks of the taskloop linearly
4660//
4661// loc Source location information
4662// gtid Global thread ID
4663// task Pattern task, exposes the loop iteration range
4664// lb Pointer to loop lower bound in task structure
4665// ub Pointer to loop upper bound in task structure
4666// st Loop stride
4667// ub_glob Global upper bound (used for lastprivate check)
4668// num_tasks Number of tasks to execute
4669// grainsize Number of loop iterations per task
4670// extras Number of chunks with grainsize+1 iterations
4671// last_chunk Reduction of grainsize for last task
4672// tc Iterations count
4673// task_dup Tasks duplication routine
4674// codeptr_ra Return address for OMPT events
4675void __kmp_taskloop_linear(ident_t *loc, int gtid, kmp_task_t *task,
4676 kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st,
4677 kmp_uint64 ub_glob, kmp_uint64 num_tasks,
4678 kmp_uint64 grainsize, kmp_uint64 extras,
4679 kmp_int64 last_chunk, kmp_uint64 tc,
4680#if OMPT_SUPPORT1
4681 void *codeptr_ra,
4682#endif
4683 void *task_dup) {
4684 KMP_COUNT_BLOCK(OMP_TASKLOOP)((void)0);
4685 KMP_TIME_PARTITIONED_BLOCK(OMP_taskloop_scheduling)((void)0);
4686 p_task_dup_t ptask_dup = (p_task_dup_t)task_dup;
4687 // compiler provides global bounds here
4688 kmp_taskloop_bounds_t task_bounds(task, lb, ub);
4689 kmp_uint64 lower = task_bounds.get_lb();
4690 kmp_uint64 upper = task_bounds.get_ub();
4691 kmp_uint64 i;
4692 kmp_info_t *thread = __kmp_threads[gtid];
4693 kmp_taskdata_t *current_task = thread->th.th_current_task;
4694 kmp_task_t *next_task;
4695 kmp_int32 lastpriv = 0;
4696
4697 KMP_DEBUG_ASSERT(tc == num_tasks * grainsize +if (!(tc == num_tasks * grainsize + (last_chunk < 0 ? last_chunk
: extras))) { __kmp_debug_assert("tc == num_tasks * grainsize + (last_chunk < 0 ? last_chunk : extras)"
, "openmp/runtime/src/kmp_tasking.cpp", 4698); }
4698 (last_chunk < 0 ? last_chunk : extras))if (!(tc == num_tasks * grainsize + (last_chunk < 0 ? last_chunk
: extras))) { __kmp_debug_assert("tc == num_tasks * grainsize + (last_chunk < 0 ? last_chunk : extras)"
, "openmp/runtime/src/kmp_tasking.cpp", 4698); }
;
4699 KMP_DEBUG_ASSERT(num_tasks > extras)if (!(num_tasks > extras)) { __kmp_debug_assert("num_tasks > extras"
, "openmp/runtime/src/kmp_tasking.cpp", 4699); }
;
4700 KMP_DEBUG_ASSERT(num_tasks > 0)if (!(num_tasks > 0)) { __kmp_debug_assert("num_tasks > 0"
, "openmp/runtime/src/kmp_tasking.cpp", 4700); }
;
4701 KA_TRACE(20, ("__kmp_taskloop_linear: T#%d: %lld tasks, grainsize %lld, "if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_taskloop_linear: T#%d: %lld tasks, grainsize %lld, "
"extras %lld, last_chunk %lld, i=%lld,%lld(%d)%lld, dup %p\n"
, gtid, num_tasks, grainsize, extras, last_chunk, lower, upper
, ub_glob, st, task_dup); }
4702 "extras %lld, last_chunk %lld, i=%lld,%lld(%d)%lld, dup %p\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_taskloop_linear: T#%d: %lld tasks, grainsize %lld, "
"extras %lld, last_chunk %lld, i=%lld,%lld(%d)%lld, dup %p\n"
, gtid, num_tasks, grainsize, extras, last_chunk, lower, upper
, ub_glob, st, task_dup); }
4703 gtid, num_tasks, grainsize, extras, last_chunk, lower, upper,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_taskloop_linear: T#%d: %lld tasks, grainsize %lld, "
"extras %lld, last_chunk %lld, i=%lld,%lld(%d)%lld, dup %p\n"
, gtid, num_tasks, grainsize, extras, last_chunk, lower, upper
, ub_glob, st, task_dup); }
4704 ub_glob, st, task_dup))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_taskloop_linear: T#%d: %lld tasks, grainsize %lld, "
"extras %lld, last_chunk %lld, i=%lld,%lld(%d)%lld, dup %p\n"
, gtid, num_tasks, grainsize, extras, last_chunk, lower, upper
, ub_glob, st, task_dup); }
;
4705
4706 // Launch num_tasks tasks, assign grainsize iterations each task
4707 for (i = 0; i < num_tasks; ++i) {
4708 kmp_uint64 chunk_minus_1;
4709 if (extras == 0) {
4710 chunk_minus_1 = grainsize - 1;
4711 } else {
4712 chunk_minus_1 = grainsize;
4713 --extras; // first extras iterations get bigger chunk (grainsize+1)
4714 }
4715 upper = lower + st * chunk_minus_1;
4716 if (upper > *ub) {
4717 upper = *ub;
4718 }
4719 if (i == num_tasks - 1) {
4720 // schedule the last task, set lastprivate flag if needed
4721 if (st == 1) { // most common case
4722 KMP_DEBUG_ASSERT(upper == *ub)if (!(upper == *ub)) { __kmp_debug_assert("upper == *ub", "openmp/runtime/src/kmp_tasking.cpp"
, 4722); }
;
4723 if (upper == ub_glob)
4724 lastpriv = 1;
4725 } else if (st > 0) { // positive loop stride
4726 KMP_DEBUG_ASSERT((kmp_uint64)st > *ub - upper)if (!((kmp_uint64)st > *ub - upper)) { __kmp_debug_assert(
"(kmp_uint64)st > *ub - upper", "openmp/runtime/src/kmp_tasking.cpp"
, 4726); }
;
4727 if ((kmp_uint64)st > ub_glob - upper)
4728 lastpriv = 1;
4729 } else { // negative loop stride
4730 KMP_DEBUG_ASSERT(upper + st < *ub)if (!(upper + st < *ub)) { __kmp_debug_assert("upper + st < *ub"
, "openmp/runtime/src/kmp_tasking.cpp", 4730); }
;
4731 if (upper - ub_glob < (kmp_uint64)(-st))
4732 lastpriv = 1;
4733 }
4734 }
4735 next_task = __kmp_task_dup_alloc(thread, task); // allocate new task
4736 kmp_taskdata_t *next_taskdata = KMP_TASK_TO_TASKDATA(next_task)(((kmp_taskdata_t *)next_task) - 1);
4737 kmp_taskloop_bounds_t next_task_bounds =
4738 kmp_taskloop_bounds_t(next_task, task_bounds);
4739
4740 // adjust task-specific bounds
4741 next_task_bounds.set_lb(lower);
4742 if (next_taskdata->td_flags.native) {
4743 next_task_bounds.set_ub(upper + (st > 0 ? 1 : -1));
4744 } else {
4745 next_task_bounds.set_ub(upper);
4746 }
4747 if (ptask_dup != NULL__null) // set lastprivate flag, construct firstprivates,
4748 // etc.
4749 ptask_dup(next_task, task, lastpriv);
4750 KA_TRACE(40,if (kmp_a_debug >= 40) { __kmp_debug_printf ("__kmp_taskloop_linear: T#%d; task #%llu: task %p: lower %lld, "
"upper %lld stride %lld, (offsets %p %p)\n", gtid, i, next_task
, lower, upper, st, next_task_bounds.get_lower_offset(), next_task_bounds
.get_upper_offset()); }
4751 ("__kmp_taskloop_linear: T#%d; task #%llu: task %p: lower %lld, "if (kmp_a_debug >= 40) { __kmp_debug_printf ("__kmp_taskloop_linear: T#%d; task #%llu: task %p: lower %lld, "
"upper %lld stride %lld, (offsets %p %p)\n", gtid, i, next_task
, lower, upper, st, next_task_bounds.get_lower_offset(), next_task_bounds
.get_upper_offset()); }
4752 "upper %lld stride %lld, (offsets %p %p)\n",if (kmp_a_debug >= 40) { __kmp_debug_printf ("__kmp_taskloop_linear: T#%d; task #%llu: task %p: lower %lld, "
"upper %lld stride %lld, (offsets %p %p)\n", gtid, i, next_task
, lower, upper, st, next_task_bounds.get_lower_offset(), next_task_bounds
.get_upper_offset()); }
4753 gtid, i, next_task, lower, upper, st,if (kmp_a_debug >= 40) { __kmp_debug_printf ("__kmp_taskloop_linear: T#%d; task #%llu: task %p: lower %lld, "
"upper %lld stride %lld, (offsets %p %p)\n", gtid, i, next_task
, lower, upper, st, next_task_bounds.get_lower_offset(), next_task_bounds
.get_upper_offset()); }
4754 next_task_bounds.get_lower_offset(),if (kmp_a_debug >= 40) { __kmp_debug_printf ("__kmp_taskloop_linear: T#%d; task #%llu: task %p: lower %lld, "
"upper %lld stride %lld, (offsets %p %p)\n", gtid, i, next_task
, lower, upper, st, next_task_bounds.get_lower_offset(), next_task_bounds
.get_upper_offset()); }
4755 next_task_bounds.get_upper_offset()))if (kmp_a_debug >= 40) { __kmp_debug_printf ("__kmp_taskloop_linear: T#%d; task #%llu: task %p: lower %lld, "
"upper %lld stride %lld, (offsets %p %p)\n", gtid, i, next_task
, lower, upper, st, next_task_bounds.get_lower_offset(), next_task_bounds
.get_upper_offset()); }
;
4756#if OMPT_SUPPORT1
4757 __kmp_omp_taskloop_task(NULL__null, gtid, next_task,
4758 codeptr_ra); // schedule new task
4759#if OMPT_OPTIONAL1
4760 if (ompt_enabled.ompt_callback_dispatch) {
4761 OMPT_GET_DISPATCH_CHUNK(next_taskdata->ompt_task_info.dispatch_chunk,do { if (st > 0) { next_taskdata->ompt_task_info.dispatch_chunk
.start = static_cast<uint64_t>(lower); next_taskdata->
ompt_task_info.dispatch_chunk.iterations = static_cast<uint64_t
>(((upper) - (lower)) / (st) + 1); } else { next_taskdata->
ompt_task_info.dispatch_chunk.start = static_cast<uint64_t
>(upper); next_taskdata->ompt_task_info.dispatch_chunk.
iterations = static_cast<uint64_t>(((lower) - (upper)) /
-(st) + 1); } } while (0)
4762 lower, upper, st)do { if (st > 0) { next_taskdata->ompt_task_info.dispatch_chunk
.start = static_cast<uint64_t>(lower); next_taskdata->
ompt_task_info.dispatch_chunk.iterations = static_cast<uint64_t
>(((upper) - (lower)) / (st) + 1); } else { next_taskdata->
ompt_task_info.dispatch_chunk.start = static_cast<uint64_t
>(upper); next_taskdata->ompt_task_info.dispatch_chunk.
iterations = static_cast<uint64_t>(((lower) - (upper)) /
-(st) + 1); } } while (0)
;
4763 }
4764#endif // OMPT_OPTIONAL
4765#else
4766 __kmp_omp_task(gtid, next_task, true); // schedule new task
4767#endif
4768 lower = upper + st; // adjust lower bound for the next iteration
4769 }
4770 // free the pattern task and exit
4771 __kmp_task_start(gtid, task, current_task); // make internal bookkeeping
4772 // do not execute the pattern task, just do internal bookkeeping
4773 __kmp_task_finish<false>(gtid, task, current_task);
4774}
4775
4776// Structure to keep taskloop parameters for auxiliary task
4777// kept in the shareds of the task structure.
4778typedef struct __taskloop_params {
4779 kmp_task_t *task;
4780 kmp_uint64 *lb;
4781 kmp_uint64 *ub;
4782 void *task_dup;
4783 kmp_int64 st;
4784 kmp_uint64 ub_glob;
4785 kmp_uint64 num_tasks;
4786 kmp_uint64 grainsize;
4787 kmp_uint64 extras;
4788 kmp_int64 last_chunk;
4789 kmp_uint64 tc;
4790 kmp_uint64 num_t_min;
4791#if OMPT_SUPPORT1
4792 void *codeptr_ra;
4793#endif
4794} __taskloop_params_t;
4795
4796void __kmp_taskloop_recur(ident_t *, int, kmp_task_t *, kmp_uint64 *,
4797 kmp_uint64 *, kmp_int64, kmp_uint64, kmp_uint64,
4798 kmp_uint64, kmp_uint64, kmp_int64, kmp_uint64,
4799 kmp_uint64,
4800#if OMPT_SUPPORT1
4801 void *,
4802#endif
4803 void *);
4804
4805// Execute part of the taskloop submitted as a task.
4806int __kmp_taskloop_task(int gtid, void *ptask) {
4807 __taskloop_params_t *p =
4808 (__taskloop_params_t *)((kmp_task_t *)ptask)->shareds;
4809 kmp_task_t *task = p->task;
4810 kmp_uint64 *lb = p->lb;
4811 kmp_uint64 *ub = p->ub;
4812 void *task_dup = p->task_dup;
4813 // p_task_dup_t ptask_dup = (p_task_dup_t)task_dup;
4814 kmp_int64 st = p->st;
4815 kmp_uint64 ub_glob = p->ub_glob;
4816 kmp_uint64 num_tasks = p->num_tasks;
4817 kmp_uint64 grainsize = p->grainsize;
4818 kmp_uint64 extras = p->extras;
4819 kmp_int64 last_chunk = p->last_chunk;
4820 kmp_uint64 tc = p->tc;
4821 kmp_uint64 num_t_min = p->num_t_min;
4822#if OMPT_SUPPORT1
4823 void *codeptr_ra = p->codeptr_ra;
4824#endif
4825#if KMP_DEBUG1
4826 kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(task)(((kmp_taskdata_t *)task) - 1);
4827 KMP_DEBUG_ASSERT(task != NULL)if (!(task != __null)) { __kmp_debug_assert("task != __null",
"openmp/runtime/src/kmp_tasking.cpp", 4827); }
;
4828 KA_TRACE(20,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_taskloop_task: T#%d, task %p: %lld tasks, grainsize"
" %lld, extras %lld, last_chunk %lld, i=%lld,%lld(%d), dup %p\n"
, gtid, taskdata, num_tasks, grainsize, extras, last_chunk, *
lb, *ub, st, task_dup); }
4829 ("__kmp_taskloop_task: T#%d, task %p: %lld tasks, grainsize"if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_taskloop_task: T#%d, task %p: %lld tasks, grainsize"
" %lld, extras %lld, last_chunk %lld, i=%lld,%lld(%d), dup %p\n"
, gtid, taskdata, num_tasks, grainsize, extras, last_chunk, *
lb, *ub, st, task_dup); }
4830 " %lld, extras %lld, last_chunk %lld, i=%lld,%lld(%d), dup %p\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_taskloop_task: T#%d, task %p: %lld tasks, grainsize"
" %lld, extras %lld, last_chunk %lld, i=%lld,%lld(%d), dup %p\n"
, gtid, taskdata, num_tasks, grainsize, extras, last_chunk, *
lb, *ub, st, task_dup); }
4831 gtid, taskdata, num_tasks, grainsize, extras, last_chunk, *lb, *ub,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_taskloop_task: T#%d, task %p: %lld tasks, grainsize"
" %lld, extras %lld, last_chunk %lld, i=%lld,%lld(%d), dup %p\n"
, gtid, taskdata, num_tasks, grainsize, extras, last_chunk, *
lb, *ub, st, task_dup); }
4832 st, task_dup))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_taskloop_task: T#%d, task %p: %lld tasks, grainsize"
" %lld, extras %lld, last_chunk %lld, i=%lld,%lld(%d), dup %p\n"
, gtid, taskdata, num_tasks, grainsize, extras, last_chunk, *
lb, *ub, st, task_dup); }
;
4833#endif
4834 KMP_DEBUG_ASSERT(num_tasks * 2 + 1 > num_t_min)if (!(num_tasks * 2 + 1 > num_t_min)) { __kmp_debug_assert
("num_tasks * 2 + 1 > num_t_min", "openmp/runtime/src/kmp_tasking.cpp"
, 4834); }
;
4835 if (num_tasks > num_t_min)
4836 __kmp_taskloop_recur(NULL__null, gtid, task, lb, ub, st, ub_glob, num_tasks,
4837 grainsize, extras, last_chunk, tc, num_t_min,
4838#if OMPT_SUPPORT1
4839 codeptr_ra,
4840#endif
4841 task_dup);
4842 else
4843 __kmp_taskloop_linear(NULL__null, gtid, task, lb, ub, st, ub_glob, num_tasks,
4844 grainsize, extras, last_chunk, tc,
4845#if OMPT_SUPPORT1
4846 codeptr_ra,
4847#endif
4848 task_dup);
4849
4850 KA_TRACE(40, ("__kmp_taskloop_task(exit): T#%d\n", gtid))if (kmp_a_debug >= 40) { __kmp_debug_printf ("__kmp_taskloop_task(exit): T#%d\n"
, gtid); }
;
4851 return 0;
4852}
4853
4854// Schedule part of the taskloop as a task,
4855// execute the rest of the taskloop.
4856//
4857// loc Source location information
4858// gtid Global thread ID
4859// task Pattern task, exposes the loop iteration range
4860// lb Pointer to loop lower bound in task structure
4861// ub Pointer to loop upper bound in task structure
4862// st Loop stride
4863// ub_glob Global upper bound (used for lastprivate check)
4864// num_tasks Number of tasks to execute
4865// grainsize Number of loop iterations per task
4866// extras Number of chunks with grainsize+1 iterations
4867// last_chunk Reduction of grainsize for last task
4868// tc Iterations count
4869// num_t_min Threshold to launch tasks recursively
4870// task_dup Tasks duplication routine
4871// codeptr_ra Return address for OMPT events
4872void __kmp_taskloop_recur(ident_t *loc, int gtid, kmp_task_t *task,
4873 kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st,
4874 kmp_uint64 ub_glob, kmp_uint64 num_tasks,
4875 kmp_uint64 grainsize, kmp_uint64 extras,
4876 kmp_int64 last_chunk, kmp_uint64 tc,
4877 kmp_uint64 num_t_min,
4878#if OMPT_SUPPORT1
4879 void *codeptr_ra,
4880#endif
4881 void *task_dup) {
4882 kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(task)(((kmp_taskdata_t *)task) - 1);
4883 KMP_DEBUG_ASSERT(task != NULL)if (!(task != __null)) { __kmp_debug_assert("task != __null",
"openmp/runtime/src/kmp_tasking.cpp", 4883); }
;
4884 KMP_DEBUG_ASSERT(num_tasks > num_t_min)if (!(num_tasks > num_t_min)) { __kmp_debug_assert("num_tasks > num_t_min"
, "openmp/runtime/src/kmp_tasking.cpp", 4884); }
;
4885 KA_TRACE(20,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_taskloop_recur: T#%d, task %p: %lld tasks, grainsize"
" %lld, extras %lld, last_chunk %lld, i=%lld,%lld(%d), dup %p\n"
, gtid, taskdata, num_tasks, grainsize, extras, last_chunk, *
lb, *ub, st, task_dup); }
4886 ("__kmp_taskloop_recur: T#%d, task %p: %lld tasks, grainsize"if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_taskloop_recur: T#%d, task %p: %lld tasks, grainsize"
" %lld, extras %lld, last_chunk %lld, i=%lld,%lld(%d), dup %p\n"
, gtid, taskdata, num_tasks, grainsize, extras, last_chunk, *
lb, *ub, st, task_dup); }
4887 " %lld, extras %lld, last_chunk %lld, i=%lld,%lld(%d), dup %p\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_taskloop_recur: T#%d, task %p: %lld tasks, grainsize"
" %lld, extras %lld, last_chunk %lld, i=%lld,%lld(%d), dup %p\n"
, gtid, taskdata, num_tasks, grainsize, extras, last_chunk, *
lb, *ub, st, task_dup); }
4888 gtid, taskdata, num_tasks, grainsize, extras, last_chunk, *lb, *ub,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_taskloop_recur: T#%d, task %p: %lld tasks, grainsize"
" %lld, extras %lld, last_chunk %lld, i=%lld,%lld(%d), dup %p\n"
, gtid, taskdata, num_tasks, grainsize, extras, last_chunk, *
lb, *ub, st, task_dup); }
4889 st, task_dup))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_taskloop_recur: T#%d, task %p: %lld tasks, grainsize"
" %lld, extras %lld, last_chunk %lld, i=%lld,%lld(%d), dup %p\n"
, gtid, taskdata, num_tasks, grainsize, extras, last_chunk, *
lb, *ub, st, task_dup); }
;
4890 p_task_dup_t ptask_dup = (p_task_dup_t)task_dup;
4891 kmp_uint64 lower = *lb;
4892 kmp_info_t *thread = __kmp_threads[gtid];
4893 // kmp_taskdata_t *current_task = thread->th.th_current_task;
4894 kmp_task_t *next_task;
4895 size_t lower_offset =
4896 (char *)lb - (char *)task; // remember offset of lb in the task structure
4897 size_t upper_offset =
4898 (char *)ub - (char *)task; // remember offset of ub in the task structure
4899
4900 KMP_DEBUG_ASSERT(tc == num_tasks * grainsize +if (!(tc == num_tasks * grainsize + (last_chunk < 0 ? last_chunk
: extras))) { __kmp_debug_assert("tc == num_tasks * grainsize + (last_chunk < 0 ? last_chunk : extras)"
, "openmp/runtime/src/kmp_tasking.cpp", 4901); }
4901 (last_chunk < 0 ? last_chunk : extras))if (!(tc == num_tasks * grainsize + (last_chunk < 0 ? last_chunk
: extras))) { __kmp_debug_assert("tc == num_tasks * grainsize + (last_chunk < 0 ? last_chunk : extras)"
, "openmp/runtime/src/kmp_tasking.cpp", 4901); }
;
4902 KMP_DEBUG_ASSERT(num_tasks > extras)if (!(num_tasks > extras)) { __kmp_debug_assert("num_tasks > extras"
, "openmp/runtime/src/kmp_tasking.cpp", 4902); }
;
4903 KMP_DEBUG_ASSERT(num_tasks > 0)if (!(num_tasks > 0)) { __kmp_debug_assert("num_tasks > 0"
, "openmp/runtime/src/kmp_tasking.cpp", 4903); }
;
4904
4905 // split the loop in two halves
4906 kmp_uint64 lb1, ub0, tc0, tc1, ext0, ext1;
4907 kmp_int64 last_chunk0 = 0, last_chunk1 = 0;
4908 kmp_uint64 gr_size0 = grainsize;
4909 kmp_uint64 n_tsk0 = num_tasks >> 1; // num_tasks/2 to execute
4910 kmp_uint64 n_tsk1 = num_tasks - n_tsk0; // to schedule as a task
4911 if (last_chunk < 0) {
4912 ext0 = ext1 = 0;
4913 last_chunk1 = last_chunk;
4914 tc0 = grainsize * n_tsk0;
4915 tc1 = tc - tc0;
4916 } else if (n_tsk0 <= extras) {
4917 gr_size0++; // integrate extras into grainsize
4918 ext0 = 0; // no extra iters in 1st half
4919 ext1 = extras - n_tsk0; // remaining extras
4920 tc0 = gr_size0 * n_tsk0;
4921 tc1 = tc - tc0;
4922 } else { // n_tsk0 > extras
4923 ext1 = 0; // no extra iters in 2nd half
4924 ext0 = extras;
4925 tc1 = grainsize * n_tsk1;
4926 tc0 = tc - tc1;
4927 }
4928 ub0 = lower + st * (tc0 - 1);
4929 lb1 = ub0 + st;
4930
4931 // create pattern task for 2nd half of the loop
4932 next_task = __kmp_task_dup_alloc(thread, task); // duplicate the task
4933 // adjust lower bound (upper bound is not changed) for the 2nd half
4934 *(kmp_uint64 *)((char *)next_task + lower_offset) = lb1;
4935 if (ptask_dup != NULL__null) // construct firstprivates, etc.
4936 ptask_dup(next_task, task, 0);
4937 *ub = ub0; // adjust upper bound for the 1st half
4938
4939 // create auxiliary task for 2nd half of the loop
4940 // make sure new task has same parent task as the pattern task
4941 kmp_taskdata_t *current_task = thread->th.th_current_task;
4942 thread->th.th_current_task = taskdata->td_parent;
4943 kmp_task_t *new_task =
4944 __kmpc_omp_task_alloc(loc, gtid, 1, 3 * sizeof(void *),
4945 sizeof(__taskloop_params_t), &__kmp_taskloop_task);
4946 // restore current task
4947 thread->th.th_current_task = current_task;
4948 __taskloop_params_t *p = (__taskloop_params_t *)new_task->shareds;
4949 p->task = next_task;
4950 p->lb = (kmp_uint64 *)((char *)next_task + lower_offset);
4951 p->ub = (kmp_uint64 *)((char *)next_task + upper_offset);
4952 p->task_dup = task_dup;
4953 p->st = st;
4954 p->ub_glob = ub_glob;
4955 p->num_tasks = n_tsk1;
4956 p->grainsize = grainsize;
4957 p->extras = ext1;
4958 p->last_chunk = last_chunk1;
4959 p->tc = tc1;
4960 p->num_t_min = num_t_min;
4961#if OMPT_SUPPORT1
4962 p->codeptr_ra = codeptr_ra;
4963#endif
4964
4965#if OMPT_SUPPORT1
4966 // schedule new task with correct return address for OMPT events
4967 __kmp_omp_taskloop_task(NULL__null, gtid, new_task, codeptr_ra);
4968#else
4969 __kmp_omp_task(gtid, new_task, true); // schedule new task
4970#endif
4971
4972 // execute the 1st half of current subrange
4973 if (n_tsk0 > num_t_min)
4974 __kmp_taskloop_recur(loc, gtid, task, lb, ub, st, ub_glob, n_tsk0, gr_size0,
4975 ext0, last_chunk0, tc0, num_t_min,
4976#if OMPT_SUPPORT1
4977 codeptr_ra,
4978#endif
4979 task_dup);
4980 else
4981 __kmp_taskloop_linear(loc, gtid, task, lb, ub, st, ub_glob, n_tsk0,
4982 gr_size0, ext0, last_chunk0, tc0,
4983#if OMPT_SUPPORT1
4984 codeptr_ra,
4985#endif
4986 task_dup);
4987
4988 KA_TRACE(40, ("__kmp_taskloop_recur(exit): T#%d\n", gtid))if (kmp_a_debug >= 40) { __kmp_debug_printf ("__kmp_taskloop_recur(exit): T#%d\n"
, gtid); }
;
4989}
4990
4991static void __kmp_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int if_val,
4992 kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st,
4993 int nogroup, int sched, kmp_uint64 grainsize,
4994 int modifier, void *task_dup) {
4995 kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(task)(((kmp_taskdata_t *)task) - 1);
4996 KMP_DEBUG_ASSERT(task != NULL)if (!(task != __null)) { __kmp_debug_assert("task != __null",
"openmp/runtime/src/kmp_tasking.cpp", 4996); }
;
4997 if (nogroup == 0) {
4998#if OMPT_SUPPORT1 && OMPT_OPTIONAL1
4999 OMPT_STORE_RETURN_ADDRESS(gtid)OmptReturnAddressGuard ReturnAddressGuard{gtid, __builtin_return_address
(0)};
;
5000#endif
5001 __kmpc_taskgroup(loc, gtid);
5002 }
5003
5004 // =========================================================================
5005 // calculate loop parameters
5006 kmp_taskloop_bounds_t task_bounds(task, lb, ub);
5007 kmp_uint64 tc;
5008 // compiler provides global bounds here
5009 kmp_uint64 lower = task_bounds.get_lb();
5010 kmp_uint64 upper = task_bounds.get_ub();
5011 kmp_uint64 ub_glob = upper; // global upper used to calc lastprivate flag
5012 kmp_uint64 num_tasks = 0, extras = 0;
5013 kmp_int64 last_chunk =
5014 0; // reduce grainsize of last task by last_chunk in strict mode
5015 kmp_uint64 num_tasks_min = __kmp_taskloop_min_tasks;
5016 kmp_info_t *thread = __kmp_threads[gtid];
5017 kmp_taskdata_t *current_task = thread->th.th_current_task;
5018
5019 KA_TRACE(20, ("__kmp_taskloop: T#%d, task %p, lb %lld, ub %lld, st %lld, "if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_taskloop: T#%d, task %p, lb %lld, ub %lld, st %lld, "
"grain %llu(%d, %d), dup %p\n", gtid, taskdata, lower, upper
, st, grainsize, sched, modifier, task_dup); }
5020 "grain %llu(%d, %d), dup %p\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_taskloop: T#%d, task %p, lb %lld, ub %lld, st %lld, "
"grain %llu(%d, %d), dup %p\n", gtid, taskdata, lower, upper
, st, grainsize, sched, modifier, task_dup); }
5021 gtid, taskdata, lower, upper, st, grainsize, sched, modifier,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_taskloop: T#%d, task %p, lb %lld, ub %lld, st %lld, "
"grain %llu(%d, %d), dup %p\n", gtid, taskdata, lower, upper
, st, grainsize, sched, modifier, task_dup); }
5022 task_dup))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_taskloop: T#%d, task %p, lb %lld, ub %lld, st %lld, "
"grain %llu(%d, %d), dup %p\n", gtid, taskdata, lower, upper
, st, grainsize, sched, modifier, task_dup); }
;
5023
5024 // compute trip count
5025 if (st == 1) { // most common case
5026 tc = upper - lower + 1;
5027 } else if (st < 0) {
5028 tc = (lower - upper) / (-st) + 1;
5029 } else { // st > 0
5030 tc = (upper - lower) / st + 1;
5031 }
5032 if (tc == 0) {
5033 KA_TRACE(20, ("__kmp_taskloop(exit): T#%d zero-trip loop\n", gtid))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_taskloop(exit): T#%d zero-trip loop\n"
, gtid); }
;
5034 // free the pattern task and exit
5035 __kmp_task_start(gtid, task, current_task);
5036 // do not execute anything for zero-trip loop
5037 __kmp_task_finish<false>(gtid, task, current_task);
5038 return;
5039 }
5040
5041#if OMPT_SUPPORT1 && OMPT_OPTIONAL1
5042 ompt_team_info_t *team_info = __ompt_get_teaminfo(0, NULL__null);
5043 ompt_task_info_t *task_info = __ompt_get_task_info_object(0);
5044 if (ompt_enabled.ompt_callback_work) {
5045 ompt_callbacks.ompt_callback(ompt_callback_work)ompt_callback_work_callback(
5046 ompt_work_taskloop, ompt_scope_begin, &(team_info->parallel_data),
5047 &(task_info->task_data), tc, OMPT_GET_RETURN_ADDRESS(0)__builtin_return_address(0));
5048 }
5049#endif
5050
5051 if (num_tasks_min == 0)
5052 // TODO: can we choose better default heuristic?
5053 num_tasks_min =
5054 KMP_MIN(thread->th.th_team_nproc * 10, INITIAL_TASK_DEQUE_SIZE)((thread->th.th_team_nproc * 10) < ((1 << 8)) ? (
thread->th.th_team_nproc * 10) : ((1 << 8)))
;
5055
5056 // compute num_tasks/grainsize based on the input provided
5057 switch (sched) {
5058 case 0: // no schedule clause specified, we can choose the default
5059 // let's try to schedule (team_size*10) tasks
5060 grainsize = thread->th.th_team_nproc * 10;
5061 KMP_FALLTHROUGH()[[fallthrough]];
5062 case 2: // num_tasks provided
5063 if (grainsize > tc) {
5064 num_tasks = tc; // too big num_tasks requested, adjust values
5065 grainsize = 1;
5066 extras = 0;
5067 } else {
5068 num_tasks = grainsize;
5069 grainsize = tc / num_tasks;
5070 extras = tc % num_tasks;
5071 }
5072 break;
5073 case 1: // grainsize provided
5074 if (grainsize > tc) {
5075 num_tasks = 1;
5076 grainsize = tc; // too big grainsize requested, adjust values
5077 extras = 0;
5078 } else {
5079 if (modifier) {
5080 num_tasks = (tc + grainsize - 1) / grainsize;
5081 last_chunk = tc - (num_tasks * grainsize);
5082 extras = 0;
5083 } else {
5084 num_tasks = tc / grainsize;
5085 // adjust grainsize for balanced distribution of iterations
5086 grainsize = tc / num_tasks;
5087 extras = tc % num_tasks;
5088 }
5089 }
5090 break;
5091 default:
5092 KMP_ASSERT2(0, "unknown scheduling of taskloop")if (!(0)) { __kmp_debug_assert(("unknown scheduling of taskloop"
), "openmp/runtime/src/kmp_tasking.cpp", 5092); }
;
5093 }
5094
5095 KMP_DEBUG_ASSERT(tc == num_tasks * grainsize +if (!(tc == num_tasks * grainsize + (last_chunk < 0 ? last_chunk
: extras))) { __kmp_debug_assert("tc == num_tasks * grainsize + (last_chunk < 0 ? last_chunk : extras)"
, "openmp/runtime/src/kmp_tasking.cpp", 5096); }
5096 (last_chunk < 0 ? last_chunk : extras))if (!(tc == num_tasks * grainsize + (last_chunk < 0 ? last_chunk
: extras))) { __kmp_debug_assert("tc == num_tasks * grainsize + (last_chunk < 0 ? last_chunk : extras)"
, "openmp/runtime/src/kmp_tasking.cpp", 5096); }
;
5097 KMP_DEBUG_ASSERT(num_tasks > extras)if (!(num_tasks > extras)) { __kmp_debug_assert("num_tasks > extras"
, "openmp/runtime/src/kmp_tasking.cpp", 5097); }
;
5098 KMP_DEBUG_ASSERT(num_tasks > 0)if (!(num_tasks > 0)) { __kmp_debug_assert("num_tasks > 0"
, "openmp/runtime/src/kmp_tasking.cpp", 5098); }
;
5099 // =========================================================================
5100
5101 // check if clause value first
5102 // Also require GOMP_taskloop to reduce to linear (taskdata->td_flags.native)
5103 if (if_val == 0) { // if(0) specified, mark task as serial
5104 taskdata->td_flags.task_serial = 1;
5105 taskdata->td_flags.tiedness = TASK_TIED1; // AC: serial task cannot be untied
5106 // always start serial tasks linearly
5107 __kmp_taskloop_linear(loc, gtid, task, lb, ub, st, ub_glob, num_tasks,
5108 grainsize, extras, last_chunk, tc,
5109#if OMPT_SUPPORT1
5110 OMPT_GET_RETURN_ADDRESS(0)__builtin_return_address(0),
5111#endif
5112 task_dup);
5113 // !taskdata->td_flags.native => currently force linear spawning of tasks
5114 // for GOMP_taskloop
5115 } else if (num_tasks > num_tasks_min && !taskdata->td_flags.native) {
5116 KA_TRACE(20, ("__kmp_taskloop: T#%d, go recursive: tc %llu, #tasks %llu"if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_taskloop: T#%d, go recursive: tc %llu, #tasks %llu"
"(%lld), grain %llu, extras %llu, last_chunk %lld\n", gtid, tc
, num_tasks, num_tasks_min, grainsize, extras, last_chunk); }
5117 "(%lld), grain %llu, extras %llu, last_chunk %lld\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_taskloop: T#%d, go recursive: tc %llu, #tasks %llu"
"(%lld), grain %llu, extras %llu, last_chunk %lld\n", gtid, tc
, num_tasks, num_tasks_min, grainsize, extras, last_chunk); }
5118 gtid, tc, num_tasks, num_tasks_min, grainsize, extras,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_taskloop: T#%d, go recursive: tc %llu, #tasks %llu"
"(%lld), grain %llu, extras %llu, last_chunk %lld\n", gtid, tc
, num_tasks, num_tasks_min, grainsize, extras, last_chunk); }
5119 last_chunk))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_taskloop: T#%d, go recursive: tc %llu, #tasks %llu"
"(%lld), grain %llu, extras %llu, last_chunk %lld\n", gtid, tc
, num_tasks, num_tasks_min, grainsize, extras, last_chunk); }
;
5120 __kmp_taskloop_recur(loc, gtid, task, lb, ub, st, ub_glob, num_tasks,
5121 grainsize, extras, last_chunk, tc, num_tasks_min,
5122#if OMPT_SUPPORT1
5123 OMPT_GET_RETURN_ADDRESS(0)__builtin_return_address(0),
5124#endif
5125 task_dup);
5126 } else {
5127 KA_TRACE(20, ("__kmp_taskloop: T#%d, go linear: tc %llu, #tasks %llu"if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_taskloop: T#%d, go linear: tc %llu, #tasks %llu"
"(%lld), grain %llu, extras %llu, last_chunk %lld\n", gtid, tc
, num_tasks, num_tasks_min, grainsize, extras, last_chunk); }
5128 "(%lld), grain %llu, extras %llu, last_chunk %lld\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_taskloop: T#%d, go linear: tc %llu, #tasks %llu"
"(%lld), grain %llu, extras %llu, last_chunk %lld\n", gtid, tc
, num_tasks, num_tasks_min, grainsize, extras, last_chunk); }
5129 gtid, tc, num_tasks, num_tasks_min, grainsize, extras,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_taskloop: T#%d, go linear: tc %llu, #tasks %llu"
"(%lld), grain %llu, extras %llu, last_chunk %lld\n", gtid, tc
, num_tasks, num_tasks_min, grainsize, extras, last_chunk); }
5130 last_chunk))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_taskloop: T#%d, go linear: tc %llu, #tasks %llu"
"(%lld), grain %llu, extras %llu, last_chunk %lld\n", gtid, tc
, num_tasks, num_tasks_min, grainsize, extras, last_chunk); }
;
5131 __kmp_taskloop_linear(loc, gtid, task, lb, ub, st, ub_glob, num_tasks,
5132 grainsize, extras, last_chunk, tc,
5133#if OMPT_SUPPORT1
5134 OMPT_GET_RETURN_ADDRESS(0)__builtin_return_address(0),
5135#endif
5136 task_dup);
5137 }
5138
5139#if OMPT_SUPPORT1 && OMPT_OPTIONAL1
5140 if (ompt_enabled.ompt_callback_work) {
5141 ompt_callbacks.ompt_callback(ompt_callback_work)ompt_callback_work_callback(
5142 ompt_work_taskloop, ompt_scope_end, &(team_info->parallel_data),
5143 &(task_info->task_data), tc, OMPT_GET_RETURN_ADDRESS(0)__builtin_return_address(0));
5144 }
5145#endif
5146
5147 if (nogroup == 0) {
5148#if OMPT_SUPPORT1 && OMPT_OPTIONAL1
5149 OMPT_STORE_RETURN_ADDRESS(gtid)OmptReturnAddressGuard ReturnAddressGuard{gtid, __builtin_return_address
(0)};
;
5150#endif
5151 __kmpc_end_taskgroup(loc, gtid);
5152 }
5153 KA_TRACE(20, ("__kmp_taskloop(exit): T#%d\n", gtid))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_taskloop(exit): T#%d\n"
, gtid); }
;
5154}
5155
5156/*!
5157@ingroup TASKING
5158@param loc Source location information
5159@param gtid Global thread ID
5160@param task Task structure
5161@param if_val Value of the if clause
5162@param lb Pointer to loop lower bound in task structure
5163@param ub Pointer to loop upper bound in task structure
5164@param st Loop stride
5165@param nogroup Flag, 1 if nogroup clause specified, 0 otherwise
5166@param sched Schedule specified 0/1/2 for none/grainsize/num_tasks
5167@param grainsize Schedule value if specified
5168@param task_dup Tasks duplication routine
5169
5170Execute the taskloop construct.
5171*/
5172void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int if_val,
5173 kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup,
5174 int sched, kmp_uint64 grainsize, void *task_dup) {
5175 __kmp_assert_valid_gtid(gtid);
5176 KA_TRACE(20, ("__kmpc_taskloop(enter): T#%d\n", gtid))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmpc_taskloop(enter): T#%d\n"
, gtid); }
;
5177 __kmp_taskloop(loc, gtid, task, if_val, lb, ub, st, nogroup, sched, grainsize,
5178 0, task_dup);
5179 KA_TRACE(20, ("__kmpc_taskloop(exit): T#%d\n", gtid))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmpc_taskloop(exit): T#%d\n"
, gtid); }
;
5180}
5181
5182/*!
5183@ingroup TASKING
5184@param loc Source location information
5185@param gtid Global thread ID
5186@param task Task structure
5187@param if_val Value of the if clause
5188@param lb Pointer to loop lower bound in task structure
5189@param ub Pointer to loop upper bound in task structure
5190@param st Loop stride
5191@param nogroup Flag, 1 if nogroup clause specified, 0 otherwise
5192@param sched Schedule specified 0/1/2 for none/grainsize/num_tasks
5193@param grainsize Schedule value if specified
5194@param modifier Modifier 'strict' for sched, 1 if present, 0 otherwise
5195@param task_dup Tasks duplication routine
5196
5197Execute the taskloop construct.
5198*/
5199void __kmpc_taskloop_5(ident_t *loc, int gtid, kmp_task_t *task, int if_val,
5200 kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st,
5201 int nogroup, int sched, kmp_uint64 grainsize,
5202 int modifier, void *task_dup) {
5203 __kmp_assert_valid_gtid(gtid);
5204 KA_TRACE(20, ("__kmpc_taskloop_5(enter): T#%d\n", gtid))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmpc_taskloop_5(enter): T#%d\n"
, gtid); }
;
5205 __kmp_taskloop(loc, gtid, task, if_val, lb, ub, st, nogroup, sched, grainsize,
5206 modifier, task_dup);
5207 KA_TRACE(20, ("__kmpc_taskloop_5(exit): T#%d\n", gtid))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmpc_taskloop_5(exit): T#%d\n"
, gtid); }
;
5208}
5209
5210/*!
5211@ingroup TASKING
5212@param gtid Global Thread ID of current thread
5213@return Returns a pointer to the thread's current task async handle. If no task
5214is present or gtid is invalid, returns NULL.
5215
5216Acqurires a pointer to the target async handle from the current task.
5217*/
5218void **__kmpc_omp_get_target_async_handle_ptr(kmp_int32 gtid) {
5219 if (gtid == KMP_GTID_DNE(-2))
5220 return NULL__null;
5221
5222 kmp_info_t *thread = __kmp_thread_from_gtid(gtid);
5223 kmp_taskdata_t *taskdata = thread->th.th_current_task;
5224
5225 if (!taskdata)
5226 return NULL__null;
5227
5228 return &taskdata->td_target_data.async_handle;
5229}
5230
5231/*!
5232@ingroup TASKING
5233@param gtid Global Thread ID of current thread
5234@return Returns TRUE if the current task being executed of the given thread has
5235a task team allocated to it. Otherwise, returns FALSE.
5236
5237Checks if the current thread has a task team.
5238*/
5239bool __kmpc_omp_has_task_team(kmp_int32 gtid) {
5240 if (gtid == KMP_GTID_DNE(-2))
5241 return FALSE0;
5242
5243 kmp_info_t *thread = __kmp_thread_from_gtid(gtid);
5244 kmp_taskdata_t *taskdata = thread->th.th_current_task;
5245
5246 if (!taskdata)
5247 return FALSE0;
5248
5249 return taskdata->td_task_team != NULL__null;
5250}

/build/source/openmp/runtime/src/kmp_lock.h

1/*
2 * kmp_lock.h -- lock header file
3 */
4
5//===----------------------------------------------------------------------===//
6//
7// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
8// See https://llvm.org/LICENSE.txt for license information.
9// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
10//
11//===----------------------------------------------------------------------===//
12
13#ifndef KMP_LOCK_H
14#define KMP_LOCK_H
15
16#include <limits.h> // CHAR_BIT
17#include <stddef.h> // offsetof
18
19#include "kmp_debug.h"
20#include "kmp_os.h"
21
22#ifdef __cplusplus201703L
23#include <atomic>
24
25extern "C" {
26#endif // __cplusplus
27
28// ----------------------------------------------------------------------------
29// Have to copy these definitions from kmp.h because kmp.h cannot be included
30// due to circular dependencies. Will undef these at end of file.
31
32#define KMP_PAD(type, sz)(sizeof(type) + (sz - ((sizeof(type) - 1) % (sz)) - 1)) \
33 (sizeof(type) + (sz - ((sizeof(type) - 1) % (sz)) - 1))
34#define KMP_GTID_DNE(-2) (-2)
35
36// Forward declaration of ident and ident_t
37
38struct ident;
39typedef struct ident ident_t;
40
41// End of copied code.
42// ----------------------------------------------------------------------------
43
44// We need to know the size of the area we can assume that the compiler(s)
45// allocated for objects of type omp_lock_t and omp_nest_lock_t. The Intel
46// compiler always allocates a pointer-sized area, as does visual studio.
47//
48// gcc however, only allocates 4 bytes for regular locks, even on 64-bit
49// intel archs. It allocates at least 8 bytes for nested lock (more on
50// recent versions), but we are bounded by the pointer-sized chunks that
51// the Intel compiler allocates.
52
53#if KMP_OS_LINUX1 && defined(KMP_GOMP_COMPAT)
54#define OMP_LOCK_T_SIZEsizeof(int) sizeof(int)
55#define OMP_NEST_LOCK_T_SIZEsizeof(void *) sizeof(void *)
56#else
57#define OMP_LOCK_T_SIZEsizeof(int) sizeof(void *)
58#define OMP_NEST_LOCK_T_SIZEsizeof(void *) sizeof(void *)
59#endif
60
61// The Intel compiler allocates a 32-byte chunk for a critical section.
62// Both gcc and visual studio only allocate enough space for a pointer.
63// Sometimes we know that the space was allocated by the Intel compiler.
64#define OMP_CRITICAL_SIZEsizeof(void *) sizeof(void *)
65#define INTEL_CRITICAL_SIZE32 32
66
67// lock flags
68typedef kmp_uint32 kmp_lock_flags_t;
69
70#define kmp_lf_critical_section1 1
71
72// When a lock table is used, the indices are of kmp_lock_index_t
73typedef kmp_uint32 kmp_lock_index_t;
74
75// When memory allocated for locks are on the lock pool (free list),
76// it is treated as structs of this type.
77struct kmp_lock_pool {
78 union kmp_user_lock *next;
79 kmp_lock_index_t index;
80};
81
82typedef struct kmp_lock_pool kmp_lock_pool_t;
83
84extern void __kmp_validate_locks(void);
85
86// ----------------------------------------------------------------------------
87// There are 5 lock implementations:
88// 1. Test and set locks.
89// 2. futex locks (Linux* OS on x86 and
90// Intel(R) Many Integrated Core Architecture)
91// 3. Ticket (Lamport bakery) locks.
92// 4. Queuing locks (with separate spin fields).
93// 5. DRPA (Dynamically Reconfigurable Distributed Polling Area) locks
94//
95// and 3 lock purposes:
96// 1. Bootstrap locks -- Used for a few locks available at library
97// startup-shutdown time.
98// These do not require non-negative global thread ID's.
99// 2. Internal RTL locks -- Used everywhere else in the RTL
100// 3. User locks (includes critical sections)
101// ----------------------------------------------------------------------------
102
103// ============================================================================
104// Lock implementations.
105//
106// Test and set locks.
107//
108// Non-nested test and set locks differ from the other lock kinds (except
109// futex) in that we use the memory allocated by the compiler for the lock,
110// rather than a pointer to it.
111//
112// On lin32, lin_32e, and win_32, the space allocated may be as small as 4
113// bytes, so we have to use a lock table for nested locks, and avoid accessing
114// the depth_locked field for non-nested locks.
115//
116// Information normally available to the tools, such as lock location, lock
117// usage (normal lock vs. critical section), etc. is not available with test and
118// set locks.
119// ----------------------------------------------------------------------------
120
121struct kmp_base_tas_lock {
122 // KMP_LOCK_FREE(tas) => unlocked; locked: (gtid+1) of owning thread
123 std::atomic<kmp_int32> poll;
124 kmp_int32 depth_locked; // depth locked, for nested locks only
125};
126
127typedef struct kmp_base_tas_lock kmp_base_tas_lock_t;
128
129union kmp_tas_lock {
130 kmp_base_tas_lock_t lk;
131 kmp_lock_pool_t pool; // make certain struct is large enough
132 double lk_align; // use worst case alignment; no cache line padding
133};
134
135typedef union kmp_tas_lock kmp_tas_lock_t;
136
137// Static initializer for test and set lock variables. Usage:
138// kmp_tas_lock_t xlock = KMP_TAS_LOCK_INITIALIZER( xlock );
139#define KMP_TAS_LOCK_INITIALIZER(lock){ { (locktag_tas), 0 } } \
140 { \
141 { KMP_LOCK_FREE(tas)(locktag_tas), 0 } \
142 }
143
144extern int __kmp_acquire_tas_lock(kmp_tas_lock_t *lck, kmp_int32 gtid);
145extern int __kmp_test_tas_lock(kmp_tas_lock_t *lck, kmp_int32 gtid);
146extern int __kmp_release_tas_lock(kmp_tas_lock_t *lck, kmp_int32 gtid);
147extern void __kmp_init_tas_lock(kmp_tas_lock_t *lck);
148extern void __kmp_destroy_tas_lock(kmp_tas_lock_t *lck);
149
150extern int __kmp_acquire_nested_tas_lock(kmp_tas_lock_t *lck, kmp_int32 gtid);
151extern int __kmp_test_nested_tas_lock(kmp_tas_lock_t *lck, kmp_int32 gtid);
152extern int __kmp_release_nested_tas_lock(kmp_tas_lock_t *lck, kmp_int32 gtid);
153extern void __kmp_init_nested_tas_lock(kmp_tas_lock_t *lck);
154extern void __kmp_destroy_nested_tas_lock(kmp_tas_lock_t *lck);
155
156#define KMP_LOCK_RELEASED1 1
157#define KMP_LOCK_STILL_HELD0 0
158#define KMP_LOCK_ACQUIRED_FIRST1 1
159#define KMP_LOCK_ACQUIRED_NEXT0 0
160#ifndef KMP_USE_FUTEX(1 && (0 || 1 || KMP_ARCH_ARM || 0))
161#define KMP_USE_FUTEX(1 && (0 || 1 || KMP_ARCH_ARM || 0)) \
162 (KMP_OS_LINUX1 && \
163 (KMP_ARCH_X860 || KMP_ARCH_X86_641 || KMP_ARCH_ARM || KMP_ARCH_AARCH640))
164#endif
165#if KMP_USE_FUTEX(1 && (0 || 1 || KMP_ARCH_ARM || 0))
166
167// ----------------------------------------------------------------------------
168// futex locks. futex locks are only available on Linux* OS.
169//
170// Like non-nested test and set lock, non-nested futex locks use the memory
171// allocated by the compiler for the lock, rather than a pointer to it.
172//
173// Information normally available to the tools, such as lock location, lock
174// usage (normal lock vs. critical section), etc. is not available with test and
175// set locks. With non-nested futex locks, the lock owner is not even available.
176// ----------------------------------------------------------------------------
177
178struct kmp_base_futex_lock {
179 volatile kmp_int32 poll; // KMP_LOCK_FREE(futex) => unlocked
180 // 2*(gtid+1) of owning thread, 0 if unlocked
181 // locked: (gtid+1) of owning thread
182 kmp_int32 depth_locked; // depth locked, for nested locks only
183};
184
185typedef struct kmp_base_futex_lock kmp_base_futex_lock_t;
186
187union kmp_futex_lock {
188 kmp_base_futex_lock_t lk;
189 kmp_lock_pool_t pool; // make certain struct is large enough
190 double lk_align; // use worst case alignment
191 // no cache line padding
192};
193
194typedef union kmp_futex_lock kmp_futex_lock_t;
195
196// Static initializer for futex lock variables. Usage:
197// kmp_futex_lock_t xlock = KMP_FUTEX_LOCK_INITIALIZER( xlock );
198#define KMP_FUTEX_LOCK_INITIALIZER(lock){ { (locktag_futex), 0 } } \
199 { \
200 { KMP_LOCK_FREE(futex)(locktag_futex), 0 } \
201 }
202
203extern int __kmp_acquire_futex_lock(kmp_futex_lock_t *lck, kmp_int32 gtid);
204extern int __kmp_test_futex_lock(kmp_futex_lock_t *lck, kmp_int32 gtid);
205extern int __kmp_release_futex_lock(kmp_futex_lock_t *lck, kmp_int32 gtid);
206extern void __kmp_init_futex_lock(kmp_futex_lock_t *lck);
207extern void __kmp_destroy_futex_lock(kmp_futex_lock_t *lck);
208
209extern int __kmp_acquire_nested_futex_lock(kmp_futex_lock_t *lck,
210 kmp_int32 gtid);
211extern int __kmp_test_nested_futex_lock(kmp_futex_lock_t *lck, kmp_int32 gtid);
212extern int __kmp_release_nested_futex_lock(kmp_futex_lock_t *lck,
213 kmp_int32 gtid);
214extern void __kmp_init_nested_futex_lock(kmp_futex_lock_t *lck);
215extern void __kmp_destroy_nested_futex_lock(kmp_futex_lock_t *lck);
216
217#endif // KMP_USE_FUTEX
218
219// ----------------------------------------------------------------------------
220// Ticket locks.
221
222#ifdef __cplusplus201703L
223
224#ifdef _MSC_VER
225// MSVC won't allow use of std::atomic<> in a union since it has non-trivial
226// copy constructor.
227
228struct kmp_base_ticket_lock {
229 // `initialized' must be the first entry in the lock data structure!
230 std::atomic_bool initialized;
231 volatile union kmp_ticket_lock *self; // points to the lock union
232 ident_t const *location; // Source code location of omp_init_lock().
233 std::atomic_uint
234 next_ticket; // ticket number to give to next thread which acquires
235 std::atomic_uint now_serving; // ticket number for thread which holds the lock
236 std::atomic_int owner_id; // (gtid+1) of owning thread, 0 if unlocked
237 std::atomic_int depth_locked; // depth locked, for nested locks only
238 kmp_lock_flags_t flags; // lock specifics, e.g. critical section lock
239};
240#else
241struct kmp_base_ticket_lock {
242 // `initialized' must be the first entry in the lock data structure!
243 std::atomic<bool> initialized;
244 volatile union kmp_ticket_lock *self; // points to the lock union
245 ident_t const *location; // Source code location of omp_init_lock().
246 std::atomic<unsigned>
247 next_ticket; // ticket number to give to next thread which acquires
248 std::atomic<unsigned>
249 now_serving; // ticket number for thread which holds the lock
250 std::atomic<int> owner_id; // (gtid+1) of owning thread, 0 if unlocked
251 std::atomic<int> depth_locked; // depth locked, for nested locks only
252 kmp_lock_flags_t flags; // lock specifics, e.g. critical section lock
253};
254#endif
255
256#else // __cplusplus
257
258struct kmp_base_ticket_lock;
259
260#endif // !__cplusplus
261
262typedef struct kmp_base_ticket_lock kmp_base_ticket_lock_t;
263
264union KMP_ALIGN_CACHE__attribute__((aligned(64))) kmp_ticket_lock {
265 kmp_base_ticket_lock_t
266 lk; // This field must be first to allow static initializing.
267 kmp_lock_pool_t pool;
268 double lk_align; // use worst case alignment
269 char lk_pad[KMP_PAD(kmp_base_ticket_lock_t, CACHE_LINE)(sizeof(kmp_base_ticket_lock_t) + (64 - ((sizeof(kmp_base_ticket_lock_t
) - 1) % (64)) - 1))
];
270};
271
272typedef union kmp_ticket_lock kmp_ticket_lock_t;
273
274// Static initializer for simple ticket lock variables. Usage:
275// kmp_ticket_lock_t xlock = KMP_TICKET_LOCK_INITIALIZER( xlock );
276// Note the macro argument. It is important to make var properly initialized.
277#define KMP_TICKET_LOCK_INITIALIZER(lock){ { true, &(lock), __null, 0U, 0U, 0, -1 } } \
278 { \
279 { true, &(lock), NULL__null, 0U, 0U, 0, -1 } \
280 }
281
282extern int __kmp_acquire_ticket_lock(kmp_ticket_lock_t *lck, kmp_int32 gtid);
283extern int __kmp_test_ticket_lock(kmp_ticket_lock_t *lck, kmp_int32 gtid);
284extern int __kmp_test_ticket_lock_with_cheks(kmp_ticket_lock_t *lck,
285 kmp_int32 gtid);
286extern int __kmp_release_ticket_lock(kmp_ticket_lock_t *lck, kmp_int32 gtid);
287extern void __kmp_init_ticket_lock(kmp_ticket_lock_t *lck);
288extern void __kmp_destroy_ticket_lock(kmp_ticket_lock_t *lck);
289
290extern int __kmp_acquire_nested_ticket_lock(kmp_ticket_lock_t *lck,
291 kmp_int32 gtid);
292extern int __kmp_test_nested_ticket_lock(kmp_ticket_lock_t *lck,
293 kmp_int32 gtid);
294extern int __kmp_release_nested_ticket_lock(kmp_ticket_lock_t *lck,
295 kmp_int32 gtid);
296extern void __kmp_init_nested_ticket_lock(kmp_ticket_lock_t *lck);
297extern void __kmp_destroy_nested_ticket_lock(kmp_ticket_lock_t *lck);
298
299// ----------------------------------------------------------------------------
300// Queuing locks.
301
302#if KMP_USE_ADAPTIVE_LOCKS(0 || 1) && !0
303
304struct kmp_adaptive_lock_info;
305
306typedef struct kmp_adaptive_lock_info kmp_adaptive_lock_info_t;
307
308#if KMP_DEBUG_ADAPTIVE_LOCKS0
309
310struct kmp_adaptive_lock_statistics {
311 /* So we can get stats from locks that haven't been destroyed. */
312 kmp_adaptive_lock_info_t *next;
313 kmp_adaptive_lock_info_t *prev;
314
315 /* Other statistics */
316 kmp_uint32 successfulSpeculations;
317 kmp_uint32 hardFailedSpeculations;
318 kmp_uint32 softFailedSpeculations;
319 kmp_uint32 nonSpeculativeAcquires;
320 kmp_uint32 nonSpeculativeAcquireAttempts;
321 kmp_uint32 lemmingYields;
322};
323
324typedef struct kmp_adaptive_lock_statistics kmp_adaptive_lock_statistics_t;
325
326extern void __kmp_print_speculative_stats();
327extern void __kmp_init_speculative_stats();
328
329#endif // KMP_DEBUG_ADAPTIVE_LOCKS
330
331struct kmp_adaptive_lock_info {
332 /* Values used for adaptivity.
333 Although these are accessed from multiple threads we don't access them
334 atomically, because if we miss updates it probably doesn't matter much. (It
335 just affects our decision about whether to try speculation on the lock). */
336 kmp_uint32 volatile badness;
337 kmp_uint32 volatile acquire_attempts;
338 /* Parameters of the lock. */
339 kmp_uint32 max_badness;
340 kmp_uint32 max_soft_retries;
341
342#if KMP_DEBUG_ADAPTIVE_LOCKS0
343 kmp_adaptive_lock_statistics_t volatile stats;
344#endif
345};
346
347#endif // KMP_USE_ADAPTIVE_LOCKS
348
349struct kmp_base_queuing_lock {
350
351 // `initialized' must be the first entry in the lock data structure!
352 volatile union kmp_queuing_lock
353 *initialized; // Points to the lock union if in initialized state.
354
355 ident_t const *location; // Source code location of omp_init_lock().
356
357 KMP_ALIGN(8)__attribute__((aligned(8))) // tail_id must be 8-byte aligned!
358
359 volatile kmp_int32
360 tail_id; // (gtid+1) of thread at tail of wait queue, 0 if empty
361 // Must be no padding here since head/tail used in 8-byte CAS
362 volatile kmp_int32
363 head_id; // (gtid+1) of thread at head of wait queue, 0 if empty
364 // Decl order assumes little endian
365 // bakery-style lock
366 volatile kmp_uint32
367 next_ticket; // ticket number to give to next thread which acquires
368 volatile kmp_uint32
369 now_serving; // ticket number for thread which holds the lock
370 volatile kmp_int32 owner_id; // (gtid+1) of owning thread, 0 if unlocked
371 kmp_int32 depth_locked; // depth locked, for nested locks only
372
373 kmp_lock_flags_t flags; // lock specifics, e.g. critical section lock
374};
375
376typedef struct kmp_base_queuing_lock kmp_base_queuing_lock_t;
377
378KMP_BUILD_ASSERT(offsetof(kmp_base_queuing_lock_t, tail_id) % 8 == 0)static_assert(__builtin_offsetof(kmp_base_queuing_lock_t, tail_id
) % 8 == 0, "Build condition error")
;
379
380union KMP_ALIGN_CACHE__attribute__((aligned(64))) kmp_queuing_lock {
381 kmp_base_queuing_lock_t
382 lk; // This field must be first to allow static initializing.
383 kmp_lock_pool_t pool;
384 double lk_align; // use worst case alignment
385 char lk_pad[KMP_PAD(kmp_base_queuing_lock_t, CACHE_LINE)(sizeof(kmp_base_queuing_lock_t) + (64 - ((sizeof(kmp_base_queuing_lock_t
) - 1) % (64)) - 1))
];
386};
387
388typedef union kmp_queuing_lock kmp_queuing_lock_t;
389
390extern int __kmp_acquire_queuing_lock(kmp_queuing_lock_t *lck, kmp_int32 gtid);
391extern int __kmp_test_queuing_lock(kmp_queuing_lock_t *lck, kmp_int32 gtid);
392extern int __kmp_release_queuing_lock(kmp_queuing_lock_t *lck, kmp_int32 gtid);
393extern void __kmp_init_queuing_lock(kmp_queuing_lock_t *lck);
394extern void __kmp_destroy_queuing_lock(kmp_queuing_lock_t *lck);
395
396extern int __kmp_acquire_nested_queuing_lock(kmp_queuing_lock_t *lck,
397 kmp_int32 gtid);
398extern int __kmp_test_nested_queuing_lock(kmp_queuing_lock_t *lck,
399 kmp_int32 gtid);
400extern int __kmp_release_nested_queuing_lock(kmp_queuing_lock_t *lck,
401 kmp_int32 gtid);
402extern void __kmp_init_nested_queuing_lock(kmp_queuing_lock_t *lck);
403extern void __kmp_destroy_nested_queuing_lock(kmp_queuing_lock_t *lck);
404
405#if KMP_USE_ADAPTIVE_LOCKS(0 || 1) && !0
406
407// ----------------------------------------------------------------------------
408// Adaptive locks.
409struct kmp_base_adaptive_lock {
410 kmp_base_queuing_lock qlk;
411 KMP_ALIGN(CACHE_LINE)__attribute__((aligned(64)))
412 kmp_adaptive_lock_info_t
413 adaptive; // Information for the speculative adaptive lock
414};
415
416typedef struct kmp_base_adaptive_lock kmp_base_adaptive_lock_t;
417
418union KMP_ALIGN_CACHE__attribute__((aligned(64))) kmp_adaptive_lock {
419 kmp_base_adaptive_lock_t lk;
420 kmp_lock_pool_t pool;
421 double lk_align;
422 char lk_pad[KMP_PAD(kmp_base_adaptive_lock_t, CACHE_LINE)(sizeof(kmp_base_adaptive_lock_t) + (64 - ((sizeof(kmp_base_adaptive_lock_t
) - 1) % (64)) - 1))
];
423};
424typedef union kmp_adaptive_lock kmp_adaptive_lock_t;
425
426#define GET_QLK_PTR(l)((kmp_queuing_lock_t *)&(l)->lk.qlk) ((kmp_queuing_lock_t *)&(l)->lk.qlk)
427
428#endif // KMP_USE_ADAPTIVE_LOCKS
429
430// ----------------------------------------------------------------------------
431// DRDPA ticket locks.
432struct kmp_base_drdpa_lock {
433 // All of the fields on the first cache line are only written when
434 // initializing or reconfiguring the lock. These are relatively rare
435 // operations, so data from the first cache line will usually stay resident in
436 // the cache of each thread trying to acquire the lock.
437 //
438 // initialized must be the first entry in the lock data structure!
439 KMP_ALIGN_CACHE__attribute__((aligned(64)))
440
441 volatile union kmp_drdpa_lock
442 *initialized; // points to the lock union if in initialized state
443 ident_t const *location; // Source code location of omp_init_lock().
444 std::atomic<std::atomic<kmp_uint64> *> polls;
445 std::atomic<kmp_uint64> mask; // is 2**num_polls-1 for mod op
446 kmp_uint64 cleanup_ticket; // thread with cleanup ticket
447 std::atomic<kmp_uint64> *old_polls; // will deallocate old_polls
448 kmp_uint32 num_polls; // must be power of 2
449
450 // next_ticket it needs to exist in a separate cache line, as it is
451 // invalidated every time a thread takes a new ticket.
452 KMP_ALIGN_CACHE__attribute__((aligned(64)))
453
454 std::atomic<kmp_uint64> next_ticket;
455
456 // now_serving is used to store our ticket value while we hold the lock. It
457 // has a slightly different meaning in the DRDPA ticket locks (where it is
458 // written by the acquiring thread) than it does in the simple ticket locks
459 // (where it is written by the releasing thread).
460 //
461 // Since now_serving is only read and written in the critical section,
462 // it is non-volatile, but it needs to exist on a separate cache line,
463 // as it is invalidated at every lock acquire.
464 //
465 // Likewise, the vars used for nested locks (owner_id and depth_locked) are
466 // only written by the thread owning the lock, so they are put in this cache
467 // line. owner_id is read by other threads, so it must be declared volatile.
468 KMP_ALIGN_CACHE__attribute__((aligned(64)))
469 kmp_uint64 now_serving; // doesn't have to be volatile
470 volatile kmp_uint32 owner_id; // (gtid+1) of owning thread, 0 if unlocked
471 kmp_int32 depth_locked; // depth locked
472 kmp_lock_flags_t flags; // lock specifics, e.g. critical section lock
473};
474
475typedef struct kmp_base_drdpa_lock kmp_base_drdpa_lock_t;
476
477union KMP_ALIGN_CACHE__attribute__((aligned(64))) kmp_drdpa_lock {
478 kmp_base_drdpa_lock_t
479 lk; // This field must be first to allow static initializing. */
480 kmp_lock_pool_t pool;
481 double lk_align; // use worst case alignment
482 char lk_pad[KMP_PAD(kmp_base_drdpa_lock_t, CACHE_LINE)(sizeof(kmp_base_drdpa_lock_t) + (64 - ((sizeof(kmp_base_drdpa_lock_t
) - 1) % (64)) - 1))
];
483};
484
485typedef union kmp_drdpa_lock kmp_drdpa_lock_t;
486
487extern int __kmp_acquire_drdpa_lock(kmp_drdpa_lock_t *lck, kmp_int32 gtid);
488extern int __kmp_test_drdpa_lock(kmp_drdpa_lock_t *lck, kmp_int32 gtid);
489extern int __kmp_release_drdpa_lock(kmp_drdpa_lock_t *lck, kmp_int32 gtid);
490extern void __kmp_init_drdpa_lock(kmp_drdpa_lock_t *lck);
491extern void __kmp_destroy_drdpa_lock(kmp_drdpa_lock_t *lck);
492
493extern int __kmp_acquire_nested_drdpa_lock(kmp_drdpa_lock_t *lck,
494 kmp_int32 gtid);
495extern int __kmp_test_nested_drdpa_lock(kmp_drdpa_lock_t *lck, kmp_int32 gtid);
496extern int __kmp_release_nested_drdpa_lock(kmp_drdpa_lock_t *lck,
497 kmp_int32 gtid);
498extern void __kmp_init_nested_drdpa_lock(kmp_drdpa_lock_t *lck);
499extern void __kmp_destroy_nested_drdpa_lock(kmp_drdpa_lock_t *lck);
500
501// ============================================================================
502// Lock purposes.
503// ============================================================================
504
505// Bootstrap locks.
506//
507// Bootstrap locks -- very few locks used at library initialization time.
508// Bootstrap locks are currently implemented as ticket locks.
509// They could also be implemented as test and set lock, but cannot be
510// implemented with other lock kinds as they require gtids which are not
511// available at initialization time.
512
513typedef kmp_ticket_lock_t kmp_bootstrap_lock_t;
514
515#define KMP_BOOTSTRAP_LOCK_INITIALIZER(lock){ { true, &((lock)), __null, 0U, 0U, 0, -1 } } KMP_TICKET_LOCK_INITIALIZER((lock)){ { true, &((lock)), __null, 0U, 0U, 0, -1 } }
516#define KMP_BOOTSTRAP_LOCK_INIT(lock)kmp_bootstrap_lock_t lock = { { true, &(lock), __null, 0U
, 0U, 0, -1 } }
\
517 kmp_bootstrap_lock_t lock = KMP_TICKET_LOCK_INITIALIZER(lock){ { true, &(lock), __null, 0U, 0U, 0, -1 } }
518
519static inline int __kmp_acquire_bootstrap_lock(kmp_bootstrap_lock_t *lck) {
520 return __kmp_acquire_ticket_lock(lck, KMP_GTID_DNE(-2));
4
Value assigned to field 'tt_found_tasks', which participates in a condition later
521}
522
523static inline int __kmp_test_bootstrap_lock(kmp_bootstrap_lock_t *lck) {
524 return __kmp_test_ticket_lock(lck, KMP_GTID_DNE(-2));
525}
526
527static inline void __kmp_release_bootstrap_lock(kmp_bootstrap_lock_t *lck) {
528 __kmp_release_ticket_lock(lck, KMP_GTID_DNE(-2));
529}
530
531static inline void __kmp_init_bootstrap_lock(kmp_bootstrap_lock_t *lck) {
532 __kmp_init_ticket_lock(lck);
533}
534
535static inline void __kmp_destroy_bootstrap_lock(kmp_bootstrap_lock_t *lck) {
536 __kmp_destroy_ticket_lock(lck);
537}
538
539// Internal RTL locks.
540//
541// Internal RTL locks are also implemented as ticket locks, for now.
542//
543// FIXME - We should go through and figure out which lock kind works best for
544// each internal lock, and use the type declaration and function calls for
545// that explicit lock kind (and get rid of this section).
546
547typedef kmp_ticket_lock_t kmp_lock_t;
548
549#define KMP_LOCK_INIT(lock)kmp_lock_t lock = { { true, &(lock), __null, 0U, 0U, 0, -
1 } }
kmp_lock_t lock = KMP_TICKET_LOCK_INITIALIZER(lock){ { true, &(lock), __null, 0U, 0U, 0, -1 } }
550
551static inline int __kmp_acquire_lock(kmp_lock_t *lck, kmp_int32 gtid) {
552 return __kmp_acquire_ticket_lock(lck, gtid);
553}
554
555static inline int __kmp_test_lock(kmp_lock_t *lck, kmp_int32 gtid) {
556 return __kmp_test_ticket_lock(lck, gtid);
557}
558
559static inline void __kmp_release_lock(kmp_lock_t *lck, kmp_int32 gtid) {
560 __kmp_release_ticket_lock(lck, gtid);
561}
562
563static inline void __kmp_init_lock(kmp_lock_t *lck) {
564 __kmp_init_ticket_lock(lck);
565}
566
567static inline void __kmp_destroy_lock(kmp_lock_t *lck) {
568 __kmp_destroy_ticket_lock(lck);
569}
570
571// User locks.
572//
573// Do not allocate objects of type union kmp_user_lock!!! This will waste space
574// unless __kmp_user_lock_kind == lk_drdpa. Instead, check the value of
575// __kmp_user_lock_kind and allocate objects of the type of the appropriate
576// union member, and cast their addresses to kmp_user_lock_p.
577
578enum kmp_lock_kind {
579 lk_default = 0,
580 lk_tas,
581#if KMP_USE_FUTEX(1 && (0 || 1 || KMP_ARCH_ARM || 0))
582 lk_futex,
583#endif
584#if KMP_USE_DYNAMIC_LOCK1 && KMP_USE_TSX(0 || 1) && !0
585 lk_hle,
586 lk_rtm_queuing,
587 lk_rtm_spin,
588#endif
589 lk_ticket,
590 lk_queuing,
591 lk_drdpa,
592#if KMP_USE_ADAPTIVE_LOCKS(0 || 1) && !0
593 lk_adaptive
594#endif // KMP_USE_ADAPTIVE_LOCKS
595};
596
597typedef enum kmp_lock_kind kmp_lock_kind_t;
598
599extern kmp_lock_kind_t __kmp_user_lock_kind;
600
601union kmp_user_lock {
602 kmp_tas_lock_t tas;
603#if KMP_USE_FUTEX(1 && (0 || 1 || KMP_ARCH_ARM || 0))
604 kmp_futex_lock_t futex;
605#endif
606 kmp_ticket_lock_t ticket;
607 kmp_queuing_lock_t queuing;
608 kmp_drdpa_lock_t drdpa;
609#if KMP_USE_ADAPTIVE_LOCKS(0 || 1) && !0
610 kmp_adaptive_lock_t adaptive;
611#endif // KMP_USE_ADAPTIVE_LOCKS
612 kmp_lock_pool_t pool;
613};
614
615typedef union kmp_user_lock *kmp_user_lock_p;
616
617#if !KMP_USE_DYNAMIC_LOCK1
618
619extern size_t __kmp_base_user_lock_size;
620extern size_t __kmp_user_lock_size;
621
622extern kmp_int32 (*__kmp_get_user_lock_owner_)(kmp_user_lock_p lck);
623
624static inline kmp_int32 __kmp_get_user_lock_owner(kmp_user_lock_p lck) {
625 KMP_DEBUG_ASSERT(__kmp_get_user_lock_owner_ != NULL)if (!(__kmp_get_user_lock_owner_ != __null)) { __kmp_debug_assert
("__kmp_get_user_lock_owner_ != __null", "openmp/runtime/src/kmp_lock.h"
, 625); }
;
626 return (*__kmp_get_user_lock_owner_)(lck);
627}
628
629extern int (*__kmp_acquire_user_lock_with_checks_)(kmp_user_lock_p lck,
630 kmp_int32 gtid);
631
632#if KMP_OS_LINUX1 && \
633 (KMP_ARCH_X860 || KMP_ARCH_X86_641 || KMP_ARCH_ARM || KMP_ARCH_AARCH640)
634
635#define __kmp_acquire_user_lock_with_checks(lck, gtid) \
636 if (__kmp_user_lock_kind == lk_tas) { \
637 if (__kmp_env_consistency_check) { \
638 char const *const func = "omp_set_lock"; \
639 if ((sizeof(kmp_tas_lock_t) <= OMP_LOCK_T_SIZEsizeof(int)) && \
640 lck->tas.lk.depth_locked != -1) { \
641 KMP_FATAL(LockNestableUsedAsSimple, func)__kmp_fatal(__kmp_msg_format(kmp_i18n_msg_LockNestableUsedAsSimple
, func), __kmp_msg_null)
; \
642 } \
643 if ((gtid >= 0) && (lck->tas.lk.poll - 1 == gtid)) { \
644 KMP_FATAL(LockIsAlreadyOwned, func)__kmp_fatal(__kmp_msg_format(kmp_i18n_msg_LockIsAlreadyOwned,
func), __kmp_msg_null)
; \
645 } \
646 } \
647 if (lck->tas.lk.poll != 0 || \
648 !__kmp_atomic_compare_store_acq(&lck->tas.lk.poll, 0, gtid + 1)) { \
649 kmp_uint32 spins; \
650 kmp_uint64 time; \
651 KMP_FSYNC_PREPARE(lck)(!__kmp_itt_fsync_prepare_ptr__3_0) ? (void)0 : __kmp_itt_fsync_prepare_ptr__3_0
((void *)(lck))
; \
652 KMP_INIT_YIELD(spins){ (spins) = __kmp_yield_init; }; \
653 KMP_INIT_BACKOFF(time){ (time) = __kmp_pause_init; }; \
654 do { \
655 KMP_YIELD_OVERSUB_ELSE_SPIN(spins, time){ if (__kmp_tpause_enabled) { if (((__kmp_nth) > (__kmp_avail_proc
? __kmp_avail_proc : __kmp_xproc))) { __kmp_tpause(0, (time)
); } else { __kmp_tpause(__kmp_tpause_hint, (time)); } (time)
= (time << 1 | 1) & ((kmp_uint64)0xFFFF); } else {
__kmp_x86_pause(); if ((((__kmp_use_yield == 1 || __kmp_use_yield
== 2) && (((__kmp_nth) > (__kmp_avail_proc ? __kmp_avail_proc
: __kmp_xproc)))))) { __kmp_yield(); } else if (__kmp_use_yield
== 1) { (spins) -= 2; if (!(spins)) { __kmp_yield(); (spins)
= __kmp_yield_next; } } } }
; \
656 } while ( \
657 lck->tas.lk.poll != 0 || \
658 !__kmp_atomic_compare_store_acq(&lck->tas.lk.poll, 0, gtid + 1)); \
659 } \
660 KMP_FSYNC_ACQUIRED(lck)(!__kmp_itt_fsync_acquired_ptr__3_0) ? (void)0 : __kmp_itt_fsync_acquired_ptr__3_0
((void *)(lck))
; \
661 } else { \
662 KMP_DEBUG_ASSERT(__kmp_acquire_user_lock_with_checks_ != NULL)if (!(__kmp_acquire_user_lock_with_checks_ != __null)) { __kmp_debug_assert
("__kmp_acquire_user_lock_with_checks_ != __null", "openmp/runtime/src/kmp_lock.h"
, 662); }
; \
663 (*__kmp_acquire_user_lock_with_checks_)(lck, gtid); \
664 }
665
666#else
667static inline int __kmp_acquire_user_lock_with_checks(kmp_user_lock_p lck,
668 kmp_int32 gtid) {
669 KMP_DEBUG_ASSERT(__kmp_acquire_user_lock_with_checks_ != NULL)if (!(__kmp_acquire_user_lock_with_checks_ != __null)) { __kmp_debug_assert
("__kmp_acquire_user_lock_with_checks_ != __null", "openmp/runtime/src/kmp_lock.h"
, 669); }
;
670 return (*__kmp_acquire_user_lock_with_checks_)(lck, gtid);
671}
672#endif
673
674extern int (*__kmp_test_user_lock_with_checks_)(kmp_user_lock_p lck,
675 kmp_int32 gtid);
676
677#if KMP_OS_LINUX1 && \
678 (KMP_ARCH_X860 || KMP_ARCH_X86_641 || KMP_ARCH_ARM || KMP_ARCH_AARCH640)
679
680#include "kmp_i18n.h" /* AC: KMP_FATAL definition */
681extern int __kmp_env_consistency_check; /* AC: copy from kmp.h here */
682static inline int __kmp_test_user_lock_with_checks(kmp_user_lock_p lck,
683 kmp_int32 gtid) {
684 if (__kmp_user_lock_kind == lk_tas) {
685 if (__kmp_env_consistency_check) {
686 char const *const func = "omp_test_lock";
687 if ((sizeof(kmp_tas_lock_t) <= OMP_LOCK_T_SIZEsizeof(int)) &&
688 lck->tas.lk.depth_locked != -1) {
689 KMP_FATAL(LockNestableUsedAsSimple, func)__kmp_fatal(__kmp_msg_format(kmp_i18n_msg_LockNestableUsedAsSimple
, func), __kmp_msg_null)
;
690 }
691 }
692 return ((lck->tas.lk.poll == 0) &&
693 __kmp_atomic_compare_store_acq(&lck->tas.lk.poll, 0, gtid + 1));
694 } else {
695 KMP_DEBUG_ASSERT(__kmp_test_user_lock_with_checks_ != NULL)if (!(__kmp_test_user_lock_with_checks_ != __null)) { __kmp_debug_assert
("__kmp_test_user_lock_with_checks_ != __null", "openmp/runtime/src/kmp_lock.h"
, 695); }
;
696 return (*__kmp_test_user_lock_with_checks_)(lck, gtid);
697 }
698}
699#else
700static inline int __kmp_test_user_lock_with_checks(kmp_user_lock_p lck,
701 kmp_int32 gtid) {
702 KMP_DEBUG_ASSERT(__kmp_test_user_lock_with_checks_ != NULL)if (!(__kmp_test_user_lock_with_checks_ != __null)) { __kmp_debug_assert
("__kmp_test_user_lock_with_checks_ != __null", "openmp/runtime/src/kmp_lock.h"
, 702); }
;
703 return (*__kmp_test_user_lock_with_checks_)(lck, gtid);
704}
705#endif
706
707extern int (*__kmp_release_user_lock_with_checks_)(kmp_user_lock_p lck,
708 kmp_int32 gtid);
709
710static inline void __kmp_release_user_lock_with_checks(kmp_user_lock_p lck,
711 kmp_int32 gtid) {
712 KMP_DEBUG_ASSERT(__kmp_release_user_lock_with_checks_ != NULL)if (!(__kmp_release_user_lock_with_checks_ != __null)) { __kmp_debug_assert
("__kmp_release_user_lock_with_checks_ != __null", "openmp/runtime/src/kmp_lock.h"
, 712); }
;
713 (*__kmp_release_user_lock_with_checks_)(lck, gtid);
714}
715
716extern void (*__kmp_init_user_lock_with_checks_)(kmp_user_lock_p lck);
717
718static inline void __kmp_init_user_lock_with_checks(kmp_user_lock_p lck) {
719 KMP_DEBUG_ASSERT(__kmp_init_user_lock_with_checks_ != NULL)if (!(__kmp_init_user_lock_with_checks_ != __null)) { __kmp_debug_assert
("__kmp_init_user_lock_with_checks_ != __null", "openmp/runtime/src/kmp_lock.h"
, 719); }
;
720 (*__kmp_init_user_lock_with_checks_)(lck);
721}
722
723// We need a non-checking version of destroy lock for when the RTL is
724// doing the cleanup as it can't always tell if the lock is nested or not.
725extern void (*__kmp_destroy_user_lock_)(kmp_user_lock_p lck);
726
727static inline void __kmp_destroy_user_lock(kmp_user_lock_p lck) {
728 KMP_DEBUG_ASSERT(__kmp_destroy_user_lock_ != NULL)if (!(__kmp_destroy_user_lock_ != __null)) { __kmp_debug_assert
("__kmp_destroy_user_lock_ != __null", "openmp/runtime/src/kmp_lock.h"
, 728); }
;
729 (*__kmp_destroy_user_lock_)(lck);
730}
731
732extern void (*__kmp_destroy_user_lock_with_checks_)(kmp_user_lock_p lck);
733
734static inline void __kmp_destroy_user_lock_with_checks(kmp_user_lock_p lck) {
735 KMP_DEBUG_ASSERT(__kmp_destroy_user_lock_with_checks_ != NULL)if (!(__kmp_destroy_user_lock_with_checks_ != __null)) { __kmp_debug_assert
("__kmp_destroy_user_lock_with_checks_ != __null", "openmp/runtime/src/kmp_lock.h"
, 735); }
;
736 (*__kmp_destroy_user_lock_with_checks_)(lck);
737}
738
739extern int (*__kmp_acquire_nested_user_lock_with_checks_)(kmp_user_lock_p lck,
740 kmp_int32 gtid);
741
742#if KMP_OS_LINUX1 && (KMP_ARCH_X860 || KMP_ARCH_X86_641)
743
744#define __kmp_acquire_nested_user_lock_with_checks(lck, gtid, depth) \
745 if (__kmp_user_lock_kind == lk_tas) { \
746 if (__kmp_env_consistency_check) { \
747 char const *const func = "omp_set_nest_lock"; \
748 if ((sizeof(kmp_tas_lock_t) <= OMP_NEST_LOCK_T_SIZEsizeof(void *)) && \
749 lck->tas.lk.depth_locked == -1) { \
750 KMP_FATAL(LockSimpleUsedAsNestable, func)__kmp_fatal(__kmp_msg_format(kmp_i18n_msg_LockSimpleUsedAsNestable
, func), __kmp_msg_null)
; \
751 } \
752 } \
753 if (lck->tas.lk.poll - 1 == gtid) { \
754 lck->tas.lk.depth_locked += 1; \
755 *depth = KMP_LOCK_ACQUIRED_NEXT0; \
756 } else { \
757 if ((lck->tas.lk.poll != 0) || \
758 !__kmp_atomic_compare_store_acq(&lck->tas.lk.poll, 0, gtid + 1)) { \
759 kmp_uint32 spins; \
760 kmp_uint64 time; \
761 KMP_FSYNC_PREPARE(lck)(!__kmp_itt_fsync_prepare_ptr__3_0) ? (void)0 : __kmp_itt_fsync_prepare_ptr__3_0
((void *)(lck))
; \
762 KMP_INIT_YIELD(spins){ (spins) = __kmp_yield_init; }; \
763 KMP_INIT_BACKOFF(time){ (time) = __kmp_pause_init; }; \
764 do { \
765 KMP_YIELD_OVERSUB_ELSE_SPIN(spins, time){ if (__kmp_tpause_enabled) { if (((__kmp_nth) > (__kmp_avail_proc
? __kmp_avail_proc : __kmp_xproc))) { __kmp_tpause(0, (time)
); } else { __kmp_tpause(__kmp_tpause_hint, (time)); } (time)
= (time << 1 | 1) & ((kmp_uint64)0xFFFF); } else {
__kmp_x86_pause(); if ((((__kmp_use_yield == 1 || __kmp_use_yield
== 2) && (((__kmp_nth) > (__kmp_avail_proc ? __kmp_avail_proc
: __kmp_xproc)))))) { __kmp_yield(); } else if (__kmp_use_yield
== 1) { (spins) -= 2; if (!(spins)) { __kmp_yield(); (spins)
= __kmp_yield_next; } } } }
; \
766 } while ( \
767 (lck->tas.lk.poll != 0) || \
768 !__kmp_atomic_compare_store_acq(&lck->tas.lk.poll, 0, gtid + 1)); \
769 } \
770 lck->tas.lk.depth_locked = 1; \
771 *depth = KMP_LOCK_ACQUIRED_FIRST1; \
772 } \
773 KMP_FSYNC_ACQUIRED(lck)(!__kmp_itt_fsync_acquired_ptr__3_0) ? (void)0 : __kmp_itt_fsync_acquired_ptr__3_0
((void *)(lck))
; \
774 } else { \
775 KMP_DEBUG_ASSERT(__kmp_acquire_nested_user_lock_with_checks_ != NULL)if (!(__kmp_acquire_nested_user_lock_with_checks_ != __null))
{ __kmp_debug_assert("__kmp_acquire_nested_user_lock_with_checks_ != __null"
, "openmp/runtime/src/kmp_lock.h", 775); }
; \
776 *depth = (*__kmp_acquire_nested_user_lock_with_checks_)(lck, gtid); \
777 }
778
779#else
780static inline void
781__kmp_acquire_nested_user_lock_with_checks(kmp_user_lock_p lck, kmp_int32 gtid,
782 int *depth) {
783 KMP_DEBUG_ASSERT(__kmp_acquire_nested_user_lock_with_checks_ != NULL)if (!(__kmp_acquire_nested_user_lock_with_checks_ != __null))
{ __kmp_debug_assert("__kmp_acquire_nested_user_lock_with_checks_ != __null"
, "openmp/runtime/src/kmp_lock.h", 783); }
;
784 *depth = (*__kmp_acquire_nested_user_lock_with_checks_)(lck, gtid);
785}
786#endif
787
788extern int (*__kmp_test_nested_user_lock_with_checks_)(kmp_user_lock_p lck,
789 kmp_int32 gtid);
790
791#if KMP_OS_LINUX1 && (KMP_ARCH_X860 || KMP_ARCH_X86_641)
792static inline int __kmp_test_nested_user_lock_with_checks(kmp_user_lock_p lck,
793 kmp_int32 gtid) {
794 if (__kmp_user_lock_kind == lk_tas) {
795 int retval;
796 if (__kmp_env_consistency_check) {
797 char const *const func = "omp_test_nest_lock";
798 if ((sizeof(kmp_tas_lock_t) <= OMP_NEST_LOCK_T_SIZEsizeof(void *)) &&
799 lck->tas.lk.depth_locked == -1) {
800 KMP_FATAL(LockSimpleUsedAsNestable, func)__kmp_fatal(__kmp_msg_format(kmp_i18n_msg_LockSimpleUsedAsNestable
, func), __kmp_msg_null)
;
801 }
802 }
803 KMP_DEBUG_ASSERT(gtid >= 0)if (!(gtid >= 0)) { __kmp_debug_assert("gtid >= 0", "openmp/runtime/src/kmp_lock.h"
, 803); }
;
804 if (lck->tas.lk.poll - 1 ==
805 gtid) { /* __kmp_get_tas_lock_owner( lck ) == gtid */
806 return ++lck->tas.lk.depth_locked; /* same owner, depth increased */
807 }
808 retval = ((lck->tas.lk.poll == 0) &&
809 __kmp_atomic_compare_store_acq(&lck->tas.lk.poll, 0, gtid + 1));
810 if (retval) {
811 KMP_MB();
812 lck->tas.lk.depth_locked = 1;
813 }
814 return retval;
815 } else {
816 KMP_DEBUG_ASSERT(__kmp_test_nested_user_lock_with_checks_ != NULL)if (!(__kmp_test_nested_user_lock_with_checks_ != __null)) { __kmp_debug_assert
("__kmp_test_nested_user_lock_with_checks_ != __null", "openmp/runtime/src/kmp_lock.h"
, 816); }
;
817 return (*__kmp_test_nested_user_lock_with_checks_)(lck, gtid);
818 }
819}
820#else
821static inline int __kmp_test_nested_user_lock_with_checks(kmp_user_lock_p lck,
822 kmp_int32 gtid) {
823 KMP_DEBUG_ASSERT(__kmp_test_nested_user_lock_with_checks_ != NULL)if (!(__kmp_test_nested_user_lock_with_checks_ != __null)) { __kmp_debug_assert
("__kmp_test_nested_user_lock_with_checks_ != __null", "openmp/runtime/src/kmp_lock.h"
, 823); }
;
824 return (*__kmp_test_nested_user_lock_with_checks_)(lck, gtid);
825}
826#endif
827
828extern int (*__kmp_release_nested_user_lock_with_checks_)(kmp_user_lock_p lck,
829 kmp_int32 gtid);
830
831static inline int
832__kmp_release_nested_user_lock_with_checks(kmp_user_lock_p lck,
833 kmp_int32 gtid) {
834 KMP_DEBUG_ASSERT(__kmp_release_nested_user_lock_with_checks_ != NULL)if (!(__kmp_release_nested_user_lock_with_checks_ != __null))
{ __kmp_debug_assert("__kmp_release_nested_user_lock_with_checks_ != __null"
, "openmp/runtime/src/kmp_lock.h", 834); }
;
835 return (*__kmp_release_nested_user_lock_with_checks_)(lck, gtid);
836}
837
838extern void (*__kmp_init_nested_user_lock_with_checks_)(kmp_user_lock_p lck);
839
840static inline void
841__kmp_init_nested_user_lock_with_checks(kmp_user_lock_p lck) {
842 KMP_DEBUG_ASSERT(__kmp_init_nested_user_lock_with_checks_ != NULL)if (!(__kmp_init_nested_user_lock_with_checks_ != __null)) { __kmp_debug_assert
("__kmp_init_nested_user_lock_with_checks_ != __null", "openmp/runtime/src/kmp_lock.h"
, 842); }
;
843 (*__kmp_init_nested_user_lock_with_checks_)(lck);
844}
845
846extern void (*__kmp_destroy_nested_user_lock_with_checks_)(kmp_user_lock_p lck);
847
848static inline void
849__kmp_destroy_nested_user_lock_with_checks(kmp_user_lock_p lck) {
850 KMP_DEBUG_ASSERT(__kmp_destroy_nested_user_lock_with_checks_ != NULL)if (!(__kmp_destroy_nested_user_lock_with_checks_ != __null))
{ __kmp_debug_assert("__kmp_destroy_nested_user_lock_with_checks_ != __null"
, "openmp/runtime/src/kmp_lock.h", 850); }
;
851 (*__kmp_destroy_nested_user_lock_with_checks_)(lck);
852}
853
854// user lock functions which do not necessarily exist for all lock kinds.
855//
856// The "set" functions usually have wrapper routines that check for a NULL set
857// function pointer and call it if non-NULL.
858//
859// In some cases, it makes sense to have a "get" wrapper function check for a
860// NULL get function pointer and return NULL / invalid value / error code if
861// the function pointer is NULL.
862//
863// In other cases, the calling code really should differentiate between an
864// unimplemented function and one that is implemented but returning NULL /
865// invalid value. If this is the case, no get function wrapper exists.
866
867extern int (*__kmp_is_user_lock_initialized_)(kmp_user_lock_p lck);
868
869// no set function; fields set during local allocation
870
871extern const ident_t *(*__kmp_get_user_lock_location_)(kmp_user_lock_p lck);
872
873static inline const ident_t *__kmp_get_user_lock_location(kmp_user_lock_p lck) {
874 if (__kmp_get_user_lock_location_ != NULL__null) {
875 return (*__kmp_get_user_lock_location_)(lck);
876 } else {
877 return NULL__null;
878 }
879}
880
881extern void (*__kmp_set_user_lock_location_)(kmp_user_lock_p lck,
882 const ident_t *loc);
883
884static inline void __kmp_set_user_lock_location(kmp_user_lock_p lck,
885 const ident_t *loc) {
886 if (__kmp_set_user_lock_location_ != NULL__null) {
887 (*__kmp_set_user_lock_location_)(lck, loc);
888 }
889}
890
891extern kmp_lock_flags_t (*__kmp_get_user_lock_flags_)(kmp_user_lock_p lck);
892
893extern void (*__kmp_set_user_lock_flags_)(kmp_user_lock_p lck,
894 kmp_lock_flags_t flags);
895
896static inline void __kmp_set_user_lock_flags(kmp_user_lock_p lck,
897 kmp_lock_flags_t flags) {
898 if (__kmp_set_user_lock_flags_ != NULL__null) {
899 (*__kmp_set_user_lock_flags_)(lck, flags);
900 }
901}
902
903// The function which sets up all of the vtbl pointers for kmp_user_lock_t.
904extern void __kmp_set_user_lock_vptrs(kmp_lock_kind_t user_lock_kind);
905
906// Macros for binding user lock functions.
907#define KMP_BIND_USER_LOCK_TEMPLATE(nest, kind, suffix) \
908 { \
909 __kmp_acquire##nest##user_lock_with_checks_ = (int (*)( \
910 kmp_user_lock_p, kmp_int32))__kmp_acquire##nest##kind##_##suffix; \
911 __kmp_release##nest##user_lock_with_checks_ = (int (*)( \
912 kmp_user_lock_p, kmp_int32))__kmp_release##nest##kind##_##suffix; \
913 __kmp_test##nest##user_lock_with_checks_ = (int (*)( \
914 kmp_user_lock_p, kmp_int32))__kmp_test##nest##kind##_##suffix; \
915 __kmp_init##nest##user_lock_with_checks_ = \
916 (void (*)(kmp_user_lock_p))__kmp_init##nest##kind##_##suffix; \
917 __kmp_destroy##nest##user_lock_with_checks_ = \
918 (void (*)(kmp_user_lock_p))__kmp_destroy##nest##kind##_##suffix; \
919 }
920
921#define KMP_BIND_USER_LOCK(kind) KMP_BIND_USER_LOCK_TEMPLATE(_, kind, lock)
922#define KMP_BIND_USER_LOCK_WITH_CHECKS(kind) \
923 KMP_BIND_USER_LOCK_TEMPLATE(_, kind, lock_with_checks)
924#define KMP_BIND_NESTED_USER_LOCK(kind) \
925 KMP_BIND_USER_LOCK_TEMPLATE(_nested_, kind, lock)
926#define KMP_BIND_NESTED_USER_LOCK_WITH_CHECKS(kind) \
927 KMP_BIND_USER_LOCK_TEMPLATE(_nested_, kind, lock_with_checks)
928
929// User lock table & lock allocation
930/* On 64-bit Linux* OS (and OS X*) GNU compiler allocates only 4 bytems memory
931 for lock variable, which is not enough to store a pointer, so we have to use
932 lock indexes instead of pointers and maintain lock table to map indexes to
933 pointers.
934
935
936 Note: The first element of the table is not a pointer to lock! It is a
937 pointer to previously allocated table (or NULL if it is the first table).
938
939 Usage:
940
941 if ( OMP_LOCK_T_SIZE < sizeof( <lock> ) ) { // or OMP_NEST_LOCK_T_SIZE
942 Lock table is fully utilized. User locks are indexes, so table is used on
943 user lock operation.
944 Note: it may be the case (lin_32) that we don't need to use a lock
945 table for regular locks, but do need the table for nested locks.
946 }
947 else {
948 Lock table initialized but not actually used.
949 }
950*/
951
952struct kmp_lock_table {
953 kmp_lock_index_t used; // Number of used elements
954 kmp_lock_index_t allocated; // Number of allocated elements
955 kmp_user_lock_p *table; // Lock table.
956};
957
958typedef struct kmp_lock_table kmp_lock_table_t;
959
960extern kmp_lock_table_t __kmp_user_lock_table;
961extern kmp_user_lock_p __kmp_lock_pool;
962
963struct kmp_block_of_locks {
964 struct kmp_block_of_locks *next_block;
965 void *locks;
966};
967
968typedef struct kmp_block_of_locks kmp_block_of_locks_t;
969
970extern kmp_block_of_locks_t *__kmp_lock_blocks;
971extern int __kmp_num_locks_in_block;
972
973extern kmp_user_lock_p __kmp_user_lock_allocate(void **user_lock,
974 kmp_int32 gtid,
975 kmp_lock_flags_t flags);
976extern void __kmp_user_lock_free(void **user_lock, kmp_int32 gtid,
977 kmp_user_lock_p lck);
978extern kmp_user_lock_p __kmp_lookup_user_lock(void **user_lock,
979 char const *func);
980extern void __kmp_cleanup_user_locks();
981
982#define KMP_CHECK_USER_LOCK_INIT() \
983 { \
984 if (!TCR_4(__kmp_init_user_locks)(__kmp_init_user_locks)) { \
985 __kmp_acquire_bootstrap_lock(&__kmp_initz_lock); \
986 if (!TCR_4(__kmp_init_user_locks)(__kmp_init_user_locks)) { \
987 TCW_4(__kmp_init_user_locks, TRUE)(__kmp_init_user_locks) = ((!0)); \
988 } \
989 __kmp_release_bootstrap_lock(&__kmp_initz_lock); \
990 } \
991 }
992
993#endif // KMP_USE_DYNAMIC_LOCK
994
995#undef KMP_PAD
996#undef KMP_GTID_DNE(-2)
997
998#if KMP_USE_DYNAMIC_LOCK1
999// KMP_USE_DYNAMIC_LOCK enables dynamic dispatch of lock functions without
1000// breaking the current compatibility. Essential functionality of this new code
1001// is dynamic dispatch, but it also implements (or enables implementation of)
1002// hinted user lock and critical section which will be part of OMP 4.5 soon.
1003//
1004// Lock type can be decided at creation time (i.e., lock initialization), and
1005// subsequent lock function call on the created lock object requires type
1006// extraction and call through jump table using the extracted type. This type
1007// information is stored in two different ways depending on the size of the lock
1008// object, and we differentiate lock types by this size requirement - direct and
1009// indirect locks.
1010//
1011// Direct locks:
1012// A direct lock object fits into the space created by the compiler for an
1013// omp_lock_t object, and TAS/Futex lock falls into this category. We use low
1014// one byte of the lock object as the storage for the lock type, and appropriate
1015// bit operation is required to access the data meaningful to the lock
1016// algorithms. Also, to differentiate direct lock from indirect lock, 1 is
1017// written to LSB of the lock object. The newly introduced "hle" lock is also a
1018// direct lock.
1019//
1020// Indirect locks:
1021// An indirect lock object requires more space than the compiler-generated
1022// space, and it should be allocated from heap. Depending on the size of the
1023// compiler-generated space for the lock (i.e., size of omp_lock_t), this
1024// omp_lock_t object stores either the address of the heap-allocated indirect
1025// lock (void * fits in the object) or an index to the indirect lock table entry
1026// that holds the address. Ticket/Queuing/DRDPA/Adaptive lock falls into this
1027// category, and the newly introduced "rtm" lock is also an indirect lock which
1028// was implemented on top of the Queuing lock. When the omp_lock_t object holds
1029// an index (not lock address), 0 is written to LSB to differentiate the lock
1030// from a direct lock, and the remaining part is the actual index to the
1031// indirect lock table.
1032
1033#include <stdint.h> // for uintptr_t
1034
1035// Shortcuts
1036#define KMP_USE_INLINED_TAS(1 && (0 || 1 || KMP_ARCH_ARM)) && 1 \
1037 (KMP_OS_LINUX1 && (KMP_ARCH_X860 || KMP_ARCH_X86_641 || KMP_ARCH_ARM)) && 1
1038#define KMP_USE_INLINED_FUTEX(1 && (0 || 1 || KMP_ARCH_ARM || 0)) && 0 KMP_USE_FUTEX(1 && (0 || 1 || KMP_ARCH_ARM || 0)) && 0
1039
1040// List of lock definitions; all nested locks are indirect locks.
1041// hle lock is xchg lock prefixed with XACQUIRE/XRELEASE.
1042// All nested locks are indirect lock types.
1043#if KMP_USE_TSX(0 || 1) && !0
1044#if KMP_USE_FUTEX(1 && (0 || 1 || KMP_ARCH_ARM || 0))
1045#define KMP_FOREACH_D_LOCK(m, a)m(tas, a) m(futex, a) m(hle, a) m(rtm_spin, a) m(tas, a) m(futex, a) m(hle, a) m(rtm_spin, a)
1046#define KMP_FOREACH_I_LOCK(m, a)m(ticket, a) m(queuing, a) m(adaptive, a) m(drdpa, a) m(rtm_queuing
, a) m(nested_tas, a) m(nested_futex, a) m(nested_ticket, a) m
(nested_queuing, a) m(nested_drdpa, a)
\
1047 m(ticket, a) m(queuing, a) m(adaptive, a) m(drdpa, a) m(rtm_queuing, a) \
1048 m(nested_tas, a) m(nested_futex, a) m(nested_ticket, a) \
1049 m(nested_queuing, a) m(nested_drdpa, a)
1050#else
1051#define KMP_FOREACH_D_LOCK(m, a)m(tas, a) m(futex, a) m(hle, a) m(rtm_spin, a) m(tas, a) m(hle, a) m(rtm_spin, a)
1052#define KMP_FOREACH_I_LOCK(m, a)m(ticket, a) m(queuing, a) m(adaptive, a) m(drdpa, a) m(rtm_queuing
, a) m(nested_tas, a) m(nested_futex, a) m(nested_ticket, a) m
(nested_queuing, a) m(nested_drdpa, a)
\
1053 m(ticket, a) m(queuing, a) m(adaptive, a) m(drdpa, a) m(rtm_queuing, a) \
1054 m(nested_tas, a) m(nested_ticket, a) m(nested_queuing, a) \
1055 m(nested_drdpa, a)
1056#endif // KMP_USE_FUTEX
1057#define KMP_LAST_D_LOCKlockseq_rtm_spin lockseq_rtm_spin
1058#else
1059#if KMP_USE_FUTEX(1 && (0 || 1 || KMP_ARCH_ARM || 0))
1060#define KMP_FOREACH_D_LOCK(m, a)m(tas, a) m(futex, a) m(hle, a) m(rtm_spin, a) m(tas, a) m(futex, a)
1061#define KMP_FOREACH_I_LOCK(m, a)m(ticket, a) m(queuing, a) m(adaptive, a) m(drdpa, a) m(rtm_queuing
, a) m(nested_tas, a) m(nested_futex, a) m(nested_ticket, a) m
(nested_queuing, a) m(nested_drdpa, a)
\
1062 m(ticket, a) m(queuing, a) m(drdpa, a) m(nested_tas, a) m(nested_futex, a) \
1063 m(nested_ticket, a) m(nested_queuing, a) m(nested_drdpa, a)
1064#define KMP_LAST_D_LOCKlockseq_rtm_spin lockseq_futex
1065#else
1066#define KMP_FOREACH_D_LOCK(m, a)m(tas, a) m(futex, a) m(hle, a) m(rtm_spin, a) m(tas, a)
1067#define KMP_FOREACH_I_LOCK(m, a)m(ticket, a) m(queuing, a) m(adaptive, a) m(drdpa, a) m(rtm_queuing
, a) m(nested_tas, a) m(nested_futex, a) m(nested_ticket, a) m
(nested_queuing, a) m(nested_drdpa, a)
\
1068 m(ticket, a) m(queuing, a) m(drdpa, a) m(nested_tas, a) m(nested_ticket, a) \
1069 m(nested_queuing, a) m(nested_drdpa, a)
1070#define KMP_LAST_D_LOCKlockseq_rtm_spin lockseq_tas
1071#endif // KMP_USE_FUTEX
1072#endif // KMP_USE_TSX
1073
1074// Information used in dynamic dispatch
1075#define KMP_LOCK_SHIFT8 \
1076 8 // number of low bits to be used as tag for direct locks
1077#define KMP_FIRST_D_LOCKlockseq_tas lockseq_tas
1078#define KMP_FIRST_I_LOCKlockseq_ticket lockseq_ticket
1079#define KMP_LAST_I_LOCKlockseq_nested_drdpa lockseq_nested_drdpa
1080#define KMP_NUM_I_LOCKS(locktag_nested_drdpa + 1) \
1081 (locktag_nested_drdpa + 1) // number of indirect lock types
1082
1083// Base type for dynamic locks.
1084typedef kmp_uint32 kmp_dyna_lock_t;
1085
1086// Lock sequence that enumerates all lock kinds. Always make this enumeration
1087// consistent with kmp_lockseq_t in the include directory.
1088typedef enum {
1089 lockseq_indirect = 0,
1090#define expand_seq(l, a) lockseq_##l,
1091 KMP_FOREACH_D_LOCK(expand_seq, 0)expand_seq(tas, 0) expand_seq(futex, 0) expand_seq(hle, 0) expand_seq
(rtm_spin, 0)
KMP_FOREACH_I_LOCK(expand_seq, 0)expand_seq(ticket, 0) expand_seq(queuing, 0) expand_seq(adaptive
, 0) expand_seq(drdpa, 0) expand_seq(rtm_queuing, 0) expand_seq
(nested_tas, 0) expand_seq(nested_futex, 0) expand_seq(nested_ticket
, 0) expand_seq(nested_queuing, 0) expand_seq(nested_drdpa, 0
)
1092#undef expand_seq
1093} kmp_dyna_lockseq_t;
1094
1095// Enumerates indirect lock tags.
1096typedef enum {
1097#define expand_tag(l, a) locktag_##l,
1098 KMP_FOREACH_I_LOCK(expand_tag, 0)expand_tag(ticket, 0) expand_tag(queuing, 0) expand_tag(adaptive
, 0) expand_tag(drdpa, 0) expand_tag(rtm_queuing, 0) expand_tag
(nested_tas, 0) expand_tag(nested_futex, 0) expand_tag(nested_ticket
, 0) expand_tag(nested_queuing, 0) expand_tag(nested_drdpa, 0
)
1099#undef expand_tag
1100} kmp_indirect_locktag_t;
1101
1102// Utility macros that extract information from lock sequences.
1103#define KMP_IS_D_LOCK(seq)((seq) >= lockseq_tas && (seq) <= lockseq_rtm_spin
)
\
1104 ((seq) >= KMP_FIRST_D_LOCKlockseq_tas && (seq) <= KMP_LAST_D_LOCKlockseq_rtm_spin)
1105#define KMP_IS_I_LOCK(seq)((seq) >= lockseq_ticket && (seq) <= lockseq_nested_drdpa
)
\
1106 ((seq) >= KMP_FIRST_I_LOCKlockseq_ticket && (seq) <= KMP_LAST_I_LOCKlockseq_nested_drdpa)
1107#define KMP_GET_I_TAG(seq)(kmp_indirect_locktag_t)((seq)-lockseq_ticket) (kmp_indirect_locktag_t)((seq)-KMP_FIRST_I_LOCKlockseq_ticket)
1108#define KMP_GET_D_TAG(seq)((seq) << 1 | 1) ((seq) << 1 | 1)
1109
1110// Enumerates direct lock tags starting from indirect tag.
1111typedef enum {
1112#define expand_tag(l, a) locktag_##l = KMP_GET_D_TAG(lockseq_##l)((lockseq_##l) << 1 | 1),
1113 KMP_FOREACH_D_LOCK(expand_tag, 0)expand_tag(tas, 0) expand_tag(futex, 0) expand_tag(hle, 0) expand_tag
(rtm_spin, 0)
1114#undef expand_tag
1115} kmp_direct_locktag_t;
1116
1117// Indirect lock type
1118typedef struct {
1119 kmp_user_lock_p lock;
1120 kmp_indirect_locktag_t type;
1121} kmp_indirect_lock_t;
1122
1123// Function tables for direct locks. Set/unset/test differentiate functions
1124// with/without consistency checking.
1125extern void (*__kmp_direct_init[])(kmp_dyna_lock_t *, kmp_dyna_lockseq_t);
1126extern void (**__kmp_direct_destroy)(kmp_dyna_lock_t *);
1127extern int (**__kmp_direct_set)(kmp_dyna_lock_t *, kmp_int32);
1128extern int (**__kmp_direct_unset)(kmp_dyna_lock_t *, kmp_int32);
1129extern int (**__kmp_direct_test)(kmp_dyna_lock_t *, kmp_int32);
1130
1131// Function tables for indirect locks. Set/unset/test differentiate functions
1132// with/without consistency checking.
1133extern void (*__kmp_indirect_init[])(kmp_user_lock_p);
1134extern void (**__kmp_indirect_destroy)(kmp_user_lock_p);
1135extern int (**__kmp_indirect_set)(kmp_user_lock_p, kmp_int32);
1136extern int (**__kmp_indirect_unset)(kmp_user_lock_p, kmp_int32);
1137extern int (**__kmp_indirect_test)(kmp_user_lock_p, kmp_int32);
1138
1139// Extracts direct lock tag from a user lock pointer
1140#define KMP_EXTRACT_D_TAG(l)(*((kmp_dyna_lock_t *)(l)) & ((1 << 8) - 1) & -
(*((kmp_dyna_lock_t *)(l)) & 1))
\
1141 (*((kmp_dyna_lock_t *)(l)) & ((1 << KMP_LOCK_SHIFT8) - 1) & \
1142 -(*((kmp_dyna_lock_t *)(l)) & 1))
1143
1144// Extracts indirect lock index from a user lock pointer
1145#define KMP_EXTRACT_I_INDEX(l)(*(kmp_lock_index_t *)(l) >> 1) (*(kmp_lock_index_t *)(l) >> 1)
1146
1147// Returns function pointer to the direct lock function with l (kmp_dyna_lock_t
1148// *) and op (operation type).
1149#define KMP_D_LOCK_FUNC(l, op)__kmp_direct_op[(*((kmp_dyna_lock_t *)(l)) & ((1 <<
8) - 1) & -(*((kmp_dyna_lock_t *)(l)) & 1))]
__kmp_direct_##op[KMP_EXTRACT_D_TAG(l)(*((kmp_dyna_lock_t *)(l)) & ((1 << 8) - 1) & -
(*((kmp_dyna_lock_t *)(l)) & 1))
]
1150
1151// Returns function pointer to the indirect lock function with l
1152// (kmp_indirect_lock_t *) and op (operation type).
1153#define KMP_I_LOCK_FUNC(l, op)__kmp_indirect_op[((kmp_indirect_lock_t *)(l))->type] \
1154 __kmp_indirect_##op[((kmp_indirect_lock_t *)(l))->type]
1155
1156// Initializes a direct lock with the given lock pointer and lock sequence.
1157#define KMP_INIT_D_LOCK(l, seq)__kmp_direct_init[((seq) << 1 | 1)]((kmp_dyna_lock_t *)
l, seq)
\
1158 __kmp_direct_init[KMP_GET_D_TAG(seq)((seq) << 1 | 1)]((kmp_dyna_lock_t *)l, seq)
1159
1160// Initializes an indirect lock with the given lock pointer and lock sequence.
1161#define KMP_INIT_I_LOCK(l, seq)__kmp_direct_init[0]((kmp_dyna_lock_t *)(l), seq) \
1162 __kmp_direct_init[0]((kmp_dyna_lock_t *)(l), seq)
1163
1164// Returns "free" lock value for the given lock type.
1165#define KMP_LOCK_FREE(type)(locktag_type) (locktag_##type)
1166
1167// Returns "busy" lock value for the given lock teyp.
1168#define KMP_LOCK_BUSY(v, type)((v) << 8 | locktag_type) ((v) << KMP_LOCK_SHIFT8 | locktag_##type)
1169
1170// Returns lock value after removing (shifting) lock tag.
1171#define KMP_LOCK_STRIP(v)((v) >> 8) ((v) >> KMP_LOCK_SHIFT8)
1172
1173// Initializes global states and data structures for managing dynamic user
1174// locks.
1175extern void __kmp_init_dynamic_user_locks();
1176
1177// Allocates and returns an indirect lock with the given indirect lock tag.
1178extern kmp_indirect_lock_t *
1179__kmp_allocate_indirect_lock(void **, kmp_int32, kmp_indirect_locktag_t);
1180
1181// Cleans up global states and data structures for managing dynamic user locks.
1182extern void __kmp_cleanup_indirect_user_locks();
1183
1184// Default user lock sequence when not using hinted locks.
1185extern kmp_dyna_lockseq_t __kmp_user_lock_seq;
1186
1187// Jump table for "set lock location", available only for indirect locks.
1188extern void (*__kmp_indirect_set_location[KMP_NUM_I_LOCKS(locktag_nested_drdpa + 1)])(kmp_user_lock_p,
1189 const ident_t *);
1190#define KMP_SET_I_LOCK_LOCATION(lck, loc){ if (__kmp_indirect_set_location[(lck)->type] != __null) __kmp_indirect_set_location
[(lck)->type]((lck)->lock, loc); }
\
1191 { \
1192 if (__kmp_indirect_set_location[(lck)->type] != NULL__null) \
1193 __kmp_indirect_set_location[(lck)->type]((lck)->lock, loc); \
1194 }
1195
1196// Jump table for "set lock flags", available only for indirect locks.
1197extern void (*__kmp_indirect_set_flags[KMP_NUM_I_LOCKS(locktag_nested_drdpa + 1)])(kmp_user_lock_p,
1198 kmp_lock_flags_t);
1199#define KMP_SET_I_LOCK_FLAGS(lck, flag){ if (__kmp_indirect_set_flags[(lck)->type] != __null) __kmp_indirect_set_flags
[(lck)->type]((lck)->lock, flag); }
\
1200 { \
1201 if (__kmp_indirect_set_flags[(lck)->type] != NULL__null) \
1202 __kmp_indirect_set_flags[(lck)->type]((lck)->lock, flag); \
1203 }
1204
1205// Jump table for "get lock location", available only for indirect locks.
1206extern const ident_t *(*__kmp_indirect_get_location[KMP_NUM_I_LOCKS(locktag_nested_drdpa + 1)])(
1207 kmp_user_lock_p);
1208#define KMP_GET_I_LOCK_LOCATION(lck)(__kmp_indirect_get_location[(lck)->type] != __null ? __kmp_indirect_get_location
[(lck)->type]((lck)->lock) : __null)
\
1209 (__kmp_indirect_get_location[(lck)->type] != NULL__null \
1210 ? __kmp_indirect_get_location[(lck)->type]((lck)->lock) \
1211 : NULL__null)
1212
1213// Jump table for "get lock flags", available only for indirect locks.
1214extern kmp_lock_flags_t (*__kmp_indirect_get_flags[KMP_NUM_I_LOCKS(locktag_nested_drdpa + 1)])(
1215 kmp_user_lock_p);
1216#define KMP_GET_I_LOCK_FLAGS(lck)(__kmp_indirect_get_flags[(lck)->type] != __null ? __kmp_indirect_get_flags
[(lck)->type]((lck)->lock) : __null)
\
1217 (__kmp_indirect_get_flags[(lck)->type] != NULL__null \
1218 ? __kmp_indirect_get_flags[(lck)->type]((lck)->lock) \
1219 : NULL__null)
1220
1221// number of kmp_indirect_lock_t objects to be allocated together
1222#define KMP_I_LOCK_CHUNK1024 1024
1223// Keep at a power of 2 since it is used in multiplication & division
1224KMP_BUILD_ASSERT(KMP_I_LOCK_CHUNK % 2 == 0)static_assert(1024 % 2 == 0, "Build condition error");
1225// number of row entries in the initial lock table
1226#define KMP_I_LOCK_TABLE_INIT_NROW_PTRS8 8
1227
1228// Lock table for indirect locks.
1229typedef struct kmp_indirect_lock_table {
1230 kmp_indirect_lock_t **table; // blocks of indirect locks allocated
1231 kmp_uint32 nrow_ptrs; // number *table pointer entries in table
1232 kmp_lock_index_t next; // index to the next lock to be allocated
1233 struct kmp_indirect_lock_table *next_table;
1234} kmp_indirect_lock_table_t;
1235
1236extern kmp_indirect_lock_table_t __kmp_i_lock_table;
1237
1238// Returns the indirect lock associated with the given index.
1239// Returns nullptr if no lock at given index
1240static inline kmp_indirect_lock_t *__kmp_get_i_lock(kmp_lock_index_t idx) {
1241 kmp_indirect_lock_table_t *lock_table = &__kmp_i_lock_table;
1242 while (lock_table) {
1243 kmp_lock_index_t max_locks = lock_table->nrow_ptrs * KMP_I_LOCK_CHUNK1024;
1244 if (idx < max_locks) {
1245 kmp_lock_index_t row = idx / KMP_I_LOCK_CHUNK1024;
1246 kmp_lock_index_t col = idx % KMP_I_LOCK_CHUNK1024;
1247 if (!lock_table->table[row] || idx >= lock_table->next)
1248 break;
1249 return &lock_table->table[row][col];
1250 }
1251 idx -= max_locks;
1252 lock_table = lock_table->next_table;
1253 }
1254 return nullptr;
1255}
1256
1257// Number of locks in a lock block, which is fixed to "1" now.
1258// TODO: No lock block implementation now. If we do support, we need to manage
1259// lock block data structure for each indirect lock type.
1260extern int __kmp_num_locks_in_block;
1261
1262// Fast lock table lookup without consistency checking
1263#define KMP_LOOKUP_I_LOCK(l)((sizeof(int) < sizeof(void *)) ? __kmp_get_i_lock((*(kmp_lock_index_t
*)(l) >> 1)) : *((kmp_indirect_lock_t **)(l)))
\
1264 ((OMP_LOCK_T_SIZEsizeof(int) < sizeof(void *)) \
1265 ? __kmp_get_i_lock(KMP_EXTRACT_I_INDEX(l)(*(kmp_lock_index_t *)(l) >> 1)) \
1266 : *((kmp_indirect_lock_t **)(l)))
1267
1268// Used once in kmp_error.cpp
1269extern kmp_int32 __kmp_get_user_lock_owner(kmp_user_lock_p, kmp_uint32);
1270
1271#else // KMP_USE_DYNAMIC_LOCK
1272
1273#define KMP_LOCK_BUSY(v, type)((v) << 8 | locktag_type) (v)
1274#define KMP_LOCK_FREE(type)(locktag_type) 0
1275#define KMP_LOCK_STRIP(v)((v) >> 8) (v)
1276
1277#endif // KMP_USE_DYNAMIC_LOCK
1278
1279// data structure for using backoff within spin locks.
1280typedef struct {
1281 kmp_uint32 step; // current step
1282 kmp_uint32 max_backoff; // upper bound of outer delay loop
1283 kmp_uint32 min_tick; // size of inner delay loop in ticks (machine-dependent)
1284} kmp_backoff_t;
1285
1286// Runtime's default backoff parameters
1287extern kmp_backoff_t __kmp_spin_backoff_params;
1288
1289// Backoff function
1290extern void __kmp_spin_backoff(kmp_backoff_t *);
1291
1292#ifdef __cplusplus201703L
1293} // extern "C"
1294#endif // __cplusplus
1295
1296#endif /* KMP_LOCK_H */