Bug Summary

File:build/source/openmp/runtime/src/kmp_barrier.cpp
Warning:line 2223, column 7
Called function pointer is null (null dereference)

Annotated Source Code

Press '?' to see keyboard shortcuts

clang -cc1 -cc1 -triple x86_64-pc-linux-gnu -analyze -disable-free -clear-ast-before-backend -disable-llvm-verifier -discard-value-names -main-file-name kmp_barrier.cpp -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=cplusplus -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -setup-static-analyzer -analyzer-config-compatibility-mode=true -mrelocation-model pic -pic-level 2 -mframe-pointer=none -fmath-errno -ffp-contract=on -fno-rounding-math -mconstructor-aliases -funwind-tables=2 -target-cpu x86-64 -tune-cpu generic -debugger-tuning=gdb -ffunction-sections -fdata-sections -fcoverage-compilation-dir=/build/source/build-llvm/tools/clang/stage2-bins -resource-dir /usr/lib/llvm-17/lib/clang/17 -D _DEBUG -D _GLIBCXX_ASSERTIONS -D _GNU_SOURCE -D _LIBCPP_ENABLE_ASSERTIONS -D __STDC_CONSTANT_MACROS -D __STDC_FORMAT_MACROS -D __STDC_LIMIT_MACROS -D omp_EXPORTS -I projects/openmp/runtime/src -I /build/source/openmp/runtime/src -I include -I /build/source/llvm/include -I /build/source/openmp/runtime/src/i18n -I /build/source/openmp/runtime/src/include -I /build/source/openmp/runtime/src/thirdparty/ittnotify -D _FORTIFY_SOURCE=2 -D NDEBUG -D _GNU_SOURCE -D _REENTRANT -D _FORTIFY_SOURCE=2 -U NDEBUG -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/c++/10 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/x86_64-linux-gnu/c++/10 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/c++/10/backward -internal-isystem /usr/lib/llvm-17/lib/clang/17/include -internal-isystem /usr/local/include -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../x86_64-linux-gnu/include -internal-externc-isystem /usr/include/x86_64-linux-gnu -internal-externc-isystem /include -internal-externc-isystem /usr/include -fmacro-prefix-map=/build/source/build-llvm/tools/clang/stage2-bins=build-llvm/tools/clang/stage2-bins -fmacro-prefix-map=/build/source/= -fcoverage-prefix-map=/build/source/build-llvm/tools/clang/stage2-bins=build-llvm/tools/clang/stage2-bins -fcoverage-prefix-map=/build/source/= -source-date-epoch 1683717183 -O2 -Wno-unused-command-line-argument -Wno-unused-parameter -Wwrite-strings -Wno-missing-field-initializers -Wno-long-long -Wno-maybe-uninitialized -Wno-class-memaccess -Wno-redundant-move -Wno-pessimizing-move -Wno-noexcept-type -Wno-comment -Wno-misleading-indentation -Wno-extra -Wno-pedantic -Wno-maybe-uninitialized -Wno-class-memaccess -Wno-frame-address -Wno-strict-aliasing -Wno-stringop-truncation -Wno-switch -Wno-uninitialized -Wno-cast-qual -std=c++17 -fdeprecated-macro -fdebug-compilation-dir=/build/source/build-llvm/tools/clang/stage2-bins -fdebug-prefix-map=/build/source/build-llvm/tools/clang/stage2-bins=build-llvm/tools/clang/stage2-bins -fdebug-prefix-map=/build/source/= -ferror-limit 19 -fvisibility-inlines-hidden -stack-protector 2 -fno-rtti -fgnuc-version=4.2.1 -fcolor-diagnostics -vectorize-loops -vectorize-slp -analyzer-output=html -analyzer-config stable-report-filename=true -faddrsig -D__GCC_HAVE_DWARF2_CFI_ASM=1 -o /tmp/scan-build-2023-05-10-133810-16478-1 -x c++ /build/source/openmp/runtime/src/kmp_barrier.cpp

/build/source/openmp/runtime/src/kmp_barrier.cpp

1/*
2 * kmp_barrier.cpp
3 */
4
5//===----------------------------------------------------------------------===//
6//
7// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
8// See https://llvm.org/LICENSE.txt for license information.
9// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
10//
11//===----------------------------------------------------------------------===//
12
13#include "kmp_wait_release.h"
14#include "kmp_barrier.h"
15#include "kmp_itt.h"
16#include "kmp_os.h"
17#include "kmp_stats.h"
18#include "ompt-specific.h"
19// for distributed barrier
20#include "kmp_affinity.h"
21
22#if KMP_MIC0
23#include <immintrin.h>
24#define USE_NGO_STORES 1
25#endif // KMP_MIC
26
27#if KMP_MIC0 && USE_NGO_STORES
28// ICV copying
29#define ngo_load(src)((void)0) __m512d Vt = _mm512_load_pd((void *)(src))
30#define ngo_store_icvs(dst, src)copy_icvs((dst), (src)) _mm512_storenrngo_pd((void *)(dst), Vt)
31#define ngo_store_go(dst, src)memcpy((dst), (src), 64) _mm512_storenrngo_pd((void *)(dst), Vt)
32#define ngo_sync()((void)0) __asm__ volatile("lock; addl $0,0(%%rsp)" ::: "memory")
33#else
34#define ngo_load(src)((void)0) ((void)0)
35#define ngo_store_icvs(dst, src)copy_icvs((dst), (src)) copy_icvs((dst), (src))
36#define ngo_store_go(dst, src)memcpy((dst), (src), 64) KMP_MEMCPYmemcpy((dst), (src), CACHE_LINE64)
37#define ngo_sync()((void)0) ((void)0)
38#endif /* KMP_MIC && USE_NGO_STORES */
39
40void __kmp_print_structure(void); // Forward declaration
41
42// ---------------------------- Barrier Algorithms ----------------------------
43// Distributed barrier
44
45// Compute how many threads to have polling each cache-line.
46// We want to limit the number of writes to IDEAL_GO_RESOLUTION.
47void distributedBarrier::computeVarsForN(size_t n) {
48 int nsockets = 1;
49 if (__kmp_topology) {
50 int socket_level = __kmp_topology->get_level(KMP_HW_SOCKET);
51 int core_level = __kmp_topology->get_level(KMP_HW_CORE);
52 int ncores_per_socket =
53 __kmp_topology->calculate_ratio(core_level, socket_level);
54 nsockets = __kmp_topology->get_count(socket_level);
55
56 if (nsockets <= 0)
57 nsockets = 1;
58 if (ncores_per_socket <= 0)
59 ncores_per_socket = 1;
60
61 threads_per_go = ncores_per_socket >> 1;
62 if (!fix_threads_per_go) {
63 // Minimize num_gos
64 if (threads_per_go > 4) {
65 if (KMP_OPTIMIZE_FOR_REDUCTIONS0) {
66 threads_per_go = threads_per_go >> 1;
67 }
68 if (threads_per_go > 4 && nsockets == 1)
69 threads_per_go = threads_per_go >> 1;
70 }
71 }
72 if (threads_per_go == 0)
73 threads_per_go = 1;
74 fix_threads_per_go = true;
75 num_gos = n / threads_per_go;
76 if (n % threads_per_go)
77 num_gos++;
78 if (nsockets == 1 || num_gos == 1)
79 num_groups = 1;
80 else {
81 num_groups = num_gos / nsockets;
82 if (num_gos % nsockets)
83 num_groups++;
84 }
85 if (num_groups <= 0)
86 num_groups = 1;
87 gos_per_group = num_gos / num_groups;
88 if (num_gos % num_groups)
89 gos_per_group++;
90 threads_per_group = threads_per_go * gos_per_group;
91 } else {
92 num_gos = n / threads_per_go;
93 if (n % threads_per_go)
94 num_gos++;
95 if (num_gos == 1)
96 num_groups = 1;
97 else {
98 num_groups = num_gos / 2;
99 if (num_gos % 2)
100 num_groups++;
101 }
102 gos_per_group = num_gos / num_groups;
103 if (num_gos % num_groups)
104 gos_per_group++;
105 threads_per_group = threads_per_go * gos_per_group;
106 }
107}
108
109void distributedBarrier::computeGo(size_t n) {
110 // Minimize num_gos
111 for (num_gos = 1;; num_gos++)
112 if (IDEAL_CONTENTION * num_gos >= n)
113 break;
114 threads_per_go = n / num_gos;
115 if (n % num_gos)
116 threads_per_go++;
117 while (num_gos > MAX_GOS) {
118 threads_per_go++;
119 num_gos = n / threads_per_go;
120 if (n % threads_per_go)
121 num_gos++;
122 }
123 computeVarsForN(n);
124}
125
126// This function is to resize the barrier arrays when the new number of threads
127// exceeds max_threads, which is the current size of all the arrays
128void distributedBarrier::resize(size_t nthr) {
129 KMP_DEBUG_ASSERT(nthr > max_threads)if (!(nthr > max_threads)) { __kmp_debug_assert("nthr > max_threads"
, "openmp/runtime/src/kmp_barrier.cpp", 129); }
;
130
131 // expand to requested size * 2
132 max_threads = nthr * 2;
133
134 // allocate arrays to new max threads
135 for (int i = 0; i < MAX_ITERS; ++i) {
136 if (flags[i])
137 flags[i] = (flags_s *)KMP_INTERNAL_REALLOC(flags[i],realloc((flags[i]), (max_threads * sizeof(flags_s)))
138 max_threads * sizeof(flags_s))realloc((flags[i]), (max_threads * sizeof(flags_s)));
139 else
140 flags[i] = (flags_s *)KMP_INTERNAL_MALLOC(max_threads * sizeof(flags_s))malloc(max_threads * sizeof(flags_s));
141 }
142
143 if (go)
144 go = (go_s *)KMP_INTERNAL_REALLOC(go, max_threads * sizeof(go_s))realloc((go), (max_threads * sizeof(go_s)));
145 else
146 go = (go_s *)KMP_INTERNAL_MALLOC(max_threads * sizeof(go_s))malloc(max_threads * sizeof(go_s));
147
148 if (iter)
149 iter = (iter_s *)KMP_INTERNAL_REALLOC(iter, max_threads * sizeof(iter_s))realloc((iter), (max_threads * sizeof(iter_s)));
150 else
151 iter = (iter_s *)KMP_INTERNAL_MALLOC(max_threads * sizeof(iter_s))malloc(max_threads * sizeof(iter_s));
152
153 if (sleep)
154 sleep =
155 (sleep_s *)KMP_INTERNAL_REALLOC(sleep, max_threads * sizeof(sleep_s))realloc((sleep), (max_threads * sizeof(sleep_s)));
156 else
157 sleep = (sleep_s *)KMP_INTERNAL_MALLOC(max_threads * sizeof(sleep_s))malloc(max_threads * sizeof(sleep_s));
158}
159
160// This function is to set all the go flags that threads might be waiting
161// on, and when blocktime is not infinite, it should be followed by a wake-up
162// call to each thread
163kmp_uint64 distributedBarrier::go_release() {
164 kmp_uint64 next_go = iter[0].iter + distributedBarrier::MAX_ITERS;
165 for (size_t j = 0; j < num_gos; j++) {
166 go[j].go.store(next_go);
167 }
168 return next_go;
169}
170
171void distributedBarrier::go_reset() {
172 for (size_t j = 0; j < max_threads; ++j) {
173 for (size_t i = 0; i < distributedBarrier::MAX_ITERS; ++i) {
174 flags[i][j].stillNeed = 1;
175 }
176 go[j].go.store(0);
177 iter[j].iter = 0;
178 }
179}
180
181// This function inits/re-inits the distributed barrier for a particular number
182// of threads. If a resize of arrays is needed, it calls the resize function.
183void distributedBarrier::init(size_t nthr) {
184 size_t old_max = max_threads;
185 if (nthr > max_threads) { // need more space in arrays
186 resize(nthr);
187 }
188
189 for (size_t i = 0; i < max_threads; i++) {
190 for (size_t j = 0; j < distributedBarrier::MAX_ITERS; j++) {
191 flags[j][i].stillNeed = 1;
192 }
193 go[i].go.store(0);
194 iter[i].iter = 0;
195 if (i >= old_max)
196 sleep[i].sleep = false;
197 }
198
199 // Recalculate num_gos, etc. based on new nthr
200 computeVarsForN(nthr);
201
202 num_threads = nthr;
203
204 if (team_icvs == NULL__null)
205 team_icvs = __kmp_allocate(sizeof(kmp_internal_control_t))___kmp_allocate((sizeof(kmp_internal_control_t)), "openmp/runtime/src/kmp_barrier.cpp"
, 205)
;
206}
207
208// This function is used only when KMP_BLOCKTIME is not infinite.
209// static
210void __kmp_dist_barrier_wakeup(enum barrier_type bt, kmp_team_t *team,
211 size_t start, size_t stop, size_t inc,
212 size_t tid) {
213 KMP_DEBUG_ASSERT(__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME)if (!(__kmp_dflt_blocktime != (2147483647))) { __kmp_debug_assert
("__kmp_dflt_blocktime != (2147483647)", "openmp/runtime/src/kmp_barrier.cpp"
, 213); }
;
214 if (bt == bs_forkjoin_barrier && TCR_4(__kmp_global.g.g_done)(__kmp_global.g.g_done))
215 return;
216
217 kmp_info_t **other_threads = team->t.t_threads;
218 for (size_t thr = start; thr < stop; thr += inc) {
219 KMP_DEBUG_ASSERT(other_threads[thr])if (!(other_threads[thr])) { __kmp_debug_assert("other_threads[thr]"
, "openmp/runtime/src/kmp_barrier.cpp", 219); }
;
220 int gtid = other_threads[thr]->th.th_info.ds.ds_gtid;
221 // Wake up worker regardless of if it appears to be sleeping or not
222 __kmp_atomic_resume_64(gtid, (kmp_atomic_flag_64<> *)NULL__null);
223 }
224}
225
226static void __kmp_dist_barrier_gather(
227 enum barrier_type bt, kmp_info_t *this_thr, int gtid, int tid,
228 void (*reduce)(void *, void *) USE_ITT_BUILD_ARG(void *itt_sync_obj), void *itt_sync_obj) {
229 KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_dist_gather)((void)0);
230 kmp_team_t *team;
231 distributedBarrier *b;
232 kmp_info_t **other_threads;
233 kmp_uint64 my_current_iter, my_next_iter;
234 kmp_uint32 nproc;
235 bool group_leader;
236
237 team = this_thr->th.th_team;
238 nproc = this_thr->th.th_team_nproc;
239 other_threads = team->t.t_threads;
240 b = team->t.b;
241 my_current_iter = b->iter[tid].iter;
242 my_next_iter = (my_current_iter + 1) % distributedBarrier::MAX_ITERS;
243 group_leader = ((tid % b->threads_per_group) == 0);
244
245 KA_TRACE(20,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_dist_barrier_gather: T#%d(%d:%d) enter; barrier type %d\n"
, gtid, team->t.t_id, tid, bt); }
246 ("__kmp_dist_barrier_gather: T#%d(%d:%d) enter; barrier type %d\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_dist_barrier_gather: T#%d(%d:%d) enter; barrier type %d\n"
, gtid, team->t.t_id, tid, bt); }
247 gtid, team->t.t_id, tid, bt))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_dist_barrier_gather: T#%d(%d:%d) enter; barrier type %d\n"
, gtid, team->t.t_id, tid, bt); }
;
248
249#if USE_ITT_BUILD1 && USE_ITT_NOTIFY1
250 // Barrier imbalance - save arrive time to the thread
251 if (__kmp_forkjoin_frames_mode == 3 || __kmp_forkjoin_frames_mode == 2) {
252 this_thr->th.th_bar_arrive_time = this_thr->th.th_bar_min_time =
253 __itt_get_timestamp(!__kmp_itt_get_timestamp_ptr__3_0) ? 0 : __kmp_itt_get_timestamp_ptr__3_0();
254 }
255#endif
256
257 if (group_leader) {
258 // Start from the thread after the group leader
259 size_t group_start = tid + 1;
260 size_t group_end = tid + b->threads_per_group;
261 size_t threads_pending = 0;
262
263 if (group_end > nproc)
264 group_end = nproc;
265 do { // wait for threads in my group
266 threads_pending = 0;
267 // Check all the flags every time to avoid branch misspredict
268 for (size_t thr = group_start; thr < group_end; thr++) {
269 // Each thread uses a different cache line
270 threads_pending += b->flags[my_current_iter][thr].stillNeed;
271 }
272 // Execute tasks here
273 if (__kmp_tasking_mode != tskm_immediate_exec) {
274 kmp_task_team_t *task_team = this_thr->th.th_task_team;
275 if (task_team != NULL__null) {
276 if (TCR_SYNC_4(task_team->tt.tt_active)(task_team->tt.tt_active)) {
277 if (KMP_TASKING_ENABLED(task_team)((!0) == ((task_team)->tt.tt_found_tasks))) {
278 int tasks_completed = FALSE0;
279 __kmp_atomic_execute_tasks_64(
280 this_thr, gtid, (kmp_atomic_flag_64<> *)NULL__null, FALSE0,
281 &tasks_completed USE_ITT_BUILD_ARG(itt_sync_obj), itt_sync_obj, 0);
282 } else
283 this_thr->th.th_reap_state = KMP_SAFE_TO_REAP1;
284 }
285 } else {
286 this_thr->th.th_reap_state = KMP_SAFE_TO_REAP1;
287 } // if
288 }
289 if (TCR_4(__kmp_global.g.g_done)(__kmp_global.g.g_done)) {
290 if (__kmp_global.g.g_abort)
291 __kmp_abort_thread();
292 break;
293 } else if (__kmp_tasking_mode != tskm_immediate_exec &&
294 this_thr->th.th_reap_state == KMP_SAFE_TO_REAP1) {
295 this_thr->th.th_reap_state = KMP_NOT_SAFE_TO_REAP0;
296 }
297 } while (threads_pending > 0);
298
299 if (reduce) { // Perform reduction if needed
300 OMPT_REDUCTION_DECL(this_thr, gtid)ompt_data_t *my_task_data = (&(this_thr->th.th_current_task
->ompt_task_info.task_data)); ompt_data_t *my_parallel_data
= (&(this_thr->th.th_team->t.ompt_team_info.parallel_data
)); void *return_address = __ompt_load_return_address(gtid);
;
301 OMPT_REDUCTION_BEGINif (ompt_enabled.enabled && ompt_enabled.ompt_callback_reduction
) { ompt_callbacks.ompt_callback_reduction_callback( ompt_sync_region_reduction
, ompt_scope_begin, my_parallel_data, my_task_data, return_address
); }
;
302 // Group leader reduces all threads in group
303 for (size_t thr = group_start; thr < group_end; thr++) {
304 (*reduce)(this_thr->th.th_local.reduce_data,
305 other_threads[thr]->th.th_local.reduce_data);
306 }
307 OMPT_REDUCTION_ENDif (ompt_enabled.enabled && ompt_enabled.ompt_callback_reduction
) { ompt_callbacks.ompt_callback_reduction_callback( ompt_sync_region_reduction
, ompt_scope_end, my_parallel_data, my_task_data, return_address
); }
;
308 }
309
310 // Set flag for next iteration
311 b->flags[my_next_iter][tid].stillNeed = 1;
312 // Each thread uses a different cache line; resets stillNeed to 0 to
313 // indicate it has reached the barrier
314 b->flags[my_current_iter][tid].stillNeed = 0;
315
316 do { // wait for all group leaders
317 threads_pending = 0;
318 for (size_t thr = 0; thr < nproc; thr += b->threads_per_group) {
319 threads_pending += b->flags[my_current_iter][thr].stillNeed;
320 }
321 // Execute tasks here
322 if (__kmp_tasking_mode != tskm_immediate_exec) {
323 kmp_task_team_t *task_team = this_thr->th.th_task_team;
324 if (task_team != NULL__null) {
325 if (TCR_SYNC_4(task_team->tt.tt_active)(task_team->tt.tt_active)) {
326 if (KMP_TASKING_ENABLED(task_team)((!0) == ((task_team)->tt.tt_found_tasks))) {
327 int tasks_completed = FALSE0;
328 __kmp_atomic_execute_tasks_64(
329 this_thr, gtid, (kmp_atomic_flag_64<> *)NULL__null, FALSE0,
330 &tasks_completed USE_ITT_BUILD_ARG(itt_sync_obj), itt_sync_obj, 0);
331 } else
332 this_thr->th.th_reap_state = KMP_SAFE_TO_REAP1;
333 }
334 } else {
335 this_thr->th.th_reap_state = KMP_SAFE_TO_REAP1;
336 } // if
337 }
338 if (TCR_4(__kmp_global.g.g_done)(__kmp_global.g.g_done)) {
339 if (__kmp_global.g.g_abort)
340 __kmp_abort_thread();
341 break;
342 } else if (__kmp_tasking_mode != tskm_immediate_exec &&
343 this_thr->th.th_reap_state == KMP_SAFE_TO_REAP1) {
344 this_thr->th.th_reap_state = KMP_NOT_SAFE_TO_REAP0;
345 }
346 } while (threads_pending > 0);
347
348 if (reduce) { // Perform reduction if needed
349 if (KMP_MASTER_TID(tid)(0 == (tid))) { // Master reduces over group leaders
350 OMPT_REDUCTION_DECL(this_thr, gtid)ompt_data_t *my_task_data = (&(this_thr->th.th_current_task
->ompt_task_info.task_data)); ompt_data_t *my_parallel_data
= (&(this_thr->th.th_team->t.ompt_team_info.parallel_data
)); void *return_address = __ompt_load_return_address(gtid);
;
351 OMPT_REDUCTION_BEGINif (ompt_enabled.enabled && ompt_enabled.ompt_callback_reduction
) { ompt_callbacks.ompt_callback_reduction_callback( ompt_sync_region_reduction
, ompt_scope_begin, my_parallel_data, my_task_data, return_address
); }
;
352 for (size_t thr = b->threads_per_group; thr < nproc;
353 thr += b->threads_per_group) {
354 (*reduce)(this_thr->th.th_local.reduce_data,
355 other_threads[thr]->th.th_local.reduce_data);
356 }
357 OMPT_REDUCTION_ENDif (ompt_enabled.enabled && ompt_enabled.ompt_callback_reduction
) { ompt_callbacks.ompt_callback_reduction_callback( ompt_sync_region_reduction
, ompt_scope_end, my_parallel_data, my_task_data, return_address
); }
;
358 }
359 }
360 } else {
361 // Set flag for next iteration
362 b->flags[my_next_iter][tid].stillNeed = 1;
363 // Each thread uses a different cache line; resets stillNeed to 0 to
364 // indicate it has reached the barrier
365 b->flags[my_current_iter][tid].stillNeed = 0;
366 }
367
368 KMP_MFENCE()if (__builtin_expect(!!(!__kmp_cpuinfo.initialized), 0)) { __kmp_query_cpuid
(&__kmp_cpuinfo); } if (__kmp_cpuinfo.flags.sse2) { __sync_synchronize
(); }
;
369
370 KA_TRACE(20,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_dist_barrier_gather: T#%d(%d:%d) exit for barrier type %d\n"
, gtid, team->t.t_id, tid, bt); }
371 ("__kmp_dist_barrier_gather: T#%d(%d:%d) exit for barrier type %d\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_dist_barrier_gather: T#%d(%d:%d) exit for barrier type %d\n"
, gtid, team->t.t_id, tid, bt); }
372 gtid, team->t.t_id, tid, bt))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_dist_barrier_gather: T#%d(%d:%d) exit for barrier type %d\n"
, gtid, team->t.t_id, tid, bt); }
;
373}
374
375static void __kmp_dist_barrier_release(
376 enum barrier_type bt, kmp_info_t *this_thr, int gtid, int tid,
377 int propagate_icvs USE_ITT_BUILD_ARG(void *itt_sync_obj), void *itt_sync_obj) {
378 KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_dist_release)((void)0);
379 kmp_team_t *team;
380 distributedBarrier *b;
381 kmp_bstate_t *thr_bar;
382 kmp_uint64 my_current_iter, next_go;
383 size_t my_go_index;
384 bool group_leader;
385
386 KA_TRACE(20, ("__kmp_dist_barrier_release: T#%d(%d) enter; barrier type %d\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_dist_barrier_release: T#%d(%d) enter; barrier type %d\n"
, gtid, tid, bt); }
387 gtid, tid, bt))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_dist_barrier_release: T#%d(%d) enter; barrier type %d\n"
, gtid, tid, bt); }
;
388
389 thr_bar = &this_thr->th.th_bar[bt].bb;
390
391 if (!KMP_MASTER_TID(tid)(0 == (tid))) {
392 // workers and non-master group leaders need to check their presence in team
393 do {
394 if (this_thr->th.th_used_in_team.load() != 1 &&
395 this_thr->th.th_used_in_team.load() != 3) {
396 // Thread is not in use in a team. Wait on location in tid's thread
397 // struct. The 0 value tells anyone looking that this thread is spinning
398 // or sleeping until this location becomes 3 again; 3 is the transition
399 // state to get to 1 which is waiting on go and being in the team
400 kmp_flag_32<false, false> my_flag(&(this_thr->th.th_used_in_team), 3);
401 if (KMP_COMPARE_AND_STORE_ACQ32(&(this_thr->th.th_used_in_team), 2,__sync_bool_compare_and_swap((volatile kmp_uint32 *)(&(this_thr
->th.th_used_in_team)), (kmp_uint32)(2), (kmp_uint32)(0))
402 0)__sync_bool_compare_and_swap((volatile kmp_uint32 *)(&(this_thr
->th.th_used_in_team)), (kmp_uint32)(2), (kmp_uint32)(0))
||
403 this_thr->th.th_used_in_team.load() == 0) {
404 my_flag.wait(this_thr, true USE_ITT_BUILD_ARG(itt_sync_obj), itt_sync_obj);
405 }
406#if USE_ITT_BUILD1 && USE_ITT_NOTIFY1
407 if ((__itt_sync_create_ptr__kmp_itt_sync_create_ptr__3_0 && itt_sync_obj == NULL__null) || KMP_ITT_DEBUG0) {
408 // In fork barrier where we could not get the object reliably
409 itt_sync_obj =
410 __kmp_itt_barrier_object(gtid, bs_forkjoin_barrier, 0, -1);
411 // Cancel wait on previous parallel region...
412 __kmp_itt_task_starting(itt_sync_obj);
413
414 if (bt == bs_forkjoin_barrier && TCR_4(__kmp_global.g.g_done)(__kmp_global.g.g_done))
415 return;
416
417 itt_sync_obj = __kmp_itt_barrier_object(gtid, bs_forkjoin_barrier);
418 if (itt_sync_obj != NULL__null)
419 // Call prepare as early as possible for "new" barrier
420 __kmp_itt_task_finished(itt_sync_obj);
421 } else
422#endif /* USE_ITT_BUILD && USE_ITT_NOTIFY */
423 if (bt == bs_forkjoin_barrier && TCR_4(__kmp_global.g.g_done)(__kmp_global.g.g_done))
424 return;
425 }
426 if (this_thr->th.th_used_in_team.load() != 1 &&
427 this_thr->th.th_used_in_team.load() != 3) // spurious wake-up?
428 continue;
429 if (bt == bs_forkjoin_barrier && TCR_4(__kmp_global.g.g_done)(__kmp_global.g.g_done))
430 return;
431
432 // At this point, the thread thinks it is in use in a team, or in
433 // transition to be used in a team, but it might have reached this barrier
434 // before it was marked unused by the team. Unused threads are awoken and
435 // shifted to wait on local thread struct elsewhere. It also might reach
436 // this point by being picked up for use by a different team. Either way,
437 // we need to update the tid.
438 tid = __kmp_tid_from_gtid(gtid);
439 team = this_thr->th.th_team;
440 KMP_DEBUG_ASSERT(tid >= 0)if (!(tid >= 0)) { __kmp_debug_assert("tid >= 0", "openmp/runtime/src/kmp_barrier.cpp"
, 440); }
;
441 KMP_DEBUG_ASSERT(team)if (!(team)) { __kmp_debug_assert("team", "openmp/runtime/src/kmp_barrier.cpp"
, 441); }
;
442 b = team->t.b;
443 my_current_iter = b->iter[tid].iter;
444 next_go = my_current_iter + distributedBarrier::MAX_ITERS;
445 my_go_index = tid / b->threads_per_go;
446 if (this_thr->th.th_used_in_team.load() == 3) {
447 KMP_COMPARE_AND_STORE_ACQ32(&(this_thr->th.th_used_in_team), 3, 1)__sync_bool_compare_and_swap((volatile kmp_uint32 *)(&(this_thr
->th.th_used_in_team)), (kmp_uint32)(3), (kmp_uint32)(1))
;
448 }
449 // Check if go flag is set
450 if (b->go[my_go_index].go.load() != next_go) {
451 // Wait on go flag on team
452 kmp_atomic_flag_64<false, true> my_flag(
453 &(b->go[my_go_index].go), next_go, &(b->sleep[tid].sleep));
454 my_flag.wait(this_thr, true USE_ITT_BUILD_ARG(itt_sync_obj), itt_sync_obj);
455 KMP_DEBUG_ASSERT(my_current_iter == b->iter[tid].iter ||if (!(my_current_iter == b->iter[tid].iter || b->iter[tid
].iter == 0)) { __kmp_debug_assert("my_current_iter == b->iter[tid].iter || b->iter[tid].iter == 0"
, "openmp/runtime/src/kmp_barrier.cpp", 456); }
456 b->iter[tid].iter == 0)if (!(my_current_iter == b->iter[tid].iter || b->iter[tid
].iter == 0)) { __kmp_debug_assert("my_current_iter == b->iter[tid].iter || b->iter[tid].iter == 0"
, "openmp/runtime/src/kmp_barrier.cpp", 456); }
;
457 KMP_DEBUG_ASSERT(b->sleep[tid].sleep == false)if (!(b->sleep[tid].sleep == false)) { __kmp_debug_assert(
"b->sleep[tid].sleep == false", "openmp/runtime/src/kmp_barrier.cpp"
, 457); }
;
458 }
459
460 if (bt == bs_forkjoin_barrier && TCR_4(__kmp_global.g.g_done)(__kmp_global.g.g_done))
461 return;
462 // At this point, the thread's go location was set. This means the primary
463 // thread is safely in the barrier, and so this thread's data is
464 // up-to-date, but we should check again that this thread is really in
465 // use in the team, as it could have been woken up for the purpose of
466 // changing team size, or reaping threads at shutdown.
467 if (this_thr->th.th_used_in_team.load() == 1)
468 break;
469 } while (1);
470
471 if (bt == bs_forkjoin_barrier && TCR_4(__kmp_global.g.g_done)(__kmp_global.g.g_done))
472 return;
473
474 group_leader = ((tid % b->threads_per_group) == 0);
475 if (group_leader) {
476 // Tell all the threads in my group they can go!
477 for (size_t go_idx = my_go_index + 1;
478 go_idx < my_go_index + b->gos_per_group; go_idx++) {
479 b->go[go_idx].go.store(next_go);
480 }
481 // Fence added so that workers can see changes to go. sfence inadequate.
482 KMP_MFENCE()if (__builtin_expect(!!(!__kmp_cpuinfo.initialized), 0)) { __kmp_query_cpuid
(&__kmp_cpuinfo); } if (__kmp_cpuinfo.flags.sse2) { __sync_synchronize
(); }
;
483 }
484
485#if KMP_BARRIER_ICV_PUSH1
486 if (propagate_icvs) { // copy ICVs to final dest
487 __kmp_init_implicit_task(team->t.t_ident, team->t.t_threads[tid], team,
488 tid, FALSE0);
489 copy_icvs(&team->t.t_implicit_task_taskdata[tid].td_icvs,
490 (kmp_internal_control_t *)team->t.b->team_icvs);
491 copy_icvs(&thr_bar->th_fixed_icvs,
492 &team->t.t_implicit_task_taskdata[tid].td_icvs);
493 }
494#endif
495 if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME(2147483647) && group_leader) {
496 // This thread is now awake and participating in the barrier;
497 // wake up the other threads in the group
498 size_t nproc = this_thr->th.th_team_nproc;
499 size_t group_end = tid + b->threads_per_group;
500 if (nproc < group_end)
501 group_end = nproc;
502 __kmp_dist_barrier_wakeup(bt, team, tid + 1, group_end, 1, tid);
503 }
504 } else { // Primary thread
505 team = this_thr->th.th_team;
506 b = team->t.b;
507 my_current_iter = b->iter[tid].iter;
508 next_go = my_current_iter + distributedBarrier::MAX_ITERS;
509#if KMP_BARRIER_ICV_PUSH1
510 if (propagate_icvs) {
511 // primary thread has ICVs in final destination; copy
512 copy_icvs(&thr_bar->th_fixed_icvs,
513 &team->t.t_implicit_task_taskdata[tid].td_icvs);
514 }
515#endif
516 // Tell all the group leaders they can go!
517 for (size_t go_idx = 0; go_idx < b->num_gos; go_idx += b->gos_per_group) {
518 b->go[go_idx].go.store(next_go);
519 }
520
521 if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME(2147483647)) {
522 // Wake-up the group leaders
523 size_t nproc = this_thr->th.th_team_nproc;
524 __kmp_dist_barrier_wakeup(bt, team, tid + b->threads_per_group, nproc,
525 b->threads_per_group, tid);
526 }
527
528 // Tell all the threads in my group they can go!
529 for (size_t go_idx = 1; go_idx < b->gos_per_group; go_idx++) {
530 b->go[go_idx].go.store(next_go);
531 }
532
533 // Fence added so that workers can see changes to go. sfence inadequate.
534 KMP_MFENCE()if (__builtin_expect(!!(!__kmp_cpuinfo.initialized), 0)) { __kmp_query_cpuid
(&__kmp_cpuinfo); } if (__kmp_cpuinfo.flags.sse2) { __sync_synchronize
(); }
;
535
536 if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME(2147483647)) {
537 // Wake-up the other threads in my group
538 size_t nproc = this_thr->th.th_team_nproc;
539 size_t group_end = tid + b->threads_per_group;
540 if (nproc < group_end)
541 group_end = nproc;
542 __kmp_dist_barrier_wakeup(bt, team, tid + 1, group_end, 1, tid);
543 }
544 }
545 // Update to next iteration
546 KMP_ASSERT(my_current_iter == b->iter[tid].iter)if (!(my_current_iter == b->iter[tid].iter)) { __kmp_debug_assert
("my_current_iter == b->iter[tid].iter", "openmp/runtime/src/kmp_barrier.cpp"
, 546); }
;
547 b->iter[tid].iter = (b->iter[tid].iter + 1) % distributedBarrier::MAX_ITERS;
548
549 KA_TRACE(if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_dist_barrier_release: T#%d(%d:%d) exit for barrier type %d\n"
, gtid, team->t.t_id, tid, bt); }
550 20, ("__kmp_dist_barrier_release: T#%d(%d:%d) exit for barrier type %d\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_dist_barrier_release: T#%d(%d:%d) exit for barrier type %d\n"
, gtid, team->t.t_id, tid, bt); }
551 gtid, team->t.t_id, tid, bt))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_dist_barrier_release: T#%d(%d:%d) exit for barrier type %d\n"
, gtid, team->t.t_id, tid, bt); }
;
552}
553
554// Linear Barrier
555template <bool cancellable = false>
556static bool __kmp_linear_barrier_gather_template(
557 enum barrier_type bt, kmp_info_t *this_thr, int gtid, int tid,
558 void (*reduce)(void *, void *) USE_ITT_BUILD_ARG(void *itt_sync_obj), void *itt_sync_obj) {
559 KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_linear_gather)((void)0);
560 kmp_team_t *team = this_thr->th.th_team;
561 kmp_bstate_t *thr_bar = &this_thr->th.th_bar[bt].bb;
562 kmp_info_t **other_threads = team->t.t_threads;
563
564 KA_TRACE(if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_linear_barrier_gather: T#%d(%d:%d) enter for barrier type %d\n"
, gtid, team->t.t_id, tid, bt); }
565 20,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_linear_barrier_gather: T#%d(%d:%d) enter for barrier type %d\n"
, gtid, team->t.t_id, tid, bt); }
566 ("__kmp_linear_barrier_gather: T#%d(%d:%d) enter for barrier type %d\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_linear_barrier_gather: T#%d(%d:%d) enter for barrier type %d\n"
, gtid, team->t.t_id, tid, bt); }
567 gtid, team->t.t_id, tid, bt))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_linear_barrier_gather: T#%d(%d:%d) enter for barrier type %d\n"
, gtid, team->t.t_id, tid, bt); }
;
568 KMP_DEBUG_ASSERT(this_thr == other_threads[this_thr->th.th_info.ds.ds_tid])if (!(this_thr == other_threads[this_thr->th.th_info.ds.ds_tid
])) { __kmp_debug_assert("this_thr == other_threads[this_thr->th.th_info.ds.ds_tid]"
, "openmp/runtime/src/kmp_barrier.cpp", 568); }
;
569
570#if USE_ITT_BUILD1 && USE_ITT_NOTIFY1
571 // Barrier imbalance - save arrive time to the thread
572 if (__kmp_forkjoin_frames_mode == 3 || __kmp_forkjoin_frames_mode == 2) {
573 this_thr->th.th_bar_arrive_time = this_thr->th.th_bar_min_time =
574 __itt_get_timestamp(!__kmp_itt_get_timestamp_ptr__3_0) ? 0 : __kmp_itt_get_timestamp_ptr__3_0();
575 }
576#endif
577 // We now perform a linear reduction to signal that all of the threads have
578 // arrived.
579 if (!KMP_MASTER_TID(tid)(0 == (tid))) {
580 KA_TRACE(20,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_linear_barrier_gather: T#%d(%d:%d) releasing T#%d(%d:%d)"
"arrived(%p): %llu => %llu\n", gtid, team->t.t_id, tid
, __kmp_gtid_from_tid(0, team), team->t.t_id, 0, &thr_bar
->b_arrived, thr_bar->b_arrived, thr_bar->b_arrived +
(1 << 2)); }
581 ("__kmp_linear_barrier_gather: T#%d(%d:%d) releasing T#%d(%d:%d)"if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_linear_barrier_gather: T#%d(%d:%d) releasing T#%d(%d:%d)"
"arrived(%p): %llu => %llu\n", gtid, team->t.t_id, tid
, __kmp_gtid_from_tid(0, team), team->t.t_id, 0, &thr_bar
->b_arrived, thr_bar->b_arrived, thr_bar->b_arrived +
(1 << 2)); }
582 "arrived(%p): %llu => %llu\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_linear_barrier_gather: T#%d(%d:%d) releasing T#%d(%d:%d)"
"arrived(%p): %llu => %llu\n", gtid, team->t.t_id, tid
, __kmp_gtid_from_tid(0, team), team->t.t_id, 0, &thr_bar
->b_arrived, thr_bar->b_arrived, thr_bar->b_arrived +
(1 << 2)); }
583 gtid, team->t.t_id, tid, __kmp_gtid_from_tid(0, team),if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_linear_barrier_gather: T#%d(%d:%d) releasing T#%d(%d:%d)"
"arrived(%p): %llu => %llu\n", gtid, team->t.t_id, tid
, __kmp_gtid_from_tid(0, team), team->t.t_id, 0, &thr_bar
->b_arrived, thr_bar->b_arrived, thr_bar->b_arrived +
(1 << 2)); }
584 team->t.t_id, 0, &thr_bar->b_arrived, thr_bar->b_arrived,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_linear_barrier_gather: T#%d(%d:%d) releasing T#%d(%d:%d)"
"arrived(%p): %llu => %llu\n", gtid, team->t.t_id, tid
, __kmp_gtid_from_tid(0, team), team->t.t_id, 0, &thr_bar
->b_arrived, thr_bar->b_arrived, thr_bar->b_arrived +
(1 << 2)); }
585 thr_bar->b_arrived + KMP_BARRIER_STATE_BUMP))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_linear_barrier_gather: T#%d(%d:%d) releasing T#%d(%d:%d)"
"arrived(%p): %llu => %llu\n", gtid, team->t.t_id, tid
, __kmp_gtid_from_tid(0, team), team->t.t_id, 0, &thr_bar
->b_arrived, thr_bar->b_arrived, thr_bar->b_arrived +
(1 << 2)); }
;
586 // Mark arrival to primary thread
587 /* After performing this write, a worker thread may not assume that the team
588 is valid any more - it could be deallocated by the primary thread at any
589 time. */
590 kmp_flag_64<> flag(&thr_bar->b_arrived, other_threads[0]);
591 flag.release();
592 } else {
593 kmp_balign_team_t *team_bar = &team->t.t_bar[bt];
594 int nproc = this_thr->th.th_team_nproc;
595 int i;
596 // Don't have to worry about sleep bit here or atomic since team setting
597 kmp_uint64 new_state = team_bar->b_arrived + KMP_BARRIER_STATE_BUMP(1 << 2);
598
599 // Collect all the worker team member threads.
600 for (i = 1; i < nproc; ++i) {
601#if KMP_CACHE_MANAGE
602 // Prefetch next thread's arrived count
603 if (i + 1 < nproc)
604 KMP_CACHE_PREFETCH(&other_threads[i + 1]->th.th_bar[bt].bb.b_arrived);
605#endif /* KMP_CACHE_MANAGE */
606 KA_TRACE(20, ("__kmp_linear_barrier_gather: T#%d(%d:%d) wait T#%d(%d:%d) "if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_linear_barrier_gather: T#%d(%d:%d) wait T#%d(%d:%d) "
"arrived(%p) == %llu\n", gtid, team->t.t_id, tid, __kmp_gtid_from_tid
(i, team), team->t.t_id, i, &other_threads[i]->th.th_bar
[bt].bb.b_arrived, new_state); }
607 "arrived(%p) == %llu\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_linear_barrier_gather: T#%d(%d:%d) wait T#%d(%d:%d) "
"arrived(%p) == %llu\n", gtid, team->t.t_id, tid, __kmp_gtid_from_tid
(i, team), team->t.t_id, i, &other_threads[i]->th.th_bar
[bt].bb.b_arrived, new_state); }
608 gtid, team->t.t_id, tid, __kmp_gtid_from_tid(i, team),if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_linear_barrier_gather: T#%d(%d:%d) wait T#%d(%d:%d) "
"arrived(%p) == %llu\n", gtid, team->t.t_id, tid, __kmp_gtid_from_tid
(i, team), team->t.t_id, i, &other_threads[i]->th.th_bar
[bt].bb.b_arrived, new_state); }
609 team->t.t_id, i,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_linear_barrier_gather: T#%d(%d:%d) wait T#%d(%d:%d) "
"arrived(%p) == %llu\n", gtid, team->t.t_id, tid, __kmp_gtid_from_tid
(i, team), team->t.t_id, i, &other_threads[i]->th.th_bar
[bt].bb.b_arrived, new_state); }
610 &other_threads[i]->th.th_bar[bt].bb.b_arrived, new_state))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_linear_barrier_gather: T#%d(%d:%d) wait T#%d(%d:%d) "
"arrived(%p) == %llu\n", gtid, team->t.t_id, tid, __kmp_gtid_from_tid
(i, team), team->t.t_id, i, &other_threads[i]->th.th_bar
[bt].bb.b_arrived, new_state); }
;
611
612 // Wait for worker thread to arrive
613 if (cancellable) {
614 kmp_flag_64<true, false> flag(
615 &other_threads[i]->th.th_bar[bt].bb.b_arrived, new_state);
616 if (flag.wait(this_thr, FALSE0 USE_ITT_BUILD_ARG(itt_sync_obj), itt_sync_obj))
617 return true;
618 } else {
619 kmp_flag_64<> flag(&other_threads[i]->th.th_bar[bt].bb.b_arrived,
620 new_state);
621 flag.wait(this_thr, FALSE0 USE_ITT_BUILD_ARG(itt_sync_obj), itt_sync_obj);
622 }
623#if USE_ITT_BUILD1 && USE_ITT_NOTIFY1
624 // Barrier imbalance - write min of the thread time and the other thread
625 // time to the thread.
626 if (__kmp_forkjoin_frames_mode == 2) {
627 this_thr->th.th_bar_min_time = KMP_MIN(((this_thr->th.th_bar_min_time) < (other_threads[i]->
th.th_bar_min_time) ? (this_thr->th.th_bar_min_time) : (other_threads
[i]->th.th_bar_min_time))
628 this_thr->th.th_bar_min_time, other_threads[i]->th.th_bar_min_time)((this_thr->th.th_bar_min_time) < (other_threads[i]->
th.th_bar_min_time) ? (this_thr->th.th_bar_min_time) : (other_threads
[i]->th.th_bar_min_time))
;
629 }
630#endif
631 if (reduce) {
632 KA_TRACE(100,if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_linear_barrier_gather: T#%d(%d:%d) += T#%d(%d:%d)\n"
, gtid, team->t.t_id, tid, __kmp_gtid_from_tid(i, team), team
->t.t_id, i); }
633 ("__kmp_linear_barrier_gather: T#%d(%d:%d) += T#%d(%d:%d)\n",if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_linear_barrier_gather: T#%d(%d:%d) += T#%d(%d:%d)\n"
, gtid, team->t.t_id, tid, __kmp_gtid_from_tid(i, team), team
->t.t_id, i); }
634 gtid, team->t.t_id, tid, __kmp_gtid_from_tid(i, team),if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_linear_barrier_gather: T#%d(%d:%d) += T#%d(%d:%d)\n"
, gtid, team->t.t_id, tid, __kmp_gtid_from_tid(i, team), team
->t.t_id, i); }
635 team->t.t_id, i))if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_linear_barrier_gather: T#%d(%d:%d) += T#%d(%d:%d)\n"
, gtid, team->t.t_id, tid, __kmp_gtid_from_tid(i, team), team
->t.t_id, i); }
;
636 OMPT_REDUCTION_DECL(this_thr, gtid)ompt_data_t *my_task_data = (&(this_thr->th.th_current_task
->ompt_task_info.task_data)); ompt_data_t *my_parallel_data
= (&(this_thr->th.th_team->t.ompt_team_info.parallel_data
)); void *return_address = __ompt_load_return_address(gtid);
;
637 OMPT_REDUCTION_BEGINif (ompt_enabled.enabled && ompt_enabled.ompt_callback_reduction
) { ompt_callbacks.ompt_callback_reduction_callback( ompt_sync_region_reduction
, ompt_scope_begin, my_parallel_data, my_task_data, return_address
); }
;
638 (*reduce)(this_thr->th.th_local.reduce_data,
639 other_threads[i]->th.th_local.reduce_data);
640 OMPT_REDUCTION_ENDif (ompt_enabled.enabled && ompt_enabled.ompt_callback_reduction
) { ompt_callbacks.ompt_callback_reduction_callback( ompt_sync_region_reduction
, ompt_scope_end, my_parallel_data, my_task_data, return_address
); }
;
641 }
642 }
643 // Don't have to worry about sleep bit here or atomic since team setting
644 team_bar->b_arrived = new_state;
645 KA_TRACE(20, ("__kmp_linear_barrier_gather: T#%d(%d:%d) set team %d "if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_linear_barrier_gather: T#%d(%d:%d) set team %d "
"arrived(%p) = %llu\n", gtid, team->t.t_id, tid, team->
t.t_id, &team_bar->b_arrived, new_state); }
646 "arrived(%p) = %llu\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_linear_barrier_gather: T#%d(%d:%d) set team %d "
"arrived(%p) = %llu\n", gtid, team->t.t_id, tid, team->
t.t_id, &team_bar->b_arrived, new_state); }
647 gtid, team->t.t_id, tid, team->t.t_id, &team_bar->b_arrived,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_linear_barrier_gather: T#%d(%d:%d) set team %d "
"arrived(%p) = %llu\n", gtid, team->t.t_id, tid, team->
t.t_id, &team_bar->b_arrived, new_state); }
648 new_state))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_linear_barrier_gather: T#%d(%d:%d) set team %d "
"arrived(%p) = %llu\n", gtid, team->t.t_id, tid, team->
t.t_id, &team_bar->b_arrived, new_state); }
;
649 }
650 KA_TRACE(if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_linear_barrier_gather: T#%d(%d:%d) exit for barrier type %d\n"
, gtid, team->t.t_id, tid, bt); }
651 20,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_linear_barrier_gather: T#%d(%d:%d) exit for barrier type %d\n"
, gtid, team->t.t_id, tid, bt); }
652 ("__kmp_linear_barrier_gather: T#%d(%d:%d) exit for barrier type %d\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_linear_barrier_gather: T#%d(%d:%d) exit for barrier type %d\n"
, gtid, team->t.t_id, tid, bt); }
653 gtid, team->t.t_id, tid, bt))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_linear_barrier_gather: T#%d(%d:%d) exit for barrier type %d\n"
, gtid, team->t.t_id, tid, bt); }
;
654 return false;
655}
656
657template <bool cancellable = false>
658static bool __kmp_linear_barrier_release_template(
659 enum barrier_type bt, kmp_info_t *this_thr, int gtid, int tid,
660 int propagate_icvs USE_ITT_BUILD_ARG(void *itt_sync_obj), void *itt_sync_obj) {
661 KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_linear_release)((void)0);
662 kmp_bstate_t *thr_bar = &this_thr->th.th_bar[bt].bb;
663 kmp_team_t *team;
664
665 if (KMP_MASTER_TID(tid)(0 == (tid))) {
666 unsigned int i;
667 kmp_uint32 nproc = this_thr->th.th_team_nproc;
668 kmp_info_t **other_threads;
669
670 team = __kmp_threads[gtid]->th.th_team;
671 KMP_DEBUG_ASSERT(team != NULL)if (!(team != __null)) { __kmp_debug_assert("team != __null",
"openmp/runtime/src/kmp_barrier.cpp", 671); }
;
672 other_threads = team->t.t_threads;
673
674 KA_TRACE(20, ("__kmp_linear_barrier_release: T#%d(%d:%d) primary enter for "if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_linear_barrier_release: T#%d(%d:%d) primary enter for "
"barrier type %d\n", gtid, team->t.t_id, tid, bt); }
675 "barrier type %d\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_linear_barrier_release: T#%d(%d:%d) primary enter for "
"barrier type %d\n", gtid, team->t.t_id, tid, bt); }
676 gtid, team->t.t_id, tid, bt))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_linear_barrier_release: T#%d(%d:%d) primary enter for "
"barrier type %d\n", gtid, team->t.t_id, tid, bt); }
;
677
678 if (nproc > 1) {
679#if KMP_BARRIER_ICV_PUSH1
680 {
681 KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(USER_icv_copy)((void)0);
682 if (propagate_icvs) {
683 ngo_load(&team->t.t_implicit_task_taskdata[0].td_icvs)((void)0);
684 for (i = 1; i < nproc; ++i) {
685 __kmp_init_implicit_task(team->t.t_ident, team->t.t_threads[i],
686 team, i, FALSE0);
687 ngo_store_icvs(&team->t.t_implicit_task_taskdata[i].td_icvs,copy_icvs((&team->t.t_implicit_task_taskdata[i].td_icvs
), (&team->t.t_implicit_task_taskdata[0].td_icvs))
688 &team->t.t_implicit_task_taskdata[0].td_icvs)copy_icvs((&team->t.t_implicit_task_taskdata[i].td_icvs
), (&team->t.t_implicit_task_taskdata[0].td_icvs))
;
689 }
690 ngo_sync()((void)0);
691 }
692 }
693#endif // KMP_BARRIER_ICV_PUSH
694
695 // Now, release all of the worker threads
696 for (i = 1; i < nproc; ++i) {
697#if KMP_CACHE_MANAGE
698 // Prefetch next thread's go flag
699 if (i + 1 < nproc)
700 KMP_CACHE_PREFETCH(&other_threads[i + 1]->th.th_bar[bt].bb.b_go);
701#endif /* KMP_CACHE_MANAGE */
702 KA_TRACE(if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_linear_barrier_release: T#%d(%d:%d) releasing T#%d(%d:%d) "
"go(%p): %u => %u\n", gtid, team->t.t_id, tid, other_threads
[i]->th.th_info.ds.ds_gtid, team->t.t_id, i, &other_threads
[i]->th.th_bar[bt].bb.b_go, other_threads[i]->th.th_bar
[bt].bb.b_go, other_threads[i]->th.th_bar[bt].bb.b_go + (1
<< 2)); }
703 20,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_linear_barrier_release: T#%d(%d:%d) releasing T#%d(%d:%d) "
"go(%p): %u => %u\n", gtid, team->t.t_id, tid, other_threads
[i]->th.th_info.ds.ds_gtid, team->t.t_id, i, &other_threads
[i]->th.th_bar[bt].bb.b_go, other_threads[i]->th.th_bar
[bt].bb.b_go, other_threads[i]->th.th_bar[bt].bb.b_go + (1
<< 2)); }
704 ("__kmp_linear_barrier_release: T#%d(%d:%d) releasing T#%d(%d:%d) "if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_linear_barrier_release: T#%d(%d:%d) releasing T#%d(%d:%d) "
"go(%p): %u => %u\n", gtid, team->t.t_id, tid, other_threads
[i]->th.th_info.ds.ds_gtid, team->t.t_id, i, &other_threads
[i]->th.th_bar[bt].bb.b_go, other_threads[i]->th.th_bar
[bt].bb.b_go, other_threads[i]->th.th_bar[bt].bb.b_go + (1
<< 2)); }
705 "go(%p): %u => %u\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_linear_barrier_release: T#%d(%d:%d) releasing T#%d(%d:%d) "
"go(%p): %u => %u\n", gtid, team->t.t_id, tid, other_threads
[i]->th.th_info.ds.ds_gtid, team->t.t_id, i, &other_threads
[i]->th.th_bar[bt].bb.b_go, other_threads[i]->th.th_bar
[bt].bb.b_go, other_threads[i]->th.th_bar[bt].bb.b_go + (1
<< 2)); }
706 gtid, team->t.t_id, tid, other_threads[i]->th.th_info.ds.ds_gtid,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_linear_barrier_release: T#%d(%d:%d) releasing T#%d(%d:%d) "
"go(%p): %u => %u\n", gtid, team->t.t_id, tid, other_threads
[i]->th.th_info.ds.ds_gtid, team->t.t_id, i, &other_threads
[i]->th.th_bar[bt].bb.b_go, other_threads[i]->th.th_bar
[bt].bb.b_go, other_threads[i]->th.th_bar[bt].bb.b_go + (1
<< 2)); }
707 team->t.t_id, i, &other_threads[i]->th.th_bar[bt].bb.b_go,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_linear_barrier_release: T#%d(%d:%d) releasing T#%d(%d:%d) "
"go(%p): %u => %u\n", gtid, team->t.t_id, tid, other_threads
[i]->th.th_info.ds.ds_gtid, team->t.t_id, i, &other_threads
[i]->th.th_bar[bt].bb.b_go, other_threads[i]->th.th_bar
[bt].bb.b_go, other_threads[i]->th.th_bar[bt].bb.b_go + (1
<< 2)); }
708 other_threads[i]->th.th_bar[bt].bb.b_go,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_linear_barrier_release: T#%d(%d:%d) releasing T#%d(%d:%d) "
"go(%p): %u => %u\n", gtid, team->t.t_id, tid, other_threads
[i]->th.th_info.ds.ds_gtid, team->t.t_id, i, &other_threads
[i]->th.th_bar[bt].bb.b_go, other_threads[i]->th.th_bar
[bt].bb.b_go, other_threads[i]->th.th_bar[bt].bb.b_go + (1
<< 2)); }
709 other_threads[i]->th.th_bar[bt].bb.b_go + KMP_BARRIER_STATE_BUMP))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_linear_barrier_release: T#%d(%d:%d) releasing T#%d(%d:%d) "
"go(%p): %u => %u\n", gtid, team->t.t_id, tid, other_threads
[i]->th.th_info.ds.ds_gtid, team->t.t_id, i, &other_threads
[i]->th.th_bar[bt].bb.b_go, other_threads[i]->th.th_bar
[bt].bb.b_go, other_threads[i]->th.th_bar[bt].bb.b_go + (1
<< 2)); }
;
710 kmp_flag_64<> flag(&other_threads[i]->th.th_bar[bt].bb.b_go,
711 other_threads[i]);
712 flag.release();
713 }
714 }
715 } else { // Wait for the PRIMARY thread to release us
716 KA_TRACE(20, ("__kmp_linear_barrier_release: T#%d wait go(%p) == %u\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_linear_barrier_release: T#%d wait go(%p) == %u\n"
, gtid, &thr_bar->b_go, (1 << 2)); }
717 gtid, &thr_bar->b_go, KMP_BARRIER_STATE_BUMP))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_linear_barrier_release: T#%d wait go(%p) == %u\n"
, gtid, &thr_bar->b_go, (1 << 2)); }
;
718 if (cancellable) {
719 kmp_flag_64<true, false> flag(&thr_bar->b_go, KMP_BARRIER_STATE_BUMP(1 << 2));
720 if (flag.wait(this_thr, TRUE(!0) USE_ITT_BUILD_ARG(itt_sync_obj), itt_sync_obj))
721 return true;
722 } else {
723 kmp_flag_64<> flag(&thr_bar->b_go, KMP_BARRIER_STATE_BUMP(1 << 2));
724 flag.wait(this_thr, TRUE(!0) USE_ITT_BUILD_ARG(itt_sync_obj), itt_sync_obj);
725 }
726#if USE_ITT_BUILD1 && USE_ITT_NOTIFY1
727 if ((__itt_sync_create_ptr__kmp_itt_sync_create_ptr__3_0 && itt_sync_obj == NULL__null) || KMP_ITT_DEBUG0) {
728 // In a fork barrier; cannot get the object reliably (or ITTNOTIFY is
729 // disabled)
730 itt_sync_obj = __kmp_itt_barrier_object(gtid, bs_forkjoin_barrier, 0, -1);
731 // Cancel wait on previous parallel region...
732 __kmp_itt_task_starting(itt_sync_obj);
733
734 if (bt == bs_forkjoin_barrier && TCR_4(__kmp_global.g.g_done)(__kmp_global.g.g_done))
735 return false;
736
737 itt_sync_obj = __kmp_itt_barrier_object(gtid, bs_forkjoin_barrier);
738 if (itt_sync_obj != NULL__null)
739 // Call prepare as early as possible for "new" barrier
740 __kmp_itt_task_finished(itt_sync_obj);
741 } else
742#endif /* USE_ITT_BUILD && USE_ITT_NOTIFY */
743 // Early exit for reaping threads releasing forkjoin barrier
744 if (bt == bs_forkjoin_barrier && TCR_4(__kmp_global.g.g_done)(__kmp_global.g.g_done))
745 return false;
746// The worker thread may now assume that the team is valid.
747#ifdef KMP_DEBUG1
748 tid = __kmp_tid_from_gtid(gtid);
749 team = __kmp_threads[gtid]->th.th_team;
750#endif
751 KMP_DEBUG_ASSERT(team != NULL)if (!(team != __null)) { __kmp_debug_assert("team != __null",
"openmp/runtime/src/kmp_barrier.cpp", 751); }
;
752 TCW_4(thr_bar->b_go, KMP_INIT_BARRIER_STATE)(thr_bar->b_go) = (0);
753 KA_TRACE(20,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_linear_barrier_release: T#%d(%d:%d) set go(%p) = %u\n"
, gtid, team->t.t_id, tid, &thr_bar->b_go, 0); }
754 ("__kmp_linear_barrier_release: T#%d(%d:%d) set go(%p) = %u\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_linear_barrier_release: T#%d(%d:%d) set go(%p) = %u\n"
, gtid, team->t.t_id, tid, &thr_bar->b_go, 0); }
755 gtid, team->t.t_id, tid, &thr_bar->b_go, KMP_INIT_BARRIER_STATE))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_linear_barrier_release: T#%d(%d:%d) set go(%p) = %u\n"
, gtid, team->t.t_id, tid, &thr_bar->b_go, 0); }
;
756 KMP_MB(); // Flush all pending memory write invalidates.
757 }
758 KA_TRACE(if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_linear_barrier_release: T#%d(%d:%d) exit for barrier type %d\n"
, gtid, team->t.t_id, tid, bt); }
759 20,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_linear_barrier_release: T#%d(%d:%d) exit for barrier type %d\n"
, gtid, team->t.t_id, tid, bt); }
760 ("__kmp_linear_barrier_release: T#%d(%d:%d) exit for barrier type %d\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_linear_barrier_release: T#%d(%d:%d) exit for barrier type %d\n"
, gtid, team->t.t_id, tid, bt); }
761 gtid, team->t.t_id, tid, bt))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_linear_barrier_release: T#%d(%d:%d) exit for barrier type %d\n"
, gtid, team->t.t_id, tid, bt); }
;
762 return false;
763}
764
765static void __kmp_linear_barrier_gather(
766 enum barrier_type bt, kmp_info_t *this_thr, int gtid, int tid,
767 void (*reduce)(void *, void *) USE_ITT_BUILD_ARG(void *itt_sync_obj), void *itt_sync_obj) {
768 __kmp_linear_barrier_gather_template<false>(
769 bt, this_thr, gtid, tid, reduce USE_ITT_BUILD_ARG(itt_sync_obj), itt_sync_obj);
770}
771
772static bool __kmp_linear_barrier_gather_cancellable(
773 enum barrier_type bt, kmp_info_t *this_thr, int gtid, int tid,
774 void (*reduce)(void *, void *) USE_ITT_BUILD_ARG(void *itt_sync_obj), void *itt_sync_obj) {
775 return __kmp_linear_barrier_gather_template<true>(
776 bt, this_thr, gtid, tid, reduce USE_ITT_BUILD_ARG(itt_sync_obj), itt_sync_obj);
777}
778
779static void __kmp_linear_barrier_release(
780 enum barrier_type bt, kmp_info_t *this_thr, int gtid, int tid,
781 int propagate_icvs USE_ITT_BUILD_ARG(void *itt_sync_obj), void *itt_sync_obj) {
782 __kmp_linear_barrier_release_template<false>(
783 bt, this_thr, gtid, tid, propagate_icvs USE_ITT_BUILD_ARG(itt_sync_obj), itt_sync_obj);
784}
785
786static bool __kmp_linear_barrier_release_cancellable(
787 enum barrier_type bt, kmp_info_t *this_thr, int gtid, int tid,
788 int propagate_icvs USE_ITT_BUILD_ARG(void *itt_sync_obj), void *itt_sync_obj) {
789 return __kmp_linear_barrier_release_template<true>(
790 bt, this_thr, gtid, tid, propagate_icvs USE_ITT_BUILD_ARG(itt_sync_obj), itt_sync_obj);
791}
792
793// Tree barrier
794static void __kmp_tree_barrier_gather(
795 enum barrier_type bt, kmp_info_t *this_thr, int gtid, int tid,
796 void (*reduce)(void *, void *) USE_ITT_BUILD_ARG(void *itt_sync_obj), void *itt_sync_obj) {
797 KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_tree_gather)((void)0);
798 kmp_team_t *team = this_thr->th.th_team;
799 kmp_bstate_t *thr_bar = &this_thr->th.th_bar[bt].bb;
800 kmp_info_t **other_threads = team->t.t_threads;
801 kmp_uint32 nproc = this_thr->th.th_team_nproc;
802 kmp_uint32 branch_bits = __kmp_barrier_gather_branch_bits[bt];
803 kmp_uint32 branch_factor = 1 << branch_bits;
804 kmp_uint32 child;
805 kmp_uint32 child_tid;
806 kmp_uint64 new_state = 0;
807
808 KA_TRACE(if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_tree_barrier_gather: T#%d(%d:%d) enter for barrier type %d\n"
, gtid, team->t.t_id, tid, bt); }
809 20, ("__kmp_tree_barrier_gather: T#%d(%d:%d) enter for barrier type %d\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_tree_barrier_gather: T#%d(%d:%d) enter for barrier type %d\n"
, gtid, team->t.t_id, tid, bt); }
810 gtid, team->t.t_id, tid, bt))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_tree_barrier_gather: T#%d(%d:%d) enter for barrier type %d\n"
, gtid, team->t.t_id, tid, bt); }
;
811 KMP_DEBUG_ASSERT(this_thr == other_threads[this_thr->th.th_info.ds.ds_tid])if (!(this_thr == other_threads[this_thr->th.th_info.ds.ds_tid
])) { __kmp_debug_assert("this_thr == other_threads[this_thr->th.th_info.ds.ds_tid]"
, "openmp/runtime/src/kmp_barrier.cpp", 811); }
;
812
813#if USE_ITT_BUILD1 && USE_ITT_NOTIFY1
814 // Barrier imbalance - save arrive time to the thread
815 if (__kmp_forkjoin_frames_mode == 3 || __kmp_forkjoin_frames_mode == 2) {
816 this_thr->th.th_bar_arrive_time = this_thr->th.th_bar_min_time =
817 __itt_get_timestamp(!__kmp_itt_get_timestamp_ptr__3_0) ? 0 : __kmp_itt_get_timestamp_ptr__3_0();
818 }
819#endif
820 // Perform tree gather to wait until all threads have arrived; reduce any
821 // required data as we go
822 child_tid = (tid << branch_bits) + 1;
823 if (child_tid < nproc) {
824 // Parent threads wait for all their children to arrive
825 new_state = team->t.t_bar[bt].b_arrived + KMP_BARRIER_STATE_BUMP(1 << 2);
826 child = 1;
827 do {
828 kmp_info_t *child_thr = other_threads[child_tid];
829 kmp_bstate_t *child_bar = &child_thr->th.th_bar[bt].bb;
830#if KMP_CACHE_MANAGE
831 // Prefetch next thread's arrived count
832 if (child + 1 <= branch_factor && child_tid + 1 < nproc)
833 KMP_CACHE_PREFETCH(
834 &other_threads[child_tid + 1]->th.th_bar[bt].bb.b_arrived);
835#endif /* KMP_CACHE_MANAGE */
836 KA_TRACE(20,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_tree_barrier_gather: T#%d(%d:%d) wait T#%d(%d:%u) "
"arrived(%p) == %llu\n", gtid, team->t.t_id, tid, __kmp_gtid_from_tid
(child_tid, team), team->t.t_id, child_tid, &child_bar
->b_arrived, new_state); }
837 ("__kmp_tree_barrier_gather: T#%d(%d:%d) wait T#%d(%d:%u) "if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_tree_barrier_gather: T#%d(%d:%d) wait T#%d(%d:%u) "
"arrived(%p) == %llu\n", gtid, team->t.t_id, tid, __kmp_gtid_from_tid
(child_tid, team), team->t.t_id, child_tid, &child_bar
->b_arrived, new_state); }
838 "arrived(%p) == %llu\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_tree_barrier_gather: T#%d(%d:%d) wait T#%d(%d:%u) "
"arrived(%p) == %llu\n", gtid, team->t.t_id, tid, __kmp_gtid_from_tid
(child_tid, team), team->t.t_id, child_tid, &child_bar
->b_arrived, new_state); }
839 gtid, team->t.t_id, tid, __kmp_gtid_from_tid(child_tid, team),if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_tree_barrier_gather: T#%d(%d:%d) wait T#%d(%d:%u) "
"arrived(%p) == %llu\n", gtid, team->t.t_id, tid, __kmp_gtid_from_tid
(child_tid, team), team->t.t_id, child_tid, &child_bar
->b_arrived, new_state); }
840 team->t.t_id, child_tid, &child_bar->b_arrived, new_state))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_tree_barrier_gather: T#%d(%d:%d) wait T#%d(%d:%u) "
"arrived(%p) == %llu\n", gtid, team->t.t_id, tid, __kmp_gtid_from_tid
(child_tid, team), team->t.t_id, child_tid, &child_bar
->b_arrived, new_state); }
;
841 // Wait for child to arrive
842 kmp_flag_64<> flag(&child_bar->b_arrived, new_state);
843 flag.wait(this_thr, FALSE0 USE_ITT_BUILD_ARG(itt_sync_obj), itt_sync_obj);
844#if USE_ITT_BUILD1 && USE_ITT_NOTIFY1
845 // Barrier imbalance - write min of the thread time and a child time to
846 // the thread.
847 if (__kmp_forkjoin_frames_mode == 2) {
848 this_thr->th.th_bar_min_time = KMP_MIN(this_thr->th.th_bar_min_time,((this_thr->th.th_bar_min_time) < (child_thr->th.th_bar_min_time
) ? (this_thr->th.th_bar_min_time) : (child_thr->th.th_bar_min_time
))
849 child_thr->th.th_bar_min_time)((this_thr->th.th_bar_min_time) < (child_thr->th.th_bar_min_time
) ? (this_thr->th.th_bar_min_time) : (child_thr->th.th_bar_min_time
))
;
850 }
851#endif
852 if (reduce) {
853 KA_TRACE(100,if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_tree_barrier_gather: T#%d(%d:%d) += T#%d(%d:%u)\n"
, gtid, team->t.t_id, tid, __kmp_gtid_from_tid(child_tid, team
), team->t.t_id, child_tid); }
854 ("__kmp_tree_barrier_gather: T#%d(%d:%d) += T#%d(%d:%u)\n",if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_tree_barrier_gather: T#%d(%d:%d) += T#%d(%d:%u)\n"
, gtid, team->t.t_id, tid, __kmp_gtid_from_tid(child_tid, team
), team->t.t_id, child_tid); }
855 gtid, team->t.t_id, tid, __kmp_gtid_from_tid(child_tid, team),if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_tree_barrier_gather: T#%d(%d:%d) += T#%d(%d:%u)\n"
, gtid, team->t.t_id, tid, __kmp_gtid_from_tid(child_tid, team
), team->t.t_id, child_tid); }
856 team->t.t_id, child_tid))if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_tree_barrier_gather: T#%d(%d:%d) += T#%d(%d:%u)\n"
, gtid, team->t.t_id, tid, __kmp_gtid_from_tid(child_tid, team
), team->t.t_id, child_tid); }
;
857 OMPT_REDUCTION_DECL(this_thr, gtid)ompt_data_t *my_task_data = (&(this_thr->th.th_current_task
->ompt_task_info.task_data)); ompt_data_t *my_parallel_data
= (&(this_thr->th.th_team->t.ompt_team_info.parallel_data
)); void *return_address = __ompt_load_return_address(gtid);
;
858 OMPT_REDUCTION_BEGINif (ompt_enabled.enabled && ompt_enabled.ompt_callback_reduction
) { ompt_callbacks.ompt_callback_reduction_callback( ompt_sync_region_reduction
, ompt_scope_begin, my_parallel_data, my_task_data, return_address
); }
;
859 (*reduce)(this_thr->th.th_local.reduce_data,
860 child_thr->th.th_local.reduce_data);
861 OMPT_REDUCTION_ENDif (ompt_enabled.enabled && ompt_enabled.ompt_callback_reduction
) { ompt_callbacks.ompt_callback_reduction_callback( ompt_sync_region_reduction
, ompt_scope_end, my_parallel_data, my_task_data, return_address
); }
;
862 }
863 child++;
864 child_tid++;
865 } while (child <= branch_factor && child_tid < nproc);
866 }
867
868 if (!KMP_MASTER_TID(tid)(0 == (tid))) { // Worker threads
869 kmp_int32 parent_tid = (tid - 1) >> branch_bits;
870
871 KA_TRACE(20,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_tree_barrier_gather: T#%d(%d:%d) releasing T#%d(%d:%d) "
"arrived(%p): %llu => %llu\n", gtid, team->t.t_id, tid
, __kmp_gtid_from_tid(parent_tid, team), team->t.t_id, parent_tid
, &thr_bar->b_arrived, thr_bar->b_arrived, thr_bar->
b_arrived + (1 << 2)); }
872 ("__kmp_tree_barrier_gather: T#%d(%d:%d) releasing T#%d(%d:%d) "if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_tree_barrier_gather: T#%d(%d:%d) releasing T#%d(%d:%d) "
"arrived(%p): %llu => %llu\n", gtid, team->t.t_id, tid
, __kmp_gtid_from_tid(parent_tid, team), team->t.t_id, parent_tid
, &thr_bar->b_arrived, thr_bar->b_arrived, thr_bar->
b_arrived + (1 << 2)); }
873 "arrived(%p): %llu => %llu\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_tree_barrier_gather: T#%d(%d:%d) releasing T#%d(%d:%d) "
"arrived(%p): %llu => %llu\n", gtid, team->t.t_id, tid
, __kmp_gtid_from_tid(parent_tid, team), team->t.t_id, parent_tid
, &thr_bar->b_arrived, thr_bar->b_arrived, thr_bar->
b_arrived + (1 << 2)); }
874 gtid, team->t.t_id, tid, __kmp_gtid_from_tid(parent_tid, team),if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_tree_barrier_gather: T#%d(%d:%d) releasing T#%d(%d:%d) "
"arrived(%p): %llu => %llu\n", gtid, team->t.t_id, tid
, __kmp_gtid_from_tid(parent_tid, team), team->t.t_id, parent_tid
, &thr_bar->b_arrived, thr_bar->b_arrived, thr_bar->
b_arrived + (1 << 2)); }
875 team->t.t_id, parent_tid, &thr_bar->b_arrived, thr_bar->b_arrived,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_tree_barrier_gather: T#%d(%d:%d) releasing T#%d(%d:%d) "
"arrived(%p): %llu => %llu\n", gtid, team->t.t_id, tid
, __kmp_gtid_from_tid(parent_tid, team), team->t.t_id, parent_tid
, &thr_bar->b_arrived, thr_bar->b_arrived, thr_bar->
b_arrived + (1 << 2)); }
876 thr_bar->b_arrived + KMP_BARRIER_STATE_BUMP))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_tree_barrier_gather: T#%d(%d:%d) releasing T#%d(%d:%d) "
"arrived(%p): %llu => %llu\n", gtid, team->t.t_id, tid
, __kmp_gtid_from_tid(parent_tid, team), team->t.t_id, parent_tid
, &thr_bar->b_arrived, thr_bar->b_arrived, thr_bar->
b_arrived + (1 << 2)); }
;
877
878 // Mark arrival to parent thread
879 /* After performing this write, a worker thread may not assume that the team
880 is valid any more - it could be deallocated by the primary thread at any
881 time. */
882 kmp_flag_64<> flag(&thr_bar->b_arrived, other_threads[parent_tid]);
883 flag.release();
884 } else {
885 // Need to update the team arrived pointer if we are the primary thread
886 if (nproc > 1) // New value was already computed above
887 team->t.t_bar[bt].b_arrived = new_state;
888 else
889 team->t.t_bar[bt].b_arrived += KMP_BARRIER_STATE_BUMP(1 << 2);
890 KA_TRACE(20, ("__kmp_tree_barrier_gather: T#%d(%d:%d) set team %d "if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_tree_barrier_gather: T#%d(%d:%d) set team %d "
"arrived(%p) = %llu\n", gtid, team->t.t_id, tid, team->
t.t_id, &team->t.t_bar[bt].b_arrived, team->t.t_bar
[bt].b_arrived); }
891 "arrived(%p) = %llu\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_tree_barrier_gather: T#%d(%d:%d) set team %d "
"arrived(%p) = %llu\n", gtid, team->t.t_id, tid, team->
t.t_id, &team->t.t_bar[bt].b_arrived, team->t.t_bar
[bt].b_arrived); }
892 gtid, team->t.t_id, tid, team->t.t_id,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_tree_barrier_gather: T#%d(%d:%d) set team %d "
"arrived(%p) = %llu\n", gtid, team->t.t_id, tid, team->
t.t_id, &team->t.t_bar[bt].b_arrived, team->t.t_bar
[bt].b_arrived); }
893 &team->t.t_bar[bt].b_arrived, team->t.t_bar[bt].b_arrived))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_tree_barrier_gather: T#%d(%d:%d) set team %d "
"arrived(%p) = %llu\n", gtid, team->t.t_id, tid, team->
t.t_id, &team->t.t_bar[bt].b_arrived, team->t.t_bar
[bt].b_arrived); }
;
894 }
895 KA_TRACE(20,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_tree_barrier_gather: T#%d(%d:%d) exit for barrier type %d\n"
, gtid, team->t.t_id, tid, bt); }
896 ("__kmp_tree_barrier_gather: T#%d(%d:%d) exit for barrier type %d\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_tree_barrier_gather: T#%d(%d:%d) exit for barrier type %d\n"
, gtid, team->t.t_id, tid, bt); }
897 gtid, team->t.t_id, tid, bt))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_tree_barrier_gather: T#%d(%d:%d) exit for barrier type %d\n"
, gtid, team->t.t_id, tid, bt); }
;
898}
899
900static void __kmp_tree_barrier_release(
901 enum barrier_type bt, kmp_info_t *this_thr, int gtid, int tid,
902 int propagate_icvs USE_ITT_BUILD_ARG(void *itt_sync_obj), void *itt_sync_obj) {
903 KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_tree_release)((void)0);
904 kmp_team_t *team;
905 kmp_bstate_t *thr_bar = &this_thr->th.th_bar[bt].bb;
906 kmp_uint32 nproc;
907 kmp_uint32 branch_bits = __kmp_barrier_release_branch_bits[bt];
908 kmp_uint32 branch_factor = 1 << branch_bits;
909 kmp_uint32 child;
910 kmp_uint32 child_tid;
911
912 // Perform a tree release for all of the threads that have been gathered
913 if (!KMP_MASTER_TID((0 == (tid))
914 tid)(0 == (tid))) { // Handle fork barrier workers who aren't part of a team yet
915 KA_TRACE(20, ("__kmp_tree_barrier_release: T#%d wait go(%p) == %u\n", gtid,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_tree_barrier_release: T#%d wait go(%p) == %u\n"
, gtid, &thr_bar->b_go, (1 << 2)); }
916 &thr_bar->b_go, KMP_BARRIER_STATE_BUMP))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_tree_barrier_release: T#%d wait go(%p) == %u\n"
, gtid, &thr_bar->b_go, (1 << 2)); }
;
917 // Wait for parent thread to release us
918 kmp_flag_64<> flag(&thr_bar->b_go, KMP_BARRIER_STATE_BUMP(1 << 2));
919 flag.wait(this_thr, TRUE(!0) USE_ITT_BUILD_ARG(itt_sync_obj), itt_sync_obj);
920#if USE_ITT_BUILD1 && USE_ITT_NOTIFY1
921 if ((__itt_sync_create_ptr__kmp_itt_sync_create_ptr__3_0 && itt_sync_obj == NULL__null) || KMP_ITT_DEBUG0) {
922 // In fork barrier where we could not get the object reliably (or
923 // ITTNOTIFY is disabled)
924 itt_sync_obj = __kmp_itt_barrier_object(gtid, bs_forkjoin_barrier, 0, -1);
925 // Cancel wait on previous parallel region...
926 __kmp_itt_task_starting(itt_sync_obj);
927
928 if (bt == bs_forkjoin_barrier && TCR_4(__kmp_global.g.g_done)(__kmp_global.g.g_done))
929 return;
930
931 itt_sync_obj = __kmp_itt_barrier_object(gtid, bs_forkjoin_barrier);
932 if (itt_sync_obj != NULL__null)
933 // Call prepare as early as possible for "new" barrier
934 __kmp_itt_task_finished(itt_sync_obj);
935 } else
936#endif /* USE_ITT_BUILD && USE_ITT_NOTIFY */
937 // Early exit for reaping threads releasing forkjoin barrier
938 if (bt == bs_forkjoin_barrier && TCR_4(__kmp_global.g.g_done)(__kmp_global.g.g_done))
939 return;
940
941 // The worker thread may now assume that the team is valid.
942 team = __kmp_threads[gtid]->th.th_team;
943 KMP_DEBUG_ASSERT(team != NULL)if (!(team != __null)) { __kmp_debug_assert("team != __null",
"openmp/runtime/src/kmp_barrier.cpp", 943); }
;
944 tid = __kmp_tid_from_gtid(gtid);
945
946 TCW_4(thr_bar->b_go, KMP_INIT_BARRIER_STATE)(thr_bar->b_go) = (0);
947 KA_TRACE(20,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_tree_barrier_release: T#%d(%d:%d) set go(%p) = %u\n"
, gtid, team->t.t_id, tid, &thr_bar->b_go, 0); }
948 ("__kmp_tree_barrier_release: T#%d(%d:%d) set go(%p) = %u\n", gtid,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_tree_barrier_release: T#%d(%d:%d) set go(%p) = %u\n"
, gtid, team->t.t_id, tid, &thr_bar->b_go, 0); }
949 team->t.t_id, tid, &thr_bar->b_go, KMP_INIT_BARRIER_STATE))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_tree_barrier_release: T#%d(%d:%d) set go(%p) = %u\n"
, gtid, team->t.t_id, tid, &thr_bar->b_go, 0); }
;
950 KMP_MB(); // Flush all pending memory write invalidates.
951 } else {
952 team = __kmp_threads[gtid]->th.th_team;
953 KMP_DEBUG_ASSERT(team != NULL)if (!(team != __null)) { __kmp_debug_assert("team != __null",
"openmp/runtime/src/kmp_barrier.cpp", 953); }
;
954 KA_TRACE(20, ("__kmp_tree_barrier_release: T#%d(%d:%d) primary enter for "if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_tree_barrier_release: T#%d(%d:%d) primary enter for "
"barrier type %d\n", gtid, team->t.t_id, tid, bt); }
955 "barrier type %d\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_tree_barrier_release: T#%d(%d:%d) primary enter for "
"barrier type %d\n", gtid, team->t.t_id, tid, bt); }
956 gtid, team->t.t_id, tid, bt))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_tree_barrier_release: T#%d(%d:%d) primary enter for "
"barrier type %d\n", gtid, team->t.t_id, tid, bt); }
;
957 }
958 nproc = this_thr->th.th_team_nproc;
959 child_tid = (tid << branch_bits) + 1;
960
961 if (child_tid < nproc) {
962 kmp_info_t **other_threads = team->t.t_threads;
963 child = 1;
964 // Parent threads release all their children
965 do {
966 kmp_info_t *child_thr = other_threads[child_tid];
967 kmp_bstate_t *child_bar = &child_thr->th.th_bar[bt].bb;
968#if KMP_CACHE_MANAGE
969 // Prefetch next thread's go count
970 if (child + 1 <= branch_factor && child_tid + 1 < nproc)
971 KMP_CACHE_PREFETCH(
972 &other_threads[child_tid + 1]->th.th_bar[bt].bb.b_go);
973#endif /* KMP_CACHE_MANAGE */
974
975#if KMP_BARRIER_ICV_PUSH1
976 {
977 KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(USER_icv_copy)((void)0);
978 if (propagate_icvs) {
979 __kmp_init_implicit_task(team->t.t_ident,
980 team->t.t_threads[child_tid], team,
981 child_tid, FALSE0);
982 copy_icvs(&team->t.t_implicit_task_taskdata[child_tid].td_icvs,
983 &team->t.t_implicit_task_taskdata[0].td_icvs);
984 }
985 }
986#endif // KMP_BARRIER_ICV_PUSH
987 KA_TRACE(20,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_tree_barrier_release: T#%d(%d:%d) releasing T#%d(%d:%u)"
"go(%p): %u => %u\n", gtid, team->t.t_id, tid, __kmp_gtid_from_tid
(child_tid, team), team->t.t_id, child_tid, &child_bar
->b_go, child_bar->b_go, child_bar->b_go + (1 <<
2)); }
988 ("__kmp_tree_barrier_release: T#%d(%d:%d) releasing T#%d(%d:%u)"if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_tree_barrier_release: T#%d(%d:%d) releasing T#%d(%d:%u)"
"go(%p): %u => %u\n", gtid, team->t.t_id, tid, __kmp_gtid_from_tid
(child_tid, team), team->t.t_id, child_tid, &child_bar
->b_go, child_bar->b_go, child_bar->b_go + (1 <<
2)); }
989 "go(%p): %u => %u\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_tree_barrier_release: T#%d(%d:%d) releasing T#%d(%d:%u)"
"go(%p): %u => %u\n", gtid, team->t.t_id, tid, __kmp_gtid_from_tid
(child_tid, team), team->t.t_id, child_tid, &child_bar
->b_go, child_bar->b_go, child_bar->b_go + (1 <<
2)); }
990 gtid, team->t.t_id, tid, __kmp_gtid_from_tid(child_tid, team),if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_tree_barrier_release: T#%d(%d:%d) releasing T#%d(%d:%u)"
"go(%p): %u => %u\n", gtid, team->t.t_id, tid, __kmp_gtid_from_tid
(child_tid, team), team->t.t_id, child_tid, &child_bar
->b_go, child_bar->b_go, child_bar->b_go + (1 <<
2)); }
991 team->t.t_id, child_tid, &child_bar->b_go, child_bar->b_go,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_tree_barrier_release: T#%d(%d:%d) releasing T#%d(%d:%u)"
"go(%p): %u => %u\n", gtid, team->t.t_id, tid, __kmp_gtid_from_tid
(child_tid, team), team->t.t_id, child_tid, &child_bar
->b_go, child_bar->b_go, child_bar->b_go + (1 <<
2)); }
992 child_bar->b_go + KMP_BARRIER_STATE_BUMP))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_tree_barrier_release: T#%d(%d:%d) releasing T#%d(%d:%u)"
"go(%p): %u => %u\n", gtid, team->t.t_id, tid, __kmp_gtid_from_tid
(child_tid, team), team->t.t_id, child_tid, &child_bar
->b_go, child_bar->b_go, child_bar->b_go + (1 <<
2)); }
;
993 // Release child from barrier
994 kmp_flag_64<> flag(&child_bar->b_go, child_thr);
995 flag.release();
996 child++;
997 child_tid++;
998 } while (child <= branch_factor && child_tid < nproc);
999 }
1000 KA_TRACE(if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_tree_barrier_release: T#%d(%d:%d) exit for barrier type %d\n"
, gtid, team->t.t_id, tid, bt); }
1001 20, ("__kmp_tree_barrier_release: T#%d(%d:%d) exit for barrier type %d\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_tree_barrier_release: T#%d(%d:%d) exit for barrier type %d\n"
, gtid, team->t.t_id, tid, bt); }
1002 gtid, team->t.t_id, tid, bt))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_tree_barrier_release: T#%d(%d:%d) exit for barrier type %d\n"
, gtid, team->t.t_id, tid, bt); }
;
1003}
1004
1005// Hyper Barrier
1006static void __kmp_hyper_barrier_gather(
1007 enum barrier_type bt, kmp_info_t *this_thr, int gtid, int tid,
1008 void (*reduce)(void *, void *) USE_ITT_BUILD_ARG(void *itt_sync_obj), void *itt_sync_obj) {
1009 KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_hyper_gather)((void)0);
1010 kmp_team_t *team = this_thr->th.th_team;
1011 kmp_bstate_t *thr_bar = &this_thr->th.th_bar[bt].bb;
1012 kmp_info_t **other_threads = team->t.t_threads;
1013 kmp_uint64 new_state = KMP_BARRIER_UNUSED_STATE(1 << 1);
1014 kmp_uint32 num_threads = this_thr->th.th_team_nproc;
1015 kmp_uint32 branch_bits = __kmp_barrier_gather_branch_bits[bt];
1016 kmp_uint32 branch_factor = 1 << branch_bits;
1017 kmp_uint32 offset;
1018 kmp_uint32 level;
1019
1020 KA_TRACE(if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hyper_barrier_gather: T#%d(%d:%d) enter for barrier type %d\n"
, gtid, team->t.t_id, tid, bt); }
1021 20,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hyper_barrier_gather: T#%d(%d:%d) enter for barrier type %d\n"
, gtid, team->t.t_id, tid, bt); }
1022 ("__kmp_hyper_barrier_gather: T#%d(%d:%d) enter for barrier type %d\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hyper_barrier_gather: T#%d(%d:%d) enter for barrier type %d\n"
, gtid, team->t.t_id, tid, bt); }
1023 gtid, team->t.t_id, tid, bt))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hyper_barrier_gather: T#%d(%d:%d) enter for barrier type %d\n"
, gtid, team->t.t_id, tid, bt); }
;
1024 KMP_DEBUG_ASSERT(this_thr == other_threads[this_thr->th.th_info.ds.ds_tid])if (!(this_thr == other_threads[this_thr->th.th_info.ds.ds_tid
])) { __kmp_debug_assert("this_thr == other_threads[this_thr->th.th_info.ds.ds_tid]"
, "openmp/runtime/src/kmp_barrier.cpp", 1024); }
;
1025
1026#if USE_ITT_BUILD1 && USE_ITT_NOTIFY1
1027 // Barrier imbalance - save arrive time to the thread
1028 if (__kmp_forkjoin_frames_mode == 3 || __kmp_forkjoin_frames_mode == 2) {
1029 this_thr->th.th_bar_arrive_time = this_thr->th.th_bar_min_time =
1030 __itt_get_timestamp(!__kmp_itt_get_timestamp_ptr__3_0) ? 0 : __kmp_itt_get_timestamp_ptr__3_0();
1031 }
1032#endif
1033 /* Perform a hypercube-embedded tree gather to wait until all of the threads
1034 have arrived, and reduce any required data as we go. */
1035 kmp_flag_64<> p_flag(&thr_bar->b_arrived);
1036 for (level = 0, offset = 1; offset < num_threads;
1037 level += branch_bits, offset <<= branch_bits) {
1038 kmp_uint32 child;
1039 kmp_uint32 child_tid;
1040
1041 if (((tid >> level) & (branch_factor - 1)) != 0) {
1042 kmp_int32 parent_tid = tid & ~((1 << (level + branch_bits)) - 1);
1043
1044 KMP_MB(); // Synchronize parent and child threads.
1045 KA_TRACE(20,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hyper_barrier_gather: T#%d(%d:%d) releasing T#%d(%d:%d) "
"arrived(%p): %llu => %llu\n", gtid, team->t.t_id, tid
, __kmp_gtid_from_tid(parent_tid, team), team->t.t_id, parent_tid
, &thr_bar->b_arrived, thr_bar->b_arrived, thr_bar->
b_arrived + (1 << 2)); }
1046 ("__kmp_hyper_barrier_gather: T#%d(%d:%d) releasing T#%d(%d:%d) "if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hyper_barrier_gather: T#%d(%d:%d) releasing T#%d(%d:%d) "
"arrived(%p): %llu => %llu\n", gtid, team->t.t_id, tid
, __kmp_gtid_from_tid(parent_tid, team), team->t.t_id, parent_tid
, &thr_bar->b_arrived, thr_bar->b_arrived, thr_bar->
b_arrived + (1 << 2)); }
1047 "arrived(%p): %llu => %llu\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hyper_barrier_gather: T#%d(%d:%d) releasing T#%d(%d:%d) "
"arrived(%p): %llu => %llu\n", gtid, team->t.t_id, tid
, __kmp_gtid_from_tid(parent_tid, team), team->t.t_id, parent_tid
, &thr_bar->b_arrived, thr_bar->b_arrived, thr_bar->
b_arrived + (1 << 2)); }
1048 gtid, team->t.t_id, tid, __kmp_gtid_from_tid(parent_tid, team),if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hyper_barrier_gather: T#%d(%d:%d) releasing T#%d(%d:%d) "
"arrived(%p): %llu => %llu\n", gtid, team->t.t_id, tid
, __kmp_gtid_from_tid(parent_tid, team), team->t.t_id, parent_tid
, &thr_bar->b_arrived, thr_bar->b_arrived, thr_bar->
b_arrived + (1 << 2)); }
1049 team->t.t_id, parent_tid, &thr_bar->b_arrived,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hyper_barrier_gather: T#%d(%d:%d) releasing T#%d(%d:%d) "
"arrived(%p): %llu => %llu\n", gtid, team->t.t_id, tid
, __kmp_gtid_from_tid(parent_tid, team), team->t.t_id, parent_tid
, &thr_bar->b_arrived, thr_bar->b_arrived, thr_bar->
b_arrived + (1 << 2)); }
1050 thr_bar->b_arrived,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hyper_barrier_gather: T#%d(%d:%d) releasing T#%d(%d:%d) "
"arrived(%p): %llu => %llu\n", gtid, team->t.t_id, tid
, __kmp_gtid_from_tid(parent_tid, team), team->t.t_id, parent_tid
, &thr_bar->b_arrived, thr_bar->b_arrived, thr_bar->
b_arrived + (1 << 2)); }
1051 thr_bar->b_arrived + KMP_BARRIER_STATE_BUMP))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hyper_barrier_gather: T#%d(%d:%d) releasing T#%d(%d:%d) "
"arrived(%p): %llu => %llu\n", gtid, team->t.t_id, tid
, __kmp_gtid_from_tid(parent_tid, team), team->t.t_id, parent_tid
, &thr_bar->b_arrived, thr_bar->b_arrived, thr_bar->
b_arrived + (1 << 2)); }
;
1052 // Mark arrival to parent thread
1053 /* After performing this write (in the last iteration of the enclosing for
1054 loop), a worker thread may not assume that the team is valid any more
1055 - it could be deallocated by the primary thread at any time. */
1056 p_flag.set_waiter(other_threads[parent_tid]);
1057 p_flag.release();
1058 break;
1059 }
1060
1061 // Parent threads wait for children to arrive
1062 if (new_state == KMP_BARRIER_UNUSED_STATE(1 << 1))
1063 new_state = team->t.t_bar[bt].b_arrived + KMP_BARRIER_STATE_BUMP(1 << 2);
1064 for (child = 1, child_tid = tid + (1 << level);
1065 child < branch_factor && child_tid < num_threads;
1066 child++, child_tid += (1 << level)) {
1067 kmp_info_t *child_thr = other_threads[child_tid];
1068 kmp_bstate_t *child_bar = &child_thr->th.th_bar[bt].bb;
1069#if KMP_CACHE_MANAGE
1070 kmp_uint32 next_child_tid = child_tid + (1 << level);
1071 // Prefetch next thread's arrived count
1072 if (child + 1 < branch_factor && next_child_tid < num_threads)
1073 KMP_CACHE_PREFETCH(
1074 &other_threads[next_child_tid]->th.th_bar[bt].bb.b_arrived);
1075#endif /* KMP_CACHE_MANAGE */
1076 KA_TRACE(20,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hyper_barrier_gather: T#%d(%d:%d) wait T#%d(%d:%u) "
"arrived(%p) == %llu\n", gtid, team->t.t_id, tid, __kmp_gtid_from_tid
(child_tid, team), team->t.t_id, child_tid, &child_bar
->b_arrived, new_state); }
1077 ("__kmp_hyper_barrier_gather: T#%d(%d:%d) wait T#%d(%d:%u) "if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hyper_barrier_gather: T#%d(%d:%d) wait T#%d(%d:%u) "
"arrived(%p) == %llu\n", gtid, team->t.t_id, tid, __kmp_gtid_from_tid
(child_tid, team), team->t.t_id, child_tid, &child_bar
->b_arrived, new_state); }
1078 "arrived(%p) == %llu\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hyper_barrier_gather: T#%d(%d:%d) wait T#%d(%d:%u) "
"arrived(%p) == %llu\n", gtid, team->t.t_id, tid, __kmp_gtid_from_tid
(child_tid, team), team->t.t_id, child_tid, &child_bar
->b_arrived, new_state); }
1079 gtid, team->t.t_id, tid, __kmp_gtid_from_tid(child_tid, team),if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hyper_barrier_gather: T#%d(%d:%d) wait T#%d(%d:%u) "
"arrived(%p) == %llu\n", gtid, team->t.t_id, tid, __kmp_gtid_from_tid
(child_tid, team), team->t.t_id, child_tid, &child_bar
->b_arrived, new_state); }
1080 team->t.t_id, child_tid, &child_bar->b_arrived, new_state))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hyper_barrier_gather: T#%d(%d:%d) wait T#%d(%d:%u) "
"arrived(%p) == %llu\n", gtid, team->t.t_id, tid, __kmp_gtid_from_tid
(child_tid, team), team->t.t_id, child_tid, &child_bar
->b_arrived, new_state); }
;
1081 // Wait for child to arrive
1082 kmp_flag_64<> c_flag(&child_bar->b_arrived, new_state);
1083 c_flag.wait(this_thr, FALSE0 USE_ITT_BUILD_ARG(itt_sync_obj), itt_sync_obj);
1084 KMP_MB(); // Synchronize parent and child threads.
1085#if USE_ITT_BUILD1 && USE_ITT_NOTIFY1
1086 // Barrier imbalance - write min of the thread time and a child time to
1087 // the thread.
1088 if (__kmp_forkjoin_frames_mode == 2) {
1089 this_thr->th.th_bar_min_time = KMP_MIN(this_thr->th.th_bar_min_time,((this_thr->th.th_bar_min_time) < (child_thr->th.th_bar_min_time
) ? (this_thr->th.th_bar_min_time) : (child_thr->th.th_bar_min_time
))
1090 child_thr->th.th_bar_min_time)((this_thr->th.th_bar_min_time) < (child_thr->th.th_bar_min_time
) ? (this_thr->th.th_bar_min_time) : (child_thr->th.th_bar_min_time
))
;
1091 }
1092#endif
1093 if (reduce) {
1094 KA_TRACE(100,if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_hyper_barrier_gather: T#%d(%d:%d) += T#%d(%d:%u)\n"
, gtid, team->t.t_id, tid, __kmp_gtid_from_tid(child_tid, team
), team->t.t_id, child_tid); }
1095 ("__kmp_hyper_barrier_gather: T#%d(%d:%d) += T#%d(%d:%u)\n",if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_hyper_barrier_gather: T#%d(%d:%d) += T#%d(%d:%u)\n"
, gtid, team->t.t_id, tid, __kmp_gtid_from_tid(child_tid, team
), team->t.t_id, child_tid); }
1096 gtid, team->t.t_id, tid, __kmp_gtid_from_tid(child_tid, team),if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_hyper_barrier_gather: T#%d(%d:%d) += T#%d(%d:%u)\n"
, gtid, team->t.t_id, tid, __kmp_gtid_from_tid(child_tid, team
), team->t.t_id, child_tid); }
1097 team->t.t_id, child_tid))if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_hyper_barrier_gather: T#%d(%d:%d) += T#%d(%d:%u)\n"
, gtid, team->t.t_id, tid, __kmp_gtid_from_tid(child_tid, team
), team->t.t_id, child_tid); }
;
1098 OMPT_REDUCTION_DECL(this_thr, gtid)ompt_data_t *my_task_data = (&(this_thr->th.th_current_task
->ompt_task_info.task_data)); ompt_data_t *my_parallel_data
= (&(this_thr->th.th_team->t.ompt_team_info.parallel_data
)); void *return_address = __ompt_load_return_address(gtid);
;
1099 OMPT_REDUCTION_BEGINif (ompt_enabled.enabled && ompt_enabled.ompt_callback_reduction
) { ompt_callbacks.ompt_callback_reduction_callback( ompt_sync_region_reduction
, ompt_scope_begin, my_parallel_data, my_task_data, return_address
); }
;
1100 (*reduce)(this_thr->th.th_local.reduce_data,
1101 child_thr->th.th_local.reduce_data);
1102 OMPT_REDUCTION_ENDif (ompt_enabled.enabled && ompt_enabled.ompt_callback_reduction
) { ompt_callbacks.ompt_callback_reduction_callback( ompt_sync_region_reduction
, ompt_scope_end, my_parallel_data, my_task_data, return_address
); }
;
1103 }
1104 }
1105 }
1106
1107 if (KMP_MASTER_TID(tid)(0 == (tid))) {
1108 // Need to update the team arrived pointer if we are the primary thread
1109 if (new_state == KMP_BARRIER_UNUSED_STATE(1 << 1))
1110 team->t.t_bar[bt].b_arrived += KMP_BARRIER_STATE_BUMP(1 << 2);
1111 else
1112 team->t.t_bar[bt].b_arrived = new_state;
1113 KA_TRACE(20, ("__kmp_hyper_barrier_gather: T#%d(%d:%d) set team %d "if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hyper_barrier_gather: T#%d(%d:%d) set team %d "
"arrived(%p) = %llu\n", gtid, team->t.t_id, tid, team->
t.t_id, &team->t.t_bar[bt].b_arrived, team->t.t_bar
[bt].b_arrived); }
1114 "arrived(%p) = %llu\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hyper_barrier_gather: T#%d(%d:%d) set team %d "
"arrived(%p) = %llu\n", gtid, team->t.t_id, tid, team->
t.t_id, &team->t.t_bar[bt].b_arrived, team->t.t_bar
[bt].b_arrived); }
1115 gtid, team->t.t_id, tid, team->t.t_id,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hyper_barrier_gather: T#%d(%d:%d) set team %d "
"arrived(%p) = %llu\n", gtid, team->t.t_id, tid, team->
t.t_id, &team->t.t_bar[bt].b_arrived, team->t.t_bar
[bt].b_arrived); }
1116 &team->t.t_bar[bt].b_arrived, team->t.t_bar[bt].b_arrived))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hyper_barrier_gather: T#%d(%d:%d) set team %d "
"arrived(%p) = %llu\n", gtid, team->t.t_id, tid, team->
t.t_id, &team->t.t_bar[bt].b_arrived, team->t.t_bar
[bt].b_arrived); }
;
1117 }
1118 KA_TRACE(if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hyper_barrier_gather: T#%d(%d:%d) exit for barrier type %d\n"
, gtid, team->t.t_id, tid, bt); }
1119 20, ("__kmp_hyper_barrier_gather: T#%d(%d:%d) exit for barrier type %d\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hyper_barrier_gather: T#%d(%d:%d) exit for barrier type %d\n"
, gtid, team->t.t_id, tid, bt); }
1120 gtid, team->t.t_id, tid, bt))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hyper_barrier_gather: T#%d(%d:%d) exit for barrier type %d\n"
, gtid, team->t.t_id, tid, bt); }
;
1121}
1122
1123// The reverse versions seem to beat the forward versions overall
1124#define KMP_REVERSE_HYPER_BAR
1125static void __kmp_hyper_barrier_release(
1126 enum barrier_type bt, kmp_info_t *this_thr, int gtid, int tid,
1127 int propagate_icvs USE_ITT_BUILD_ARG(void *itt_sync_obj), void *itt_sync_obj) {
1128 KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_hyper_release)((void)0);
1129 kmp_team_t *team;
1130 kmp_bstate_t *thr_bar = &this_thr->th.th_bar[bt].bb;
1131 kmp_info_t **other_threads;
1132 kmp_uint32 num_threads;
1133 kmp_uint32 branch_bits = __kmp_barrier_release_branch_bits[bt];
1134 kmp_uint32 branch_factor = 1 << branch_bits;
1135 kmp_uint32 child;
1136 kmp_uint32 child_tid;
1137 kmp_uint32 offset;
1138 kmp_uint32 level;
1139
1140 /* Perform a hypercube-embedded tree release for all of the threads that have
1141 been gathered. If KMP_REVERSE_HYPER_BAR is defined (default) the threads
1142 are released in the reverse order of the corresponding gather, otherwise
1143 threads are released in the same order. */
1144 if (KMP_MASTER_TID(tid)(0 == (tid))) { // primary thread
1145 team = __kmp_threads[gtid]->th.th_team;
1146 KMP_DEBUG_ASSERT(team != NULL)if (!(team != __null)) { __kmp_debug_assert("team != __null",
"openmp/runtime/src/kmp_barrier.cpp", 1146); }
;
1147 KA_TRACE(20, ("__kmp_hyper_barrier_release: T#%d(%d:%d) primary enter for "if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hyper_barrier_release: T#%d(%d:%d) primary enter for "
"barrier type %d\n", gtid, team->t.t_id, tid, bt); }
1148 "barrier type %d\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hyper_barrier_release: T#%d(%d:%d) primary enter for "
"barrier type %d\n", gtid, team->t.t_id, tid, bt); }
1149 gtid, team->t.t_id, tid, bt))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hyper_barrier_release: T#%d(%d:%d) primary enter for "
"barrier type %d\n", gtid, team->t.t_id, tid, bt); }
;
1150#if KMP_BARRIER_ICV_PUSH1
1151 if (propagate_icvs) { // primary already has ICVs in final destination; copy
1152 copy_icvs(&thr_bar->th_fixed_icvs,
1153 &team->t.t_implicit_task_taskdata[tid].td_icvs);
1154 }
1155#endif
1156 } else { // Handle fork barrier workers who aren't part of a team yet
1157 KA_TRACE(20, ("__kmp_hyper_barrier_release: T#%d wait go(%p) == %u\n", gtid,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hyper_barrier_release: T#%d wait go(%p) == %u\n"
, gtid, &thr_bar->b_go, (1 << 2)); }
1158 &thr_bar->b_go, KMP_BARRIER_STATE_BUMP))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hyper_barrier_release: T#%d wait go(%p) == %u\n"
, gtid, &thr_bar->b_go, (1 << 2)); }
;
1159 // Wait for parent thread to release us
1160 kmp_flag_64<> flag(&thr_bar->b_go, KMP_BARRIER_STATE_BUMP(1 << 2));
1161 flag.wait(this_thr, TRUE(!0) USE_ITT_BUILD_ARG(itt_sync_obj), itt_sync_obj);
1162#if USE_ITT_BUILD1 && USE_ITT_NOTIFY1
1163 if ((__itt_sync_create_ptr__kmp_itt_sync_create_ptr__3_0 && itt_sync_obj == NULL__null) || KMP_ITT_DEBUG0) {
1164 // In fork barrier where we could not get the object reliably
1165 itt_sync_obj = __kmp_itt_barrier_object(gtid, bs_forkjoin_barrier, 0, -1);
1166 // Cancel wait on previous parallel region...
1167 __kmp_itt_task_starting(itt_sync_obj);
1168
1169 if (bt == bs_forkjoin_barrier && TCR_4(__kmp_global.g.g_done)(__kmp_global.g.g_done))
1170 return;
1171
1172 itt_sync_obj = __kmp_itt_barrier_object(gtid, bs_forkjoin_barrier);
1173 if (itt_sync_obj != NULL__null)
1174 // Call prepare as early as possible for "new" barrier
1175 __kmp_itt_task_finished(itt_sync_obj);
1176 } else
1177#endif /* USE_ITT_BUILD && USE_ITT_NOTIFY */
1178 // Early exit for reaping threads releasing forkjoin barrier
1179 if (bt == bs_forkjoin_barrier && TCR_4(__kmp_global.g.g_done)(__kmp_global.g.g_done))
1180 return;
1181
1182 // The worker thread may now assume that the team is valid.
1183 team = __kmp_threads[gtid]->th.th_team;
1184 KMP_DEBUG_ASSERT(team != NULL)if (!(team != __null)) { __kmp_debug_assert("team != __null",
"openmp/runtime/src/kmp_barrier.cpp", 1184); }
;
1185 tid = __kmp_tid_from_gtid(gtid);
1186
1187 TCW_4(thr_bar->b_go, KMP_INIT_BARRIER_STATE)(thr_bar->b_go) = (0);
1188 KA_TRACE(20,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hyper_barrier_release: T#%d(%d:%d) set go(%p) = %u\n"
, gtid, team->t.t_id, tid, &thr_bar->b_go, 0); }
1189 ("__kmp_hyper_barrier_release: T#%d(%d:%d) set go(%p) = %u\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hyper_barrier_release: T#%d(%d:%d) set go(%p) = %u\n"
, gtid, team->t.t_id, tid, &thr_bar->b_go, 0); }
1190 gtid, team->t.t_id, tid, &thr_bar->b_go, KMP_INIT_BARRIER_STATE))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hyper_barrier_release: T#%d(%d:%d) set go(%p) = %u\n"
, gtid, team->t.t_id, tid, &thr_bar->b_go, 0); }
;
1191 KMP_MB(); // Flush all pending memory write invalidates.
1192 }
1193 num_threads = this_thr->th.th_team_nproc;
1194 other_threads = team->t.t_threads;
1195
1196#ifdef KMP_REVERSE_HYPER_BAR
1197 // Count up to correct level for parent
1198 for (level = 0, offset = 1;
1199 offset < num_threads && (((tid >> level) & (branch_factor - 1)) == 0);
1200 level += branch_bits, offset <<= branch_bits)
1201 ;
1202
1203 // Now go down from there
1204 for (level -= branch_bits, offset >>= branch_bits; offset != 0;
1205 level -= branch_bits, offset >>= branch_bits)
1206#else
1207 // Go down the tree, level by level
1208 for (level = 0, offset = 1; offset < num_threads;
1209 level += branch_bits, offset <<= branch_bits)
1210#endif // KMP_REVERSE_HYPER_BAR
1211 {
1212#ifdef KMP_REVERSE_HYPER_BAR
1213 /* Now go in reverse order through the children, highest to lowest.
1214 Initial setting of child is conservative here. */
1215 child = num_threads >> ((level == 0) ? level : level - 1);
1216 for (child = (child < branch_factor - 1) ? child : branch_factor - 1,
1217 child_tid = tid + (child << level);
1218 child >= 1; child--, child_tid -= (1 << level))
1219#else
1220 if (((tid >> level) & (branch_factor - 1)) != 0)
1221 // No need to go lower than this, since this is the level parent would be
1222 // notified
1223 break;
1224 // Iterate through children on this level of the tree
1225 for (child = 1, child_tid = tid + (1 << level);
1226 child < branch_factor && child_tid < num_threads;
1227 child++, child_tid += (1 << level))
1228#endif // KMP_REVERSE_HYPER_BAR
1229 {
1230 if (child_tid >= num_threads)
1231 continue; // Child doesn't exist so keep going
1232 else {
1233 kmp_info_t *child_thr = other_threads[child_tid];
1234 kmp_bstate_t *child_bar = &child_thr->th.th_bar[bt].bb;
1235#if KMP_CACHE_MANAGE
1236 kmp_uint32 next_child_tid = child_tid - (1 << level);
1237// Prefetch next thread's go count
1238#ifdef KMP_REVERSE_HYPER_BAR
1239 if (child - 1 >= 1 && next_child_tid < num_threads)
1240#else
1241 if (child + 1 < branch_factor && next_child_tid < num_threads)
1242#endif // KMP_REVERSE_HYPER_BAR
1243 KMP_CACHE_PREFETCH(
1244 &other_threads[next_child_tid]->th.th_bar[bt].bb.b_go);
1245#endif /* KMP_CACHE_MANAGE */
1246
1247#if KMP_BARRIER_ICV_PUSH1
1248 if (propagate_icvs) // push my fixed ICVs to my child
1249 copy_icvs(&child_bar->th_fixed_icvs, &thr_bar->th_fixed_icvs);
1250#endif // KMP_BARRIER_ICV_PUSH
1251
1252 KA_TRACE(if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hyper_barrier_release: T#%d(%d:%d) releasing T#%d(%d:%u)"
"go(%p): %u => %u\n", gtid, team->t.t_id, tid, __kmp_gtid_from_tid
(child_tid, team), team->t.t_id, child_tid, &child_bar
->b_go, child_bar->b_go, child_bar->b_go + (1 <<
2)); }
1253 20,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hyper_barrier_release: T#%d(%d:%d) releasing T#%d(%d:%u)"
"go(%p): %u => %u\n", gtid, team->t.t_id, tid, __kmp_gtid_from_tid
(child_tid, team), team->t.t_id, child_tid, &child_bar
->b_go, child_bar->b_go, child_bar->b_go + (1 <<
2)); }
1254 ("__kmp_hyper_barrier_release: T#%d(%d:%d) releasing T#%d(%d:%u)"if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hyper_barrier_release: T#%d(%d:%d) releasing T#%d(%d:%u)"
"go(%p): %u => %u\n", gtid, team->t.t_id, tid, __kmp_gtid_from_tid
(child_tid, team), team->t.t_id, child_tid, &child_bar
->b_go, child_bar->b_go, child_bar->b_go + (1 <<
2)); }
1255 "go(%p): %u => %u\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hyper_barrier_release: T#%d(%d:%d) releasing T#%d(%d:%u)"
"go(%p): %u => %u\n", gtid, team->t.t_id, tid, __kmp_gtid_from_tid
(child_tid, team), team->t.t_id, child_tid, &child_bar
->b_go, child_bar->b_go, child_bar->b_go + (1 <<
2)); }
1256 gtid, team->t.t_id, tid, __kmp_gtid_from_tid(child_tid, team),if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hyper_barrier_release: T#%d(%d:%d) releasing T#%d(%d:%u)"
"go(%p): %u => %u\n", gtid, team->t.t_id, tid, __kmp_gtid_from_tid
(child_tid, team), team->t.t_id, child_tid, &child_bar
->b_go, child_bar->b_go, child_bar->b_go + (1 <<
2)); }
1257 team->t.t_id, child_tid, &child_bar->b_go, child_bar->b_go,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hyper_barrier_release: T#%d(%d:%d) releasing T#%d(%d:%u)"
"go(%p): %u => %u\n", gtid, team->t.t_id, tid, __kmp_gtid_from_tid
(child_tid, team), team->t.t_id, child_tid, &child_bar
->b_go, child_bar->b_go, child_bar->b_go + (1 <<
2)); }
1258 child_bar->b_go + KMP_BARRIER_STATE_BUMP))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hyper_barrier_release: T#%d(%d:%d) releasing T#%d(%d:%u)"
"go(%p): %u => %u\n", gtid, team->t.t_id, tid, __kmp_gtid_from_tid
(child_tid, team), team->t.t_id, child_tid, &child_bar
->b_go, child_bar->b_go, child_bar->b_go + (1 <<
2)); }
;
1259 // Release child from barrier
1260 kmp_flag_64<> flag(&child_bar->b_go, child_thr);
1261 flag.release();
1262 }
1263 }
1264 }
1265#if KMP_BARRIER_ICV_PUSH1
1266 if (propagate_icvs &&
1267 !KMP_MASTER_TID(tid)(0 == (tid))) { // copy ICVs locally to final dest
1268 __kmp_init_implicit_task(team->t.t_ident, team->t.t_threads[tid], team, tid,
1269 FALSE0);
1270 copy_icvs(&team->t.t_implicit_task_taskdata[tid].td_icvs,
1271 &thr_bar->th_fixed_icvs);
1272 }
1273#endif
1274 KA_TRACE(if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hyper_barrier_release: T#%d(%d:%d) exit for barrier type %d\n"
, gtid, team->t.t_id, tid, bt); }
1275 20,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hyper_barrier_release: T#%d(%d:%d) exit for barrier type %d\n"
, gtid, team->t.t_id, tid, bt); }
1276 ("__kmp_hyper_barrier_release: T#%d(%d:%d) exit for barrier type %d\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hyper_barrier_release: T#%d(%d:%d) exit for barrier type %d\n"
, gtid, team->t.t_id, tid, bt); }
1277 gtid, team->t.t_id, tid, bt))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hyper_barrier_release: T#%d(%d:%d) exit for barrier type %d\n"
, gtid, team->t.t_id, tid, bt); }
;
1278}
1279
1280// Hierarchical Barrier
1281
1282// Initialize thread barrier data
1283/* Initializes/re-initializes the hierarchical barrier data stored on a thread.
1284 Performs the minimum amount of initialization required based on how the team
1285 has changed. Returns true if leaf children will require both on-core and
1286 traditional wake-up mechanisms. For example, if the team size increases,
1287 threads already in the team will respond to on-core wakeup on their parent
1288 thread, but threads newly added to the team will only be listening on the
1289 their local b_go. */
1290static bool __kmp_init_hierarchical_barrier_thread(enum barrier_type bt,
1291 kmp_bstate_t *thr_bar,
1292 kmp_uint32 nproc, int gtid,
1293 int tid, kmp_team_t *team) {
1294 // Checks to determine if (re-)initialization is needed
1295 bool uninitialized = thr_bar->team == NULL__null;
1296 bool team_changed = team != thr_bar->team;
1297 bool team_sz_changed = nproc != thr_bar->nproc;
1298 bool tid_changed = tid != thr_bar->old_tid;
1299 bool retval = false;
1300
1301 if (uninitialized || team_sz_changed) {
1302 __kmp_get_hierarchy(nproc, thr_bar);
1303 }
1304
1305 if (uninitialized || team_sz_changed || tid_changed) {
1306 thr_bar->my_level = thr_bar->depth - 1; // default for primary thread
1307 thr_bar->parent_tid = -1; // default for primary thread
1308 if (!KMP_MASTER_TID(tid)(0 == (tid))) {
1309 // if not primary thread, find parent thread in hierarchy
1310 kmp_uint32 d = 0;
1311 while (d < thr_bar->depth) { // find parent based on level of thread in
1312 // hierarchy, and note level
1313 kmp_uint32 rem;
1314 if (d == thr_bar->depth - 2) { // reached level right below the primary
1315 thr_bar->parent_tid = 0;
1316 thr_bar->my_level = d;
1317 break;
1318 } else if ((rem = tid % thr_bar->skip_per_level[d + 1]) != 0) {
1319 // TODO: can we make the above op faster?
1320 // thread is not a subtree root at next level, so this is max
1321 thr_bar->parent_tid = tid - rem;
1322 thr_bar->my_level = d;
1323 break;
1324 }
1325 ++d;
1326 }
1327 }
1328 __kmp_type_convert(7 - ((tid - thr_bar->parent_tid) /
1329 (thr_bar->skip_per_level[thr_bar->my_level])),
1330 &(thr_bar->offset));
1331 thr_bar->old_tid = tid;
1332 thr_bar->wait_flag = KMP_BARRIER_NOT_WAITING0;
1333 thr_bar->team = team;
1334 thr_bar->parent_bar =
1335 &team->t.t_threads[thr_bar->parent_tid]->th.th_bar[bt].bb;
1336 }
1337 if (uninitialized || team_changed || tid_changed) {
1338 thr_bar->team = team;
1339 thr_bar->parent_bar =
1340 &team->t.t_threads[thr_bar->parent_tid]->th.th_bar[bt].bb;
1341 retval = true;
1342 }
1343 if (uninitialized || team_sz_changed || tid_changed) {
1344 thr_bar->nproc = nproc;
1345 thr_bar->leaf_kids = thr_bar->base_leaf_kids;
1346 if (thr_bar->my_level == 0)
1347 thr_bar->leaf_kids = 0;
1348 if (thr_bar->leaf_kids && (kmp_uint32)tid + thr_bar->leaf_kids + 1 > nproc)
1349 __kmp_type_convert(nproc - tid - 1, &(thr_bar->leaf_kids));
1350 thr_bar->leaf_state = 0;
1351 for (int i = 0; i < thr_bar->leaf_kids; ++i)
1352 ((char *)&(thr_bar->leaf_state))[7 - i] = 1;
1353 }
1354 return retval;
1355}
1356
1357static void __kmp_hierarchical_barrier_gather(
1358 enum barrier_type bt, kmp_info_t *this_thr, int gtid, int tid,
1359 void (*reduce)(void *, void *) USE_ITT_BUILD_ARG(void *itt_sync_obj), void *itt_sync_obj) {
1360 KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_hier_gather)((void)0);
1361 kmp_team_t *team = this_thr->th.th_team;
1362 kmp_bstate_t *thr_bar = &this_thr->th.th_bar[bt].bb;
1363 kmp_uint32 nproc = this_thr->th.th_team_nproc;
1364 kmp_info_t **other_threads = team->t.t_threads;
1365 kmp_uint64 new_state = 0;
1366
1367 int level = team->t.t_level;
1368 if (other_threads[0]
1369 ->th.th_teams_microtask) // are we inside the teams construct?
1370 if (this_thr->th.th_teams_size.nteams > 1)
1371 ++level; // level was not increased in teams construct for team_of_masters
1372 if (level == 1)
1373 thr_bar->use_oncore_barrier = 1;
1374 else
1375 thr_bar->use_oncore_barrier = 0; // Do not use oncore barrier when nested
1376
1377 KA_TRACE(20, ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) enter for "if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) enter for "
"barrier type %d\n", gtid, team->t.t_id, tid, bt); }
1378 "barrier type %d\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) enter for "
"barrier type %d\n", gtid, team->t.t_id, tid, bt); }
1379 gtid, team->t.t_id, tid, bt))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) enter for "
"barrier type %d\n", gtid, team->t.t_id, tid, bt); }
;
1380 KMP_DEBUG_ASSERT(this_thr == other_threads[this_thr->th.th_info.ds.ds_tid])if (!(this_thr == other_threads[this_thr->th.th_info.ds.ds_tid
])) { __kmp_debug_assert("this_thr == other_threads[this_thr->th.th_info.ds.ds_tid]"
, "openmp/runtime/src/kmp_barrier.cpp", 1380); }
;
1381
1382#if USE_ITT_BUILD1 && USE_ITT_NOTIFY1
1383 // Barrier imbalance - save arrive time to the thread
1384 if (__kmp_forkjoin_frames_mode == 3 || __kmp_forkjoin_frames_mode == 2) {
1385 this_thr->th.th_bar_arrive_time = __itt_get_timestamp(!__kmp_itt_get_timestamp_ptr__3_0) ? 0 : __kmp_itt_get_timestamp_ptr__3_0();
1386 }
1387#endif
1388
1389 (void)__kmp_init_hierarchical_barrier_thread(bt, thr_bar, nproc, gtid, tid,
1390 team);
1391
1392 if (thr_bar->my_level) { // not a leaf (my_level==0 means leaf)
1393 kmp_int32 child_tid;
1394 new_state =
1395 (kmp_uint64)team->t.t_bar[bt].b_arrived + KMP_BARRIER_STATE_BUMP(1 << 2);
1396 if (__kmp_dflt_blocktime == KMP_MAX_BLOCKTIME(2147483647) &&
1397 thr_bar->use_oncore_barrier) {
1398 if (thr_bar->leaf_kids) {
1399 // First, wait for leaf children to check-in on my b_arrived flag
1400 kmp_uint64 leaf_state =
1401 KMP_MASTER_TID(tid)(0 == (tid))
1402 ? thr_bar->b_arrived | thr_bar->leaf_state
1403 : team->t.t_bar[bt].b_arrived | thr_bar->leaf_state;
1404 KA_TRACE(20, ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) waiting "if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) waiting "
"for leaf kids\n", gtid, team->t.t_id, tid); }
1405 "for leaf kids\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) waiting "
"for leaf kids\n", gtid, team->t.t_id, tid); }
1406 gtid, team->t.t_id, tid))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) waiting "
"for leaf kids\n", gtid, team->t.t_id, tid); }
;
1407 kmp_flag_64<> flag(&thr_bar->b_arrived, leaf_state);
1408 flag.wait(this_thr, FALSE0 USE_ITT_BUILD_ARG(itt_sync_obj), itt_sync_obj);
1409 if (reduce) {
1410 OMPT_REDUCTION_DECL(this_thr, gtid)ompt_data_t *my_task_data = (&(this_thr->th.th_current_task
->ompt_task_info.task_data)); ompt_data_t *my_parallel_data
= (&(this_thr->th.th_team->t.ompt_team_info.parallel_data
)); void *return_address = __ompt_load_return_address(gtid);
;
1411 OMPT_REDUCTION_BEGINif (ompt_enabled.enabled && ompt_enabled.ompt_callback_reduction
) { ompt_callbacks.ompt_callback_reduction_callback( ompt_sync_region_reduction
, ompt_scope_begin, my_parallel_data, my_task_data, return_address
); }
;
1412 for (child_tid = tid + 1; child_tid <= tid + thr_bar->leaf_kids;
1413 ++child_tid) {
1414 KA_TRACE(100, ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) += "if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) += "
"T#%d(%d:%d)\n", gtid, team->t.t_id, tid, __kmp_gtid_from_tid
(child_tid, team), team->t.t_id, child_tid); }
1415 "T#%d(%d:%d)\n",if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) += "
"T#%d(%d:%d)\n", gtid, team->t.t_id, tid, __kmp_gtid_from_tid
(child_tid, team), team->t.t_id, child_tid); }
1416 gtid, team->t.t_id, tid,if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) += "
"T#%d(%d:%d)\n", gtid, team->t.t_id, tid, __kmp_gtid_from_tid
(child_tid, team), team->t.t_id, child_tid); }
1417 __kmp_gtid_from_tid(child_tid, team), team->t.t_id,if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) += "
"T#%d(%d:%d)\n", gtid, team->t.t_id, tid, __kmp_gtid_from_tid
(child_tid, team), team->t.t_id, child_tid); }
1418 child_tid))if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) += "
"T#%d(%d:%d)\n", gtid, team->t.t_id, tid, __kmp_gtid_from_tid
(child_tid, team), team->t.t_id, child_tid); }
;
1419 (*reduce)(this_thr->th.th_local.reduce_data,
1420 other_threads[child_tid]->th.th_local.reduce_data);
1421 }
1422 OMPT_REDUCTION_ENDif (ompt_enabled.enabled && ompt_enabled.ompt_callback_reduction
) { ompt_callbacks.ompt_callback_reduction_callback( ompt_sync_region_reduction
, ompt_scope_end, my_parallel_data, my_task_data, return_address
); }
;
1423 }
1424 // clear leaf_state bits
1425 KMP_TEST_THEN_AND64(&thr_bar->b_arrived, ~(thr_bar->leaf_state))__sync_fetch_and_and((volatile kmp_uint64 *)(&thr_bar->
b_arrived), (kmp_uint64)(~(thr_bar->leaf_state)))
;
1426 }
1427 // Next, wait for higher level children on each child's b_arrived flag
1428 for (kmp_uint32 d = 1; d < thr_bar->my_level;
1429 ++d) { // gather lowest level threads first, but skip 0
1430 kmp_uint32 last = tid + thr_bar->skip_per_level[d + 1],
1431 skip = thr_bar->skip_per_level[d];
1432 if (last > nproc)
1433 last = nproc;
1434 for (child_tid = tid + skip; child_tid < (int)last; child_tid += skip) {
1435 kmp_info_t *child_thr = other_threads[child_tid];
1436 kmp_bstate_t *child_bar = &child_thr->th.th_bar[bt].bb;
1437 KA_TRACE(20, ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) wait "if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) wait "
"T#%d(%d:%d) " "arrived(%p) == %llu\n", gtid, team->t.t_id
, tid, __kmp_gtid_from_tid(child_tid, team), team->t.t_id,
child_tid, &child_bar->b_arrived, new_state); }
1438 "T#%d(%d:%d) "if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) wait "
"T#%d(%d:%d) " "arrived(%p) == %llu\n", gtid, team->t.t_id
, tid, __kmp_gtid_from_tid(child_tid, team), team->t.t_id,
child_tid, &child_bar->b_arrived, new_state); }
1439 "arrived(%p) == %llu\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) wait "
"T#%d(%d:%d) " "arrived(%p) == %llu\n", gtid, team->t.t_id
, tid, __kmp_gtid_from_tid(child_tid, team), team->t.t_id,
child_tid, &child_bar->b_arrived, new_state); }
1440 gtid, team->t.t_id, tid,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) wait "
"T#%d(%d:%d) " "arrived(%p) == %llu\n", gtid, team->t.t_id
, tid, __kmp_gtid_from_tid(child_tid, team), team->t.t_id,
child_tid, &child_bar->b_arrived, new_state); }
1441 __kmp_gtid_from_tid(child_tid, team), team->t.t_id,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) wait "
"T#%d(%d:%d) " "arrived(%p) == %llu\n", gtid, team->t.t_id
, tid, __kmp_gtid_from_tid(child_tid, team), team->t.t_id,
child_tid, &child_bar->b_arrived, new_state); }
1442 child_tid, &child_bar->b_arrived, new_state))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) wait "
"T#%d(%d:%d) " "arrived(%p) == %llu\n", gtid, team->t.t_id
, tid, __kmp_gtid_from_tid(child_tid, team), team->t.t_id,
child_tid, &child_bar->b_arrived, new_state); }
;
1443 kmp_flag_64<> flag(&child_bar->b_arrived, new_state);
1444 flag.wait(this_thr, FALSE0 USE_ITT_BUILD_ARG(itt_sync_obj), itt_sync_obj);
1445 if (reduce) {
1446 KA_TRACE(100, ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) += "if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) += "
"T#%d(%d:%d)\n", gtid, team->t.t_id, tid, __kmp_gtid_from_tid
(child_tid, team), team->t.t_id, child_tid); }
1447 "T#%d(%d:%d)\n",if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) += "
"T#%d(%d:%d)\n", gtid, team->t.t_id, tid, __kmp_gtid_from_tid
(child_tid, team), team->t.t_id, child_tid); }
1448 gtid, team->t.t_id, tid,if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) += "
"T#%d(%d:%d)\n", gtid, team->t.t_id, tid, __kmp_gtid_from_tid
(child_tid, team), team->t.t_id, child_tid); }
1449 __kmp_gtid_from_tid(child_tid, team), team->t.t_id,if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) += "
"T#%d(%d:%d)\n", gtid, team->t.t_id, tid, __kmp_gtid_from_tid
(child_tid, team), team->t.t_id, child_tid); }
1450 child_tid))if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) += "
"T#%d(%d:%d)\n", gtid, team->t.t_id, tid, __kmp_gtid_from_tid
(child_tid, team), team->t.t_id, child_tid); }
;
1451 (*reduce)(this_thr->th.th_local.reduce_data,
1452 child_thr->th.th_local.reduce_data);
1453 }
1454 }
1455 }
1456 } else { // Blocktime is not infinite
1457 for (kmp_uint32 d = 0; d < thr_bar->my_level;
1458 ++d) { // Gather lowest level threads first
1459 kmp_uint32 last = tid + thr_bar->skip_per_level[d + 1],
1460 skip = thr_bar->skip_per_level[d];
1461 if (last > nproc)
1462 last = nproc;
1463 for (child_tid = tid + skip; child_tid < (int)last; child_tid += skip) {
1464 kmp_info_t *child_thr = other_threads[child_tid];
1465 kmp_bstate_t *child_bar = &child_thr->th.th_bar[bt].bb;
1466 KA_TRACE(20, ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) wait "if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) wait "
"T#%d(%d:%d) " "arrived(%p) == %llu\n", gtid, team->t.t_id
, tid, __kmp_gtid_from_tid(child_tid, team), team->t.t_id,
child_tid, &child_bar->b_arrived, new_state); }
1467 "T#%d(%d:%d) "if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) wait "
"T#%d(%d:%d) " "arrived(%p) == %llu\n", gtid, team->t.t_id
, tid, __kmp_gtid_from_tid(child_tid, team), team->t.t_id,
child_tid, &child_bar->b_arrived, new_state); }
1468 "arrived(%p) == %llu\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) wait "
"T#%d(%d:%d) " "arrived(%p) == %llu\n", gtid, team->t.t_id
, tid, __kmp_gtid_from_tid(child_tid, team), team->t.t_id,
child_tid, &child_bar->b_arrived, new_state); }
1469 gtid, team->t.t_id, tid,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) wait "
"T#%d(%d:%d) " "arrived(%p) == %llu\n", gtid, team->t.t_id
, tid, __kmp_gtid_from_tid(child_tid, team), team->t.t_id,
child_tid, &child_bar->b_arrived, new_state); }
1470 __kmp_gtid_from_tid(child_tid, team), team->t.t_id,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) wait "
"T#%d(%d:%d) " "arrived(%p) == %llu\n", gtid, team->t.t_id
, tid, __kmp_gtid_from_tid(child_tid, team), team->t.t_id,
child_tid, &child_bar->b_arrived, new_state); }
1471 child_tid, &child_bar->b_arrived, new_state))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) wait "
"T#%d(%d:%d) " "arrived(%p) == %llu\n", gtid, team->t.t_id
, tid, __kmp_gtid_from_tid(child_tid, team), team->t.t_id,
child_tid, &child_bar->b_arrived, new_state); }
;
1472 kmp_flag_64<> flag(&child_bar->b_arrived, new_state);
1473 flag.wait(this_thr, FALSE0 USE_ITT_BUILD_ARG(itt_sync_obj), itt_sync_obj);
1474 if (reduce) {
1475 KA_TRACE(100, ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) += "if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) += "
"T#%d(%d:%d)\n", gtid, team->t.t_id, tid, __kmp_gtid_from_tid
(child_tid, team), team->t.t_id, child_tid); }
1476 "T#%d(%d:%d)\n",if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) += "
"T#%d(%d:%d)\n", gtid, team->t.t_id, tid, __kmp_gtid_from_tid
(child_tid, team), team->t.t_id, child_tid); }
1477 gtid, team->t.t_id, tid,if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) += "
"T#%d(%d:%d)\n", gtid, team->t.t_id, tid, __kmp_gtid_from_tid
(child_tid, team), team->t.t_id, child_tid); }
1478 __kmp_gtid_from_tid(child_tid, team), team->t.t_id,if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) += "
"T#%d(%d:%d)\n", gtid, team->t.t_id, tid, __kmp_gtid_from_tid
(child_tid, team), team->t.t_id, child_tid); }
1479 child_tid))if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) += "
"T#%d(%d:%d)\n", gtid, team->t.t_id, tid, __kmp_gtid_from_tid
(child_tid, team), team->t.t_id, child_tid); }
;
1480 (*reduce)(this_thr->th.th_local.reduce_data,
1481 child_thr->th.th_local.reduce_data);
1482 }
1483 }
1484 }
1485 }
1486 }
1487 // All subordinates are gathered; now release parent if not primary thread
1488
1489 if (!KMP_MASTER_TID(tid)(0 == (tid))) { // worker threads release parent in hierarchy
1490 KA_TRACE(20, ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) releasing"if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) releasing"
" T#%d(%d:%d) arrived(%p): %llu => %llu\n", gtid, team->
t.t_id, tid, __kmp_gtid_from_tid(thr_bar->parent_tid, team
), team->t.t_id, thr_bar->parent_tid, &thr_bar->
b_arrived, thr_bar->b_arrived, thr_bar->b_arrived + (1 <<
2)); }
1491 " T#%d(%d:%d) arrived(%p): %llu => %llu\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) releasing"
" T#%d(%d:%d) arrived(%p): %llu => %llu\n", gtid, team->
t.t_id, tid, __kmp_gtid_from_tid(thr_bar->parent_tid, team
), team->t.t_id, thr_bar->parent_tid, &thr_bar->
b_arrived, thr_bar->b_arrived, thr_bar->b_arrived + (1 <<
2)); }
1492 gtid, team->t.t_id, tid,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) releasing"
" T#%d(%d:%d) arrived(%p): %llu => %llu\n", gtid, team->
t.t_id, tid, __kmp_gtid_from_tid(thr_bar->parent_tid, team
), team->t.t_id, thr_bar->parent_tid, &thr_bar->
b_arrived, thr_bar->b_arrived, thr_bar->b_arrived + (1 <<
2)); }
1493 __kmp_gtid_from_tid(thr_bar->parent_tid, team), team->t.t_id,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) releasing"
" T#%d(%d:%d) arrived(%p): %llu => %llu\n", gtid, team->
t.t_id, tid, __kmp_gtid_from_tid(thr_bar->parent_tid, team
), team->t.t_id, thr_bar->parent_tid, &thr_bar->
b_arrived, thr_bar->b_arrived, thr_bar->b_arrived + (1 <<
2)); }
1494 thr_bar->parent_tid, &thr_bar->b_arrived, thr_bar->b_arrived,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) releasing"
" T#%d(%d:%d) arrived(%p): %llu => %llu\n", gtid, team->
t.t_id, tid, __kmp_gtid_from_tid(thr_bar->parent_tid, team
), team->t.t_id, thr_bar->parent_tid, &thr_bar->
b_arrived, thr_bar->b_arrived, thr_bar->b_arrived + (1 <<
2)); }
1495 thr_bar->b_arrived + KMP_BARRIER_STATE_BUMP))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) releasing"
" T#%d(%d:%d) arrived(%p): %llu => %llu\n", gtid, team->
t.t_id, tid, __kmp_gtid_from_tid(thr_bar->parent_tid, team
), team->t.t_id, thr_bar->parent_tid, &thr_bar->
b_arrived, thr_bar->b_arrived, thr_bar->b_arrived + (1 <<
2)); }
;
1496 /* Mark arrival to parent: After performing this write, a worker thread may
1497 not assume that the team is valid any more - it could be deallocated by
1498 the primary thread at any time. */
1499 if (thr_bar->my_level || __kmp_dflt_blocktime != KMP_MAX_BLOCKTIME(2147483647) ||
1500 !thr_bar->use_oncore_barrier) { // Parent is waiting on my b_arrived
1501 // flag; release it
1502 kmp_flag_64<> flag(&thr_bar->b_arrived,
1503 other_threads[thr_bar->parent_tid]);
1504 flag.release();
1505 } else {
1506 // Leaf does special release on "offset" bits of parent's b_arrived flag
1507 thr_bar->b_arrived = team->t.t_bar[bt].b_arrived + KMP_BARRIER_STATE_BUMP(1 << 2);
1508 kmp_flag_oncore flag(&thr_bar->parent_bar->b_arrived,
1509 thr_bar->offset + 1);
1510 flag.set_waiter(other_threads[thr_bar->parent_tid]);
1511 flag.release();
1512 }
1513 } else { // Primary thread needs to update the team's b_arrived value
1514 team->t.t_bar[bt].b_arrived = new_state;
1515 KA_TRACE(20, ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) set team %d "if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) set team %d "
"arrived(%p) = %llu\n", gtid, team->t.t_id, tid, team->
t.t_id, &team->t.t_bar[bt].b_arrived, team->t.t_bar
[bt].b_arrived); }
1516 "arrived(%p) = %llu\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) set team %d "
"arrived(%p) = %llu\n", gtid, team->t.t_id, tid, team->
t.t_id, &team->t.t_bar[bt].b_arrived, team->t.t_bar
[bt].b_arrived); }
1517 gtid, team->t.t_id, tid, team->t.t_id,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) set team %d "
"arrived(%p) = %llu\n", gtid, team->t.t_id, tid, team->
t.t_id, &team->t.t_bar[bt].b_arrived, team->t.t_bar
[bt].b_arrived); }
1518 &team->t.t_bar[bt].b_arrived, team->t.t_bar[bt].b_arrived))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) set team %d "
"arrived(%p) = %llu\n", gtid, team->t.t_id, tid, team->
t.t_id, &team->t.t_bar[bt].b_arrived, team->t.t_bar
[bt].b_arrived); }
;
1519 }
1520 // Is the team access below unsafe or just technically invalid?
1521 KA_TRACE(20, ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) exit for "if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) exit for "
"barrier type %d\n", gtid, team->t.t_id, tid, bt); }
1522 "barrier type %d\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) exit for "
"barrier type %d\n", gtid, team->t.t_id, tid, bt); }
1523 gtid, team->t.t_id, tid, bt))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) exit for "
"barrier type %d\n", gtid, team->t.t_id, tid, bt); }
;
1524}
1525
1526static void __kmp_hierarchical_barrier_release(
1527 enum barrier_type bt, kmp_info_t *this_thr, int gtid, int tid,
1528 int propagate_icvs USE_ITT_BUILD_ARG(void *itt_sync_obj), void *itt_sync_obj) {
1529 KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_hier_release)((void)0);
1530 kmp_team_t *team;
1531 kmp_bstate_t *thr_bar = &this_thr->th.th_bar[bt].bb;
1532 kmp_uint32 nproc;
1533 bool team_change = false; // indicates on-core barrier shouldn't be used
1534
1535 if (KMP_MASTER_TID(tid)(0 == (tid))) {
1536 team = __kmp_threads[gtid]->th.th_team;
1537 KMP_DEBUG_ASSERT(team != NULL)if (!(team != __null)) { __kmp_debug_assert("team != __null",
"openmp/runtime/src/kmp_barrier.cpp", 1537); }
;
1538 KA_TRACE(20, ("__kmp_hierarchical_barrier_release: T#%d(%d:%d) primary "if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hierarchical_barrier_release: T#%d(%d:%d) primary "
"entered barrier type %d\n", gtid, team->t.t_id, tid, bt)
; }
1539 "entered barrier type %d\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hierarchical_barrier_release: T#%d(%d:%d) primary "
"entered barrier type %d\n", gtid, team->t.t_id, tid, bt)
; }
1540 gtid, team->t.t_id, tid, bt))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hierarchical_barrier_release: T#%d(%d:%d) primary "
"entered barrier type %d\n", gtid, team->t.t_id, tid, bt)
; }
;
1541 } else { // Worker threads
1542 // Wait for parent thread to release me
1543 if (!thr_bar->use_oncore_barrier ||
1544 __kmp_dflt_blocktime != KMP_MAX_BLOCKTIME(2147483647) || thr_bar->my_level != 0 ||
1545 thr_bar->team == NULL__null) {
1546 // Use traditional method of waiting on my own b_go flag
1547 thr_bar->wait_flag = KMP_BARRIER_OWN_FLAG1;
1548 kmp_flag_64<> flag(&thr_bar->b_go, KMP_BARRIER_STATE_BUMP(1 << 2));
1549 flag.wait(this_thr, TRUE(!0) USE_ITT_BUILD_ARG(itt_sync_obj), itt_sync_obj);
1550 TCW_8(thr_bar->b_go,(thr_bar->b_go) = (0)
1551 KMP_INIT_BARRIER_STATE)(thr_bar->b_go) = (0); // Reset my b_go flag for next time
1552 } else { // Thread barrier data is initialized, this is a leaf, blocktime is
1553 // infinite, not nested
1554 // Wait on my "offset" bits on parent's b_go flag
1555 thr_bar->wait_flag = KMP_BARRIER_PARENT_FLAG2;
1556 kmp_flag_oncore flag(&thr_bar->parent_bar->b_go, KMP_BARRIER_STATE_BUMP(1 << 2),
1557 thr_bar->offset + 1, bt,
1558 this_thr USE_ITT_BUILD_ARG(itt_sync_obj), itt_sync_obj);
1559 flag.wait(this_thr, TRUE(!0));
1560 if (thr_bar->wait_flag ==
1561 KMP_BARRIER_SWITCHING4) { // Thread was switched to own b_go
1562 TCW_8(thr_bar->b_go,(thr_bar->b_go) = (0)
1563 KMP_INIT_BARRIER_STATE)(thr_bar->b_go) = (0); // Reset my b_go flag for next time
1564 } else { // Reset my bits on parent's b_go flag
1565 (RCAST(volatile char *,reinterpret_cast<volatile char *>(&(thr_bar->parent_bar
->b_go))
1566 &(thr_bar->parent_bar->b_go))reinterpret_cast<volatile char *>(&(thr_bar->parent_bar
->b_go))
)[thr_bar->offset + 1] = 0;
1567 }
1568 }
1569 thr_bar->wait_flag = KMP_BARRIER_NOT_WAITING0;
1570 // Early exit for reaping threads releasing forkjoin barrier
1571 if (bt == bs_forkjoin_barrier && TCR_4(__kmp_global.g.g_done)(__kmp_global.g.g_done))
1572 return;
1573 // The worker thread may now assume that the team is valid.
1574 team = __kmp_threads[gtid]->th.th_team;
1575 KMP_DEBUG_ASSERT(team != NULL)if (!(team != __null)) { __kmp_debug_assert("team != __null",
"openmp/runtime/src/kmp_barrier.cpp", 1575); }
;
1576 tid = __kmp_tid_from_gtid(gtid);
1577
1578 KA_TRACE(if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hierarchical_barrier_release: T#%d(%d:%d) set go(%p) = %u\n"
, gtid, team->t.t_id, tid, &thr_bar->b_go, 0); }
1579 20,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hierarchical_barrier_release: T#%d(%d:%d) set go(%p) = %u\n"
, gtid, team->t.t_id, tid, &thr_bar->b_go, 0); }
1580 ("__kmp_hierarchical_barrier_release: T#%d(%d:%d) set go(%p) = %u\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hierarchical_barrier_release: T#%d(%d:%d) set go(%p) = %u\n"
, gtid, team->t.t_id, tid, &thr_bar->b_go, 0); }
1581 gtid, team->t.t_id, tid, &thr_bar->b_go, KMP_INIT_BARRIER_STATE))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hierarchical_barrier_release: T#%d(%d:%d) set go(%p) = %u\n"
, gtid, team->t.t_id, tid, &thr_bar->b_go, 0); }
;
1582 KMP_MB(); // Flush all pending memory write invalidates.
1583 }
1584
1585 nproc = this_thr->th.th_team_nproc;
1586 int level = team->t.t_level;
1587 if (team->t.t_threads[0]
1588 ->th.th_teams_microtask) { // are we inside the teams construct?
1589 if (team->t.t_pkfn != (microtask_t)__kmp_teams_master &&
1590 this_thr->th.th_teams_level == level)
1591 ++level; // level was not increased in teams construct for team_of_workers
1592 if (this_thr->th.th_teams_size.nteams > 1)
1593 ++level; // level was not increased in teams construct for team_of_masters
1594 }
1595 if (level == 1)
1596 thr_bar->use_oncore_barrier = 1;
1597 else
1598 thr_bar->use_oncore_barrier = 0; // Do not use oncore barrier when nested
1599
1600 // If the team size has increased, we still communicate with old leaves via
1601 // oncore barrier.
1602 unsigned short int old_leaf_kids = thr_bar->leaf_kids;
1603 kmp_uint64 old_leaf_state = thr_bar->leaf_state;
1604 team_change = __kmp_init_hierarchical_barrier_thread(bt, thr_bar, nproc, gtid,
1605 tid, team);
1606 // But if the entire team changes, we won't use oncore barrier at all
1607 if (team_change)
1608 old_leaf_kids = 0;
1609
1610#if KMP_BARRIER_ICV_PUSH1
1611 if (propagate_icvs) {
1612 __kmp_init_implicit_task(team->t.t_ident, team->t.t_threads[tid], team, tid,
1613 FALSE0);
1614 if (KMP_MASTER_TID((0 == (tid))
1615 tid)(0 == (tid))) { // primary already has copy in final destination; copy
1616 copy_icvs(&thr_bar->th_fixed_icvs,
1617 &team->t.t_implicit_task_taskdata[tid].td_icvs);
1618 } else if (__kmp_dflt_blocktime == KMP_MAX_BLOCKTIME(2147483647) &&
1619 thr_bar->use_oncore_barrier) { // optimization for inf blocktime
1620 if (!thr_bar->my_level) // I'm a leaf in the hierarchy (my_level==0)
1621 // leaves (on-core children) pull parent's fixed ICVs directly to local
1622 // ICV store
1623 copy_icvs(&team->t.t_implicit_task_taskdata[tid].td_icvs,
1624 &thr_bar->parent_bar->th_fixed_icvs);
1625 // non-leaves will get ICVs piggybacked with b_go via NGO store
1626 } else { // blocktime is not infinite; pull ICVs from parent's fixed ICVs
1627 if (thr_bar->my_level) // not a leaf; copy ICVs to my fixed ICVs child can
1628 // access
1629 copy_icvs(&thr_bar->th_fixed_icvs, &thr_bar->parent_bar->th_fixed_icvs);
1630 else // leaves copy parent's fixed ICVs directly to local ICV store
1631 copy_icvs(&team->t.t_implicit_task_taskdata[tid].td_icvs,
1632 &thr_bar->parent_bar->th_fixed_icvs);
1633 }
1634 }
1635#endif // KMP_BARRIER_ICV_PUSH
1636
1637 // Now, release my children
1638 if (thr_bar->my_level) { // not a leaf
1639 kmp_int32 child_tid;
1640 kmp_uint32 last;
1641 if (__kmp_dflt_blocktime == KMP_MAX_BLOCKTIME(2147483647) &&
1642 thr_bar->use_oncore_barrier) {
1643 if (KMP_MASTER_TID(tid)(0 == (tid))) { // do a flat release
1644 // Set local b_go to bump children via NGO store of the cache line
1645 // containing IVCs and b_go.
1646 thr_bar->b_go = KMP_BARRIER_STATE_BUMP(1 << 2);
1647 // Use ngo stores if available; b_go piggybacks in the last 8 bytes of
1648 // the cache line
1649 ngo_load(&thr_bar->th_fixed_icvs)((void)0);
1650 // This loops over all the threads skipping only the leaf nodes in the
1651 // hierarchy
1652 for (child_tid = thr_bar->skip_per_level[1]; child_tid < (int)nproc;
1653 child_tid += thr_bar->skip_per_level[1]) {
1654 kmp_bstate_t *child_bar =
1655 &team->t.t_threads[child_tid]->th.th_bar[bt].bb;
1656 KA_TRACE(20, ("__kmp_hierarchical_barrier_release: T#%d(%d:%d) "if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hierarchical_barrier_release: T#%d(%d:%d) "
"releasing T#%d(%d:%d)" " go(%p): %u => %u\n", gtid, team
->t.t_id, tid, __kmp_gtid_from_tid(child_tid, team), team->
t.t_id, child_tid, &child_bar->b_go, child_bar->b_go
, child_bar->b_go + (1 << 2)); }
1657 "releasing T#%d(%d:%d)"if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hierarchical_barrier_release: T#%d(%d:%d) "
"releasing T#%d(%d:%d)" " go(%p): %u => %u\n", gtid, team
->t.t_id, tid, __kmp_gtid_from_tid(child_tid, team), team->
t.t_id, child_tid, &child_bar->b_go, child_bar->b_go
, child_bar->b_go + (1 << 2)); }
1658 " go(%p): %u => %u\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hierarchical_barrier_release: T#%d(%d:%d) "
"releasing T#%d(%d:%d)" " go(%p): %u => %u\n", gtid, team
->t.t_id, tid, __kmp_gtid_from_tid(child_tid, team), team->
t.t_id, child_tid, &child_bar->b_go, child_bar->b_go
, child_bar->b_go + (1 << 2)); }
1659 gtid, team->t.t_id, tid,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hierarchical_barrier_release: T#%d(%d:%d) "
"releasing T#%d(%d:%d)" " go(%p): %u => %u\n", gtid, team
->t.t_id, tid, __kmp_gtid_from_tid(child_tid, team), team->
t.t_id, child_tid, &child_bar->b_go, child_bar->b_go
, child_bar->b_go + (1 << 2)); }
1660 __kmp_gtid_from_tid(child_tid, team), team->t.t_id,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hierarchical_barrier_release: T#%d(%d:%d) "
"releasing T#%d(%d:%d)" " go(%p): %u => %u\n", gtid, team
->t.t_id, tid, __kmp_gtid_from_tid(child_tid, team), team->
t.t_id, child_tid, &child_bar->b_go, child_bar->b_go
, child_bar->b_go + (1 << 2)); }
1661 child_tid, &child_bar->b_go, child_bar->b_go,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hierarchical_barrier_release: T#%d(%d:%d) "
"releasing T#%d(%d:%d)" " go(%p): %u => %u\n", gtid, team
->t.t_id, tid, __kmp_gtid_from_tid(child_tid, team), team->
t.t_id, child_tid, &child_bar->b_go, child_bar->b_go
, child_bar->b_go + (1 << 2)); }
1662 child_bar->b_go + KMP_BARRIER_STATE_BUMP))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hierarchical_barrier_release: T#%d(%d:%d) "
"releasing T#%d(%d:%d)" " go(%p): %u => %u\n", gtid, team
->t.t_id, tid, __kmp_gtid_from_tid(child_tid, team), team->
t.t_id, child_tid, &child_bar->b_go, child_bar->b_go
, child_bar->b_go + (1 << 2)); }
;
1663 // Use ngo store (if available) to both store ICVs and release child
1664 // via child's b_go
1665 ngo_store_go(&child_bar->th_fixed_icvs, &thr_bar->th_fixed_icvs)memcpy((&child_bar->th_fixed_icvs), (&thr_bar->
th_fixed_icvs), 64)
;
1666 }
1667 ngo_sync()((void)0);
1668 }
1669 TCW_8(thr_bar->b_go,(thr_bar->b_go) = (0)
1670 KMP_INIT_BARRIER_STATE)(thr_bar->b_go) = (0); // Reset my b_go flag for next time
1671 // Now, release leaf children
1672 if (thr_bar->leaf_kids) { // if there are any
1673 // We test team_change on the off-chance that the level 1 team changed.
1674 if (team_change ||
1675 old_leaf_kids < thr_bar->leaf_kids) { // some old, some new
1676 if (old_leaf_kids) { // release old leaf kids
1677 thr_bar->b_go |= old_leaf_state;
1678 }
1679 // Release new leaf kids
1680 last = tid + thr_bar->skip_per_level[1];
1681 if (last > nproc)
1682 last = nproc;
1683 for (child_tid = tid + 1 + old_leaf_kids; child_tid < (int)last;
1684 ++child_tid) { // skip_per_level[0]=1
1685 kmp_info_t *child_thr = team->t.t_threads[child_tid];
1686 kmp_bstate_t *child_bar = &child_thr->th.th_bar[bt].bb;
1687 KA_TRACE(if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hierarchical_barrier_release: T#%d(%d:%d) releasing"
" T#%d(%d:%d) go(%p): %u => %u\n", gtid, team->t.t_id,
tid, __kmp_gtid_from_tid(child_tid, team), team->t.t_id, child_tid
, &child_bar->b_go, child_bar->b_go, child_bar->
b_go + (1 << 2)); }
1688 20,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hierarchical_barrier_release: T#%d(%d:%d) releasing"
" T#%d(%d:%d) go(%p): %u => %u\n", gtid, team->t.t_id,
tid, __kmp_gtid_from_tid(child_tid, team), team->t.t_id, child_tid
, &child_bar->b_go, child_bar->b_go, child_bar->
b_go + (1 << 2)); }
1689 ("__kmp_hierarchical_barrier_release: T#%d(%d:%d) releasing"if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hierarchical_barrier_release: T#%d(%d:%d) releasing"
" T#%d(%d:%d) go(%p): %u => %u\n", gtid, team->t.t_id,
tid, __kmp_gtid_from_tid(child_tid, team), team->t.t_id, child_tid
, &child_bar->b_go, child_bar->b_go, child_bar->
b_go + (1 << 2)); }
1690 " T#%d(%d:%d) go(%p): %u => %u\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hierarchical_barrier_release: T#%d(%d:%d) releasing"
" T#%d(%d:%d) go(%p): %u => %u\n", gtid, team->t.t_id,
tid, __kmp_gtid_from_tid(child_tid, team), team->t.t_id, child_tid
, &child_bar->b_go, child_bar->b_go, child_bar->
b_go + (1 << 2)); }
1691 gtid, team->t.t_id, tid, __kmp_gtid_from_tid(child_tid, team),if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hierarchical_barrier_release: T#%d(%d:%d) releasing"
" T#%d(%d:%d) go(%p): %u => %u\n", gtid, team->t.t_id,
tid, __kmp_gtid_from_tid(child_tid, team), team->t.t_id, child_tid
, &child_bar->b_go, child_bar->b_go, child_bar->
b_go + (1 << 2)); }
1692 team->t.t_id, child_tid, &child_bar->b_go, child_bar->b_go,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hierarchical_barrier_release: T#%d(%d:%d) releasing"
" T#%d(%d:%d) go(%p): %u => %u\n", gtid, team->t.t_id,
tid, __kmp_gtid_from_tid(child_tid, team), team->t.t_id, child_tid
, &child_bar->b_go, child_bar->b_go, child_bar->
b_go + (1 << 2)); }
1693 child_bar->b_go + KMP_BARRIER_STATE_BUMP))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hierarchical_barrier_release: T#%d(%d:%d) releasing"
" T#%d(%d:%d) go(%p): %u => %u\n", gtid, team->t.t_id,
tid, __kmp_gtid_from_tid(child_tid, team), team->t.t_id, child_tid
, &child_bar->b_go, child_bar->b_go, child_bar->
b_go + (1 << 2)); }
;
1694 // Release child using child's b_go flag
1695 kmp_flag_64<> flag(&child_bar->b_go, child_thr);
1696 flag.release();
1697 }
1698 } else { // Release all children at once with leaf_state bits on my own
1699 // b_go flag
1700 thr_bar->b_go |= thr_bar->leaf_state;
1701 }
1702 }
1703 } else { // Blocktime is not infinite; do a simple hierarchical release
1704 for (int d = thr_bar->my_level - 1; d >= 0;
1705 --d) { // Release highest level threads first
1706 last = tid + thr_bar->skip_per_level[d + 1];
1707 kmp_uint32 skip = thr_bar->skip_per_level[d];
1708 if (last > nproc)
1709 last = nproc;
1710 for (child_tid = tid + skip; child_tid < (int)last; child_tid += skip) {
1711 kmp_info_t *child_thr = team->t.t_threads[child_tid];
1712 kmp_bstate_t *child_bar = &child_thr->th.th_bar[bt].bb;
1713 KA_TRACE(20, ("__kmp_hierarchical_barrier_release: T#%d(%d:%d) "if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hierarchical_barrier_release: T#%d(%d:%d) "
"releasing T#%d(%d:%d) go(%p): %u => %u\n", gtid, team->
t.t_id, tid, __kmp_gtid_from_tid(child_tid, team), team->t
.t_id, child_tid, &child_bar->b_go, child_bar->b_go
, child_bar->b_go + (1 << 2)); }
1714 "releasing T#%d(%d:%d) go(%p): %u => %u\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hierarchical_barrier_release: T#%d(%d:%d) "
"releasing T#%d(%d:%d) go(%p): %u => %u\n", gtid, team->
t.t_id, tid, __kmp_gtid_from_tid(child_tid, team), team->t
.t_id, child_tid, &child_bar->b_go, child_bar->b_go
, child_bar->b_go + (1 << 2)); }
1715 gtid, team->t.t_id, tid,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hierarchical_barrier_release: T#%d(%d:%d) "
"releasing T#%d(%d:%d) go(%p): %u => %u\n", gtid, team->
t.t_id, tid, __kmp_gtid_from_tid(child_tid, team), team->t
.t_id, child_tid, &child_bar->b_go, child_bar->b_go
, child_bar->b_go + (1 << 2)); }
1716 __kmp_gtid_from_tid(child_tid, team), team->t.t_id,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hierarchical_barrier_release: T#%d(%d:%d) "
"releasing T#%d(%d:%d) go(%p): %u => %u\n", gtid, team->
t.t_id, tid, __kmp_gtid_from_tid(child_tid, team), team->t
.t_id, child_tid, &child_bar->b_go, child_bar->b_go
, child_bar->b_go + (1 << 2)); }
1717 child_tid, &child_bar->b_go, child_bar->b_go,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hierarchical_barrier_release: T#%d(%d:%d) "
"releasing T#%d(%d:%d) go(%p): %u => %u\n", gtid, team->
t.t_id, tid, __kmp_gtid_from_tid(child_tid, team), team->t
.t_id, child_tid, &child_bar->b_go, child_bar->b_go
, child_bar->b_go + (1 << 2)); }
1718 child_bar->b_go + KMP_BARRIER_STATE_BUMP))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hierarchical_barrier_release: T#%d(%d:%d) "
"releasing T#%d(%d:%d) go(%p): %u => %u\n", gtid, team->
t.t_id, tid, __kmp_gtid_from_tid(child_tid, team), team->t
.t_id, child_tid, &child_bar->b_go, child_bar->b_go
, child_bar->b_go + (1 << 2)); }
;
1719 // Release child using child's b_go flag
1720 kmp_flag_64<> flag(&child_bar->b_go, child_thr);
1721 flag.release();
1722 }
1723 }
1724 }
1725#if KMP_BARRIER_ICV_PUSH1
1726 if (propagate_icvs && !KMP_MASTER_TID(tid)(0 == (tid)))
1727 // non-leaves copy ICVs from fixed ICVs to local dest
1728 copy_icvs(&team->t.t_implicit_task_taskdata[tid].td_icvs,
1729 &thr_bar->th_fixed_icvs);
1730#endif // KMP_BARRIER_ICV_PUSH
1731 }
1732 KA_TRACE(20, ("__kmp_hierarchical_barrier_release: T#%d(%d:%d) exit for "if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hierarchical_barrier_release: T#%d(%d:%d) exit for "
"barrier type %d\n", gtid, team->t.t_id, tid, bt); }
1733 "barrier type %d\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hierarchical_barrier_release: T#%d(%d:%d) exit for "
"barrier type %d\n", gtid, team->t.t_id, tid, bt); }
1734 gtid, team->t.t_id, tid, bt))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hierarchical_barrier_release: T#%d(%d:%d) exit for "
"barrier type %d\n", gtid, team->t.t_id, tid, bt); }
;
1735}
1736
1737// End of Barrier Algorithms
1738
1739// type traits for cancellable value
1740// if cancellable is true, then is_cancellable is a normal boolean variable
1741// if cancellable is false, then is_cancellable is a compile time constant
1742template <bool cancellable> struct is_cancellable {};
1743template <> struct is_cancellable<true> {
1744 bool value;
1745 is_cancellable() : value(false) {}
1746 is_cancellable(bool b) : value(b) {}
1747 is_cancellable &operator=(bool b) {
1748 value = b;
1749 return *this;
1750 }
1751 operator bool() const { return value; }
1752};
1753template <> struct is_cancellable<false> {
1754 is_cancellable &operator=(bool b) { return *this; }
1755 constexpr operator bool() const { return false; }
1756};
1757
1758// Internal function to do a barrier.
1759/* If is_split is true, do a split barrier, otherwise, do a plain barrier
1760 If reduce is non-NULL, do a split reduction barrier, otherwise, do a split
1761 barrier
1762 When cancellable = false,
1763 Returns 0 if primary thread, 1 if worker thread.
1764 When cancellable = true
1765 Returns 0 if not cancelled, 1 if cancelled. */
1766template <bool cancellable = false>
1767static int __kmp_barrier_template(enum barrier_type bt, int gtid, int is_split,
1768 size_t reduce_size, void *reduce_data,
1769 void (*reduce)(void *, void *)) {
1770 KMP_TIME_PARTITIONED_BLOCK(OMP_plain_barrier)((void)0);
1771 KMP_SET_THREAD_STATE_BLOCK(PLAIN_BARRIER)((void)0);
1772 int tid = __kmp_tid_from_gtid(gtid);
1773 kmp_info_t *this_thr = __kmp_threads[gtid];
1774 kmp_team_t *team = this_thr->th.th_team;
1775 int status = 0;
1776 is_cancellable<cancellable> cancelled;
1777#if OMPT_SUPPORT1 && OMPT_OPTIONAL1
1778 ompt_data_t *my_task_data;
1779 ompt_data_t *my_parallel_data;
1780 void *return_address;
1781 ompt_sync_region_t barrier_kind;
1782#endif
1783
1784 KA_TRACE(15, ("__kmp_barrier: T#%d(%d:%d) has arrived\n", gtid,if (kmp_a_debug >= 15) { __kmp_debug_printf ("__kmp_barrier: T#%d(%d:%d) has arrived\n"
, gtid, __kmp_team_from_gtid(gtid)->t.t_id, __kmp_tid_from_gtid
(gtid)); }
1785 __kmp_team_from_gtid(gtid)->t.t_id, __kmp_tid_from_gtid(gtid)))if (kmp_a_debug >= 15) { __kmp_debug_printf ("__kmp_barrier: T#%d(%d:%d) has arrived\n"
, gtid, __kmp_team_from_gtid(gtid)->t.t_id, __kmp_tid_from_gtid
(gtid)); }
;
1786
1787#if OMPT_SUPPORT1
1788 if (ompt_enabled.enabled) {
1789#if OMPT_OPTIONAL1
1790 my_task_data = OMPT_CUR_TASK_DATA(this_thr)(&(this_thr->th.th_current_task->ompt_task_info.task_data
))
;
1791 my_parallel_data = OMPT_CUR_TEAM_DATA(this_thr)(&(this_thr->th.th_team->t.ompt_team_info.parallel_data
))
;
1792 return_address = OMPT_LOAD_RETURN_ADDRESS(gtid)__ompt_load_return_address(gtid);
1793 barrier_kind = __ompt_get_barrier_kind(bt, this_thr);
1794 if (ompt_enabled.ompt_callback_sync_region) {
1795 ompt_callbacks.ompt_callback(ompt_callback_sync_region)ompt_callback_sync_region_callback(
1796 barrier_kind, ompt_scope_begin, my_parallel_data, my_task_data,
1797 return_address);
1798 }
1799 if (ompt_enabled.ompt_callback_sync_region_wait) {
1800 ompt_callbacks.ompt_callback(ompt_callback_sync_region_wait)ompt_callback_sync_region_wait_callback(
1801 barrier_kind, ompt_scope_begin, my_parallel_data, my_task_data,
1802 return_address);
1803 }
1804#endif
1805 // It is OK to report the barrier state after the barrier begin callback.
1806 // According to the OMPT specification, a compliant implementation may
1807 // even delay reporting this state until the barrier begins to wait.
1808 this_thr->th.ompt_thread_info.state = ompt_state_wait_barrier;
1809 }
1810#endif
1811
1812 if (!team->t.t_serialized) {
1813#if USE_ITT_BUILD1
1814 // This value will be used in itt notify events below.
1815 void *itt_sync_obj = NULL__null;
1816#if USE_ITT_NOTIFY1
1817 if (__itt_sync_create_ptr__kmp_itt_sync_create_ptr__3_0 || KMP_ITT_DEBUG0)
1818 itt_sync_obj = __kmp_itt_barrier_object(gtid, bt, 1);
1819#endif
1820#endif /* USE_ITT_BUILD */
1821 if (__kmp_tasking_mode == tskm_extra_barrier) {
1822 __kmp_tasking_barrier(team, this_thr, gtid);
1823 KA_TRACE(15,if (kmp_a_debug >= 15) { __kmp_debug_printf ("__kmp_barrier: T#%d(%d:%d) past tasking barrier\n"
, gtid, __kmp_team_from_gtid(gtid)->t.t_id, __kmp_tid_from_gtid
(gtid)); }
1824 ("__kmp_barrier: T#%d(%d:%d) past tasking barrier\n", gtid,if (kmp_a_debug >= 15) { __kmp_debug_printf ("__kmp_barrier: T#%d(%d:%d) past tasking barrier\n"
, gtid, __kmp_team_from_gtid(gtid)->t.t_id, __kmp_tid_from_gtid
(gtid)); }
1825 __kmp_team_from_gtid(gtid)->t.t_id, __kmp_tid_from_gtid(gtid)))if (kmp_a_debug >= 15) { __kmp_debug_printf ("__kmp_barrier: T#%d(%d:%d) past tasking barrier\n"
, gtid, __kmp_team_from_gtid(gtid)->t.t_id, __kmp_tid_from_gtid
(gtid)); }
;
1826 }
1827
1828 /* Copy the blocktime info to the thread, where __kmp_wait_template() can
1829 access it when the team struct is not guaranteed to exist. */
1830 // See note about the corresponding code in __kmp_join_barrier() being
1831 // performance-critical.
1832 if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME(2147483647)) {
1833#if KMP_USE_MONITOR
1834 this_thr->th.th_team_bt_intervals =
1835 team->t.t_implicit_task_taskdata[tid].td_icvs.bt_intervals;
1836 this_thr->th.th_team_bt_set =
1837 team->t.t_implicit_task_taskdata[tid].td_icvs.bt_set;
1838#else
1839 this_thr->th.th_team_bt_intervals = KMP_BLOCKTIME_INTERVAL(team, tid)((((team)->t.t_threads[(tid)]->th.th_current_task->td_icvs
.bt_set) ? ((team)->t.t_threads[(tid)]->th.th_current_task
->td_icvs.blocktime) : __kmp_dflt_blocktime) * __kmp_ticks_per_msec
)
;
1840#endif
1841 }
1842
1843#if USE_ITT_BUILD1
1844 if (__itt_sync_create_ptr__kmp_itt_sync_create_ptr__3_0 || KMP_ITT_DEBUG0)
1845 __kmp_itt_barrier_starting(gtid, itt_sync_obj);
1846#endif /* USE_ITT_BUILD */
1847#if USE_DEBUGGER0
1848 // Let the debugger know: the thread arrived to the barrier and waiting.
1849 if (KMP_MASTER_TID(tid)(0 == (tid))) { // Primary thread counter stored in team struct
1850 team->t.t_bar[bt].b_master_arrived += 1;
1851 } else {
1852 this_thr->th.th_bar[bt].bb.b_worker_arrived += 1;
1853 } // if
1854#endif /* USE_DEBUGGER */
1855 if (reduce != NULL__null) {
1856 // KMP_DEBUG_ASSERT( is_split == TRUE ); // #C69956
1857 this_thr->th.th_local.reduce_data = reduce_data;
1858 }
1859
1860 if (KMP_MASTER_TID(tid)(0 == (tid)) && __kmp_tasking_mode != tskm_immediate_exec)
1861 // use 0 to only setup the current team if nthreads > 1
1862 __kmp_task_team_setup(this_thr, team, 0);
1863
1864 if (cancellable) {
1865 cancelled = __kmp_linear_barrier_gather_cancellable(
1866 bt, this_thr, gtid, tid, reduce USE_ITT_BUILD_ARG(itt_sync_obj), itt_sync_obj);
1867 } else {
1868 switch (__kmp_barrier_gather_pattern[bt]) {
1869 case bp_dist_bar: {
1870 __kmp_dist_barrier_gather(bt, this_thr, gtid, tid,
1871 reduce USE_ITT_BUILD_ARG(itt_sync_obj), itt_sync_obj);
1872 break;
1873 }
1874 case bp_hyper_bar: {
1875 // don't set branch bits to 0; use linear
1876 KMP_ASSERT(__kmp_barrier_gather_branch_bits[bt])if (!(__kmp_barrier_gather_branch_bits[bt])) { __kmp_debug_assert
("__kmp_barrier_gather_branch_bits[bt]", "openmp/runtime/src/kmp_barrier.cpp"
, 1876); }
;
1877 __kmp_hyper_barrier_gather(bt, this_thr, gtid, tid,
1878 reduce USE_ITT_BUILD_ARG(itt_sync_obj), itt_sync_obj);
1879 break;
1880 }
1881 case bp_hierarchical_bar: {
1882 __kmp_hierarchical_barrier_gather(
1883 bt, this_thr, gtid, tid, reduce USE_ITT_BUILD_ARG(itt_sync_obj), itt_sync_obj);
1884 break;
1885 }
1886 case bp_tree_bar: {
1887 // don't set branch bits to 0; use linear
1888 KMP_ASSERT(__kmp_barrier_gather_branch_bits[bt])if (!(__kmp_barrier_gather_branch_bits[bt])) { __kmp_debug_assert
("__kmp_barrier_gather_branch_bits[bt]", "openmp/runtime/src/kmp_barrier.cpp"
, 1888); }
;
1889 __kmp_tree_barrier_gather(bt, this_thr, gtid, tid,
1890 reduce USE_ITT_BUILD_ARG(itt_sync_obj), itt_sync_obj);
1891 break;
1892 }
1893 default: {
1894 __kmp_linear_barrier_gather(bt, this_thr, gtid, tid,
1895 reduce USE_ITT_BUILD_ARG(itt_sync_obj), itt_sync_obj);
1896 }
1897 }
1898 }
1899
1900 KMP_MB();
1901
1902 if (KMP_MASTER_TID(tid)(0 == (tid))) {
1903 status = 0;
1904 if (__kmp_tasking_mode != tskm_immediate_exec && !cancelled) {
1905 __kmp_task_team_wait(this_thr, team USE_ITT_BUILD_ARG(itt_sync_obj), itt_sync_obj);
1906 }
1907#if USE_DEBUGGER0
1908 // Let the debugger know: All threads are arrived and starting leaving the
1909 // barrier.
1910 team->t.t_bar[bt].b_team_arrived += 1;
1911#endif
1912
1913 if (__kmp_omp_cancellation) {
1914 kmp_int32 cancel_request = KMP_ATOMIC_LD_RLX(&team->t.t_cancel_request)(&team->t.t_cancel_request)->load(std::memory_order_relaxed
)
;
1915 // Reset cancellation flag for worksharing constructs
1916 if (cancel_request == cancel_loop ||
1917 cancel_request == cancel_sections) {
1918 KMP_ATOMIC_ST_RLX(&team->t.t_cancel_request, cancel_noreq)(&team->t.t_cancel_request)->store(cancel_noreq, std
::memory_order_relaxed)
;
1919 }
1920 }
1921#if USE_ITT_BUILD1
1922 /* TODO: In case of split reduction barrier, primary thread may send
1923 acquired event early, before the final summation into the shared
1924 variable is done (final summation can be a long operation for array
1925 reductions). */
1926 if (__itt_sync_create_ptr__kmp_itt_sync_create_ptr__3_0 || KMP_ITT_DEBUG0)
1927 __kmp_itt_barrier_middle(gtid, itt_sync_obj);
1928#endif /* USE_ITT_BUILD */
1929#if USE_ITT_BUILD1 && USE_ITT_NOTIFY1
1930 // Barrier - report frame end (only if active_level == 1)
1931 if ((__itt_frame_submit_v3_ptr__kmp_itt_frame_submit_v3_ptr__3_0 || KMP_ITT_DEBUG0) &&
1932 __kmp_forkjoin_frames_mode &&
1933 (this_thr->th.th_teams_microtask == NULL__null || // either not in teams
1934 this_thr->th.th_teams_size.nteams == 1) && // or inside single team
1935 team->t.t_active_level == 1) {
1936 ident_t *loc = __kmp_threads[gtid]->th.th_ident;
1937 kmp_uint64 cur_time = __itt_get_timestamp(!__kmp_itt_get_timestamp_ptr__3_0) ? 0 : __kmp_itt_get_timestamp_ptr__3_0();
1938 kmp_info_t **other_threads = team->t.t_threads;
1939 int nproc = this_thr->th.th_team_nproc;
1940 int i;
1941 switch (__kmp_forkjoin_frames_mode) {
1942 case 1:
1943 __kmp_itt_frame_submit(gtid, this_thr->th.th_frame_time, cur_time, 0,
1944 loc, nproc);
1945 this_thr->th.th_frame_time = cur_time;
1946 break;
1947 case 2: // AC 2015-01-19: currently does not work for hierarchical (to
1948 // be fixed)
1949 __kmp_itt_frame_submit(gtid, this_thr->th.th_bar_min_time, cur_time,
1950 1, loc, nproc);
1951 break;
1952 case 3:
1953 if (__itt_metadata_add_ptr__kmp_itt_metadata_add_ptr__3_0) {
1954 // Initialize with primary thread's wait time
1955 kmp_uint64 delta = cur_time - this_thr->th.th_bar_arrive_time;
1956 // Set arrive time to zero to be able to check it in
1957 // __kmp_invoke_task(); the same is done inside the loop below
1958 this_thr->th.th_bar_arrive_time = 0;
1959 for (i = 1; i < nproc; ++i) {
1960 delta += (cur_time - other_threads[i]->th.th_bar_arrive_time);
1961 other_threads[i]->th.th_bar_arrive_time = 0;
1962 }
1963 __kmp_itt_metadata_imbalance(gtid, this_thr->th.th_frame_time,
1964 cur_time, delta,
1965 (kmp_uint64)(reduce != NULL__null));
1966 }
1967 __kmp_itt_frame_submit(gtid, this_thr->th.th_frame_time, cur_time, 0,
1968 loc, nproc);
1969 this_thr->th.th_frame_time = cur_time;
1970 break;
1971 }
1972 }
1973#endif /* USE_ITT_BUILD */
1974 } else {
1975 status = 1;
1976#if USE_ITT_BUILD1
1977 if (__itt_sync_create_ptr__kmp_itt_sync_create_ptr__3_0 || KMP_ITT_DEBUG0)
1978 __kmp_itt_barrier_middle(gtid, itt_sync_obj);
1979#endif /* USE_ITT_BUILD */
1980 }
1981 if ((status == 1 || !is_split) && !cancelled) {
1982 if (cancellable) {
1983 cancelled = __kmp_linear_barrier_release_cancellable(
1984 bt, this_thr, gtid, tid, FALSE0 USE_ITT_BUILD_ARG(itt_sync_obj), itt_sync_obj);
1985 } else {
1986 switch (__kmp_barrier_release_pattern[bt]) {
1987 case bp_dist_bar: {
1988 KMP_ASSERT(__kmp_barrier_release_branch_bits[bt])if (!(__kmp_barrier_release_branch_bits[bt])) { __kmp_debug_assert
("__kmp_barrier_release_branch_bits[bt]", "openmp/runtime/src/kmp_barrier.cpp"
, 1988); }
;
1989 __kmp_dist_barrier_release(bt, this_thr, gtid, tid,
1990 FALSE0 USE_ITT_BUILD_ARG(itt_sync_obj), itt_sync_obj);
1991 break;
1992 }
1993 case bp_hyper_bar: {
1994 KMP_ASSERT(__kmp_barrier_release_branch_bits[bt])if (!(__kmp_barrier_release_branch_bits[bt])) { __kmp_debug_assert
("__kmp_barrier_release_branch_bits[bt]", "openmp/runtime/src/kmp_barrier.cpp"
, 1994); }
;
1995 __kmp_hyper_barrier_release(bt, this_thr, gtid, tid,
1996 FALSE0 USE_ITT_BUILD_ARG(itt_sync_obj), itt_sync_obj);
1997 break;
1998 }
1999 case bp_hierarchical_bar: {
2000 __kmp_hierarchical_barrier_release(
2001 bt, this_thr, gtid, tid, FALSE0 USE_ITT_BUILD_ARG(itt_sync_obj), itt_sync_obj);
2002 break;
2003 }
2004 case bp_tree_bar: {
2005 KMP_ASSERT(__kmp_barrier_release_branch_bits[bt])if (!(__kmp_barrier_release_branch_bits[bt])) { __kmp_debug_assert
("__kmp_barrier_release_branch_bits[bt]", "openmp/runtime/src/kmp_barrier.cpp"
, 2005); }
;
2006 __kmp_tree_barrier_release(bt, this_thr, gtid, tid,
2007 FALSE0 USE_ITT_BUILD_ARG(itt_sync_obj), itt_sync_obj);
2008 break;
2009 }
2010 default: {
2011 __kmp_linear_barrier_release(bt, this_thr, gtid, tid,
2012 FALSE0 USE_ITT_BUILD_ARG(itt_sync_obj), itt_sync_obj);
2013 }
2014 }
2015 }
2016 if (__kmp_tasking_mode != tskm_immediate_exec && !cancelled) {
2017 __kmp_task_team_sync(this_thr, team);
2018 }
2019 }
2020
2021#if USE_ITT_BUILD1
2022 /* GEH: TODO: Move this under if-condition above and also include in
2023 __kmp_end_split_barrier(). This will more accurately represent the actual
2024 release time of the threads for split barriers. */
2025 if (__itt_sync_create_ptr__kmp_itt_sync_create_ptr__3_0 || KMP_ITT_DEBUG0)
2026 __kmp_itt_barrier_finished(gtid, itt_sync_obj);
2027#endif /* USE_ITT_BUILD */
2028 } else { // Team is serialized.
2029 status = 0;
2030 if (__kmp_tasking_mode != tskm_immediate_exec) {
2031 if (this_thr->th.th_task_team != NULL__null) {
2032#if USE_ITT_NOTIFY1
2033 void *itt_sync_obj = NULL__null;
2034 if (__itt_sync_create_ptr__kmp_itt_sync_create_ptr__3_0 || KMP_ITT_DEBUG0) {
2035 itt_sync_obj = __kmp_itt_barrier_object(gtid, bt, 1);
2036 __kmp_itt_barrier_starting(gtid, itt_sync_obj);
2037 }
2038#endif
2039
2040 KMP_DEBUG_ASSERT(if (!(this_thr->th.th_task_team->tt.tt_found_proxy_tasks
== (!0) || this_thr->th.th_task_team->tt.tt_hidden_helper_task_encountered
== (!0))) { __kmp_debug_assert("this_thr->th.th_task_team->tt.tt_found_proxy_tasks == (!0) || this_thr->th.th_task_team->tt.tt_hidden_helper_task_encountered == (!0)"
, "openmp/runtime/src/kmp_barrier.cpp", 2043); }
2041 this_thr->th.th_task_team->tt.tt_found_proxy_tasks == TRUE ||if (!(this_thr->th.th_task_team->tt.tt_found_proxy_tasks
== (!0) || this_thr->th.th_task_team->tt.tt_hidden_helper_task_encountered
== (!0))) { __kmp_debug_assert("this_thr->th.th_task_team->tt.tt_found_proxy_tasks == (!0) || this_thr->th.th_task_team->tt.tt_hidden_helper_task_encountered == (!0)"
, "openmp/runtime/src/kmp_barrier.cpp", 2043); }
2042 this_thr->th.th_task_team->tt.tt_hidden_helper_task_encountered ==if (!(this_thr->th.th_task_team->tt.tt_found_proxy_tasks
== (!0) || this_thr->th.th_task_team->tt.tt_hidden_helper_task_encountered
== (!0))) { __kmp_debug_assert("this_thr->th.th_task_team->tt.tt_found_proxy_tasks == (!0) || this_thr->th.th_task_team->tt.tt_hidden_helper_task_encountered == (!0)"
, "openmp/runtime/src/kmp_barrier.cpp", 2043); }
2043 TRUE)if (!(this_thr->th.th_task_team->tt.tt_found_proxy_tasks
== (!0) || this_thr->th.th_task_team->tt.tt_hidden_helper_task_encountered
== (!0))) { __kmp_debug_assert("this_thr->th.th_task_team->tt.tt_found_proxy_tasks == (!0) || this_thr->th.th_task_team->tt.tt_hidden_helper_task_encountered == (!0)"
, "openmp/runtime/src/kmp_barrier.cpp", 2043); }
;
2044 __kmp_task_team_wait(this_thr, team USE_ITT_BUILD_ARG(itt_sync_obj), itt_sync_obj);
2045 __kmp_task_team_setup(this_thr, team, 0);
2046
2047#if USE_ITT_BUILD1
2048 if (__itt_sync_create_ptr__kmp_itt_sync_create_ptr__3_0 || KMP_ITT_DEBUG0)
2049 __kmp_itt_barrier_finished(gtid, itt_sync_obj);
2050#endif /* USE_ITT_BUILD */
2051 }
2052 }
2053 }
2054 KA_TRACE(15, ("__kmp_barrier: T#%d(%d:%d) is leaving with return value %d\n",if (kmp_a_debug >= 15) { __kmp_debug_printf ("__kmp_barrier: T#%d(%d:%d) is leaving with return value %d\n"
, gtid, __kmp_team_from_gtid(gtid)->t.t_id, __kmp_tid_from_gtid
(gtid), status); }
2055 gtid, __kmp_team_from_gtid(gtid)->t.t_id,if (kmp_a_debug >= 15) { __kmp_debug_printf ("__kmp_barrier: T#%d(%d:%d) is leaving with return value %d\n"
, gtid, __kmp_team_from_gtid(gtid)->t.t_id, __kmp_tid_from_gtid
(gtid), status); }
2056 __kmp_tid_from_gtid(gtid), status))if (kmp_a_debug >= 15) { __kmp_debug_printf ("__kmp_barrier: T#%d(%d:%d) is leaving with return value %d\n"
, gtid, __kmp_team_from_gtid(gtid)->t.t_id, __kmp_tid_from_gtid
(gtid), status); }
;
2057
2058#if OMPT_SUPPORT1
2059 if (ompt_enabled.enabled) {
2060#if OMPT_OPTIONAL1
2061 if (ompt_enabled.ompt_callback_sync_region_wait) {
2062 ompt_callbacks.ompt_callback(ompt_callback_sync_region_wait)ompt_callback_sync_region_wait_callback(
2063 barrier_kind, ompt_scope_end, my_parallel_data, my_task_data,
2064 return_address);
2065 }
2066 if (ompt_enabled.ompt_callback_sync_region) {
2067 ompt_callbacks.ompt_callback(ompt_callback_sync_region)ompt_callback_sync_region_callback(
2068 barrier_kind, ompt_scope_end, my_parallel_data, my_task_data,
2069 return_address);
2070 }
2071#endif
2072 this_thr->th.ompt_thread_info.state = ompt_state_work_parallel;
2073 }
2074#endif
2075
2076 if (cancellable)
2077 return (int)cancelled;
2078 return status;
2079}
2080
2081// Returns 0 if primary thread, 1 if worker thread.
2082int __kmp_barrier(enum barrier_type bt, int gtid, int is_split,
2083 size_t reduce_size, void *reduce_data,
2084 void (*reduce)(void *, void *)) {
2085 return __kmp_barrier_template<>(bt, gtid, is_split, reduce_size, reduce_data,
2086 reduce);
2087}
2088
2089#if defined(KMP_GOMP_COMPAT)
2090// Returns 1 if cancelled, 0 otherwise
2091int __kmp_barrier_gomp_cancel(int gtid) {
2092 if (__kmp_omp_cancellation) {
2093 int cancelled = __kmp_barrier_template<true>(bs_plain_barrier, gtid, FALSE0,
2094 0, NULL__null, NULL__null);
2095 if (cancelled) {
2096 int tid = __kmp_tid_from_gtid(gtid);
2097 kmp_info_t *this_thr = __kmp_threads[gtid];
2098 if (KMP_MASTER_TID(tid)(0 == (tid))) {
2099 // Primary thread does not need to revert anything
2100 } else {
2101 // Workers need to revert their private b_arrived flag
2102 this_thr->th.th_bar[bs_plain_barrier].bb.b_arrived -=
2103 KMP_BARRIER_STATE_BUMP(1 << 2);
2104 }
2105 }
2106 return cancelled;
2107 }
2108 __kmp_barrier(bs_plain_barrier, gtid, FALSE0, 0, NULL__null, NULL__null);
2109 return FALSE0;
2110}
2111#endif
2112
2113void __kmp_end_split_barrier(enum barrier_type bt, int gtid) {
2114 KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_end_split_barrier)((void)0);
2115 KMP_SET_THREAD_STATE_BLOCK(PLAIN_BARRIER)((void)0);
2116 KMP_DEBUG_ASSERT(bt < bs_last_barrier)if (!(bt < bs_last_barrier)) { __kmp_debug_assert("bt < bs_last_barrier"
, "openmp/runtime/src/kmp_barrier.cpp", 2116); }
;
2117 int tid = __kmp_tid_from_gtid(gtid);
2118 kmp_info_t *this_thr = __kmp_threads[gtid];
2119 kmp_team_t *team = this_thr->th.th_team;
2120
2121 if (!team->t.t_serialized) {
2122 if (KMP_MASTER_GTID(gtid)(0 == __kmp_tid_from_gtid((gtid)))) {
2123 switch (__kmp_barrier_release_pattern[bt]) {
2124 case bp_dist_bar: {
2125 __kmp_dist_barrier_release(bt, this_thr, gtid, tid,
2126 FALSE0 USE_ITT_BUILD_ARG(NULL), __null);
2127 break;
2128 }
2129 case bp_hyper_bar: {
2130 KMP_ASSERT(__kmp_barrier_release_branch_bits[bt])if (!(__kmp_barrier_release_branch_bits[bt])) { __kmp_debug_assert
("__kmp_barrier_release_branch_bits[bt]", "openmp/runtime/src/kmp_barrier.cpp"
, 2130); }
;
2131 __kmp_hyper_barrier_release(bt, this_thr, gtid, tid,
2132 FALSE0 USE_ITT_BUILD_ARG(NULL), __null);
2133 break;
2134 }
2135 case bp_hierarchical_bar: {
2136 __kmp_hierarchical_barrier_release(bt, this_thr, gtid, tid,
2137 FALSE0 USE_ITT_BUILD_ARG(NULL), __null);
2138 break;
2139 }
2140 case bp_tree_bar: {
2141 KMP_ASSERT(__kmp_barrier_release_branch_bits[bt])if (!(__kmp_barrier_release_branch_bits[bt])) { __kmp_debug_assert
("__kmp_barrier_release_branch_bits[bt]", "openmp/runtime/src/kmp_barrier.cpp"
, 2141); }
;
2142 __kmp_tree_barrier_release(bt, this_thr, gtid, tid,
2143 FALSE0 USE_ITT_BUILD_ARG(NULL), __null);
2144 break;
2145 }
2146 default: {
2147 __kmp_linear_barrier_release(bt, this_thr, gtid, tid,
2148 FALSE0 USE_ITT_BUILD_ARG(NULL), __null);
2149 }
2150 }
2151 if (__kmp_tasking_mode != tskm_immediate_exec) {
2152 __kmp_task_team_sync(this_thr, team);
2153 } // if
2154 }
2155 }
2156}
2157
2158void __kmp_join_barrier(int gtid) {
2159 KMP_TIME_PARTITIONED_BLOCK(OMP_join_barrier)((void)0);
2160 KMP_SET_THREAD_STATE_BLOCK(FORK_JOIN_BARRIER)((void)0);
2161
2162 KMP_DEBUG_ASSERT(__kmp_threads && __kmp_threads[gtid])if (!(__kmp_threads && __kmp_threads[gtid])) { __kmp_debug_assert
("__kmp_threads && __kmp_threads[gtid]", "openmp/runtime/src/kmp_barrier.cpp"
, 2162); }
;
1
Assuming '__kmp_threads' is null
2
Taking true branch
2163
2164 kmp_info_t *this_thr = __kmp_threads[gtid];
2165 kmp_team_t *team;
2166 int tid;
2167#ifdef KMP_DEBUG1
2168 int team_id;
2169#endif /* KMP_DEBUG */
2170#if USE_ITT_BUILD1
2171 void *itt_sync_obj = NULL__null;
2172#if USE_ITT_NOTIFY1
2173 if (__itt_sync_create_ptr__kmp_itt_sync_create_ptr__3_0 || KMP_ITT_DEBUG0) // Don't call routine without need
3
Assuming '__kmp_itt_sync_create_ptr__3_0' is null
4
Taking false branch
2174 // Get object created at fork_barrier
2175 itt_sync_obj = __kmp_itt_barrier_object(gtid, bs_forkjoin_barrier);
2176#endif
2177#endif /* USE_ITT_BUILD */
2178#if ((USE_ITT_BUILD1 && USE_ITT_NOTIFY1) || defined KMP_DEBUG1)
2179 int nproc = this_thr->th.th_team_nproc;
2180#endif
2181 KMP_MB();
2182
2183 // Get current info
2184 team = this_thr->th.th_team;
2185 KMP_DEBUG_ASSERT(nproc == team->t.t_nproc)if (!(nproc == team->t.t_nproc)) { __kmp_debug_assert("nproc == team->t.t_nproc"
, "openmp/runtime/src/kmp_barrier.cpp", 2185); }
;
5
Assuming 'nproc' is not equal to field 't_nproc'
6
Taking true branch
2186 tid = __kmp_tid_from_gtid(gtid);
7
Calling '__kmp_tid_from_gtid'
13
Returning from '__kmp_tid_from_gtid'
2187#ifdef KMP_DEBUG1
2188 team_id = team->t.t_id;
2189 kmp_info_t *master_thread = this_thr->th.th_team_master;
2190 if (master_thread != team->t.t_threads[0]) {
14
Assuming the condition is false
15
Taking false branch
2191 __kmp_print_structure();
2192 }
2193#endif /* KMP_DEBUG */
2194 KMP_DEBUG_ASSERT(master_thread == team->t.t_threads[0])if (!(master_thread == team->t.t_threads[0])) { __kmp_debug_assert
("master_thread == team->t.t_threads[0]", "openmp/runtime/src/kmp_barrier.cpp"
, 2194); }
;
16
Taking false branch
2195 KMP_MB();
2196
2197 // Verify state
2198 KMP_DEBUG_ASSERT(TCR_PTR(this_thr->th.th_team))if (!(((void *)(this_thr->th.th_team)))) { __kmp_debug_assert
("((void *)(this_thr->th.th_team))", "openmp/runtime/src/kmp_barrier.cpp"
, 2198); }
;
17
Taking false branch
2199 KMP_DEBUG_ASSERT(TCR_PTR(this_thr->th.th_root))if (!(((void *)(this_thr->th.th_root)))) { __kmp_debug_assert
("((void *)(this_thr->th.th_root))", "openmp/runtime/src/kmp_barrier.cpp"
, 2199); }
;
18
Assuming field 'th_root' is non-null
19
Taking false branch
2200 KMP_DEBUG_ASSERT(this_thr == team->t.t_threads[tid])if (!(this_thr == team->t.t_threads[tid])) { __kmp_debug_assert
("this_thr == team->t.t_threads[tid]", "openmp/runtime/src/kmp_barrier.cpp"
, 2200); }
;
20
Assuming the condition is false
21
Taking false branch
2201 KA_TRACE(10, ("__kmp_join_barrier: T#%d(%d:%d) arrived at join barrier\n",if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_join_barrier: T#%d(%d:%d) arrived at join barrier\n"
, gtid, team_id, tid); }
22
Assuming 'kmp_a_debug' is < 10
23
Taking false branch
2202 gtid, team_id, tid))if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_join_barrier: T#%d(%d:%d) arrived at join barrier\n"
, gtid, team_id, tid); }
;
2203
2204#if OMPT_SUPPORT1
2205 if (ompt_enabled.enabled) {
24
Assuming field 'enabled' is not equal to 0
25
Taking true branch
2206#if OMPT_OPTIONAL1
2207 ompt_data_t *my_task_data;
2208 ompt_data_t *my_parallel_data;
2209 void *codeptr = NULL__null;
2210 int ds_tid = this_thr->th.th_info.ds.ds_tid;
2211 if (KMP_MASTER_TID(ds_tid)(0 == (ds_tid)) &&
26
Assuming 'ds_tid' is equal to 0
29
Taking false branch
2212 (ompt_callbacks.ompt_callback(ompt_callback_sync_region_wait)ompt_callback_sync_region_wait_callback ||
27
Assuming field 'ompt_callback_sync_region_wait_callback' is null
2213 ompt_callbacks.ompt_callback(ompt_callback_sync_region)ompt_callback_sync_region_callback))
28
Assuming field 'ompt_callback_sync_region_callback' is null
2214 codeptr = team->t.ompt_team_info.master_return_address;
2215 my_task_data = OMPT_CUR_TASK_DATA(this_thr)(&(this_thr->th.th_current_task->ompt_task_info.task_data
))
;
2216 my_parallel_data = OMPT_CUR_TEAM_DATA(this_thr)(&(this_thr->th.th_team->t.ompt_team_info.parallel_data
))
;
2217 if (ompt_enabled.ompt_callback_sync_region) {
30
Assuming field 'ompt_callback_sync_region' is 0
31
Taking false branch
2218 ompt_callbacks.ompt_callback(ompt_callback_sync_region)ompt_callback_sync_region_callback(
2219 ompt_sync_region_barrier_implicit, ompt_scope_begin, my_parallel_data,
2220 my_task_data, codeptr);
2221 }
2222 if (ompt_enabled.ompt_callback_sync_region_wait) {
32
Assuming field 'ompt_callback_sync_region_wait' is not equal to 0
33
Taking true branch
2223 ompt_callbacks.ompt_callback(ompt_callback_sync_region_wait)ompt_callback_sync_region_wait_callback(
34
Called function pointer is null (null dereference)
2224 ompt_sync_region_barrier_implicit, ompt_scope_begin, my_parallel_data,
2225 my_task_data, codeptr);
2226 }
2227 if (!KMP_MASTER_TID(ds_tid)(0 == (ds_tid)))
2228 this_thr->th.ompt_thread_info.task_data = *OMPT_CUR_TASK_DATA(this_thr)(&(this_thr->th.th_current_task->ompt_task_info.task_data
))
;
2229#endif
2230 this_thr->th.ompt_thread_info.state = ompt_state_wait_barrier_implicit;
2231 }
2232#endif
2233
2234 if (__kmp_tasking_mode == tskm_extra_barrier) {
2235 __kmp_tasking_barrier(team, this_thr, gtid);
2236 KA_TRACE(10, ("__kmp_join_barrier: T#%d(%d:%d) past tasking barrier\n",if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_join_barrier: T#%d(%d:%d) past tasking barrier\n"
, gtid, team_id, tid); }
2237 gtid, team_id, tid))if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_join_barrier: T#%d(%d:%d) past tasking barrier\n"
, gtid, team_id, tid); }
;
2238 }
2239#ifdef KMP_DEBUG1
2240 if (__kmp_tasking_mode != tskm_immediate_exec) {
2241 KA_TRACE(20, ("__kmp_join_barrier: T#%d, old team = %d, old task_team = "if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_join_barrier: T#%d, old team = %d, old task_team = "
"%p, th_task_team = %p\n", __kmp_gtid_from_thread(this_thr),
team_id, team->t.t_task_team[this_thr->th.th_task_state
], this_thr->th.th_task_team); }
2242 "%p, th_task_team = %p\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_join_barrier: T#%d, old team = %d, old task_team = "
"%p, th_task_team = %p\n", __kmp_gtid_from_thread(this_thr),
team_id, team->t.t_task_team[this_thr->th.th_task_state
], this_thr->th.th_task_team); }
2243 __kmp_gtid_from_thread(this_thr), team_id,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_join_barrier: T#%d, old team = %d, old task_team = "
"%p, th_task_team = %p\n", __kmp_gtid_from_thread(this_thr),
team_id, team->t.t_task_team[this_thr->th.th_task_state
], this_thr->th.th_task_team); }
2244 team->t.t_task_team[this_thr->th.th_task_state],if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_join_barrier: T#%d, old team = %d, old task_team = "
"%p, th_task_team = %p\n", __kmp_gtid_from_thread(this_thr),
team_id, team->t.t_task_team[this_thr->th.th_task_state
], this_thr->th.th_task_team); }
2245 this_thr->th.th_task_team))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_join_barrier: T#%d, old team = %d, old task_team = "
"%p, th_task_team = %p\n", __kmp_gtid_from_thread(this_thr),
team_id, team->t.t_task_team[this_thr->th.th_task_state
], this_thr->th.th_task_team); }
;
2246 if (this_thr->th.th_task_team)
2247 KMP_DEBUG_ASSERT(this_thr->th.th_task_team ==if (!(this_thr->th.th_task_team == team->t.t_task_team[
this_thr->th.th_task_state])) { __kmp_debug_assert("this_thr->th.th_task_team == team->t.t_task_team[this_thr->th.th_task_state]"
, "openmp/runtime/src/kmp_barrier.cpp", 2248); }
2248 team->t.t_task_team[this_thr->th.th_task_state])if (!(this_thr->th.th_task_team == team->t.t_task_team[
this_thr->th.th_task_state])) { __kmp_debug_assert("this_thr->th.th_task_team == team->t.t_task_team[this_thr->th.th_task_state]"
, "openmp/runtime/src/kmp_barrier.cpp", 2248); }
;
2249 }
2250#endif /* KMP_DEBUG */
2251
2252 /* Copy the blocktime info to the thread, where __kmp_wait_template() can
2253 access it when the team struct is not guaranteed to exist. Doing these
2254 loads causes a cache miss slows down EPCC parallel by 2x. As a workaround,
2255 we do not perform the copy if blocktime=infinite, since the values are not
2256 used by __kmp_wait_template() in that case. */
2257 if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME(2147483647)) {
2258#if KMP_USE_MONITOR
2259 this_thr->th.th_team_bt_intervals =
2260 team->t.t_implicit_task_taskdata[tid].td_icvs.bt_intervals;
2261 this_thr->th.th_team_bt_set =
2262 team->t.t_implicit_task_taskdata[tid].td_icvs.bt_set;
2263#else
2264 this_thr->th.th_team_bt_intervals = KMP_BLOCKTIME_INTERVAL(team, tid)((((team)->t.t_threads[(tid)]->th.th_current_task->td_icvs
.bt_set) ? ((team)->t.t_threads[(tid)]->th.th_current_task
->td_icvs.blocktime) : __kmp_dflt_blocktime) * __kmp_ticks_per_msec
)
;
2265#endif
2266 }
2267
2268#if USE_ITT_BUILD1
2269 if (__itt_sync_create_ptr__kmp_itt_sync_create_ptr__3_0 || KMP_ITT_DEBUG0)
2270 __kmp_itt_barrier_starting(gtid, itt_sync_obj);
2271#endif /* USE_ITT_BUILD */
2272
2273 switch (__kmp_barrier_gather_pattern[bs_forkjoin_barrier]) {
2274 case bp_dist_bar: {
2275 __kmp_dist_barrier_gather(bs_forkjoin_barrier, this_thr, gtid, tid,
2276 NULL__null USE_ITT_BUILD_ARG(itt_sync_obj), itt_sync_obj);
2277 break;
2278 }
2279 case bp_hyper_bar: {
2280 KMP_ASSERT(__kmp_barrier_gather_branch_bits[bs_forkjoin_barrier])if (!(__kmp_barrier_gather_branch_bits[bs_forkjoin_barrier]))
{ __kmp_debug_assert("__kmp_barrier_gather_branch_bits[bs_forkjoin_barrier]"
, "openmp/runtime/src/kmp_barrier.cpp", 2280); }
;
2281 __kmp_hyper_barrier_gather(bs_forkjoin_barrier, this_thr, gtid, tid,
2282 NULL__null USE_ITT_BUILD_ARG(itt_sync_obj), itt_sync_obj);
2283 break;
2284 }
2285 case bp_hierarchical_bar: {
2286 __kmp_hierarchical_barrier_gather(bs_forkjoin_barrier, this_thr, gtid, tid,
2287 NULL__null USE_ITT_BUILD_ARG(itt_sync_obj), itt_sync_obj);
2288 break;
2289 }
2290 case bp_tree_bar: {
2291 KMP_ASSERT(__kmp_barrier_gather_branch_bits[bs_forkjoin_barrier])if (!(__kmp_barrier_gather_branch_bits[bs_forkjoin_barrier]))
{ __kmp_debug_assert("__kmp_barrier_gather_branch_bits[bs_forkjoin_barrier]"
, "openmp/runtime/src/kmp_barrier.cpp", 2291); }
;
2292 __kmp_tree_barrier_gather(bs_forkjoin_barrier, this_thr, gtid, tid,
2293 NULL__null USE_ITT_BUILD_ARG(itt_sync_obj), itt_sync_obj);
2294 break;
2295 }
2296 default: {
2297 __kmp_linear_barrier_gather(bs_forkjoin_barrier, this_thr, gtid, tid,
2298 NULL__null USE_ITT_BUILD_ARG(itt_sync_obj), itt_sync_obj);
2299 }
2300 }
2301
2302 /* From this point on, the team data structure may be deallocated at any time
2303 by the primary thread - it is unsafe to reference it in any of the worker
2304 threads. Any per-team data items that need to be referenced before the
2305 end of the barrier should be moved to the kmp_task_team_t structs. */
2306 if (KMP_MASTER_TID(tid)(0 == (tid))) {
2307 if (__kmp_tasking_mode != tskm_immediate_exec) {
2308 __kmp_task_team_wait(this_thr, team USE_ITT_BUILD_ARG(itt_sync_obj), itt_sync_obj);
2309 }
2310 if (__kmp_display_affinity) {
2311 KMP_CHECK_UPDATE(team->t.t_display_affinity, 0)if ((team->t.t_display_affinity) != (0)) (team->t.t_display_affinity
) = (0)
;
2312 }
2313#if KMP_STATS_ENABLED0
2314 // Have primary thread flag the workers to indicate they are now waiting for
2315 // next parallel region, Also wake them up so they switch their timers to
2316 // idle.
2317 for (int i = 0; i < team->t.t_nproc; ++i) {
2318 kmp_info_t *team_thread = team->t.t_threads[i];
2319 if (team_thread == this_thr)
2320 continue;
2321 team_thread->th.th_stats->setIdleFlag();
2322 if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME(2147483647) &&
2323 team_thread->th.th_sleep_loc != NULL__null)
2324 __kmp_null_resume_wrapper(team_thread);
2325 }
2326#endif
2327#if USE_ITT_BUILD1
2328 if (__itt_sync_create_ptr__kmp_itt_sync_create_ptr__3_0 || KMP_ITT_DEBUG0)
2329 __kmp_itt_barrier_middle(gtid, itt_sync_obj);
2330#endif /* USE_ITT_BUILD */
2331
2332#if USE_ITT_BUILD1 && USE_ITT_NOTIFY1
2333 // Join barrier - report frame end
2334 if ((__itt_frame_submit_v3_ptr__kmp_itt_frame_submit_v3_ptr__3_0 || KMP_ITT_DEBUG0) &&
2335 __kmp_forkjoin_frames_mode &&
2336 (this_thr->th.th_teams_microtask == NULL__null || // either not in teams
2337 this_thr->th.th_teams_size.nteams == 1) && // or inside single team
2338 team->t.t_active_level == 1) {
2339 kmp_uint64 cur_time = __itt_get_timestamp(!__kmp_itt_get_timestamp_ptr__3_0) ? 0 : __kmp_itt_get_timestamp_ptr__3_0();
2340 ident_t *loc = team->t.t_ident;
2341 kmp_info_t **other_threads = team->t.t_threads;
2342 switch (__kmp_forkjoin_frames_mode) {
2343 case 1:
2344 __kmp_itt_frame_submit(gtid, this_thr->th.th_frame_time, cur_time, 0,
2345 loc, nproc);
2346 break;
2347 case 2:
2348 __kmp_itt_frame_submit(gtid, this_thr->th.th_bar_min_time, cur_time, 1,
2349 loc, nproc);
2350 break;
2351 case 3:
2352 if (__itt_metadata_add_ptr__kmp_itt_metadata_add_ptr__3_0) {
2353 // Initialize with primary thread's wait time
2354 kmp_uint64 delta = cur_time - this_thr->th.th_bar_arrive_time;
2355 // Set arrive time to zero to be able to check it in
2356 // __kmp_invoke_task(); the same is done inside the loop below
2357 this_thr->th.th_bar_arrive_time = 0;
2358 for (int i = 1; i < nproc; ++i) {
2359 delta += (cur_time - other_threads[i]->th.th_bar_arrive_time);
2360 other_threads[i]->th.th_bar_arrive_time = 0;
2361 }
2362 __kmp_itt_metadata_imbalance(gtid, this_thr->th.th_frame_time,
2363 cur_time, delta, 0);
2364 }
2365 __kmp_itt_frame_submit(gtid, this_thr->th.th_frame_time, cur_time, 0,
2366 loc, nproc);
2367 this_thr->th.th_frame_time = cur_time;
2368 break;
2369 }
2370 }
2371#endif /* USE_ITT_BUILD */
2372 }
2373#if USE_ITT_BUILD1
2374 else {
2375 if (__itt_sync_create_ptr__kmp_itt_sync_create_ptr__3_0 || KMP_ITT_DEBUG0)
2376 __kmp_itt_barrier_middle(gtid, itt_sync_obj);
2377 }
2378#endif /* USE_ITT_BUILD */
2379
2380#if KMP_DEBUG1
2381 if (KMP_MASTER_TID(tid)(0 == (tid))) {
2382 KA_TRACE(if (kmp_a_debug >= 15) { __kmp_debug_printf ("__kmp_join_barrier: T#%d(%d:%d) says all %d team threads arrived\n"
, gtid, team_id, tid, nproc); }
2383 15,if (kmp_a_debug >= 15) { __kmp_debug_printf ("__kmp_join_barrier: T#%d(%d:%d) says all %d team threads arrived\n"
, gtid, team_id, tid, nproc); }
2384 ("__kmp_join_barrier: T#%d(%d:%d) says all %d team threads arrived\n",if (kmp_a_debug >= 15) { __kmp_debug_printf ("__kmp_join_barrier: T#%d(%d:%d) says all %d team threads arrived\n"
, gtid, team_id, tid, nproc); }
2385 gtid, team_id, tid, nproc))if (kmp_a_debug >= 15) { __kmp_debug_printf ("__kmp_join_barrier: T#%d(%d:%d) says all %d team threads arrived\n"
, gtid, team_id, tid, nproc); }
;
2386 }
2387#endif /* KMP_DEBUG */
2388
2389 // TODO now, mark worker threads as done so they may be disbanded
2390 KMP_MB(); // Flush all pending memory write invalidates.
2391 KA_TRACE(10,if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_join_barrier: T#%d(%d:%d) leaving\n"
, gtid, team_id, tid); }
2392 ("__kmp_join_barrier: T#%d(%d:%d) leaving\n", gtid, team_id, tid))if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_join_barrier: T#%d(%d:%d) leaving\n"
, gtid, team_id, tid); }
;
2393
2394}
2395
2396// TODO release worker threads' fork barriers as we are ready instead of all at
2397// once
2398void __kmp_fork_barrier(int gtid, int tid) {
2399 KMP_TIME_PARTITIONED_BLOCK(OMP_fork_barrier)((void)0);
2400 KMP_SET_THREAD_STATE_BLOCK(FORK_JOIN_BARRIER)((void)0);
2401 kmp_info_t *this_thr = __kmp_threads[gtid];
2402 kmp_team_t *team = (tid == 0) ? this_thr->th.th_team : NULL__null;
2403#if USE_ITT_BUILD1
2404 void *itt_sync_obj = NULL__null;
2405#endif /* USE_ITT_BUILD */
2406 if (team)
2407
2408 KA_TRACE(10, ("__kmp_fork_barrier: T#%d(%d:%d) has arrived\n", gtid,if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_fork_barrier: T#%d(%d:%d) has arrived\n"
, gtid, (team != __null) ? team->t.t_id : -1, tid); }
2409 (team != NULL) ? team->t.t_id : -1, tid))if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_fork_barrier: T#%d(%d:%d) has arrived\n"
, gtid, (team != __null) ? team->t.t_id : -1, tid); }
;
2410
2411 // th_team pointer only valid for primary thread here
2412 if (KMP_MASTER_TID(tid)(0 == (tid))) {
2413#if USE_ITT_BUILD1 && USE_ITT_NOTIFY1
2414 if (__itt_sync_create_ptr__kmp_itt_sync_create_ptr__3_0 || KMP_ITT_DEBUG0) {
2415 // Create itt barrier object
2416 itt_sync_obj = __kmp_itt_barrier_object(gtid, bs_forkjoin_barrier, 1);
2417 __kmp_itt_barrier_middle(gtid, itt_sync_obj); // Call acquired/releasing
2418 }
2419#endif /* USE_ITT_BUILD && USE_ITT_NOTIFY */
2420
2421#ifdef KMP_DEBUG1
2422 KMP_DEBUG_ASSERT(team)if (!(team)) { __kmp_debug_assert("team", "openmp/runtime/src/kmp_barrier.cpp"
, 2422); }
;
2423 kmp_info_t **other_threads = team->t.t_threads;
2424 int i;
2425
2426 // Verify state
2427 KMP_MB();
2428
2429 for (i = 1; i < team->t.t_nproc; ++i) {
2430 KA_TRACE(500,if (kmp_a_debug >= 500) { __kmp_debug_printf ("__kmp_fork_barrier: T#%d(%d:0) checking T#%d(%d:%d) fork go "
"== %u.\n", gtid, team->t.t_id, other_threads[i]->th.th_info
.ds.ds_gtid, team->t.t_id, other_threads[i]->th.th_info
.ds.ds_tid, other_threads[i]->th.th_bar[bs_forkjoin_barrier
].bb.b_go); }
2431 ("__kmp_fork_barrier: T#%d(%d:0) checking T#%d(%d:%d) fork go "if (kmp_a_debug >= 500) { __kmp_debug_printf ("__kmp_fork_barrier: T#%d(%d:0) checking T#%d(%d:%d) fork go "
"== %u.\n", gtid, team->t.t_id, other_threads[i]->th.th_info
.ds.ds_gtid, team->t.t_id, other_threads[i]->th.th_info
.ds.ds_tid, other_threads[i]->th.th_bar[bs_forkjoin_barrier
].bb.b_go); }
2432 "== %u.\n",if (kmp_a_debug >= 500) { __kmp_debug_printf ("__kmp_fork_barrier: T#%d(%d:0) checking T#%d(%d:%d) fork go "
"== %u.\n", gtid, team->t.t_id, other_threads[i]->th.th_info
.ds.ds_gtid, team->t.t_id, other_threads[i]->th.th_info
.ds.ds_tid, other_threads[i]->th.th_bar[bs_forkjoin_barrier
].bb.b_go); }
2433 gtid, team->t.t_id, other_threads[i]->th.th_info.ds.ds_gtid,if (kmp_a_debug >= 500) { __kmp_debug_printf ("__kmp_fork_barrier: T#%d(%d:0) checking T#%d(%d:%d) fork go "
"== %u.\n", gtid, team->t.t_id, other_threads[i]->th.th_info
.ds.ds_gtid, team->t.t_id, other_threads[i]->th.th_info
.ds.ds_tid, other_threads[i]->th.th_bar[bs_forkjoin_barrier
].bb.b_go); }
2434 team->t.t_id, other_threads[i]->th.th_info.ds.ds_tid,if (kmp_a_debug >= 500) { __kmp_debug_printf ("__kmp_fork_barrier: T#%d(%d:0) checking T#%d(%d:%d) fork go "
"== %u.\n", gtid, team->t.t_id, other_threads[i]->th.th_info
.ds.ds_gtid, team->t.t_id, other_threads[i]->th.th_info
.ds.ds_tid, other_threads[i]->th.th_bar[bs_forkjoin_barrier
].bb.b_go); }
2435 other_threads[i]->th.th_bar[bs_forkjoin_barrier].bb.b_go))if (kmp_a_debug >= 500) { __kmp_debug_printf ("__kmp_fork_barrier: T#%d(%d:0) checking T#%d(%d:%d) fork go "
"== %u.\n", gtid, team->t.t_id, other_threads[i]->th.th_info
.ds.ds_gtid, team->t.t_id, other_threads[i]->th.th_info
.ds.ds_tid, other_threads[i]->th.th_bar[bs_forkjoin_barrier
].bb.b_go); }
;
2436 KMP_DEBUG_ASSERT(if (!(((other_threads[i]->th.th_bar[bs_forkjoin_barrier].bb
.b_go) & ~((1 << 0))) == 0)) { __kmp_debug_assert("((other_threads[i]->th.th_bar[bs_forkjoin_barrier].bb.b_go) & ~((1 << 0))) == 0"
, "openmp/runtime/src/kmp_barrier.cpp", 2438); }
2437 (TCR_4(other_threads[i]->th.th_bar[bs_forkjoin_barrier].bb.b_go) &if (!(((other_threads[i]->th.th_bar[bs_forkjoin_barrier].bb
.b_go) & ~((1 << 0))) == 0)) { __kmp_debug_assert("((other_threads[i]->th.th_bar[bs_forkjoin_barrier].bb.b_go) & ~((1 << 0))) == 0"
, "openmp/runtime/src/kmp_barrier.cpp", 2438); }
2438 ~(KMP_BARRIER_SLEEP_STATE)) == KMP_INIT_BARRIER_STATE)if (!(((other_threads[i]->th.th_bar[bs_forkjoin_barrier].bb
.b_go) & ~((1 << 0))) == 0)) { __kmp_debug_assert("((other_threads[i]->th.th_bar[bs_forkjoin_barrier].bb.b_go) & ~((1 << 0))) == 0"
, "openmp/runtime/src/kmp_barrier.cpp", 2438); }
;
2439 KMP_DEBUG_ASSERT(other_threads[i]->th.th_team == team)if (!(other_threads[i]->th.th_team == team)) { __kmp_debug_assert
("other_threads[i]->th.th_team == team", "openmp/runtime/src/kmp_barrier.cpp"
, 2439); }
;
2440 }
2441#endif
2442
2443 if (__kmp_tasking_mode != tskm_immediate_exec) {
2444 // 0 indicates setup current task team if nthreads > 1
2445 __kmp_task_team_setup(this_thr, team, 0);
2446 }
2447
2448 /* The primary thread may have changed its blocktime between join barrier
2449 and fork barrier. Copy the blocktime info to the thread, where
2450 __kmp_wait_template() can access it when the team struct is not
2451 guaranteed to exist. */
2452 // See note about the corresponding code in __kmp_join_barrier() being
2453 // performance-critical
2454 if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME(2147483647)) {
2455#if KMP_USE_MONITOR
2456 this_thr->th.th_team_bt_intervals =
2457 team->t.t_implicit_task_taskdata[tid].td_icvs.bt_intervals;
2458 this_thr->th.th_team_bt_set =
2459 team->t.t_implicit_task_taskdata[tid].td_icvs.bt_set;
2460#else
2461 this_thr->th.th_team_bt_intervals = KMP_BLOCKTIME_INTERVAL(team, tid)((((team)->t.t_threads[(tid)]->th.th_current_task->td_icvs
.bt_set) ? ((team)->t.t_threads[(tid)]->th.th_current_task
->td_icvs.blocktime) : __kmp_dflt_blocktime) * __kmp_ticks_per_msec
)
;
2462#endif
2463 }
2464 } // primary thread
2465
2466 switch (__kmp_barrier_release_pattern[bs_forkjoin_barrier]) {
2467 case bp_dist_bar: {
2468 __kmp_dist_barrier_release(bs_forkjoin_barrier, this_thr, gtid, tid,
2469 TRUE(!0) USE_ITT_BUILD_ARG(NULL), __null);
2470 break;
2471 }
2472 case bp_hyper_bar: {
2473 KMP_ASSERT(__kmp_barrier_release_branch_bits[bs_forkjoin_barrier])if (!(__kmp_barrier_release_branch_bits[bs_forkjoin_barrier])
) { __kmp_debug_assert("__kmp_barrier_release_branch_bits[bs_forkjoin_barrier]"
, "openmp/runtime/src/kmp_barrier.cpp", 2473); }
;
2474 __kmp_hyper_barrier_release(bs_forkjoin_barrier, this_thr, gtid, tid,
2475 TRUE(!0) USE_ITT_BUILD_ARG(itt_sync_obj), itt_sync_obj);
2476 break;
2477 }
2478 case bp_hierarchical_bar: {
2479 __kmp_hierarchical_barrier_release(bs_forkjoin_barrier, this_thr, gtid, tid,
2480 TRUE(!0) USE_ITT_BUILD_ARG(itt_sync_obj), itt_sync_obj);
2481 break;
2482 }
2483 case bp_tree_bar: {
2484 KMP_ASSERT(__kmp_barrier_release_branch_bits[bs_forkjoin_barrier])if (!(__kmp_barrier_release_branch_bits[bs_forkjoin_barrier])
) { __kmp_debug_assert("__kmp_barrier_release_branch_bits[bs_forkjoin_barrier]"
, "openmp/runtime/src/kmp_barrier.cpp", 2484); }
;
2485 __kmp_tree_barrier_release(bs_forkjoin_barrier, this_thr, gtid, tid,
2486 TRUE(!0) USE_ITT_BUILD_ARG(itt_sync_obj), itt_sync_obj);
2487 break;
2488 }
2489 default: {
2490 __kmp_linear_barrier_release(bs_forkjoin_barrier, this_thr, gtid, tid,
2491 TRUE(!0) USE_ITT_BUILD_ARG(itt_sync_obj), itt_sync_obj);
2492 }
2493 }
2494
2495#if OMPT_SUPPORT1
2496 if (ompt_enabled.enabled &&
2497 this_thr->th.ompt_thread_info.state == ompt_state_wait_barrier_implicit) {
2498 int ds_tid = this_thr->th.th_info.ds.ds_tid;
2499 ompt_data_t *task_data = (team)
2500 ? OMPT_CUR_TASK_DATA(this_thr)(&(this_thr->th.th_current_task->ompt_task_info.task_data
))
2501 : &(this_thr->th.ompt_thread_info.task_data);
2502 this_thr->th.ompt_thread_info.state = ompt_state_overhead;
2503#if OMPT_OPTIONAL1
2504 void *codeptr = NULL__null;
2505 if (KMP_MASTER_TID(ds_tid)(0 == (ds_tid)) &&
2506 (ompt_callbacks.ompt_callback(ompt_callback_sync_region_wait)ompt_callback_sync_region_wait_callback ||
2507 ompt_callbacks.ompt_callback(ompt_callback_sync_region)ompt_callback_sync_region_callback))
2508 codeptr = team ? team->t.ompt_team_info.master_return_address : NULL__null;
2509 if (ompt_enabled.ompt_callback_sync_region_wait) {
2510 ompt_callbacks.ompt_callback(ompt_callback_sync_region_wait)ompt_callback_sync_region_wait_callback(
2511 ompt_sync_region_barrier_implicit, ompt_scope_end, NULL__null, task_data,
2512 codeptr);
2513 }
2514 if (ompt_enabled.ompt_callback_sync_region) {
2515 ompt_callbacks.ompt_callback(ompt_callback_sync_region)ompt_callback_sync_region_callback(
2516 ompt_sync_region_barrier_implicit, ompt_scope_end, NULL__null, task_data,
2517 codeptr);
2518 }
2519#endif
2520 if (!KMP_MASTER_TID(ds_tid)(0 == (ds_tid)) && ompt_enabled.ompt_callback_implicit_task) {
2521 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)ompt_callback_implicit_task_callback(
2522 ompt_scope_end, NULL__null, task_data, 0, ds_tid,
2523 ompt_task_implicit); // TODO: Can this be ompt_task_initial?
2524 }
2525 }
2526#endif
2527
2528 // Early exit for reaping threads releasing forkjoin barrier
2529 if (TCR_4(__kmp_global.g.g_done)(__kmp_global.g.g_done)) {
2530 this_thr->th.th_task_team = NULL__null;
2531
2532#if USE_ITT_BUILD1 && USE_ITT_NOTIFY1
2533 if (__itt_sync_create_ptr__kmp_itt_sync_create_ptr__3_0 || KMP_ITT_DEBUG0) {
2534 if (!KMP_MASTER_TID(tid)(0 == (tid))) {
2535 itt_sync_obj = __kmp_itt_barrier_object(gtid, bs_forkjoin_barrier);
2536 if (itt_sync_obj)
2537 __kmp_itt_barrier_finished(gtid, itt_sync_obj);
2538 }
2539 }
2540#endif /* USE_ITT_BUILD && USE_ITT_NOTIFY */
2541 KA_TRACE(10, ("__kmp_fork_barrier: T#%d is leaving early\n", gtid))if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_fork_barrier: T#%d is leaving early\n"
, gtid); }
;
2542 return;
2543 }
2544
2545 /* We can now assume that a valid team structure has been allocated by the
2546 primary thread and propagated to all worker threads. The current thread,
2547 however, may not be part of the team, so we can't blindly assume that the
2548 team pointer is non-null. */
2549 team = (kmp_team_t *)TCR_PTR(this_thr->th.th_team)((void *)(this_thr->th.th_team));
2550 KMP_DEBUG_ASSERT(team != NULL)if (!(team != __null)) { __kmp_debug_assert("team != __null",
"openmp/runtime/src/kmp_barrier.cpp", 2550); }
;
2551 tid = __kmp_tid_from_gtid(gtid);
2552
2553#if KMP_BARRIER_ICV_PULL
2554 /* Primary thread's copy of the ICVs was set up on the implicit taskdata in
2555 __kmp_reinitialize_team. __kmp_fork_call() assumes the primary thread's
2556 implicit task has this data before this function is called. We cannot
2557 modify __kmp_fork_call() to look at the fixed ICVs in the primary thread's
2558 thread struct, because it is not always the case that the threads arrays
2559 have been allocated when __kmp_fork_call() is executed. */
2560 {
2561 KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(USER_icv_copy)((void)0);
2562 if (!KMP_MASTER_TID(tid)(0 == (tid))) { // primary thread already has ICVs
2563 // Copy the initial ICVs from the primary thread's thread struct to the
2564 // implicit task for this tid.
2565 KA_TRACE(10,if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_fork_barrier: T#%d(%d) is PULLing ICVs\n"
, gtid, tid); }
2566 ("__kmp_fork_barrier: T#%d(%d) is PULLing ICVs\n", gtid, tid))if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_fork_barrier: T#%d(%d) is PULLing ICVs\n"
, gtid, tid); }
;
2567 __kmp_init_implicit_task(team->t.t_ident, team->t.t_threads[tid], team,
2568 tid, FALSE0);
2569 copy_icvs(&team->t.t_implicit_task_taskdata[tid].td_icvs,
2570 &team->t.t_threads[0]
2571 ->th.th_bar[bs_forkjoin_barrier]
2572 .bb.th_fixed_icvs);
2573 }
2574 }
2575#endif // KMP_BARRIER_ICV_PULL
2576
2577 if (__kmp_tasking_mode != tskm_immediate_exec) {
2578 __kmp_task_team_sync(this_thr, team);
2579 }
2580
2581#if KMP_AFFINITY_SUPPORTED1
2582 kmp_proc_bind_t proc_bind = team->t.t_proc_bind;
2583 if (proc_bind == proc_bind_intel) {
2584 // Call dynamic affinity settings
2585 if (__kmp_affinity.type == affinity_balanced && team->t.t_size_changed) {
2586 __kmp_balanced_affinity(this_thr, team->t.t_nproc);
2587 }
2588 } else if (proc_bind != proc_bind_false) {
2589 if (this_thr->th.th_new_place == this_thr->th.th_current_place) {
2590 KA_TRACE(100, ("__kmp_fork_barrier: T#%d already in correct place %d\n",if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_fork_barrier: T#%d already in correct place %d\n"
, __kmp_gtid_from_thread(this_thr), this_thr->th.th_current_place
); }
2591 __kmp_gtid_from_thread(this_thr),if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_fork_barrier: T#%d already in correct place %d\n"
, __kmp_gtid_from_thread(this_thr), this_thr->th.th_current_place
); }
2592 this_thr->th.th_current_place))if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_fork_barrier: T#%d already in correct place %d\n"
, __kmp_gtid_from_thread(this_thr), this_thr->th.th_current_place
); }
;
2593 } else {
2594 __kmp_affinity_set_place(gtid);
2595 }
2596 }
2597#endif // KMP_AFFINITY_SUPPORTED
2598 // Perform the display affinity functionality
2599 if (__kmp_display_affinity) {
2600 if (team->t.t_display_affinity
2601#if KMP_AFFINITY_SUPPORTED1
2602 || (__kmp_affinity.type == affinity_balanced && team->t.t_size_changed)
2603#endif
2604 ) {
2605 // NULL means use the affinity-format-var ICV
2606 __kmp_aux_display_affinity(gtid, NULL__null);
2607 this_thr->th.th_prev_num_threads = team->t.t_nproc;
2608 this_thr->th.th_prev_level = team->t.t_level;
2609 }
2610 }
2611 if (!KMP_MASTER_TID(tid)(0 == (tid)))
2612 KMP_CHECK_UPDATE(this_thr->th.th_def_allocator, team->t.t_def_allocator)if ((this_thr->th.th_def_allocator) != (team->t.t_def_allocator
)) (this_thr->th.th_def_allocator) = (team->t.t_def_allocator
)
;
2613
2614#if USE_ITT_BUILD1 && USE_ITT_NOTIFY1
2615 if (__itt_sync_create_ptr__kmp_itt_sync_create_ptr__3_0 || KMP_ITT_DEBUG0) {
2616 if (!KMP_MASTER_TID(tid)(0 == (tid))) {
2617 // Get correct barrier object
2618 itt_sync_obj = __kmp_itt_barrier_object(gtid, bs_forkjoin_barrier);
2619 __kmp_itt_barrier_finished(gtid, itt_sync_obj); // Workers call acquired
2620 } // (prepare called inside barrier_release)
2621 }
2622#endif /* USE_ITT_BUILD && USE_ITT_NOTIFY */
2623 KA_TRACE(10, ("__kmp_fork_barrier: T#%d(%d:%d) is leaving\n", gtid,if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_fork_barrier: T#%d(%d:%d) is leaving\n"
, gtid, team->t.t_id, tid); }
2624 team->t.t_id, tid))if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_fork_barrier: T#%d(%d:%d) is leaving\n"
, gtid, team->t.t_id, tid); }
;
2625}
2626
2627void __kmp_setup_icv_copy(kmp_team_t *team, int new_nproc,
2628 kmp_internal_control_t *new_icvs, ident_t *loc) {
2629 KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_setup_icv_copy)((void)0);
2630
2631 KMP_DEBUG_ASSERT(team && new_nproc && new_icvs)if (!(team && new_nproc && new_icvs)) { __kmp_debug_assert
("team && new_nproc && new_icvs", "openmp/runtime/src/kmp_barrier.cpp"
, 2631); }
;
2632 KMP_DEBUG_ASSERT((!TCR_4(__kmp_init_parallel)) || new_icvs->nproc)if (!((!(__kmp_init_parallel)) || new_icvs->nproc)) { __kmp_debug_assert
("(!(__kmp_init_parallel)) || new_icvs->nproc", "openmp/runtime/src/kmp_barrier.cpp"
, 2632); }
;
2633
2634/* Primary thread's copy of the ICVs was set up on the implicit taskdata in
2635 __kmp_reinitialize_team. __kmp_fork_call() assumes the primary thread's
2636 implicit task has this data before this function is called. */
2637#if KMP_BARRIER_ICV_PULL
2638 /* Copy ICVs to primary thread's thread structure into th_fixed_icvs (which
2639 remains untouched), where all of the worker threads can access them and
2640 make their own copies after the barrier. */
2641 KMP_DEBUG_ASSERT(team->t.t_threads[0])if (!(team->t.t_threads[0])) { __kmp_debug_assert("team->t.t_threads[0]"
, "openmp/runtime/src/kmp_barrier.cpp", 2641); }
; // The threads arrays should be
2642 // allocated at this point
2643 copy_icvs(
2644 &team->t.t_threads[0]->th.th_bar[bs_forkjoin_barrier].bb.th_fixed_icvs,
2645 new_icvs);
2646 KF_TRACE(10, ("__kmp_setup_icv_copy: PULL: T#%d this_thread=%p team=%p\n", 0,if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_setup_icv_copy: PULL: T#%d this_thread=%p team=%p\n"
, 0, team->t.t_threads[0], team); }
2647 team->t.t_threads[0], team))if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_setup_icv_copy: PULL: T#%d this_thread=%p team=%p\n"
, 0, team->t.t_threads[0], team); }
;
2648#elif KMP_BARRIER_ICV_PUSH1
2649 // The ICVs will be propagated in the fork barrier, so nothing needs to be
2650 // done here.
2651 KF_TRACE(10, ("__kmp_setup_icv_copy: PUSH: T#%d this_thread=%p team=%p\n", 0,if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_setup_icv_copy: PUSH: T#%d this_thread=%p team=%p\n"
, 0, team->t.t_threads[0], team); }
2652 team->t.t_threads[0], team))if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_setup_icv_copy: PUSH: T#%d this_thread=%p team=%p\n"
, 0, team->t.t_threads[0], team); }
;
2653#else
2654 // Copy the ICVs to each of the non-primary threads. This takes O(nthreads)
2655 // time.
2656 ngo_load(new_icvs)((void)0);
2657 KMP_DEBUG_ASSERT(team->t.t_threads[0])if (!(team->t.t_threads[0])) { __kmp_debug_assert("team->t.t_threads[0]"
, "openmp/runtime/src/kmp_barrier.cpp", 2657); }
; // The threads arrays should be
2658 // allocated at this point
2659 for (int f = 1; f < new_nproc; ++f) { // Skip the primary thread
2660 // TODO: GEH - pass in better source location info since usually NULL here
2661 KF_TRACE(10, ("__kmp_setup_icv_copy: LINEAR: T#%d this_thread=%p team=%p\n",if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_setup_icv_copy: LINEAR: T#%d this_thread=%p team=%p\n"
, f, team->t.t_threads[f], team); }
2662 f, team->t.t_threads[f], team))if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_setup_icv_copy: LINEAR: T#%d this_thread=%p team=%p\n"
, f, team->t.t_threads[f], team); }
;
2663 __kmp_init_implicit_task(loc, team->t.t_threads[f], team, f, FALSE0);
2664 ngo_store_icvs(&team->t.t_implicit_task_taskdata[f].td_icvs, new_icvs)copy_icvs((&team->t.t_implicit_task_taskdata[f].td_icvs
), (new_icvs))
;
2665 KF_TRACE(10, ("__kmp_setup_icv_copy: LINEAR: T#%d this_thread=%p team=%p\n",if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_setup_icv_copy: LINEAR: T#%d this_thread=%p team=%p\n"
, f, team->t.t_threads[f], team); }
2666 f, team->t.t_threads[f], team))if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_setup_icv_copy: LINEAR: T#%d this_thread=%p team=%p\n"
, f, team->t.t_threads[f], team); }
;
2667 }
2668 ngo_sync()((void)0);
2669#endif // KMP_BARRIER_ICV_PULL
2670}

/build/source/openmp/runtime/src/kmp.h

1/*! \file */
2/*
3 * kmp.h -- KPTS runtime header file.
4 */
5
6//===----------------------------------------------------------------------===//
7//
8// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
9// See https://llvm.org/LICENSE.txt for license information.
10// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
11//
12//===----------------------------------------------------------------------===//
13
14#ifndef KMP_H
15#define KMP_H
16
17#include "kmp_config.h"
18
19/* #define BUILD_PARALLEL_ORDERED 1 */
20
21/* This fix replaces gettimeofday with clock_gettime for better scalability on
22 the Altix. Requires user code to be linked with -lrt. */
23//#define FIX_SGI_CLOCK
24
25/* Defines for OpenMP 3.0 tasking and auto scheduling */
26
27#ifndef KMP_STATIC_STEAL_ENABLED1
28#define KMP_STATIC_STEAL_ENABLED1 1
29#endif
30
31#define TASK_CURRENT_NOT_QUEUED0 0
32#define TASK_CURRENT_QUEUED1 1
33
34#ifdef BUILD_TIED_TASK_STACK
35#define TASK_STACK_EMPTY 0 // entries when the stack is empty
36#define TASK_STACK_BLOCK_BITS 5 // Used in TASK_STACK_SIZE and TASK_STACK_MASK
37// Number of entries in each task stack array
38#define TASK_STACK_BLOCK_SIZE (1 << TASK_STACK_BLOCK_BITS)
39// Mask for determining index into stack block
40#define TASK_STACK_INDEX_MASK (TASK_STACK_BLOCK_SIZE - 1)
41#endif // BUILD_TIED_TASK_STACK
42
43#define TASK_NOT_PUSHED1 1
44#define TASK_SUCCESSFULLY_PUSHED0 0
45#define TASK_TIED1 1
46#define TASK_UNTIED0 0
47#define TASK_EXPLICIT1 1
48#define TASK_IMPLICIT0 0
49#define TASK_PROXY1 1
50#define TASK_FULL0 0
51#define TASK_DETACHABLE1 1
52#define TASK_UNDETACHABLE0 0
53
54#define KMP_CANCEL_THREADS
55#define KMP_THREAD_ATTR
56
57// Android does not have pthread_cancel. Undefine KMP_CANCEL_THREADS if being
58// built on Android
59#if defined(__ANDROID__)
60#undef KMP_CANCEL_THREADS
61#endif
62
63#include <signal.h>
64#include <stdarg.h>
65#include <stddef.h>
66#include <stdio.h>
67#include <stdlib.h>
68#include <string.h>
69#include <limits>
70#include <type_traits>
71/* include <ctype.h> don't use; problems with /MD on Windows* OS NT due to bad
72 Microsoft library. Some macros provided below to replace these functions */
73#ifndef __ABSOFT_WIN
74#include <sys/types.h>
75#endif
76#include <limits.h>
77#include <time.h>
78
79#include <errno(*__errno_location ()).h>
80
81#include "kmp_os.h"
82
83#include "kmp_safe_c_api.h"
84
85#if KMP_STATS_ENABLED0
86class kmp_stats_list;
87#endif
88
89#if KMP_USE_HIER_SCHED0
90// Only include hierarchical scheduling if affinity is supported
91#undef KMP_USE_HIER_SCHED0
92#define KMP_USE_HIER_SCHED0 KMP_AFFINITY_SUPPORTED1
93#endif
94
95#if KMP_USE_HWLOC0 && KMP_AFFINITY_SUPPORTED1
96#include "hwloc.h"
97#ifndef HWLOC_OBJ_NUMANODE
98#define HWLOC_OBJ_NUMANODE HWLOC_OBJ_NODE
99#endif
100#ifndef HWLOC_OBJ_PACKAGE
101#define HWLOC_OBJ_PACKAGE HWLOC_OBJ_SOCKET
102#endif
103#endif
104
105#if KMP_ARCH_X860 || KMP_ARCH_X86_641
106#include <xmmintrin.h>
107#endif
108
109// The below has to be defined before including "kmp_barrier.h".
110#define KMP_INTERNAL_MALLOC(sz)malloc(sz) malloc(sz)
111#define KMP_INTERNAL_FREE(p)free(p) free(p)
112#define KMP_INTERNAL_REALLOC(p, sz)realloc((p), (sz)) realloc((p), (sz))
113#define KMP_INTERNAL_CALLOC(n, sz)calloc((n), (sz)) calloc((n), (sz))
114
115#include "kmp_debug.h"
116#include "kmp_lock.h"
117#include "kmp_version.h"
118#include "kmp_barrier.h"
119#if USE_DEBUGGER0
120#include "kmp_debugger.h"
121#endif
122#include "kmp_i18n.h"
123
124#define KMP_HANDLE_SIGNALS(1 || 0) (KMP_OS_UNIX1 || KMP_OS_WINDOWS0)
125
126#include "kmp_wrapper_malloc.h"
127#if KMP_OS_UNIX1
128#include <unistd.h>
129#if !defined NSIG(64 + 1) && defined _NSIG(64 + 1)
130#define NSIG(64 + 1) _NSIG(64 + 1)
131#endif
132#endif
133
134#if KMP_OS_LINUX1
135#pragma weak clock_gettime
136#endif
137
138#if OMPT_SUPPORT1
139#include "ompt-internal.h"
140#endif
141
142#if OMPD_SUPPORT1
143#include "ompd-specific.h"
144#endif
145
146#ifndef UNLIKELY
147#define UNLIKELY(x)__builtin_expect(!!(x), 0) (x)
148#endif
149
150// Affinity format function
151#include "kmp_str.h"
152
153// 0 - no fast memory allocation, alignment: 8-byte on x86, 16-byte on x64.
154// 3 - fast allocation using sync, non-sync free lists of any size, non-self
155// free lists of limited size.
156#ifndef USE_FAST_MEMORY3
157#define USE_FAST_MEMORY3 3
158#endif
159
160#ifndef KMP_NESTED_HOT_TEAMS1
161#define KMP_NESTED_HOT_TEAMS1 0
162#define USE_NESTED_HOT_ARG(x), x
163#else
164#if KMP_NESTED_HOT_TEAMS1
165#define USE_NESTED_HOT_ARG(x), x , x
166#else
167#define USE_NESTED_HOT_ARG(x), x
168#endif
169#endif
170
171// Assume using BGET compare_exchange instruction instead of lock by default.
172#ifndef USE_CMP_XCHG_FOR_BGET1
173#define USE_CMP_XCHG_FOR_BGET1 1
174#endif
175
176// Test to see if queuing lock is better than bootstrap lock for bget
177// #ifndef USE_QUEUING_LOCK_FOR_BGET
178// #define USE_QUEUING_LOCK_FOR_BGET
179// #endif
180
181#define KMP_NSEC_PER_SEC1000000000L 1000000000L
182#define KMP_USEC_PER_SEC1000000L 1000000L
183
184/*!
185@ingroup BASIC_TYPES
186@{
187*/
188
189/*!
190Values for bit flags used in the ident_t to describe the fields.
191*/
192enum {
193 /*! Use trampoline for internal microtasks */
194 KMP_IDENT_IMB = 0x01,
195 /*! Use c-style ident structure */
196 KMP_IDENT_KMPC = 0x02,
197 /* 0x04 is no longer used */
198 /*! Entry point generated by auto-parallelization */
199 KMP_IDENT_AUTOPAR = 0x08,
200 /*! Compiler generates atomic reduction option for kmpc_reduce* */
201 KMP_IDENT_ATOMIC_REDUCE = 0x10,
202 /*! To mark a 'barrier' directive in user code */
203 KMP_IDENT_BARRIER_EXPL = 0x20,
204 /*! To Mark implicit barriers. */
205 KMP_IDENT_BARRIER_IMPL = 0x0040,
206 KMP_IDENT_BARRIER_IMPL_MASK = 0x01C0,
207 KMP_IDENT_BARRIER_IMPL_FOR = 0x0040,
208 KMP_IDENT_BARRIER_IMPL_SECTIONS = 0x00C0,
209
210 KMP_IDENT_BARRIER_IMPL_SINGLE = 0x0140,
211 KMP_IDENT_BARRIER_IMPL_WORKSHARE = 0x01C0,
212
213 /*! To mark a static loop in OMPT callbacks */
214 KMP_IDENT_WORK_LOOP = 0x200,
215 /*! To mark a sections directive in OMPT callbacks */
216 KMP_IDENT_WORK_SECTIONS = 0x400,
217 /*! To mark a distribute construct in OMPT callbacks */
218 KMP_IDENT_WORK_DISTRIBUTE = 0x800,
219 /*! Atomic hint; bottom four bits as omp_sync_hint_t. Top four reserved and
220 not currently used. If one day we need more bits, then we can use
221 an invalid combination of hints to mean that another, larger field
222 should be used in a different flag. */
223 KMP_IDENT_ATOMIC_HINT_MASK = 0xFF0000,
224 KMP_IDENT_ATOMIC_HINT_UNCONTENDED = 0x010000,
225 KMP_IDENT_ATOMIC_HINT_CONTENDED = 0x020000,
226 KMP_IDENT_ATOMIC_HINT_NONSPECULATIVE = 0x040000,
227 KMP_IDENT_ATOMIC_HINT_SPECULATIVE = 0x080000,
228 KMP_IDENT_OPENMP_SPEC_VERSION_MASK = 0xFF000000
229};
230
231/*!
232 * The ident structure that describes a source location.
233 */
234typedef struct ident {
235 kmp_int32 reserved_1; /**< might be used in Fortran; see above */
236 kmp_int32 flags; /**< also f.flags; KMP_IDENT_xxx flags; KMP_IDENT_KMPC
237 identifies this union member */
238 kmp_int32 reserved_2; /**< not really used in Fortran any more; see above */
239#if USE_ITT_BUILD1
240/* but currently used for storing region-specific ITT */
241/* contextual information. */
242#endif /* USE_ITT_BUILD */
243 kmp_int32 reserved_3; /**< source[4] in Fortran, do not use for C++ */
244 char const *psource; /**< String describing the source location.
245 The string is composed of semi-colon separated fields
246 which describe the source file, the function and a pair
247 of line numbers that delimit the construct. */
248 // Returns the OpenMP version in form major*10+minor (e.g., 50 for 5.0)
249 kmp_int32 get_openmp_version() {
250 return (((flags & KMP_IDENT_OPENMP_SPEC_VERSION_MASK) >> 24) & 0xFF);
251 }
252} ident_t;
253/*!
254@}
255*/
256
257// Some forward declarations.
258typedef union kmp_team kmp_team_t;
259typedef struct kmp_taskdata kmp_taskdata_t;
260typedef union kmp_task_team kmp_task_team_t;
261typedef union kmp_team kmp_team_p;
262typedef union kmp_info kmp_info_p;
263typedef union kmp_root kmp_root_p;
264
265template <bool C = false, bool S = true> class kmp_flag_32;
266template <bool C = false, bool S = true> class kmp_flag_64;
267template <bool C = false, bool S = true> class kmp_atomic_flag_64;
268class kmp_flag_oncore;
269
270#ifdef __cplusplus201703L
271extern "C" {
272#endif
273
274/* ------------------------------------------------------------------------ */
275
276/* Pack two 32-bit signed integers into a 64-bit signed integer */
277/* ToDo: Fix word ordering for big-endian machines. */
278#define KMP_PACK_64(HIGH_32, LOW_32)((kmp_int64)((((kmp_uint64)(HIGH_32)) << 32) | (kmp_uint64
)(LOW_32)))
\
279 ((kmp_int64)((((kmp_uint64)(HIGH_32)) << 32) | (kmp_uint64)(LOW_32)))
280
281// Generic string manipulation macros. Assume that _x is of type char *
282#define SKIP_WS(_x){ while (*(_x) == ' ' || *(_x) == '\t') (_x)++; } \
283 { \
284 while (*(_x) == ' ' || *(_x) == '\t') \
285 (_x)++; \
286 }
287#define SKIP_DIGITS(_x){ while (*(_x) >= '0' && *(_x) <= '9') (_x)++; } \
288 { \
289 while (*(_x) >= '0' && *(_x) <= '9') \
290 (_x)++; \
291 }
292#define SKIP_TOKEN(_x){ while ((*(_x) >= '0' && *(_x) <= '9') || (*(_x
) >= 'a' && *(_x) <= 'z') || (*(_x) >= 'A' &&
*(_x) <= 'Z') || *(_x) == '_') (_x)++; }
\
293 { \
294 while ((*(_x) >= '0' && *(_x) <= '9') || (*(_x) >= 'a' && *(_x) <= 'z') || \
295 (*(_x) >= 'A' && *(_x) <= 'Z') || *(_x) == '_') \
296 (_x)++; \
297 }
298#define SKIP_TO(_x, _c){ while (*(_x) != '\0' && *(_x) != (_c)) (_x)++; } \
299 { \
300 while (*(_x) != '\0' && *(_x) != (_c)) \
301 (_x)++; \
302 }
303
304/* ------------------------------------------------------------------------ */
305
306#define KMP_MAX(x, y)((x) > (y) ? (x) : (y)) ((x) > (y) ? (x) : (y))
307#define KMP_MIN(x, y)((x) < (y) ? (x) : (y)) ((x) < (y) ? (x) : (y))
308
309/* ------------------------------------------------------------------------ */
310/* Enumeration types */
311
312enum kmp_state_timer {
313 ts_stop,
314 ts_start,
315 ts_pause,
316
317 ts_last_state
318};
319
320enum dynamic_mode {
321 dynamic_default,
322#ifdef USE_LOAD_BALANCE1
323 dynamic_load_balance,
324#endif /* USE_LOAD_BALANCE */
325 dynamic_random,
326 dynamic_thread_limit,
327 dynamic_max
328};
329
330/* external schedule constants, duplicate enum omp_sched in omp.h in order to
331 * not include it here */
332#ifndef KMP_SCHED_TYPE_DEFINED
333#define KMP_SCHED_TYPE_DEFINED
334typedef enum kmp_sched {
335 kmp_sched_lower = 0, // lower and upper bounds are for routine parameter check
336 // Note: need to adjust __kmp_sch_map global array in case enum is changed
337 kmp_sched_static = 1, // mapped to kmp_sch_static_chunked (33)
338 kmp_sched_dynamic = 2, // mapped to kmp_sch_dynamic_chunked (35)
339 kmp_sched_guided = 3, // mapped to kmp_sch_guided_chunked (36)
340 kmp_sched_auto = 4, // mapped to kmp_sch_auto (38)
341 kmp_sched_upper_std = 5, // upper bound for standard schedules
342 kmp_sched_lower_ext = 100, // lower bound of Intel extension schedules
343 kmp_sched_trapezoidal = 101, // mapped to kmp_sch_trapezoidal (39)
344#if KMP_STATIC_STEAL_ENABLED1
345 kmp_sched_static_steal = 102, // mapped to kmp_sch_static_steal (44)
346#endif
347 kmp_sched_upper,
348 kmp_sched_default = kmp_sched_static, // default scheduling
349 kmp_sched_monotonic = 0x80000000
350} kmp_sched_t;
351#endif
352
353/*!
354 @ingroup WORK_SHARING
355 * Describes the loop schedule to be used for a parallel for loop.
356 */
357enum sched_type : kmp_int32 {
358 kmp_sch_lower = 32, /**< lower bound for unordered values */
359 kmp_sch_static_chunked = 33,
360 kmp_sch_static = 34, /**< static unspecialized */
361 kmp_sch_dynamic_chunked = 35,
362 kmp_sch_guided_chunked = 36, /**< guided unspecialized */
363 kmp_sch_runtime = 37,
364 kmp_sch_auto = 38, /**< auto */
365 kmp_sch_trapezoidal = 39,
366
367 /* accessible only through KMP_SCHEDULE environment variable */
368 kmp_sch_static_greedy = 40,
369 kmp_sch_static_balanced = 41,
370 /* accessible only through KMP_SCHEDULE environment variable */
371 kmp_sch_guided_iterative_chunked = 42,
372 kmp_sch_guided_analytical_chunked = 43,
373 /* accessible only through KMP_SCHEDULE environment variable */
374 kmp_sch_static_steal = 44,
375
376 /* static with chunk adjustment (e.g., simd) */
377 kmp_sch_static_balanced_chunked = 45,
378 kmp_sch_guided_simd = 46, /**< guided with chunk adjustment */
379 kmp_sch_runtime_simd = 47, /**< runtime with chunk adjustment */
380
381 /* accessible only through KMP_SCHEDULE environment variable */
382 kmp_sch_upper, /**< upper bound for unordered values */
383
384 kmp_ord_lower = 64, /**< lower bound for ordered values, must be power of 2 */
385 kmp_ord_static_chunked = 65,
386 kmp_ord_static = 66, /**< ordered static unspecialized */
387 kmp_ord_dynamic_chunked = 67,
388 kmp_ord_guided_chunked = 68,
389 kmp_ord_runtime = 69,
390 kmp_ord_auto = 70, /**< ordered auto */
391 kmp_ord_trapezoidal = 71,
392 kmp_ord_upper, /**< upper bound for ordered values */
393
394 /* Schedules for Distribute construct */
395 kmp_distribute_static_chunked = 91, /**< distribute static chunked */
396 kmp_distribute_static = 92, /**< distribute static unspecialized */
397
398 /* For the "nomerge" versions, kmp_dispatch_next*() will always return a
399 single iteration/chunk, even if the loop is serialized. For the schedule
400 types listed above, the entire iteration vector is returned if the loop is
401 serialized. This doesn't work for gcc/gcomp sections. */
402 kmp_nm_lower = 160, /**< lower bound for nomerge values */
403
404 kmp_nm_static_chunked =
405 (kmp_sch_static_chunked - kmp_sch_lower + kmp_nm_lower),
406 kmp_nm_static = 162, /**< static unspecialized */
407 kmp_nm_dynamic_chunked = 163,
408 kmp_nm_guided_chunked = 164, /**< guided unspecialized */
409 kmp_nm_runtime = 165,
410 kmp_nm_auto = 166, /**< auto */
411 kmp_nm_trapezoidal = 167,
412
413 /* accessible only through KMP_SCHEDULE environment variable */
414 kmp_nm_static_greedy = 168,
415 kmp_nm_static_balanced = 169,
416 /* accessible only through KMP_SCHEDULE environment variable */
417 kmp_nm_guided_iterative_chunked = 170,
418 kmp_nm_guided_analytical_chunked = 171,
419 kmp_nm_static_steal =
420 172, /* accessible only through OMP_SCHEDULE environment variable */
421
422 kmp_nm_ord_static_chunked = 193,
423 kmp_nm_ord_static = 194, /**< ordered static unspecialized */
424 kmp_nm_ord_dynamic_chunked = 195,
425 kmp_nm_ord_guided_chunked = 196,
426 kmp_nm_ord_runtime = 197,
427 kmp_nm_ord_auto = 198, /**< auto */
428 kmp_nm_ord_trapezoidal = 199,
429 kmp_nm_upper, /**< upper bound for nomerge values */
430
431 /* Support for OpenMP 4.5 monotonic and nonmonotonic schedule modifiers. Since
432 we need to distinguish the three possible cases (no modifier, monotonic
433 modifier, nonmonotonic modifier), we need separate bits for each modifier.
434 The absence of monotonic does not imply nonmonotonic, especially since 4.5
435 says that the behaviour of the "no modifier" case is implementation defined
436 in 4.5, but will become "nonmonotonic" in 5.0.
437
438 Since we're passing a full 32 bit value, we can use a couple of high bits
439 for these flags; out of paranoia we avoid the sign bit.
440
441 These modifiers can be or-ed into non-static schedules by the compiler to
442 pass the additional information. They will be stripped early in the
443 processing in __kmp_dispatch_init when setting up schedules, so most of the
444 code won't ever see schedules with these bits set. */
445 kmp_sch_modifier_monotonic =
446 (1 << 29), /**< Set if the monotonic schedule modifier was present */
447 kmp_sch_modifier_nonmonotonic =
448 (1 << 30), /**< Set if the nonmonotonic schedule modifier was present */
449
450#define SCHEDULE_WITHOUT_MODIFIERS(s)(enum sched_type)( (s) & ~(kmp_sch_modifier_nonmonotonic |
kmp_sch_modifier_monotonic))
\
451 (enum sched_type)( \
452 (s) & ~(kmp_sch_modifier_nonmonotonic | kmp_sch_modifier_monotonic))
453#define SCHEDULE_HAS_MONOTONIC(s)(((s)&kmp_sch_modifier_monotonic) != 0) (((s)&kmp_sch_modifier_monotonic) != 0)
454#define SCHEDULE_HAS_NONMONOTONIC(s)(((s)&kmp_sch_modifier_nonmonotonic) != 0) (((s)&kmp_sch_modifier_nonmonotonic) != 0)
455#define SCHEDULE_HAS_NO_MODIFIERS(s)(((s) & (kmp_sch_modifier_nonmonotonic | kmp_sch_modifier_monotonic
)) == 0)
\
456 (((s) & (kmp_sch_modifier_nonmonotonic | kmp_sch_modifier_monotonic)) == 0)
457#define SCHEDULE_GET_MODIFIERS(s)((enum sched_type)( (s) & (kmp_sch_modifier_nonmonotonic |
kmp_sch_modifier_monotonic)))
\
458 ((enum sched_type)( \
459 (s) & (kmp_sch_modifier_nonmonotonic | kmp_sch_modifier_monotonic)))
460#define SCHEDULE_SET_MODIFIERS(s, m)(s = (enum sched_type)((kmp_int32)s | (kmp_int32)m)) \
461 (s = (enum sched_type)((kmp_int32)s | (kmp_int32)m))
462#define SCHEDULE_NONMONOTONIC0 0
463#define SCHEDULE_MONOTONIC1 1
464
465 kmp_sch_default = kmp_sch_static /**< default scheduling algorithm */
466};
467
468// Apply modifiers on internal kind to standard kind
469static inline void
470__kmp_sched_apply_mods_stdkind(kmp_sched_t *kind,
471 enum sched_type internal_kind) {
472 if (SCHEDULE_HAS_MONOTONIC(internal_kind)(((internal_kind)&kmp_sch_modifier_monotonic) != 0)) {
473 *kind = (kmp_sched_t)((int)*kind | (int)kmp_sched_monotonic);
474 }
475}
476
477// Apply modifiers on standard kind to internal kind
478static inline void
479__kmp_sched_apply_mods_intkind(kmp_sched_t kind,
480 enum sched_type *internal_kind) {
481 if ((int)kind & (int)kmp_sched_monotonic) {
482 *internal_kind = (enum sched_type)((int)*internal_kind |
483 (int)kmp_sch_modifier_monotonic);
484 }
485}
486
487// Get standard schedule without modifiers
488static inline kmp_sched_t __kmp_sched_without_mods(kmp_sched_t kind) {
489 return (kmp_sched_t)((int)kind & ~((int)kmp_sched_monotonic));
490}
491
492/* Type to keep runtime schedule set via OMP_SCHEDULE or omp_set_schedule() */
493typedef union kmp_r_sched {
494 struct {
495 enum sched_type r_sched_type;
496 int chunk;
497 };
498 kmp_int64 sched;
499} kmp_r_sched_t;
500
501extern enum sched_type __kmp_sch_map[]; // map OMP 3.0 schedule types with our
502// internal schedule types
503
504enum library_type {
505 library_none,
506 library_serial,
507 library_turnaround,
508 library_throughput
509};
510
511#if KMP_OS_LINUX1
512enum clock_function_type {
513 clock_function_gettimeofday,
514 clock_function_clock_gettime
515};
516#endif /* KMP_OS_LINUX */
517
518#if KMP_MIC_SUPPORTED((0 || 1) && (1 || 0))
519enum mic_type { non_mic, mic1, mic2, mic3, dummy };
520#endif
521
522/* -- fast reduction stuff ------------------------------------------------ */
523
524#undef KMP_FAST_REDUCTION_BARRIER1
525#define KMP_FAST_REDUCTION_BARRIER1 1
526
527#undef KMP_FAST_REDUCTION_CORE_DUO1
528#if KMP_ARCH_X860 || KMP_ARCH_X86_641
529#define KMP_FAST_REDUCTION_CORE_DUO1 1
530#endif
531
532enum _reduction_method {
533 reduction_method_not_defined = 0,
534 critical_reduce_block = (1 << 8),
535 atomic_reduce_block = (2 << 8),
536 tree_reduce_block = (3 << 8),
537 empty_reduce_block = (4 << 8)
538};
539
540// Description of the packed_reduction_method variable:
541// The packed_reduction_method variable consists of two enum types variables
542// that are packed together into 0-th byte and 1-st byte:
543// 0: (packed_reduction_method & 0x000000FF) is a 'enum barrier_type' value of
544// barrier that will be used in fast reduction: bs_plain_barrier or
545// bs_reduction_barrier
546// 1: (packed_reduction_method & 0x0000FF00) is a reduction method that will
547// be used in fast reduction;
548// Reduction method is of 'enum _reduction_method' type and it's defined the way
549// so that the bits of 0-th byte are empty, so no need to execute a shift
550// instruction while packing/unpacking
551
552#if KMP_FAST_REDUCTION_BARRIER1
553#define PACK_REDUCTION_METHOD_AND_BARRIER(reduction_method, barrier_type)((reduction_method) | (barrier_type)) \
554 ((reduction_method) | (barrier_type))
555
556#define UNPACK_REDUCTION_METHOD(packed_reduction_method)((enum _reduction_method)((packed_reduction_method) & (0x0000FF00
)))
\
557 ((enum _reduction_method)((packed_reduction_method) & (0x0000FF00)))
558
559#define UNPACK_REDUCTION_BARRIER(packed_reduction_method)((enum barrier_type)((packed_reduction_method) & (0x000000FF
)))
\
560 ((enum barrier_type)((packed_reduction_method) & (0x000000FF)))
561#else
562#define PACK_REDUCTION_METHOD_AND_BARRIER(reduction_method, barrier_type)((reduction_method) | (barrier_type)) \
563 (reduction_method)
564
565#define UNPACK_REDUCTION_METHOD(packed_reduction_method)((enum _reduction_method)((packed_reduction_method) & (0x0000FF00
)))
\
566 (packed_reduction_method)
567
568#define UNPACK_REDUCTION_BARRIER(packed_reduction_method)((enum barrier_type)((packed_reduction_method) & (0x000000FF
)))
(bs_plain_barrier)
569#endif
570
571#define TEST_REDUCTION_METHOD(packed_reduction_method, which_reduction_block)((((enum _reduction_method)((packed_reduction_method) & (
0x0000FF00)))) == (which_reduction_block))
\
572 ((UNPACK_REDUCTION_METHOD(packed_reduction_method)((enum _reduction_method)((packed_reduction_method) & (0x0000FF00
)))
) == \
573 (which_reduction_block))
574
575#if KMP_FAST_REDUCTION_BARRIER1
576#define TREE_REDUCE_BLOCK_WITH_REDUCTION_BARRIER(((tree_reduce_block) | (bs_reduction_barrier))) \
577 (PACK_REDUCTION_METHOD_AND_BARRIER(tree_reduce_block, bs_reduction_barrier)((tree_reduce_block) | (bs_reduction_barrier)))
578
579#define TREE_REDUCE_BLOCK_WITH_PLAIN_BARRIER(((tree_reduce_block) | (bs_plain_barrier))) \
580 (PACK_REDUCTION_METHOD_AND_BARRIER(tree_reduce_block, bs_plain_barrier)((tree_reduce_block) | (bs_plain_barrier)))
581#endif
582
583typedef int PACKED_REDUCTION_METHOD_T;
584
585/* -- end of fast reduction stuff ----------------------------------------- */
586
587#if KMP_OS_WINDOWS0
588#define USE_CBLKDATA
589#if KMP_MSVC_COMPAT0
590#pragma warning(push)
591#pragma warning(disable : 271 310)
592#endif
593#include <windows.h>
594#if KMP_MSVC_COMPAT0
595#pragma warning(pop)
596#endif
597#endif
598
599#if KMP_OS_UNIX1
600#include <dlfcn.h>
601#include <pthread.h>
602#endif
603
604enum kmp_hw_t : int {
605 KMP_HW_UNKNOWN = -1,
606 KMP_HW_SOCKET = 0,
607 KMP_HW_PROC_GROUP,
608 KMP_HW_NUMA,
609 KMP_HW_DIE,
610 KMP_HW_LLC,
611 KMP_HW_L3,
612 KMP_HW_TILE,
613 KMP_HW_MODULE,
614 KMP_HW_L2,
615 KMP_HW_L1,
616 KMP_HW_CORE,
617 KMP_HW_THREAD,
618 KMP_HW_LAST
619};
620
621typedef enum kmp_hw_core_type_t {
622 KMP_HW_CORE_TYPE_UNKNOWN = 0x0,
623#if KMP_ARCH_X860 || KMP_ARCH_X86_641
624 KMP_HW_CORE_TYPE_ATOM = 0x20,
625 KMP_HW_CORE_TYPE_CORE = 0x40,
626 KMP_HW_MAX_NUM_CORE_TYPES = 3,
627#else
628 KMP_HW_MAX_NUM_CORE_TYPES = 1,
629#endif
630} kmp_hw_core_type_t;
631
632#define KMP_HW_MAX_NUM_CORE_EFFS8 8
633
634#define KMP_DEBUG_ASSERT_VALID_HW_TYPE(type)if (!(type >= (kmp_hw_t)0 && type < KMP_HW_LAST
)) { __kmp_debug_assert("type >= (kmp_hw_t)0 && type < KMP_HW_LAST"
, "openmp/runtime/src/kmp.h", 634); }
\
635 KMP_DEBUG_ASSERT(type >= (kmp_hw_t)0 && type < KMP_HW_LAST)if (!(type >= (kmp_hw_t)0 && type < KMP_HW_LAST
)) { __kmp_debug_assert("type >= (kmp_hw_t)0 && type < KMP_HW_LAST"
, "openmp/runtime/src/kmp.h", 635); }
636#define KMP_ASSERT_VALID_HW_TYPE(type)if (!(type >= (kmp_hw_t)0 && type < KMP_HW_LAST
)) { __kmp_debug_assert("type >= (kmp_hw_t)0 && type < KMP_HW_LAST"
, "openmp/runtime/src/kmp.h", 636); }
\
637 KMP_ASSERT(type >= (kmp_hw_t)0 && type < KMP_HW_LAST)if (!(type >= (kmp_hw_t)0 && type < KMP_HW_LAST
)) { __kmp_debug_assert("type >= (kmp_hw_t)0 && type < KMP_HW_LAST"
, "openmp/runtime/src/kmp.h", 637); }
638
639#define KMP_FOREACH_HW_TYPE(type)for (kmp_hw_t type = (kmp_hw_t)0; type < KMP_HW_LAST; type
= (kmp_hw_t)((int)type + 1))
\
640 for (kmp_hw_t type = (kmp_hw_t)0; type < KMP_HW_LAST; \
641 type = (kmp_hw_t)((int)type + 1))
642
643const char *__kmp_hw_get_keyword(kmp_hw_t type, bool plural = false);
644const char *__kmp_hw_get_catalog_string(kmp_hw_t type, bool plural = false);
645const char *__kmp_hw_get_core_type_string(kmp_hw_core_type_t type);
646
647/* Only Linux* OS and Windows* OS support thread affinity. */
648#if KMP_AFFINITY_SUPPORTED1
649
650// GROUP_AFFINITY is already defined for _MSC_VER>=1600 (VS2010 and later).
651#if KMP_OS_WINDOWS0
652#if _MSC_VER < 1600 && KMP_MSVC_COMPAT0
653typedef struct GROUP_AFFINITY {
654 KAFFINITY Mask;
655 WORD Group;
656 WORD Reserved[3];
657} GROUP_AFFINITY;
658#endif /* _MSC_VER < 1600 */
659#if KMP_GROUP_AFFINITY0
660extern int __kmp_num_proc_groups;
661#else
662static const int __kmp_num_proc_groups = 1;
663#endif /* KMP_GROUP_AFFINITY */
664typedef DWORD (*kmp_GetActiveProcessorCount_t)(WORD);
665extern kmp_GetActiveProcessorCount_t __kmp_GetActiveProcessorCount;
666
667typedef WORD (*kmp_GetActiveProcessorGroupCount_t)(void);
668extern kmp_GetActiveProcessorGroupCount_t __kmp_GetActiveProcessorGroupCount;
669
670typedef BOOL (*kmp_GetThreadGroupAffinity_t)(HANDLE, GROUP_AFFINITY *);
671extern kmp_GetThreadGroupAffinity_t __kmp_GetThreadGroupAffinity;
672
673typedef BOOL (*kmp_SetThreadGroupAffinity_t)(HANDLE, const GROUP_AFFINITY *,
674 GROUP_AFFINITY *);
675extern kmp_SetThreadGroupAffinity_t __kmp_SetThreadGroupAffinity;
676#endif /* KMP_OS_WINDOWS */
677
678#if KMP_USE_HWLOC0
679extern hwloc_topology_t __kmp_hwloc_topology;
680extern int __kmp_hwloc_error;
681#endif
682
683extern size_t __kmp_affin_mask_size;
684#define KMP_AFFINITY_CAPABLE()(__kmp_affin_mask_size > 0) (__kmp_affin_mask_size > 0)
685#define KMP_AFFINITY_DISABLE()(__kmp_affin_mask_size = 0) (__kmp_affin_mask_size = 0)
686#define KMP_AFFINITY_ENABLE(mask_size)(__kmp_affin_mask_size = mask_size) (__kmp_affin_mask_size = mask_size)
687#define KMP_CPU_SET_ITERATE(i, mask)for (i = (mask)->begin(); (int)i != (mask)->end(); i = (
mask)->next(i))
\
688 for (i = (mask)->begin(); (int)i != (mask)->end(); i = (mask)->next(i))
689#define KMP_CPU_SET(i, mask)(mask)->set(i) (mask)->set(i)
690#define KMP_CPU_ISSET(i, mask)(mask)->is_set(i) (mask)->is_set(i)
691#define KMP_CPU_CLR(i, mask)(mask)->clear(i) (mask)->clear(i)
692#define KMP_CPU_ZERO(mask)(mask)->zero() (mask)->zero()
693#define KMP_CPU_COPY(dest, src)(dest)->copy(src) (dest)->copy(src)
694#define KMP_CPU_AND(dest, src)(dest)->bitwise_and(src) (dest)->bitwise_and(src)
695#define KMP_CPU_COMPLEMENT(max_bit_number, mask)(mask)->bitwise_not() (mask)->bitwise_not()
696#define KMP_CPU_UNION(dest, src)(dest)->bitwise_or(src) (dest)->bitwise_or(src)
697#define KMP_CPU_ALLOC(ptr)(ptr = __kmp_affinity_dispatch->allocate_mask()) (ptr = __kmp_affinity_dispatch->allocate_mask())
698#define KMP_CPU_FREE(ptr)__kmp_affinity_dispatch->deallocate_mask(ptr) __kmp_affinity_dispatch->deallocate_mask(ptr)
699#define KMP_CPU_ALLOC_ON_STACK(ptr)(ptr = __kmp_affinity_dispatch->allocate_mask()) KMP_CPU_ALLOC(ptr)(ptr = __kmp_affinity_dispatch->allocate_mask())
700#define KMP_CPU_FREE_FROM_STACK(ptr)__kmp_affinity_dispatch->deallocate_mask(ptr) KMP_CPU_FREE(ptr)__kmp_affinity_dispatch->deallocate_mask(ptr)
701#define KMP_CPU_INTERNAL_ALLOC(ptr)(ptr = __kmp_affinity_dispatch->allocate_mask()) KMP_CPU_ALLOC(ptr)(ptr = __kmp_affinity_dispatch->allocate_mask())
702#define KMP_CPU_INTERNAL_FREE(ptr)__kmp_affinity_dispatch->deallocate_mask(ptr) KMP_CPU_FREE(ptr)__kmp_affinity_dispatch->deallocate_mask(ptr)
703#define KMP_CPU_INDEX(arr, i)__kmp_affinity_dispatch->index_mask_array(arr, i) __kmp_affinity_dispatch->index_mask_array(arr, i)
704#define KMP_CPU_ALLOC_ARRAY(arr, n)(arr = __kmp_affinity_dispatch->allocate_mask_array(n)) \
705 (arr = __kmp_affinity_dispatch->allocate_mask_array(n))
706#define KMP_CPU_FREE_ARRAY(arr, n)__kmp_affinity_dispatch->deallocate_mask_array(arr) \
707 __kmp_affinity_dispatch->deallocate_mask_array(arr)
708#define KMP_CPU_INTERNAL_ALLOC_ARRAY(arr, n)(arr = __kmp_affinity_dispatch->allocate_mask_array(n)) KMP_CPU_ALLOC_ARRAY(arr, n)(arr = __kmp_affinity_dispatch->allocate_mask_array(n))
709#define KMP_CPU_INTERNAL_FREE_ARRAY(arr, n)__kmp_affinity_dispatch->deallocate_mask_array(arr) KMP_CPU_FREE_ARRAY(arr, n)__kmp_affinity_dispatch->deallocate_mask_array(arr)
710#define __kmp_get_system_affinity(mask, abort_bool)(mask)->get_system_affinity(abort_bool) \
711 (mask)->get_system_affinity(abort_bool)
712#define __kmp_set_system_affinity(mask, abort_bool)(mask)->set_system_affinity(abort_bool) \
713 (mask)->set_system_affinity(abort_bool)
714#define __kmp_get_proc_group(mask)(mask)->get_proc_group() (mask)->get_proc_group()
715
716class KMPAffinity {
717public:
718 class Mask {
719 public:
720 void *operator new(size_t n);
721 void operator delete(void *p);
722 void *operator new[](size_t n);
723 void operator delete[](void *p);
724 virtual ~Mask() {}
725 // Set bit i to 1
726 virtual void set(int i) {}
727 // Return bit i
728 virtual bool is_set(int i) const { return false; }
729 // Set bit i to 0
730 virtual void clear(int i) {}
731 // Zero out entire mask
732 virtual void zero() {}
733 // Copy src into this mask
734 virtual void copy(const Mask *src) {}
735 // this &= rhs
736 virtual void bitwise_and(const Mask *rhs) {}
737 // this |= rhs
738 virtual void bitwise_or(const Mask *rhs) {}
739 // this = ~this
740 virtual void bitwise_not() {}
741 // API for iterating over an affinity mask
742 // for (int i = mask->begin(); i != mask->end(); i = mask->next(i))
743 virtual int begin() const { return 0; }
744 virtual int end() const { return 0; }
745 virtual int next(int previous) const { return 0; }
746#if KMP_OS_WINDOWS0
747 virtual int set_process_affinity(bool abort_on_error) const { return -1; }
748#endif
749 // Set the system's affinity to this affinity mask's value
750 virtual int set_system_affinity(bool abort_on_error) const { return -1; }
751 // Set this affinity mask to the current system affinity
752 virtual int get_system_affinity(bool abort_on_error) { return -1; }
753 // Only 1 DWORD in the mask should have any procs set.
754 // Return the appropriate index, or -1 for an invalid mask.
755 virtual int get_proc_group() const { return -1; }
756 int get_max_cpu() const {
757 int cpu;
758 int max_cpu = -1;
759 KMP_CPU_SET_ITERATE(cpu, this)for (cpu = (this)->begin(); (int)cpu != (this)->end(); cpu
= (this)->next(cpu))
{
760 if (cpu > max_cpu)
761 max_cpu = cpu;
762 }
763 return max_cpu;
764 }
765 };
766 void *operator new(size_t n);
767 void operator delete(void *p);
768 // Need virtual destructor
769 virtual ~KMPAffinity() = default;
770 // Determine if affinity is capable
771 virtual void determine_capable(const char *env_var) {}
772 // Bind the current thread to os proc
773 virtual void bind_thread(int proc) {}
774 // Factory functions to allocate/deallocate a mask
775 virtual Mask *allocate_mask() { return nullptr; }
776 virtual void deallocate_mask(Mask *m) {}
777 virtual Mask *allocate_mask_array(int num) { return nullptr; }
778 virtual void deallocate_mask_array(Mask *m) {}
779 virtual Mask *index_mask_array(Mask *m, int index) { return nullptr; }
780 static void pick_api();
781 static void destroy_api();
782 enum api_type {
783 NATIVE_OS
784#if KMP_USE_HWLOC0
785 ,
786 HWLOC
787#endif
788 };
789 virtual api_type get_api_type() const {
790 KMP_ASSERT(0)if (!(0)) { __kmp_debug_assert("0", "openmp/runtime/src/kmp.h"
, 790); }
;
791 return NATIVE_OS;
792 }
793
794private:
795 static bool picked_api;
796};
797
798typedef KMPAffinity::Mask kmp_affin_mask_t;
799extern KMPAffinity *__kmp_affinity_dispatch;
800
801// Declare local char buffers with this size for printing debug and info
802// messages, using __kmp_affinity_print_mask().
803#define KMP_AFFIN_MASK_PRINT_LEN1024 1024
804
805enum affinity_type {
806 affinity_none = 0,
807 affinity_physical,
808 affinity_logical,
809 affinity_compact,
810 affinity_scatter,
811 affinity_explicit,
812 affinity_balanced,
813 affinity_disabled, // not used outsize the env var parser
814 affinity_default
815};
816
817enum affinity_top_method {
818 affinity_top_method_all = 0, // try all (supported) methods, in order
819#if KMP_ARCH_X860 || KMP_ARCH_X86_641
820 affinity_top_method_apicid,
821 affinity_top_method_x2apicid,
822 affinity_top_method_x2apicid_1f,
823#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
824 affinity_top_method_cpuinfo, // KMP_CPUINFO_FILE is usable on Windows* OS, too
825#if KMP_GROUP_AFFINITY0
826 affinity_top_method_group,
827#endif /* KMP_GROUP_AFFINITY */
828 affinity_top_method_flat,
829#if KMP_USE_HWLOC0
830 affinity_top_method_hwloc,
831#endif
832 affinity_top_method_default
833};
834
835#define affinity_respect_mask_default(2) (2)
836
837typedef struct kmp_affinity_flags_t {
838 unsigned dups : 1;
839 unsigned verbose : 1;
840 unsigned warnings : 1;
841 unsigned respect : 2;
842 unsigned reset : 1;
843 unsigned initialized : 1;
844 unsigned reserved : 25;
845} kmp_affinity_flags_t;
846KMP_BUILD_ASSERT(sizeof(kmp_affinity_flags_t) == 4)static_assert(sizeof(kmp_affinity_flags_t) == 4, "Build condition error"
)
;
847
848typedef struct kmp_affinity_ids_t {
849 int ids[KMP_HW_LAST];
850 int operator[](size_t idx) const { return ids[idx]; }
851 int &operator[](size_t idx) { return ids[idx]; }
852 kmp_affinity_ids_t &operator=(const kmp_affinity_ids_t &rhs) {
853 for (int i = 0; i < KMP_HW_LAST; ++i)
854 ids[i] = rhs[i];
855 return *this;
856 }
857} kmp_affinity_ids_t;
858
859typedef struct kmp_affinity_attrs_t {
860 int core_type : 8;
861 int core_eff : 8;
862 unsigned valid : 1;
863 unsigned reserved : 15;
864} kmp_affinity_attrs_t;
865#define KMP_AFFINITY_ATTRS_UNKNOWN{ KMP_HW_CORE_TYPE_UNKNOWN, kmp_hw_attr_t::UNKNOWN_CORE_EFF, 0
, 0 }
\
866 { KMP_HW_CORE_TYPE_UNKNOWN, kmp_hw_attr_t::UNKNOWN_CORE_EFF, 0, 0 }
867
868typedef struct kmp_affinity_t {
869 char *proclist;
870 enum affinity_type type;
871 kmp_hw_t gran;
872 int gran_levels;
873 int compact;
874 int offset;
875 kmp_affinity_flags_t flags;
876 unsigned num_masks;
877 kmp_affin_mask_t *masks;
878 kmp_affinity_ids_t *ids;
879 kmp_affinity_attrs_t *attrs;
880 unsigned num_os_id_masks;
881 kmp_affin_mask_t *os_id_masks;
882 const char *env_var;
883} kmp_affinity_t;
884
885#define KMP_AFFINITY_INIT(env){ nullptr, affinity_default, KMP_HW_UNKNOWN, -1, 0, 0, {(!0),
0, (!0), (2), 0, 0}, 0, nullptr, nullptr, nullptr, 0, nullptr
, env }
\
886 { \
887 nullptr, affinity_default, KMP_HW_UNKNOWN, -1, 0, 0, \
888 {TRUE(!0), FALSE0, TRUE(!0), affinity_respect_mask_default(2), FALSE0, FALSE0}, 0, \
889 nullptr, nullptr, nullptr, 0, nullptr, env \
890 }
891
892extern enum affinity_top_method __kmp_affinity_top_method;
893extern kmp_affinity_t __kmp_affinity;
894extern kmp_affinity_t __kmp_hh_affinity;
895extern kmp_affinity_t *__kmp_affinities[2];
896
897extern void __kmp_affinity_bind_thread(int which);
898
899extern kmp_affin_mask_t *__kmp_affin_fullMask;
900extern kmp_affin_mask_t *__kmp_affin_origMask;
901extern char *__kmp_cpuinfo_file;
902
903#endif /* KMP_AFFINITY_SUPPORTED */
904
905// This needs to be kept in sync with the values in omp.h !!!
906typedef enum kmp_proc_bind_t {
907 proc_bind_false = 0,
908 proc_bind_true,
909 proc_bind_primary,
910 proc_bind_close,
911 proc_bind_spread,
912 proc_bind_intel, // use KMP_AFFINITY interface
913 proc_bind_default
914} kmp_proc_bind_t;
915
916typedef struct kmp_nested_proc_bind_t {
917 kmp_proc_bind_t *bind_types;
918 int size;
919 int used;
920} kmp_nested_proc_bind_t;
921
922extern kmp_nested_proc_bind_t __kmp_nested_proc_bind;
923extern kmp_proc_bind_t __kmp_teams_proc_bind;
924
925extern int __kmp_display_affinity;
926extern char *__kmp_affinity_format;
927static const size_t KMP_AFFINITY_FORMAT_SIZE = 512;
928#if OMPT_SUPPORT1
929extern int __kmp_tool;
930extern char *__kmp_tool_libraries;
931#endif // OMPT_SUPPORT
932
933#if KMP_AFFINITY_SUPPORTED1
934#define KMP_PLACE_ALL(-1) (-1)
935#define KMP_PLACE_UNDEFINED(-2) (-2)
936// Is KMP_AFFINITY is being used instead of OMP_PROC_BIND/OMP_PLACES?
937#define KMP_AFFINITY_NON_PROC_BIND((__kmp_nested_proc_bind.bind_types[0] == proc_bind_false || __kmp_nested_proc_bind
.bind_types[0] == proc_bind_intel) && (__kmp_affinity
.num_masks > 0 || __kmp_affinity.type == affinity_balanced
))
\
938 ((__kmp_nested_proc_bind.bind_types[0] == proc_bind_false || \
939 __kmp_nested_proc_bind.bind_types[0] == proc_bind_intel) && \
940 (__kmp_affinity.num_masks > 0 || __kmp_affinity.type == affinity_balanced))
941#endif /* KMP_AFFINITY_SUPPORTED */
942
943extern int __kmp_affinity_num_places;
944
945typedef enum kmp_cancel_kind_t {
946 cancel_noreq = 0,
947 cancel_parallel = 1,
948 cancel_loop = 2,
949 cancel_sections = 3,
950 cancel_taskgroup = 4
951} kmp_cancel_kind_t;
952
953// KMP_HW_SUBSET support:
954typedef struct kmp_hws_item {
955 int num;
956 int offset;
957} kmp_hws_item_t;
958
959extern kmp_hws_item_t __kmp_hws_socket;
960extern kmp_hws_item_t __kmp_hws_die;
961extern kmp_hws_item_t __kmp_hws_node;
962extern kmp_hws_item_t __kmp_hws_tile;
963extern kmp_hws_item_t __kmp_hws_core;
964extern kmp_hws_item_t __kmp_hws_proc;
965extern int __kmp_hws_requested;
966extern int __kmp_hws_abs_flag; // absolute or per-item number requested
967
968/* ------------------------------------------------------------------------ */
969
970#define KMP_PAD(type, sz)(sizeof(type) + (sz - ((sizeof(type) - 1) % (sz)) - 1)) \
971 (sizeof(type) + (sz - ((sizeof(type) - 1) % (sz)) - 1))
972
973// We need to avoid using -1 as a GTID as +1 is added to the gtid
974// when storing it in a lock, and the value 0 is reserved.
975#define KMP_GTID_DNE(-2) (-2) /* Does not exist */
976#define KMP_GTID_SHUTDOWN(-3) (-3) /* Library is shutting down */
977#define KMP_GTID_MONITOR(-4) (-4) /* Monitor thread ID */
978#define KMP_GTID_UNKNOWN(-5) (-5) /* Is not known */
979#define KMP_GTID_MIN(-6) (-6) /* Minimal gtid for low bound check in DEBUG */
980
981/* OpenMP 5.0 Memory Management support */
982
983#ifndef __OMP_H
984// Duplicate type definitions from omp.h
985typedef uintptr_t omp_uintptr_t;
986
987typedef enum {
988 omp_atk_sync_hint = 1,
989 omp_atk_alignment = 2,
990 omp_atk_access = 3,
991 omp_atk_pool_size = 4,
992 omp_atk_fallback = 5,
993 omp_atk_fb_data = 6,
994 omp_atk_pinned = 7,
995 omp_atk_partition = 8
996} omp_alloctrait_key_t;
997
998typedef enum {
999 omp_atv_false = 0,
1000 omp_atv_true = 1,
1001 omp_atv_contended = 3,
1002 omp_atv_uncontended = 4,
1003 omp_atv_serialized = 5,
1004 omp_atv_sequential = omp_atv_serialized, // (deprecated)
1005 omp_atv_private = 6,
1006 omp_atv_all = 7,
1007 omp_atv_thread = 8,
1008 omp_atv_pteam = 9,
1009 omp_atv_cgroup = 10,
1010 omp_atv_default_mem_fb = 11,
1011 omp_atv_null_fb = 12,
1012 omp_atv_abort_fb = 13,
1013 omp_atv_allocator_fb = 14,
1014 omp_atv_environment = 15,
1015 omp_atv_nearest = 16,
1016 omp_atv_blocked = 17,
1017 omp_atv_interleaved = 18
1018} omp_alloctrait_value_t;
1019#define omp_atv_default((omp_uintptr_t)-1) ((omp_uintptr_t)-1)
1020
1021typedef void *omp_memspace_handle_t;
1022extern omp_memspace_handle_t const omp_default_mem_space;
1023extern omp_memspace_handle_t const omp_large_cap_mem_space;
1024extern omp_memspace_handle_t const omp_const_mem_space;
1025extern omp_memspace_handle_t const omp_high_bw_mem_space;
1026extern omp_memspace_handle_t const omp_low_lat_mem_space;
1027extern omp_memspace_handle_t const llvm_omp_target_host_mem_space;
1028extern omp_memspace_handle_t const llvm_omp_target_shared_mem_space;
1029extern omp_memspace_handle_t const llvm_omp_target_device_mem_space;
1030
1031typedef struct {
1032 omp_alloctrait_key_t key;
1033 omp_uintptr_t value;
1034} omp_alloctrait_t;
1035
1036typedef void *omp_allocator_handle_t;
1037extern omp_allocator_handle_t const omp_null_allocator;
1038extern omp_allocator_handle_t const omp_default_mem_alloc;
1039extern omp_allocator_handle_t const omp_large_cap_mem_alloc;
1040extern omp_allocator_handle_t const omp_const_mem_alloc;
1041extern omp_allocator_handle_t const omp_high_bw_mem_alloc;
1042extern omp_allocator_handle_t const omp_low_lat_mem_alloc;
1043extern omp_allocator_handle_t const omp_cgroup_mem_alloc;
1044extern omp_allocator_handle_t const omp_pteam_mem_alloc;
1045extern omp_allocator_handle_t const omp_thread_mem_alloc;
1046extern omp_allocator_handle_t const llvm_omp_target_host_mem_alloc;
1047extern omp_allocator_handle_t const llvm_omp_target_shared_mem_alloc;
1048extern omp_allocator_handle_t const llvm_omp_target_device_mem_alloc;
1049extern omp_allocator_handle_t const kmp_max_mem_alloc;
1050extern omp_allocator_handle_t __kmp_def_allocator;
1051
1052// end of duplicate type definitions from omp.h
1053#endif
1054
1055extern int __kmp_memkind_available;
1056
1057typedef omp_memspace_handle_t kmp_memspace_t; // placeholder
1058
1059typedef struct kmp_allocator_t {
1060 omp_memspace_handle_t memspace;
1061 void **memkind; // pointer to memkind
1062 size_t alignment;
1063 omp_alloctrait_value_t fb;
1064 kmp_allocator_t *fb_data;
1065 kmp_uint64 pool_size;
1066 kmp_uint64 pool_used;
1067 bool pinned;
1068} kmp_allocator_t;
1069
1070extern omp_allocator_handle_t __kmpc_init_allocator(int gtid,
1071 omp_memspace_handle_t,
1072 int ntraits,
1073 omp_alloctrait_t traits[]);
1074extern void __kmpc_destroy_allocator(int gtid, omp_allocator_handle_t al);
1075extern void __kmpc_set_default_allocator(int gtid, omp_allocator_handle_t al);
1076extern omp_allocator_handle_t __kmpc_get_default_allocator(int gtid);
1077// external interfaces, may be used by compiler
1078extern void *__kmpc_alloc(int gtid, size_t sz, omp_allocator_handle_t al);
1079extern void *__kmpc_aligned_alloc(int gtid, size_t align, size_t sz,
1080 omp_allocator_handle_t al);
1081extern void *__kmpc_calloc(int gtid, size_t nmemb, size_t sz,
1082 omp_allocator_handle_t al);
1083extern void *__kmpc_realloc(int gtid, void *ptr, size_t sz,
1084 omp_allocator_handle_t al,
1085 omp_allocator_handle_t free_al);
1086extern void __kmpc_free(int gtid, void *ptr, omp_allocator_handle_t al);
1087// internal interfaces, contain real implementation
1088extern void *__kmp_alloc(int gtid, size_t align, size_t sz,
1089 omp_allocator_handle_t al);
1090extern void *__kmp_calloc(int gtid, size_t align, size_t nmemb, size_t sz,
1091 omp_allocator_handle_t al);
1092extern void *__kmp_realloc(int gtid, void *ptr, size_t sz,
1093 omp_allocator_handle_t al,
1094 omp_allocator_handle_t free_al);
1095extern void ___kmpc_free(int gtid, void *ptr, omp_allocator_handle_t al);
1096
1097extern void __kmp_init_memkind();
1098extern void __kmp_fini_memkind();
1099extern void __kmp_init_target_mem();
1100
1101/* ------------------------------------------------------------------------ */
1102
1103#if ENABLE_LIBOMPTARGET1
1104extern void __kmp_init_target_task();
1105#endif
1106
1107/* ------------------------------------------------------------------------ */
1108
1109#define KMP_UINT64_MAX(~((kmp_uint64)1 << ((sizeof(kmp_uint64) * (1 << 3
)) - 1)))
\
1110 (~((kmp_uint64)1 << ((sizeof(kmp_uint64) * (1 << 3)) - 1)))
1111
1112#define KMP_MIN_NTH1 1
1113
1114#ifndef KMP_MAX_NTH2147483647
1115#if defined(PTHREAD_THREADS_MAX) && PTHREAD_THREADS_MAX < INT_MAX2147483647
1116#define KMP_MAX_NTH2147483647 PTHREAD_THREADS_MAX
1117#else
1118#define KMP_MAX_NTH2147483647 INT_MAX2147483647
1119#endif
1120#endif /* KMP_MAX_NTH */
1121
1122#ifdef PTHREAD_STACK_MIN16384
1123#define KMP_MIN_STKSIZE16384 PTHREAD_STACK_MIN16384
1124#else
1125#define KMP_MIN_STKSIZE16384 ((size_t)(32 * 1024))
1126#endif
1127
1128#define KMP_MAX_STKSIZE(~((size_t)1 << ((sizeof(size_t) * (1 << 3)) - 1)
))
(~((size_t)1 << ((sizeof(size_t) * (1 << 3)) - 1)))
1129
1130#if KMP_ARCH_X860
1131#define KMP_DEFAULT_STKSIZE((size_t)(4 * 1024 * 1024)) ((size_t)(2 * 1024 * 1024))
1132#elif KMP_ARCH_X86_641
1133#define KMP_DEFAULT_STKSIZE((size_t)(4 * 1024 * 1024)) ((size_t)(4 * 1024 * 1024))
1134#define KMP_BACKUP_STKSIZE((size_t)(2 * 1024 * 1024)) ((size_t)(2 * 1024 * 1024))
1135#else
1136#define KMP_DEFAULT_STKSIZE((size_t)(4 * 1024 * 1024)) ((size_t)(1024 * 1024))
1137#endif
1138
1139#define KMP_DEFAULT_MALLOC_POOL_INCR((size_t)(1024 * 1024)) ((size_t)(1024 * 1024))
1140#define KMP_MIN_MALLOC_POOL_INCR((size_t)(4 * 1024)) ((size_t)(4 * 1024))
1141#define KMP_MAX_MALLOC_POOL_INCR(~((size_t)1 << ((sizeof(size_t) * (1 << 3)) - 1)
))
\
1142 (~((size_t)1 << ((sizeof(size_t) * (1 << 3)) - 1)))
1143
1144#define KMP_MIN_STKOFFSET(0) (0)
1145#define KMP_MAX_STKOFFSET(~((size_t)1 << ((sizeof(size_t) * (1 << 3)) - 1)
))
KMP_MAX_STKSIZE(~((size_t)1 << ((sizeof(size_t) * (1 << 3)) - 1)
))
1146#if KMP_OS_DARWIN0
1147#define KMP_DEFAULT_STKOFFSET64 KMP_MIN_STKOFFSET(0)
1148#else
1149#define KMP_DEFAULT_STKOFFSET64 CACHE_LINE64
1150#endif
1151
1152#define KMP_MIN_STKPADDING(0) (0)
1153#define KMP_MAX_STKPADDING(2 * 1024 * 1024) (2 * 1024 * 1024)
1154
1155#define KMP_BLOCKTIME_MULTIPLIER(1000) \
1156 (1000) /* number of blocktime units per second */
1157#define KMP_MIN_BLOCKTIME(0) (0)
1158#define KMP_MAX_BLOCKTIME(2147483647) \
1159 (INT_MAX2147483647) /* Must be this for "infinite" setting the work */
1160
1161/* __kmp_blocktime is in milliseconds */
1162#define KMP_DEFAULT_BLOCKTIME(__kmp_is_hybrid_cpu() ? (0) : (200)) (__kmp_is_hybrid_cpu() ? (0) : (200))
1163
1164#if KMP_USE_MONITOR
1165#define KMP_DEFAULT_MONITOR_STKSIZE ((size_t)(64 * 1024))
1166#define KMP_MIN_MONITOR_WAKEUPS (1) // min times monitor wakes up per second
1167#define KMP_MAX_MONITOR_WAKEUPS (1000) // max times monitor can wake up per sec
1168
1169/* Calculate new number of monitor wakeups for a specific block time based on
1170 previous monitor_wakeups. Only allow increasing number of wakeups */
1171#define KMP_WAKEUPS_FROM_BLOCKTIME(blocktime, monitor_wakeups) \
1172 (((blocktime) == KMP_MAX_BLOCKTIME(2147483647)) ? (monitor_wakeups) \
1173 : ((blocktime) == KMP_MIN_BLOCKTIME(0)) ? KMP_MAX_MONITOR_WAKEUPS \
1174 : ((monitor_wakeups) > (KMP_BLOCKTIME_MULTIPLIER(1000) / (blocktime))) \
1175 ? (monitor_wakeups) \
1176 : (KMP_BLOCKTIME_MULTIPLIER(1000)) / (blocktime))
1177
1178/* Calculate number of intervals for a specific block time based on
1179 monitor_wakeups */
1180#define KMP_INTERVALS_FROM_BLOCKTIME(blocktime, monitor_wakeups) \
1181 (((blocktime) + (KMP_BLOCKTIME_MULTIPLIER(1000) / (monitor_wakeups)) - 1) / \
1182 (KMP_BLOCKTIME_MULTIPLIER(1000) / (monitor_wakeups)))
1183#else
1184#define KMP_BLOCKTIME(team, tid)(((team)->t.t_threads[(tid)]->th.th_current_task->td_icvs
.bt_set) ? ((team)->t.t_threads[(tid)]->th.th_current_task
->td_icvs.blocktime) : __kmp_dflt_blocktime)
\
1185 (get__bt_set(team, tid)((team)->t.t_threads[(tid)]->th.th_current_task->td_icvs
.bt_set)
? get__blocktime(team, tid)((team)->t.t_threads[(tid)]->th.th_current_task->td_icvs
.blocktime)
: __kmp_dflt_blocktime)
1186#if KMP_OS_UNIX1 && (KMP_ARCH_X860 || KMP_ARCH_X86_641)
1187// HW TSC is used to reduce overhead (clock tick instead of nanosecond).
1188extern kmp_uint64 __kmp_ticks_per_msec;
1189#if KMP_COMPILER_ICC0 || KMP_COMPILER_ICX0
1190#define KMP_NOW()__kmp_hardware_timestamp() ((kmp_uint64)_rdtsc())
1191#else
1192#define KMP_NOW()__kmp_hardware_timestamp() __kmp_hardware_timestamp()
1193#endif
1194#define KMP_NOW_MSEC()(__kmp_hardware_timestamp() / __kmp_ticks_per_msec) (KMP_NOW()__kmp_hardware_timestamp() / __kmp_ticks_per_msec)
1195#define KMP_BLOCKTIME_INTERVAL(team, tid)((((team)->t.t_threads[(tid)]->th.th_current_task->td_icvs
.bt_set) ? ((team)->t.t_threads[(tid)]->th.th_current_task
->td_icvs.blocktime) : __kmp_dflt_blocktime) * __kmp_ticks_per_msec
)
\
1196 (KMP_BLOCKTIME(team, tid)(((team)->t.t_threads[(tid)]->th.th_current_task->td_icvs
.bt_set) ? ((team)->t.t_threads[(tid)]->th.th_current_task
->td_icvs.blocktime) : __kmp_dflt_blocktime)
* __kmp_ticks_per_msec)
1197#define KMP_BLOCKING(goal, count)((goal) > __kmp_hardware_timestamp()) ((goal) > KMP_NOW()__kmp_hardware_timestamp())
1198#else
1199// System time is retrieved sporadically while blocking.
1200extern kmp_uint64 __kmp_now_nsec();
1201#define KMP_NOW()__kmp_hardware_timestamp() __kmp_now_nsec()
1202#define KMP_NOW_MSEC()(__kmp_hardware_timestamp() / __kmp_ticks_per_msec) (KMP_NOW()__kmp_hardware_timestamp() / KMP_USEC_PER_SEC1000000L)
1203#define KMP_BLOCKTIME_INTERVAL(team, tid)((((team)->t.t_threads[(tid)]->th.th_current_task->td_icvs
.bt_set) ? ((team)->t.t_threads[(tid)]->th.th_current_task
->td_icvs.blocktime) : __kmp_dflt_blocktime) * __kmp_ticks_per_msec
)
\
1204 (KMP_BLOCKTIME(team, tid)(((team)->t.t_threads[(tid)]->th.th_current_task->td_icvs
.bt_set) ? ((team)->t.t_threads[(tid)]->th.th_current_task
->td_icvs.blocktime) : __kmp_dflt_blocktime)
* KMP_USEC_PER_SEC1000000L)
1205#define KMP_BLOCKING(goal, count)((goal) > __kmp_hardware_timestamp()) ((count) % 1000 != 0 || (goal) > KMP_NOW()__kmp_hardware_timestamp())
1206#endif
1207#endif // KMP_USE_MONITOR
1208
1209#define KMP_MIN_STATSCOLS40 40
1210#define KMP_MAX_STATSCOLS4096 4096
1211#define KMP_DEFAULT_STATSCOLS80 80
1212
1213#define KMP_MIN_INTERVAL0 0
1214#define KMP_MAX_INTERVAL(2147483647 - 1) (INT_MAX2147483647 - 1)
1215#define KMP_DEFAULT_INTERVAL0 0
1216
1217#define KMP_MIN_CHUNK1 1
1218#define KMP_MAX_CHUNK(2147483647 - 1) (INT_MAX2147483647 - 1)
1219#define KMP_DEFAULT_CHUNK1 1
1220
1221#define KMP_MIN_DISP_NUM_BUFF1 1
1222#define KMP_DFLT_DISP_NUM_BUFF7 7
1223#define KMP_MAX_DISP_NUM_BUFF4096 4096
1224
1225#define KMP_MAX_ORDERED8 8
1226
1227#define KMP_MAX_FIELDS32 32
1228
1229#define KMP_MAX_BRANCH_BITS31 31
1230
1231#define KMP_MAX_ACTIVE_LEVELS_LIMIT2147483647 INT_MAX2147483647
1232
1233#define KMP_MAX_DEFAULT_DEVICE_LIMIT2147483647 INT_MAX2147483647
1234
1235#define KMP_MAX_TASK_PRIORITY_LIMIT2147483647 INT_MAX2147483647
1236
1237/* Minimum number of threads before switch to TLS gtid (experimentally
1238 determined) */
1239/* josh TODO: what about OS X* tuning? */
1240#if KMP_ARCH_X860 || KMP_ARCH_X86_641
1241#define KMP_TLS_GTID_MIN5 5
1242#else
1243#define KMP_TLS_GTID_MIN5 INT_MAX2147483647
1244#endif
1245
1246#define KMP_MASTER_TID(tid)(0 == (tid)) (0 == (tid))
1247#define KMP_WORKER_TID(tid)(0 != (tid)) (0 != (tid))
1248
1249#define KMP_MASTER_GTID(gtid)(0 == __kmp_tid_from_gtid((gtid))) (0 == __kmp_tid_from_gtid((gtid)))
1250#define KMP_WORKER_GTID(gtid)(0 != __kmp_tid_from_gtid((gtid))) (0 != __kmp_tid_from_gtid((gtid)))
1251#define KMP_INITIAL_GTID(gtid)(0 == (gtid)) (0 == (gtid))
1252
1253#ifndef TRUE(!0)
1254#define FALSE0 0
1255#define TRUE(!0) (!FALSE0)
1256#endif
1257
1258/* NOTE: all of the following constants must be even */
1259
1260#if KMP_OS_WINDOWS0
1261#define KMP_INIT_WAIT1024U 64U /* initial number of spin-tests */
1262#define KMP_NEXT_WAIT512U 32U /* susequent number of spin-tests */
1263#elif KMP_OS_LINUX1
1264#define KMP_INIT_WAIT1024U 1024U /* initial number of spin-tests */
1265#define KMP_NEXT_WAIT512U 512U /* susequent number of spin-tests */
1266#elif KMP_OS_DARWIN0
1267/* TODO: tune for KMP_OS_DARWIN */
1268#define KMP_INIT_WAIT1024U 1024U /* initial number of spin-tests */
1269#define KMP_NEXT_WAIT512U 512U /* susequent number of spin-tests */
1270#elif KMP_OS_DRAGONFLY0
1271/* TODO: tune for KMP_OS_DRAGONFLY */
1272#define KMP_INIT_WAIT1024U 1024U /* initial number of spin-tests */
1273#define KMP_NEXT_WAIT512U 512U /* susequent number of spin-tests */
1274#elif KMP_OS_FREEBSD0
1275/* TODO: tune for KMP_OS_FREEBSD */
1276#define KMP_INIT_WAIT1024U 1024U /* initial number of spin-tests */
1277#define KMP_NEXT_WAIT512U 512U /* susequent number of spin-tests */
1278#elif KMP_OS_NETBSD0
1279/* TODO: tune for KMP_OS_NETBSD */
1280#define KMP_INIT_WAIT1024U 1024U /* initial number of spin-tests */
1281#define KMP_NEXT_WAIT512U 512U /* susequent number of spin-tests */
1282#elif KMP_OS_HURD0
1283/* TODO: tune for KMP_OS_HURD */
1284#define KMP_INIT_WAIT1024U 1024U /* initial number of spin-tests */
1285#define KMP_NEXT_WAIT512U 512U /* susequent number of spin-tests */
1286#elif KMP_OS_OPENBSD0
1287/* TODO: tune for KMP_OS_OPENBSD */
1288#define KMP_INIT_WAIT1024U 1024U /* initial number of spin-tests */
1289#define KMP_NEXT_WAIT512U 512U /* susequent number of spin-tests */
1290#endif
1291
1292#if KMP_ARCH_X860 || KMP_ARCH_X86_641
1293typedef struct kmp_cpuid {
1294 kmp_uint32 eax;
1295 kmp_uint32 ebx;
1296 kmp_uint32 ecx;
1297 kmp_uint32 edx;
1298} kmp_cpuid_t;
1299
1300typedef struct kmp_cpuinfo_flags_t {
1301 unsigned sse2 : 1; // 0 if SSE2 instructions are not supported, 1 otherwise.
1302 unsigned rtm : 1; // 0 if RTM instructions are not supported, 1 otherwise.
1303 unsigned hybrid : 1;
1304 unsigned reserved : 29; // Ensure size of 32 bits
1305} kmp_cpuinfo_flags_t;
1306
1307typedef struct kmp_cpuinfo {
1308 int initialized; // If 0, other fields are not initialized.
1309 int signature; // CPUID(1).EAX
1310 int family; // CPUID(1).EAX[27:20]+CPUID(1).EAX[11:8] (Extended Family+Family)
1311 int model; // ( CPUID(1).EAX[19:16] << 4 ) + CPUID(1).EAX[7:4] ( ( Extended
1312 // Model << 4 ) + Model)
1313 int stepping; // CPUID(1).EAX[3:0] ( Stepping )
1314 kmp_cpuinfo_flags_t flags;
1315 int apic_id;
1316 int physical_id;
1317 int logical_id;
1318 kmp_uint64 frequency; // Nominal CPU frequency in Hz.
1319 char name[3 * sizeof(kmp_cpuid_t)]; // CPUID(0x80000002,0x80000003,0x80000004)
1320} kmp_cpuinfo_t;
1321
1322extern void __kmp_query_cpuid(kmp_cpuinfo_t *p);
1323
1324#if KMP_OS_UNIX1
1325// subleaf is only needed for cache and topology discovery and can be set to
1326// zero in most cases
1327static inline void __kmp_x86_cpuid(int leaf, int subleaf, struct kmp_cpuid *p) {
1328 __asm__ __volatile__("cpuid"
1329 : "=a"(p->eax), "=b"(p->ebx), "=c"(p->ecx), "=d"(p->edx)
1330 : "a"(leaf), "c"(subleaf));
1331}
1332// Load p into FPU control word
1333static inline void __kmp_load_x87_fpu_control_word(const kmp_int16 *p) {
1334 __asm__ __volatile__("fldcw %0" : : "m"(*p));
1335}
1336// Store FPU control word into p
1337static inline void __kmp_store_x87_fpu_control_word(kmp_int16 *p) {
1338 __asm__ __volatile__("fstcw %0" : "=m"(*p));
1339}
1340static inline void __kmp_clear_x87_fpu_status_word() {
1341#if KMP_MIC0
1342 // 32-bit protected mode x87 FPU state
1343 struct x87_fpu_state {
1344 unsigned cw;
1345 unsigned sw;
1346 unsigned tw;
1347 unsigned fip;
1348 unsigned fips;
1349 unsigned fdp;
1350 unsigned fds;
1351 };
1352 struct x87_fpu_state fpu_state = {0, 0, 0, 0, 0, 0, 0};
1353 __asm__ __volatile__("fstenv %0\n\t" // store FP env
1354 "andw $0x7f00, %1\n\t" // clear 0-7,15 bits of FP SW
1355 "fldenv %0\n\t" // load FP env back
1356 : "+m"(fpu_state), "+m"(fpu_state.sw));
1357#else
1358 __asm__ __volatile__("fnclex");
1359#endif // KMP_MIC
1360}
1361#if __SSE__1
1362static inline void __kmp_load_mxcsr(const kmp_uint32 *p) { _mm_setcsr(*p); }
1363static inline void __kmp_store_mxcsr(kmp_uint32 *p) { *p = _mm_getcsr(); }
1364#else
1365static inline void __kmp_load_mxcsr(const kmp_uint32 *p) {}
1366static inline void __kmp_store_mxcsr(kmp_uint32 *p) { *p = 0; }
1367#endif
1368#else
1369// Windows still has these as external functions in assembly file
1370extern void __kmp_x86_cpuid(int mode, int mode2, struct kmp_cpuid *p);
1371extern void __kmp_load_x87_fpu_control_word(const kmp_int16 *p);
1372extern void __kmp_store_x87_fpu_control_word(kmp_int16 *p);
1373extern void __kmp_clear_x87_fpu_status_word();
1374static inline void __kmp_load_mxcsr(const kmp_uint32 *p) { _mm_setcsr(*p); }
1375static inline void __kmp_store_mxcsr(kmp_uint32 *p) { *p = _mm_getcsr(); }
1376#endif // KMP_OS_UNIX
1377
1378#define KMP_X86_MXCSR_MASK0xffffffc0 0xffffffc0 /* ignore status flags (6 lsb) */
1379
1380// User-level Monitor/Mwait
1381#if KMP_HAVE_UMWAIT((0 || 1) && (1 || 0) && !0)
1382// We always try for UMWAIT first
1383#if KMP_HAVE_WAITPKG_INTRINSICS1
1384#if KMP_HAVE_IMMINTRIN_H1
1385#include <immintrin.h>
1386#elif KMP_HAVE_INTRIN_H0
1387#include <intrin.h>
1388#endif
1389#endif // KMP_HAVE_WAITPKG_INTRINSICS
1390
1391KMP_ATTRIBUTE_TARGET_WAITPKG__attribute__((target("waitpkg")))
1392static inline int __kmp_tpause(uint32_t hint, uint64_t counter) {
1393#if !KMP_HAVE_WAITPKG_INTRINSICS1
1394 uint32_t timeHi = uint32_t(counter >> 32);
1395 uint32_t timeLo = uint32_t(counter & 0xffffffff);
1396 char flag;
1397 __asm__ volatile("#tpause\n.byte 0x66, 0x0F, 0xAE, 0xF1\n"
1398 "setb %0"
1399 // The "=q" restraint means any register accessible as rl
1400 // in 32-bit mode: a, b, c, and d;
1401 // in 64-bit mode: any integer register
1402 : "=q"(flag)
1403 : "a"(timeLo), "d"(timeHi), "c"(hint)
1404 :);
1405 return flag;
1406#else
1407 return _tpause(hint, counter);
1408#endif
1409}
1410KMP_ATTRIBUTE_TARGET_WAITPKG__attribute__((target("waitpkg")))
1411static inline void __kmp_umonitor(void *cacheline) {
1412#if !KMP_HAVE_WAITPKG_INTRINSICS1
1413 __asm__ volatile("# umonitor\n.byte 0xF3, 0x0F, 0xAE, 0x01 "
1414 :
1415 : "a"(cacheline)
1416 :);
1417#else
1418 _umonitor(cacheline);
1419#endif
1420}
1421KMP_ATTRIBUTE_TARGET_WAITPKG__attribute__((target("waitpkg")))
1422static inline int __kmp_umwait(uint32_t hint, uint64_t counter) {
1423#if !KMP_HAVE_WAITPKG_INTRINSICS1
1424 uint32_t timeHi = uint32_t(counter >> 32);
1425 uint32_t timeLo = uint32_t(counter & 0xffffffff);
1426 char flag;
1427 __asm__ volatile("#umwait\n.byte 0xF2, 0x0F, 0xAE, 0xF1\n"
1428 "setb %0"
1429 // The "=q" restraint means any register accessible as rl
1430 // in 32-bit mode: a, b, c, and d;
1431 // in 64-bit mode: any integer register
1432 : "=q"(flag)
1433 : "a"(timeLo), "d"(timeHi), "c"(hint)
1434 :);
1435 return flag;
1436#else
1437 return _umwait(hint, counter);
1438#endif
1439}
1440#elif KMP_HAVE_MWAIT((0 || 1) && (1 || 0) && !0)
1441#if KMP_OS_UNIX1
1442#include <pmmintrin.h>
1443#else
1444#include <intrin.h>
1445#endif
1446#if KMP_OS_UNIX1
1447__attribute__((target("sse3")))
1448#endif
1449static inline void
1450__kmp_mm_monitor(void *cacheline, unsigned extensions, unsigned hints) {
1451 _mm_monitor(cacheline, extensions, hints);
1452}
1453#if KMP_OS_UNIX1
1454__attribute__((target("sse3")))
1455#endif
1456static inline void
1457__kmp_mm_mwait(unsigned extensions, unsigned hints) {
1458 _mm_mwait(extensions, hints);
1459}
1460#endif // KMP_HAVE_UMWAIT
1461
1462#if KMP_ARCH_X860
1463extern void __kmp_x86_pause(void);
1464#elif KMP_MIC0
1465// Performance testing on KNC (C0QS-7120 P/A/X/D, 61-core, 16 GB Memory) showed
1466// regression after removal of extra PAUSE from spin loops. Changing
1467// the delay from 100 to 300 showed even better performance than double PAUSE
1468// on Spec OMP2001 and LCPC tasking tests, no regressions on EPCC.
1469static inline void __kmp_x86_pause(void) { _mm_delay_32(300); }
1470#else
1471static inline void __kmp_x86_pause(void) { _mm_pause(); }
1472#endif
1473#define KMP_CPU_PAUSE()__kmp_x86_pause() __kmp_x86_pause()
1474#elif KMP_ARCH_PPC64(0 || 0)
1475#define KMP_PPC64_PRI_LOW() __asm__ volatile("or 1, 1, 1")
1476#define KMP_PPC64_PRI_MED() __asm__ volatile("or 2, 2, 2")
1477#define KMP_PPC64_PRI_LOC_MB() __asm__ volatile("" : : : "memory")
1478#define KMP_CPU_PAUSE()__kmp_x86_pause() \
1479 do { \
1480 KMP_PPC64_PRI_LOW(); \
1481 KMP_PPC64_PRI_MED(); \
1482 KMP_PPC64_PRI_LOC_MB(); \
1483 } while (0)
1484#else
1485#define KMP_CPU_PAUSE()__kmp_x86_pause() /* nothing to do */
1486#endif
1487
1488#define KMP_INIT_YIELD(count){ (count) = __kmp_yield_init; } \
1489 { (count) = __kmp_yield_init; }
1490
1491#define KMP_INIT_BACKOFF(time){ (time) = __kmp_pause_init; } \
1492 { (time) = __kmp_pause_init; }
1493
1494#define KMP_OVERSUBSCRIBED((__kmp_nth) > (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc
))
\
1495 (TCR_4(__kmp_nth)(__kmp_nth) > (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc))
1496
1497#define KMP_TRY_YIELD((__kmp_use_yield == 1) || (__kmp_use_yield == 2 && (
((__kmp_nth) > (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc
)))))
\
1498 ((__kmp_use_yield == 1) || (__kmp_use_yield == 2 && (KMP_OVERSUBSCRIBED((__kmp_nth) > (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc
))
)))
1499
1500#define KMP_TRY_YIELD_OVERSUB((__kmp_use_yield == 1 || __kmp_use_yield == 2) && ((
(__kmp_nth) > (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc
))))
\
1501 ((__kmp_use_yield == 1 || __kmp_use_yield == 2) && (KMP_OVERSUBSCRIBED((__kmp_nth) > (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc
))
))
1502
1503#define KMP_YIELD(cond){ __kmp_x86_pause(); if ((cond) && (((__kmp_use_yield
== 1) || (__kmp_use_yield == 2 && (((__kmp_nth) >
(__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc))))))) __kmp_yield
(); }
\
1504 { \
1505 KMP_CPU_PAUSE()__kmp_x86_pause(); \
1506 if ((cond) && (KMP_TRY_YIELD((__kmp_use_yield == 1) || (__kmp_use_yield == 2 && (
((__kmp_nth) > (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc
)))))
)) \
1507 __kmp_yield(); \
1508 }
1509
1510#define KMP_YIELD_OVERSUB(){ __kmp_x86_pause(); if ((((__kmp_use_yield == 1 || __kmp_use_yield
== 2) && (((__kmp_nth) > (__kmp_avail_proc ? __kmp_avail_proc
: __kmp_xproc)))))) __kmp_yield(); }
\
1511 { \
1512 KMP_CPU_PAUSE()__kmp_x86_pause(); \
1513 if ((KMP_TRY_YIELD_OVERSUB((__kmp_use_yield == 1 || __kmp_use_yield == 2) && ((
(__kmp_nth) > (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc
))))
)) \
1514 __kmp_yield(); \
1515 }
1516
1517// Note the decrement of 2 in the following Macros. With KMP_LIBRARY=turnaround,
1518// there should be no yielding since initial value from KMP_INIT_YIELD() is odd.
1519#define KMP_YIELD_SPIN(count){ __kmp_x86_pause(); if (((__kmp_use_yield == 1) || (__kmp_use_yield
== 2 && (((__kmp_nth) > (__kmp_avail_proc ? __kmp_avail_proc
: __kmp_xproc)))))) { (count) -= 2; if (!(count)) { __kmp_yield
(); (count) = __kmp_yield_next; } } }
\
1520 { \
1521 KMP_CPU_PAUSE()__kmp_x86_pause(); \
1522 if (KMP_TRY_YIELD((__kmp_use_yield == 1) || (__kmp_use_yield == 2 && (
((__kmp_nth) > (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc
)))))
) { \
1523 (count) -= 2; \
1524 if (!(count)) { \
1525 __kmp_yield(); \
1526 (count) = __kmp_yield_next; \
1527 } \
1528 } \
1529 }
1530
1531// If TPAUSE is available & enabled, use it. If oversubscribed, use the slower
1532// (C0.2) state, which improves performance of other SMT threads on the same
1533// core, otherwise, use the fast (C0.1) default state, or whatever the user has
1534// requested. Uses a timed TPAUSE, and exponential backoff. If TPAUSE isn't
1535// available, fall back to the regular CPU pause and yield combination.
1536#if KMP_HAVE_UMWAIT((0 || 1) && (1 || 0) && !0)
1537#define KMP_TPAUSE_MAX_MASK((kmp_uint64)0xFFFF) ((kmp_uint64)0xFFFF)
1538#define KMP_YIELD_OVERSUB_ELSE_SPIN(count, time){ if (__kmp_tpause_enabled) { if (((__kmp_nth) > (__kmp_avail_proc
? __kmp_avail_proc : __kmp_xproc))) { __kmp_tpause(0, (time)
); } else { __kmp_tpause(__kmp_tpause_hint, (time)); } (time)
= (time << 1 | 1) & ((kmp_uint64)0xFFFF); } else {
__kmp_x86_pause(); if ((((__kmp_use_yield == 1 || __kmp_use_yield
== 2) && (((__kmp_nth) > (__kmp_avail_proc ? __kmp_avail_proc
: __kmp_xproc)))))) { __kmp_yield(); } else if (__kmp_use_yield
== 1) { (count) -= 2; if (!(count)) { __kmp_yield(); (count)
= __kmp_yield_next; } } } }
\
1539 { \
1540 if (__kmp_tpause_enabled) { \
1541 if (KMP_OVERSUBSCRIBED((__kmp_nth) > (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc
))
) { \
1542 __kmp_tpause(0, (time)); \
1543 } else { \
1544 __kmp_tpause(__kmp_tpause_hint, (time)); \
1545 } \
1546 (time) = (time << 1 | 1) & KMP_TPAUSE_MAX_MASK((kmp_uint64)0xFFFF); \
1547 } else { \
1548 KMP_CPU_PAUSE()__kmp_x86_pause(); \
1549 if ((KMP_TRY_YIELD_OVERSUB((__kmp_use_yield == 1 || __kmp_use_yield == 2) && ((
(__kmp_nth) > (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc
))))
)) { \
1550 __kmp_yield(); \
1551 } else if (__kmp_use_yield == 1) { \
1552 (count) -= 2; \
1553 if (!(count)) { \
1554 __kmp_yield(); \
1555 (count) = __kmp_yield_next; \
1556 } \
1557 } \
1558 } \
1559 }
1560#else
1561#define KMP_YIELD_OVERSUB_ELSE_SPIN(count, time){ if (__kmp_tpause_enabled) { if (((__kmp_nth) > (__kmp_avail_proc
? __kmp_avail_proc : __kmp_xproc))) { __kmp_tpause(0, (time)
); } else { __kmp_tpause(__kmp_tpause_hint, (time)); } (time)
= (time << 1 | 1) & ((kmp_uint64)0xFFFF); } else {
__kmp_x86_pause(); if ((((__kmp_use_yield == 1 || __kmp_use_yield
== 2) && (((__kmp_nth) > (__kmp_avail_proc ? __kmp_avail_proc
: __kmp_xproc)))))) { __kmp_yield(); } else if (__kmp_use_yield
== 1) { (count) -= 2; if (!(count)) { __kmp_yield(); (count)
= __kmp_yield_next; } } } }
\
1562 { \
1563 KMP_CPU_PAUSE()__kmp_x86_pause(); \
1564 if ((KMP_TRY_YIELD_OVERSUB((__kmp_use_yield == 1 || __kmp_use_yield == 2) && ((
(__kmp_nth) > (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc
))))
)) \
1565 __kmp_yield(); \
1566 else if (__kmp_use_yield == 1) { \
1567 (count) -= 2; \
1568 if (!(count)) { \
1569 __kmp_yield(); \
1570 (count) = __kmp_yield_next; \
1571 } \
1572 } \
1573 }
1574#endif // KMP_HAVE_UMWAIT
1575
1576/* ------------------------------------------------------------------------ */
1577/* Support datatypes for the orphaned construct nesting checks. */
1578/* ------------------------------------------------------------------------ */
1579
1580/* When adding to this enum, add its corresponding string in cons_text_c[]
1581 * array in kmp_error.cpp */
1582enum cons_type {
1583 ct_none,
1584 ct_parallel,
1585 ct_pdo,
1586 ct_pdo_ordered,
1587 ct_psections,
1588 ct_psingle,
1589 ct_critical,
1590 ct_ordered_in_parallel,
1591 ct_ordered_in_pdo,
1592 ct_master,
1593 ct_reduce,
1594 ct_barrier,
1595 ct_masked
1596};
1597
1598#define IS_CONS_TYPE_ORDERED(ct)((ct) == ct_pdo_ordered) ((ct) == ct_pdo_ordered)
1599
1600struct cons_data {
1601 ident_t const *ident;
1602 enum cons_type type;
1603 int prev;
1604 kmp_user_lock_p
1605 name; /* address exclusively for critical section name comparison */
1606};
1607
1608struct cons_header {
1609 int p_top, w_top, s_top;
1610 int stack_size, stack_top;
1611 struct cons_data *stack_data;
1612};
1613
1614struct kmp_region_info {
1615 char *text;
1616 int offset[KMP_MAX_FIELDS32];
1617 int length[KMP_MAX_FIELDS32];
1618};
1619
1620/* ---------------------------------------------------------------------- */
1621/* ---------------------------------------------------------------------- */
1622
1623#if KMP_OS_WINDOWS0
1624typedef HANDLE kmp_thread_t;
1625typedef DWORD kmp_key_t;
1626#endif /* KMP_OS_WINDOWS */
1627
1628#if KMP_OS_UNIX1
1629typedef pthread_t kmp_thread_t;
1630typedef pthread_key_t kmp_key_t;
1631#endif
1632
1633extern kmp_key_t __kmp_gtid_threadprivate_key;
1634
1635typedef struct kmp_sys_info {
1636 long maxrss; /* the maximum resident set size utilized (in kilobytes) */
1637 long minflt; /* the number of page faults serviced without any I/O */
1638 long majflt; /* the number of page faults serviced that required I/O */
1639 long nswap; /* the number of times a process was "swapped" out of memory */
1640 long inblock; /* the number of times the file system had to perform input */
1641 long oublock; /* the number of times the file system had to perform output */
1642 long nvcsw; /* the number of times a context switch was voluntarily */
1643 long nivcsw; /* the number of times a context switch was forced */
1644} kmp_sys_info_t;
1645
1646#if USE_ITT_BUILD1
1647// We cannot include "kmp_itt.h" due to circular dependency. Declare the only
1648// required type here. Later we will check the type meets requirements.
1649typedef int kmp_itt_mark_t;
1650#define KMP_ITT_DEBUG0 0
1651#endif /* USE_ITT_BUILD */
1652
1653typedef kmp_int32 kmp_critical_name[8];
1654
1655/*!
1656@ingroup PARALLEL
1657The type for a microtask which gets passed to @ref __kmpc_fork_call().
1658The arguments to the outlined function are
1659@param global_tid the global thread identity of the thread executing the
1660function.
1661@param bound_tid the local identity of the thread executing the function
1662@param ... pointers to shared variables accessed by the function.
1663*/
1664typedef void (*kmpc_micro)(kmp_int32 *global_tid, kmp_int32 *bound_tid, ...);
1665typedef void (*kmpc_micro_bound)(kmp_int32 *bound_tid, kmp_int32 *bound_nth,
1666 ...);
1667
1668/*!
1669@ingroup THREADPRIVATE
1670@{
1671*/
1672/* ---------------------------------------------------------------------------
1673 */
1674/* Threadprivate initialization/finalization function declarations */
1675
1676/* for non-array objects: __kmpc_threadprivate_register() */
1677
1678/*!
1679 Pointer to the constructor function.
1680 The first argument is the <tt>this</tt> pointer
1681*/
1682typedef void *(*kmpc_ctor)(void *);
1683
1684/*!
1685 Pointer to the destructor function.
1686 The first argument is the <tt>this</tt> pointer
1687*/
1688typedef void (*kmpc_dtor)(
1689 void * /*, size_t */); /* 2nd arg: magic number for KCC unused by Intel
1690 compiler */
1691/*!
1692 Pointer to an alternate constructor.
1693 The first argument is the <tt>this</tt> pointer.
1694*/
1695typedef void *(*kmpc_cctor)(void *, void *);
1696
1697/* for array objects: __kmpc_threadprivate_register_vec() */
1698/* First arg: "this" pointer */
1699/* Last arg: number of array elements */
1700/*!
1701 Array constructor.
1702 First argument is the <tt>this</tt> pointer
1703 Second argument the number of array elements.
1704*/
1705typedef void *(*kmpc_ctor_vec)(void *, size_t);
1706/*!
1707 Pointer to the array destructor function.
1708 The first argument is the <tt>this</tt> pointer
1709 Second argument the number of array elements.
1710*/
1711typedef void (*kmpc_dtor_vec)(void *, size_t);
1712/*!
1713 Array constructor.
1714 First argument is the <tt>this</tt> pointer
1715 Third argument the number of array elements.
1716*/
1717typedef void *(*kmpc_cctor_vec)(void *, void *,
1718 size_t); /* function unused by compiler */
1719
1720/*!
1721@}
1722*/
1723
1724/* keeps tracked of threadprivate cache allocations for cleanup later */
1725typedef struct kmp_cached_addr {
1726 void **addr; /* address of allocated cache */
1727 void ***compiler_cache; /* pointer to compiler's cache */
1728 void *data; /* pointer to global data */
1729 struct kmp_cached_addr *next; /* pointer to next cached address */
1730} kmp_cached_addr_t;
1731
1732struct private_data {
1733 struct private_data *next; /* The next descriptor in the list */
1734 void *data; /* The data buffer for this descriptor */
1735 int more; /* The repeat count for this descriptor */
1736 size_t size; /* The data size for this descriptor */
1737};
1738
1739struct private_common {
1740 struct private_common *next;
1741 struct private_common *link;
1742 void *gbl_addr;
1743 void *par_addr; /* par_addr == gbl_addr for PRIMARY thread */
1744 size_t cmn_size;
1745};
1746
1747struct shared_common {
1748 struct shared_common *next;
1749 struct private_data *pod_init;
1750 void *obj_init;
1751 void *gbl_addr;
1752 union {
1753 kmpc_ctor ctor;
1754 kmpc_ctor_vec ctorv;
1755 } ct;
1756 union {
1757 kmpc_cctor cctor;
1758 kmpc_cctor_vec cctorv;
1759 } cct;
1760 union {
1761 kmpc_dtor dtor;
1762 kmpc_dtor_vec dtorv;
1763 } dt;
1764 size_t vec_len;
1765 int is_vec;
1766 size_t cmn_size;
1767};
1768
1769#define KMP_HASH_TABLE_LOG29 9 /* log2 of the hash table size */
1770#define KMP_HASH_TABLE_SIZE(1 << 9) \
1771 (1 << KMP_HASH_TABLE_LOG29) /* size of the hash table */
1772#define KMP_HASH_SHIFT3 3 /* throw away this many low bits from the address */
1773#define KMP_HASH(x)((((kmp_uintptr_t)x) >> 3) & ((1 << 9) - 1)) \
1774 ((((kmp_uintptr_t)x) >> KMP_HASH_SHIFT3) & (KMP_HASH_TABLE_SIZE(1 << 9) - 1))
1775
1776struct common_table {
1777 struct private_common *data[KMP_HASH_TABLE_SIZE(1 << 9)];
1778};
1779
1780struct shared_table {
1781 struct shared_common *data[KMP_HASH_TABLE_SIZE(1 << 9)];
1782};
1783
1784/* ------------------------------------------------------------------------ */
1785
1786#if KMP_USE_HIER_SCHED0
1787// Shared barrier data that exists inside a single unit of the scheduling
1788// hierarchy
1789typedef struct kmp_hier_private_bdata_t {
1790 kmp_int32 num_active;
1791 kmp_uint64 index;
1792 kmp_uint64 wait_val[2];
1793} kmp_hier_private_bdata_t;
1794#endif
1795
1796typedef struct kmp_sched_flags {
1797 unsigned ordered : 1;
1798 unsigned nomerge : 1;
1799 unsigned contains_last : 1;
1800#if KMP_USE_HIER_SCHED0
1801 unsigned use_hier : 1;
1802 unsigned unused : 28;
1803#else
1804 unsigned unused : 29;
1805#endif
1806} kmp_sched_flags_t;
1807
1808KMP_BUILD_ASSERT(sizeof(kmp_sched_flags_t) == 4)static_assert(sizeof(kmp_sched_flags_t) == 4, "Build condition error"
)
;
1809
1810#if KMP_STATIC_STEAL_ENABLED1
1811typedef struct KMP_ALIGN_CACHE__attribute__((aligned(64))) dispatch_private_info32 {
1812 kmp_int32 count;
1813 kmp_int32 ub;
1814 /* Adding KMP_ALIGN_CACHE here doesn't help / can hurt performance */
1815 kmp_int32 lb;
1816 kmp_int32 st;
1817 kmp_int32 tc;
1818 kmp_lock_t *steal_lock; // lock used for chunk stealing
1819 // KMP_ALIGN(32) ensures (if the KMP_ALIGN macro is turned on)
1820 // a) parm3 is properly aligned and
1821 // b) all parm1-4 are on the same cache line.
1822 // Because of parm1-4 are used together, performance seems to be better
1823 // if they are on the same cache line (not measured though).
1824
1825 struct KMP_ALIGN(32)__attribute__((aligned(32))) { // AC: changed 16 to 32 in order to simplify template
1826 kmp_int32 parm1; // structures in kmp_dispatch.cpp. This should
1827 kmp_int32 parm2; // make no real change at least while padding is off.
1828 kmp_int32 parm3;
1829 kmp_int32 parm4;
1830 };
1831
1832 kmp_uint32 ordered_lower;
1833 kmp_uint32 ordered_upper;
1834#if KMP_OS_WINDOWS0
1835 kmp_int32 last_upper;
1836#endif /* KMP_OS_WINDOWS */
1837} dispatch_private_info32_t;
1838
1839typedef struct KMP_ALIGN_CACHE__attribute__((aligned(64))) dispatch_private_info64 {
1840 kmp_int64 count; // current chunk number for static & static-steal scheduling
1841 kmp_int64 ub; /* upper-bound */
1842 /* Adding KMP_ALIGN_CACHE here doesn't help / can hurt performance */
1843 kmp_int64 lb; /* lower-bound */
1844 kmp_int64 st; /* stride */
1845 kmp_int64 tc; /* trip count (number of iterations) */
1846 kmp_lock_t *steal_lock; // lock used for chunk stealing
1847 /* parm[1-4] are used in different ways by different scheduling algorithms */
1848
1849 // KMP_ALIGN( 32 ) ensures ( if the KMP_ALIGN macro is turned on )
1850 // a) parm3 is properly aligned and
1851 // b) all parm1-4 are in the same cache line.
1852 // Because of parm1-4 are used together, performance seems to be better
1853 // if they are in the same line (not measured though).
1854
1855 struct KMP_ALIGN(32)__attribute__((aligned(32))) {
1856 kmp_int64 parm1;
1857 kmp_int64 parm2;
1858 kmp_int64 parm3;
1859 kmp_int64 parm4;
1860 };
1861
1862 kmp_uint64 ordered_lower;
1863 kmp_uint64 ordered_upper;
1864#if KMP_OS_WINDOWS0
1865 kmp_int64 last_upper;
1866#endif /* KMP_OS_WINDOWS */
1867} dispatch_private_info64_t;
1868#else /* KMP_STATIC_STEAL_ENABLED */
1869typedef struct KMP_ALIGN_CACHE__attribute__((aligned(64))) dispatch_private_info32 {
1870 kmp_int32 lb;
1871 kmp_int32 ub;
1872 kmp_int32 st;
1873 kmp_int32 tc;
1874
1875 kmp_int32 parm1;
1876 kmp_int32 parm2;
1877 kmp_int32 parm3;
1878 kmp_int32 parm4;
1879
1880 kmp_int32 count;
1881
1882 kmp_uint32 ordered_lower;
1883 kmp_uint32 ordered_upper;
1884#if KMP_OS_WINDOWS0
1885 kmp_int32 last_upper;
1886#endif /* KMP_OS_WINDOWS */
1887} dispatch_private_info32_t;
1888
1889typedef struct KMP_ALIGN_CACHE__attribute__((aligned(64))) dispatch_private_info64 {
1890 kmp_int64 lb; /* lower-bound */
1891 kmp_int64 ub; /* upper-bound */
1892 kmp_int64 st; /* stride */
1893 kmp_int64 tc; /* trip count (number of iterations) */
1894
1895 /* parm[1-4] are used in different ways by different scheduling algorithms */
1896 kmp_int64 parm1;
1897 kmp_int64 parm2;
1898 kmp_int64 parm3;
1899 kmp_int64 parm4;
1900
1901 kmp_int64 count; /* current chunk number for static scheduling */
1902
1903 kmp_uint64 ordered_lower;
1904 kmp_uint64 ordered_upper;
1905#if KMP_OS_WINDOWS0
1906 kmp_int64 last_upper;
1907#endif /* KMP_OS_WINDOWS */
1908} dispatch_private_info64_t;
1909#endif /* KMP_STATIC_STEAL_ENABLED */
1910
1911typedef struct KMP_ALIGN_CACHE__attribute__((aligned(64))) dispatch_private_info {
1912 union private_info {
1913 dispatch_private_info32_t p32;
1914 dispatch_private_info64_t p64;
1915 } u;
1916 enum sched_type schedule; /* scheduling algorithm */
1917 kmp_sched_flags_t flags; /* flags (e.g., ordered, nomerge, etc.) */
1918 std::atomic<kmp_uint32> steal_flag; // static_steal only, state of a buffer
1919 kmp_int32 ordered_bumped;
1920 // Stack of buffers for nest of serial regions
1921 struct dispatch_private_info *next;
1922 kmp_int32 type_size; /* the size of types in private_info */
1923#if KMP_USE_HIER_SCHED0
1924 kmp_int32 hier_id;
1925 void *parent; /* hierarchical scheduling parent pointer */
1926#endif
1927 enum cons_type pushed_ws;
1928} dispatch_private_info_t;
1929
1930typedef struct dispatch_shared_info32 {
1931 /* chunk index under dynamic, number of idle threads under static-steal;
1932 iteration index otherwise */
1933 volatile kmp_uint32 iteration;
1934 volatile kmp_int32 num_done;
1935 volatile kmp_uint32 ordered_iteration;
1936 // Dummy to retain the structure size after making ordered_iteration scalar
1937 kmp_int32 ordered_dummy[KMP_MAX_ORDERED8 - 1];
1938} dispatch_shared_info32_t;
1939
1940typedef struct dispatch_shared_info64 {
1941 /* chunk index under dynamic, number of idle threads under static-steal;
1942 iteration index otherwise */
1943 volatile kmp_uint64 iteration;
1944 volatile kmp_int64 num_done;
1945 volatile kmp_uint64 ordered_iteration;
1946 // Dummy to retain the structure size after making ordered_iteration scalar
1947 kmp_int64 ordered_dummy[KMP_MAX_ORDERED8 - 3];
1948} dispatch_shared_info64_t;
1949
1950typedef struct dispatch_shared_info {
1951 union shared_info {
1952 dispatch_shared_info32_t s32;
1953 dispatch_shared_info64_t s64;
1954 } u;
1955 volatile kmp_uint32 buffer_index;
1956 volatile kmp_int32 doacross_buf_idx; // teamwise index
1957 volatile kmp_uint32 *doacross_flags; // shared array of iteration flags (0/1)
1958 kmp_int32 doacross_num_done; // count finished threads
1959#if KMP_USE_HIER_SCHED0
1960 void *hier;
1961#endif
1962#if KMP_USE_HWLOC0
1963 // When linking with libhwloc, the ORDERED EPCC test slows down on big
1964 // machines (> 48 cores). Performance analysis showed that a cache thrash
1965 // was occurring and this padding helps alleviate the problem.
1966 char padding[64];
1967#endif
1968} dispatch_shared_info_t;
1969
1970typedef struct kmp_disp {
1971 /* Vector for ORDERED SECTION */
1972 void (*th_deo_fcn)(int *gtid, int *cid, ident_t *);
1973 /* Vector for END ORDERED SECTION */
1974 void (*th_dxo_fcn)(int *gtid, int *cid, ident_t *);
1975
1976 dispatch_shared_info_t *th_dispatch_sh_current;
1977 dispatch_private_info_t *th_dispatch_pr_current;
1978
1979 dispatch_private_info_t *th_disp_buffer;
1980 kmp_uint32 th_disp_index;
1981 kmp_int32 th_doacross_buf_idx; // thread's doacross buffer index
1982 volatile kmp_uint32 *th_doacross_flags; // pointer to shared array of flags
1983 kmp_int64 *th_doacross_info; // info on loop bounds
1984#if KMP_USE_INTERNODE_ALIGNMENT0
1985 char more_padding[INTERNODE_CACHE_LINE4096];
1986#endif
1987} kmp_disp_t;
1988
1989/* ------------------------------------------------------------------------ */
1990/* Barrier stuff */
1991
1992/* constants for barrier state update */
1993#define KMP_INIT_BARRIER_STATE0 0 /* should probably start from zero */
1994#define KMP_BARRIER_SLEEP_BIT0 0 /* bit used for suspend/sleep part of state */
1995#define KMP_BARRIER_UNUSED_BIT1 1 // bit that must never be set for valid state
1996#define KMP_BARRIER_BUMP_BIT2 2 /* lsb used for bump of go/arrived state */
1997
1998#define KMP_BARRIER_SLEEP_STATE(1 << 0) (1 << KMP_BARRIER_SLEEP_BIT0)
1999#define KMP_BARRIER_UNUSED_STATE(1 << 1) (1 << KMP_BARRIER_UNUSED_BIT1)
2000#define KMP_BARRIER_STATE_BUMP(1 << 2) (1 << KMP_BARRIER_BUMP_BIT2)
2001
2002#if (KMP_BARRIER_SLEEP_BIT0 >= KMP_BARRIER_BUMP_BIT2)
2003#error "Barrier sleep bit must be smaller than barrier bump bit"
2004#endif
2005#if (KMP_BARRIER_UNUSED_BIT1 >= KMP_BARRIER_BUMP_BIT2)
2006#error "Barrier unused bit must be smaller than barrier bump bit"
2007#endif
2008
2009// Constants for release barrier wait state: currently, hierarchical only
2010#define KMP_BARRIER_NOT_WAITING0 0 // Normal state; worker not in wait_sleep
2011#define KMP_BARRIER_OWN_FLAG1 \
2012 1 // Normal state; worker waiting on own b_go flag in release
2013#define KMP_BARRIER_PARENT_FLAG2 \
2014 2 // Special state; worker waiting on parent's b_go flag in release
2015#define KMP_BARRIER_SWITCH_TO_OWN_FLAG3 \
2016 3 // Special state; tells worker to shift from parent to own b_go
2017#define KMP_BARRIER_SWITCHING4 \
2018 4 // Special state; worker resets appropriate flag on wake-up
2019
2020#define KMP_NOT_SAFE_TO_REAP0 \
2021 0 // Thread th_reap_state: not safe to reap (tasking)
2022#define KMP_SAFE_TO_REAP1 1 // Thread th_reap_state: safe to reap (not tasking)
2023
2024// The flag_type describes the storage used for the flag.
2025enum flag_type {
2026 flag32, /**< atomic 32 bit flags */
2027 flag64, /**< 64 bit flags */
2028 atomic_flag64, /**< atomic 64 bit flags */
2029 flag_oncore, /**< special 64-bit flag for on-core barrier (hierarchical) */
2030 flag_unset
2031};
2032
2033enum barrier_type {
2034 bs_plain_barrier = 0, /* 0, All non-fork/join barriers (except reduction
2035 barriers if enabled) */
2036 bs_forkjoin_barrier, /* 1, All fork/join (parallel region) barriers */
2037#if KMP_FAST_REDUCTION_BARRIER1
2038 bs_reduction_barrier, /* 2, All barriers that are used in reduction */
2039#endif // KMP_FAST_REDUCTION_BARRIER
2040 bs_last_barrier /* Just a placeholder to mark the end */
2041};
2042
2043// to work with reduction barriers just like with plain barriers
2044#if !KMP_FAST_REDUCTION_BARRIER1
2045#define bs_reduction_barrier bs_plain_barrier
2046#endif // KMP_FAST_REDUCTION_BARRIER
2047
2048typedef enum kmp_bar_pat { /* Barrier communication patterns */
2049 bp_linear_bar =
2050 0, /* Single level (degenerate) tree */
2051 bp_tree_bar =
2052 1, /* Balanced tree with branching factor 2^n */
2053 bp_hyper_bar = 2, /* Hypercube-embedded tree with min
2054 branching factor 2^n */
2055 bp_hierarchical_bar = 3, /* Machine hierarchy tree */
2056 bp_dist_bar = 4, /* Distributed barrier */
2057 bp_last_bar /* Placeholder to mark the end */
2058} kmp_bar_pat_e;
2059
2060#define KMP_BARRIER_ICV_PUSH1 1
2061
2062/* Record for holding the values of the internal controls stack records */
2063typedef struct kmp_internal_control {
2064 int serial_nesting_level; /* corresponds to the value of the
2065 th_team_serialized field */
2066 kmp_int8 dynamic; /* internal control for dynamic adjustment of threads (per
2067 thread) */
2068 kmp_int8
2069 bt_set; /* internal control for whether blocktime is explicitly set */
2070 int blocktime; /* internal control for blocktime */
2071#if KMP_USE_MONITOR
2072 int bt_intervals; /* internal control for blocktime intervals */
2073#endif
2074 int nproc; /* internal control for #threads for next parallel region (per
2075 thread) */
2076 int thread_limit; /* internal control for thread-limit-var */
2077 int max_active_levels; /* internal control for max_active_levels */
2078 kmp_r_sched_t
2079 sched; /* internal control for runtime schedule {sched,chunk} pair */
2080 kmp_proc_bind_t proc_bind; /* internal control for affinity */
2081 kmp_int32 default_device; /* internal control for default device */
2082 struct kmp_internal_control *next;
2083} kmp_internal_control_t;
2084
2085static inline void copy_icvs(kmp_internal_control_t *dst,
2086 kmp_internal_control_t *src) {
2087 *dst = *src;
2088}
2089
2090/* Thread barrier needs volatile barrier fields */
2091typedef struct KMP_ALIGN_CACHE__attribute__((aligned(64))) kmp_bstate {
2092 // th_fixed_icvs is aligned by virtue of kmp_bstate being aligned (and all
2093 // uses of it). It is not explicitly aligned below, because we *don't* want
2094 // it to be padded -- instead, we fit b_go into the same cache line with
2095 // th_fixed_icvs, enabling NGO cache lines stores in the hierarchical barrier.
2096 kmp_internal_control_t th_fixed_icvs; // Initial ICVs for the thread
2097 // Tuck b_go into end of th_fixed_icvs cache line, so it can be stored with
2098 // same NGO store
2099 volatile kmp_uint64 b_go; // STATE => task should proceed (hierarchical)
2100 KMP_ALIGN_CACHE__attribute__((aligned(64))) volatile kmp_uint64
2101 b_arrived; // STATE => task reached synch point.
2102 kmp_uint32 *skip_per_level;
2103 kmp_uint32 my_level;
2104 kmp_int32 parent_tid;
2105 kmp_int32 old_tid;
2106 kmp_uint32 depth;
2107 struct kmp_bstate *parent_bar;
2108 kmp_team_t *team;
2109 kmp_uint64 leaf_state;
2110 kmp_uint32 nproc;
2111 kmp_uint8 base_leaf_kids;
2112 kmp_uint8 leaf_kids;
2113 kmp_uint8 offset;
2114 kmp_uint8 wait_flag;
2115 kmp_uint8 use_oncore_barrier;
2116#if USE_DEBUGGER0
2117 // The following field is intended for the debugger solely. Only the worker
2118 // thread itself accesses this field: the worker increases it by 1 when it
2119 // arrives to a barrier.
2120 KMP_ALIGN_CACHE__attribute__((aligned(64))) kmp_uint b_worker_arrived;
2121#endif /* USE_DEBUGGER */
2122} kmp_bstate_t;
2123
2124union KMP_ALIGN_CACHE__attribute__((aligned(64))) kmp_barrier_union {
2125 double b_align; /* use worst case alignment */
2126 char b_pad[KMP_PAD(kmp_bstate_t, CACHE_LINE)(sizeof(kmp_bstate_t) + (64 - ((sizeof(kmp_bstate_t) - 1) % (
64)) - 1))
];
2127 kmp_bstate_t bb;
2128};
2129
2130typedef union kmp_barrier_union kmp_balign_t;
2131
2132/* Team barrier needs only non-volatile arrived counter */
2133union KMP_ALIGN_CACHE__attribute__((aligned(64))) kmp_barrier_team_union {
2134 double b_align; /* use worst case alignment */
2135 char b_pad[CACHE_LINE64];
2136 struct {
2137 kmp_uint64 b_arrived; /* STATE => task reached synch point. */
2138#if USE_DEBUGGER0
2139 // The following two fields are indended for the debugger solely. Only
2140 // primary thread of the team accesses these fields: the first one is
2141 // increased by 1 when the primary thread arrives to a barrier, the second
2142 // one is increased by one when all the threads arrived.
2143 kmp_uint b_master_arrived;
2144 kmp_uint b_team_arrived;
2145#endif
2146 };
2147};
2148
2149typedef union kmp_barrier_team_union kmp_balign_team_t;
2150
2151/* Padding for Linux* OS pthreads condition variables and mutexes used to signal
2152 threads when a condition changes. This is to workaround an NPTL bug where
2153 padding was added to pthread_cond_t which caused the initialization routine
2154 to write outside of the structure if compiled on pre-NPTL threads. */
2155#if KMP_OS_WINDOWS0
2156typedef struct kmp_win32_mutex {
2157 /* The Lock */
2158 CRITICAL_SECTION cs;
2159} kmp_win32_mutex_t;
2160
2161typedef struct kmp_win32_cond {
2162 /* Count of the number of waiters. */
2163 int waiters_count_;
2164
2165 /* Serialize access to <waiters_count_> */
2166 kmp_win32_mutex_t waiters_count_lock_;
2167
2168 /* Number of threads to release via a <cond_broadcast> or a <cond_signal> */
2169 int release_count_;
2170
2171 /* Keeps track of the current "generation" so that we don't allow */
2172 /* one thread to steal all the "releases" from the broadcast. */
2173 int wait_generation_count_;
2174
2175 /* A manual-reset event that's used to block and release waiting threads. */
2176 HANDLE event_;
2177} kmp_win32_cond_t;
2178#endif
2179
2180#if KMP_OS_UNIX1
2181
2182union KMP_ALIGN_CACHE__attribute__((aligned(64))) kmp_cond_union {
2183 double c_align;
2184 char c_pad[CACHE_LINE64];
2185 pthread_cond_t c_cond;
2186};
2187
2188typedef union kmp_cond_union kmp_cond_align_t;
2189
2190union KMP_ALIGN_CACHE__attribute__((aligned(64))) kmp_mutex_union {
2191 double m_align;
2192 char m_pad[CACHE_LINE64];
2193 pthread_mutex_t m_mutex;
2194};
2195
2196typedef union kmp_mutex_union kmp_mutex_align_t;
2197
2198#endif /* KMP_OS_UNIX */
2199
2200typedef struct kmp_desc_base {
2201 void *ds_stackbase;
2202 size_t ds_stacksize;
2203 int ds_stackgrow;
2204 kmp_thread_t ds_thread;
2205 volatile int ds_tid;
2206 int ds_gtid;
2207#if KMP_OS_WINDOWS0
2208 volatile int ds_alive;
2209 DWORD ds_thread_id;
2210/* ds_thread keeps thread handle on Windows* OS. It is enough for RTL purposes.
2211 However, debugger support (libomp_db) cannot work with handles, because they
2212 uncomparable. For example, debugger requests info about thread with handle h.
2213 h is valid within debugger process, and meaningless within debugee process.
2214 Even if h is duped by call to DuplicateHandle(), so the result h' is valid
2215 within debugee process, but it is a *new* handle which does *not* equal to
2216 any other handle in debugee... The only way to compare handles is convert
2217 them to system-wide ids. GetThreadId() function is available only in
2218 Longhorn and Server 2003. :-( In contrast, GetCurrentThreadId() is available
2219 on all Windows* OS flavours (including Windows* 95). Thus, we have to get
2220 thread id by call to GetCurrentThreadId() from within the thread and save it
2221 to let libomp_db identify threads. */
2222#endif /* KMP_OS_WINDOWS */
2223} kmp_desc_base_t;
2224
2225typedef union KMP_ALIGN_CACHE__attribute__((aligned(64))) kmp_desc {
2226 double ds_align; /* use worst case alignment */
2227 char ds_pad[KMP_PAD(kmp_desc_base_t, CACHE_LINE)(sizeof(kmp_desc_base_t) + (64 - ((sizeof(kmp_desc_base_t) - 1
) % (64)) - 1))
];
2228 kmp_desc_base_t ds;
2229} kmp_desc_t;
2230
2231typedef struct kmp_local {
2232 volatile int this_construct; /* count of single's encountered by thread */
2233 void *reduce_data;
2234#if KMP_USE_BGET1
2235 void *bget_data;
2236 void *bget_list;
2237#if !USE_CMP_XCHG_FOR_BGET1
2238#ifdef USE_QUEUING_LOCK_FOR_BGET
2239 kmp_lock_t bget_lock; /* Lock for accessing bget free list */
2240#else
2241 kmp_bootstrap_lock_t bget_lock; // Lock for accessing bget free list. Must be
2242// bootstrap lock so we can use it at library
2243// shutdown.
2244#endif /* USE_LOCK_FOR_BGET */
2245#endif /* ! USE_CMP_XCHG_FOR_BGET */
2246#endif /* KMP_USE_BGET */
2247
2248 PACKED_REDUCTION_METHOD_T
2249 packed_reduction_method; /* stored by __kmpc_reduce*(), used by
2250 __kmpc_end_reduce*() */
2251
2252} kmp_local_t;
2253
2254#define KMP_CHECK_UPDATE(a, b)if ((a) != (b)) (a) = (b) \
2255 if ((a) != (b)) \
2256 (a) = (b)
2257#define KMP_CHECK_UPDATE_SYNC(a, b)if ((a) != (b)) (((a))) = (((b))) \
2258 if ((a) != (b)) \
2259 TCW_SYNC_PTR((a), (b))(((a))) = (((b)))
2260
2261#define get__blocktime(xteam, xtid)((xteam)->t.t_threads[(xtid)]->th.th_current_task->td_icvs
.blocktime)
\
2262 ((xteam)->t.t_threads[(xtid)]->th.th_current_task->td_icvs.blocktime)
2263#define get__bt_set(xteam, xtid)((xteam)->t.t_threads[(xtid)]->th.th_current_task->td_icvs
.bt_set)
\
2264 ((xteam)->t.t_threads[(xtid)]->th.th_current_task->td_icvs.bt_set)
2265#if KMP_USE_MONITOR
2266#define get__bt_intervals(xteam, xtid) \
2267 ((xteam)->t.t_threads[(xtid)]->th.th_current_task->td_icvs.bt_intervals)
2268#endif
2269
2270#define get__dynamic_2(xteam, xtid)((xteam)->t.t_threads[(xtid)]->th.th_current_task->td_icvs
.dynamic)
\
2271 ((xteam)->t.t_threads[(xtid)]->th.th_current_task->td_icvs.dynamic)
2272#define get__nproc_2(xteam, xtid)((xteam)->t.t_threads[(xtid)]->th.th_current_task->td_icvs
.nproc)
\
2273 ((xteam)->t.t_threads[(xtid)]->th.th_current_task->td_icvs.nproc)
2274#define get__sched_2(xteam, xtid)((xteam)->t.t_threads[(xtid)]->th.th_current_task->td_icvs
.sched)
\
2275 ((xteam)->t.t_threads[(xtid)]->th.th_current_task->td_icvs.sched)
2276
2277#define set__blocktime_team(xteam, xtid, xval)(((xteam)->t.t_threads[(xtid)]->th.th_current_task->
td_icvs.blocktime) = (xval))
\
2278 (((xteam)->t.t_threads[(xtid)]->th.th_current_task->td_icvs.blocktime) = \
2279 (xval))
2280
2281#if KMP_USE_MONITOR
2282#define set__bt_intervals_team(xteam, xtid, xval) \
2283 (((xteam)->t.t_threads[(xtid)]->th.th_current_task->td_icvs.bt_intervals) = \
2284 (xval))
2285#endif
2286
2287#define set__bt_set_team(xteam, xtid, xval)(((xteam)->t.t_threads[(xtid)]->th.th_current_task->
td_icvs.bt_set) = (xval))
\
2288 (((xteam)->t.t_threads[(xtid)]->th.th_current_task->td_icvs.bt_set) = (xval))
2289
2290#define set__dynamic(xthread, xval)(((xthread)->th.th_current_task->td_icvs.dynamic) = (xval
))
\
2291 (((xthread)->th.th_current_task->td_icvs.dynamic) = (xval))
2292#define get__dynamic(xthread)(((xthread)->th.th_current_task->td_icvs.dynamic) ? ((!
0)) : (0))
\
2293 (((xthread)->th.th_current_task->td_icvs.dynamic) ? (FTN_TRUE(!0)) : (FTN_FALSE0))
2294
2295#define set__nproc(xthread, xval)(((xthread)->th.th_current_task->td_icvs.nproc) = (xval
))
\
2296 (((xthread)->th.th_current_task->td_icvs.nproc) = (xval))
2297
2298#define set__thread_limit(xthread, xval)(((xthread)->th.th_current_task->td_icvs.thread_limit) =
(xval))
\
2299 (((xthread)->th.th_current_task->td_icvs.thread_limit) = (xval))
2300
2301#define set__max_active_levels(xthread, xval)(((xthread)->th.th_current_task->td_icvs.max_active_levels
) = (xval))
\
2302 (((xthread)->th.th_current_task->td_icvs.max_active_levels) = (xval))
2303
2304#define get__max_active_levels(xthread)((xthread)->th.th_current_task->td_icvs.max_active_levels
)
\
2305 ((xthread)->th.th_current_task->td_icvs.max_active_levels)
2306
2307#define set__sched(xthread, xval)(((xthread)->th.th_current_task->td_icvs.sched) = (xval
))
\
2308 (((xthread)->th.th_current_task->td_icvs.sched) = (xval))
2309
2310#define set__proc_bind(xthread, xval)(((xthread)->th.th_current_task->td_icvs.proc_bind) = (
xval))
\
2311 (((xthread)->th.th_current_task->td_icvs.proc_bind) = (xval))
2312#define get__proc_bind(xthread)((xthread)->th.th_current_task->td_icvs.proc_bind) \
2313 ((xthread)->th.th_current_task->td_icvs.proc_bind)
2314
2315// OpenMP tasking data structures
2316
2317typedef enum kmp_tasking_mode {
2318 tskm_immediate_exec = 0,
2319 tskm_extra_barrier = 1,
2320 tskm_task_teams = 2,
2321 tskm_max = 2
2322} kmp_tasking_mode_t;
2323
2324extern kmp_tasking_mode_t
2325 __kmp_tasking_mode; /* determines how/when to execute tasks */
2326extern int __kmp_task_stealing_constraint;
2327extern int __kmp_enable_task_throttling;
2328extern kmp_int32 __kmp_default_device; // Set via OMP_DEFAULT_DEVICE if
2329// specified, defaults to 0 otherwise
2330// Set via OMP_MAX_TASK_PRIORITY if specified, defaults to 0 otherwise
2331extern kmp_int32 __kmp_max_task_priority;
2332// Set via KMP_TASKLOOP_MIN_TASKS if specified, defaults to 0 otherwise
2333extern kmp_uint64 __kmp_taskloop_min_tasks;
2334
2335/* NOTE: kmp_taskdata_t and kmp_task_t structures allocated in single block with
2336 taskdata first */
2337#define KMP_TASK_TO_TASKDATA(task)(((kmp_taskdata_t *)task) - 1) (((kmp_taskdata_t *)task) - 1)
2338#define KMP_TASKDATA_TO_TASK(taskdata)(kmp_task_t *)(taskdata + 1) (kmp_task_t *)(taskdata + 1)
2339
2340// The tt_found_tasks flag is a signal to all threads in the team that tasks
2341// were spawned and queued since the previous barrier release.
2342#define KMP_TASKING_ENABLED(task_team)((!0) == ((task_team)->tt.tt_found_tasks)) \
2343 (TRUE(!0) == TCR_SYNC_4((task_team)->tt.tt_found_tasks)((task_team)->tt.tt_found_tasks))
2344/*!
2345@ingroup BASIC_TYPES
2346@{
2347*/
2348
2349/*!
2350 */
2351typedef kmp_int32 (*kmp_routine_entry_t)(kmp_int32, void *);
2352
2353typedef union kmp_cmplrdata {
2354 kmp_int32 priority; /**< priority specified by user for the task */
2355 kmp_routine_entry_t
2356 destructors; /* pointer to function to invoke deconstructors of
2357 firstprivate C++ objects */
2358 /* future data */
2359} kmp_cmplrdata_t;
2360
2361/* sizeof_kmp_task_t passed as arg to kmpc_omp_task call */
2362/*!
2363 */
2364typedef struct kmp_task { /* GEH: Shouldn't this be aligned somehow? */
2365 void *shareds; /**< pointer to block of pointers to shared vars */
2366 kmp_routine_entry_t
2367 routine; /**< pointer to routine to call for executing task */
2368 kmp_int32 part_id; /**< part id for the task */
2369 kmp_cmplrdata_t
2370 data1; /* Two known optional additions: destructors and priority */
2371 kmp_cmplrdata_t data2; /* Process destructors first, priority second */
2372 /* future data */
2373 /* private vars */
2374} kmp_task_t;
2375
2376/*!
2377@}
2378*/
2379
2380typedef struct kmp_taskgroup {
2381 std::atomic<kmp_int32> count; // number of allocated and incomplete tasks
2382 std::atomic<kmp_int32>
2383 cancel_request; // request for cancellation of this taskgroup
2384 struct kmp_taskgroup *parent; // parent taskgroup
2385 // Block of data to perform task reduction
2386 void *reduce_data; // reduction related info
2387 kmp_int32 reduce_num_data; // number of data items to reduce
2388 uintptr_t *gomp_data; // gomp reduction data
2389} kmp_taskgroup_t;
2390
2391// forward declarations
2392typedef union kmp_depnode kmp_depnode_t;
2393typedef struct kmp_depnode_list kmp_depnode_list_t;
2394typedef struct kmp_dephash_entry kmp_dephash_entry_t;
2395
2396// macros for checking dep flag as an integer
2397#define KMP_DEP_IN0x1 0x1
2398#define KMP_DEP_OUT0x2 0x2
2399#define KMP_DEP_INOUT0x3 0x3
2400#define KMP_DEP_MTX0x4 0x4
2401#define KMP_DEP_SET0x8 0x8
2402#define KMP_DEP_ALL0x80 0x80
2403// Compiler sends us this info:
2404typedef struct kmp_depend_info {
2405 kmp_intptr_t base_addr;
2406 size_t len;
2407 union {
2408 kmp_uint8 flag; // flag as an unsigned char
2409 struct { // flag as a set of 8 bits
2410 unsigned in : 1;
2411 unsigned out : 1;
2412 unsigned mtx : 1;
2413 unsigned set : 1;
2414 unsigned unused : 3;
2415 unsigned all : 1;
2416 } flags;
2417 };
2418} kmp_depend_info_t;
2419
2420// Internal structures to work with task dependencies:
2421struct kmp_depnode_list {
2422 kmp_depnode_t *node;
2423 kmp_depnode_list_t *next;
2424};
2425
2426// Max number of mutexinoutset dependencies per node
2427#define MAX_MTX_DEPS4 4
2428
2429typedef struct kmp_base_depnode {
2430 kmp_depnode_list_t *successors; /* used under lock */
2431 kmp_task_t *task; /* non-NULL if depnode is active, used under lock */
2432 kmp_lock_t *mtx_locks[MAX_MTX_DEPS4]; /* lock mutexinoutset dependent tasks */
2433 kmp_int32 mtx_num_locks; /* number of locks in mtx_locks array */
2434 kmp_lock_t lock; /* guards shared fields: task, successors */
2435#if KMP_SUPPORT_GRAPH_OUTPUT
2436 kmp_uint32 id;
2437#endif
2438 std::atomic<kmp_int32> npredecessors;
2439 std::atomic<kmp_int32> nrefs;
2440} kmp_base_depnode_t;
2441
2442union KMP_ALIGN_CACHE__attribute__((aligned(64))) kmp_depnode {
2443 double dn_align; /* use worst case alignment */
2444 char dn_pad[KMP_PAD(kmp_base_depnode_t, CACHE_LINE)(sizeof(kmp_base_depnode_t) + (64 - ((sizeof(kmp_base_depnode_t
) - 1) % (64)) - 1))
];
2445 kmp_base_depnode_t dn;
2446};
2447
2448struct kmp_dephash_entry {
2449 kmp_intptr_t addr;
2450 kmp_depnode_t *last_out;
2451 kmp_depnode_list_t *last_set;
2452 kmp_depnode_list_t *prev_set;
2453 kmp_uint8 last_flag;
2454 kmp_lock_t *mtx_lock; /* is referenced by depnodes w/mutexinoutset dep */
2455 kmp_dephash_entry_t *next_in_bucket;
2456};
2457
2458typedef struct kmp_dephash {
2459 kmp_dephash_entry_t **buckets;
2460 size_t size;
2461 kmp_depnode_t *last_all;
2462 size_t generation;
2463 kmp_uint32 nelements;
2464 kmp_uint32 nconflicts;
2465} kmp_dephash_t;
2466
2467typedef struct kmp_task_affinity_info {
2468 kmp_intptr_t base_addr;
2469 size_t len;
2470 struct {
2471 bool flag1 : 1;
2472 bool flag2 : 1;
2473 kmp_int32 reserved : 30;
2474 } flags;
2475} kmp_task_affinity_info_t;
2476
2477typedef enum kmp_event_type_t {
2478 KMP_EVENT_UNINITIALIZED = 0,
2479 KMP_EVENT_ALLOW_COMPLETION = 1
2480} kmp_event_type_t;
2481
2482typedef struct {
2483 kmp_event_type_t type;
2484 kmp_tas_lock_t lock;
2485 union {
2486 kmp_task_t *task;
2487 } ed;
2488} kmp_event_t;
2489
2490#ifdef BUILD_TIED_TASK_STACK
2491
2492/* Tied Task stack definitions */
2493typedef struct kmp_stack_block {
2494 kmp_taskdata_t *sb_block[TASK_STACK_BLOCK_SIZE];
2495 struct kmp_stack_block *sb_next;
2496 struct kmp_stack_block *sb_prev;
2497} kmp_stack_block_t;
2498
2499typedef struct kmp_task_stack {
2500 kmp_stack_block_t ts_first_block; // first block of stack entries
2501 kmp_taskdata_t **ts_top; // pointer to the top of stack
2502 kmp_int32 ts_entries; // number of entries on the stack
2503} kmp_task_stack_t;
2504
2505#endif // BUILD_TIED_TASK_STACK
2506
2507typedef struct kmp_tasking_flags { /* Total struct must be exactly 32 bits */
2508 /* Compiler flags */ /* Total compiler flags must be 16 bits */
2509 unsigned tiedness : 1; /* task is either tied (1) or untied (0) */
2510 unsigned final : 1; /* task is final(1) so execute immediately */
2511 unsigned merged_if0 : 1; /* no __kmpc_task_{begin/complete}_if0 calls in if0
2512 code path */
2513 unsigned destructors_thunk : 1; /* set if the compiler creates a thunk to
2514 invoke destructors from the runtime */
2515 unsigned proxy : 1; /* task is a proxy task (it will be executed outside the
2516 context of the RTL) */
2517 unsigned priority_specified : 1; /* set if the compiler provides priority
2518 setting for the task */
2519 unsigned detachable : 1; /* 1 == can detach */
2520 unsigned hidden_helper : 1; /* 1 == hidden helper task */
2521 unsigned reserved : 8; /* reserved for compiler use */
2522
2523 /* Library flags */ /* Total library flags must be 16 bits */
2524 unsigned tasktype : 1; /* task is either explicit(1) or implicit (0) */
2525 unsigned task_serial : 1; // task is executed immediately (1) or deferred (0)
2526 unsigned tasking_ser : 1; // all tasks in team are either executed immediately
2527 // (1) or may be deferred (0)
2528 unsigned team_serial : 1; // entire team is serial (1) [1 thread] or parallel
2529 // (0) [>= 2 threads]
2530 /* If either team_serial or tasking_ser is set, task team may be NULL */
2531 /* Task State Flags: */
2532 unsigned started : 1; /* 1==started, 0==not started */
2533 unsigned executing : 1; /* 1==executing, 0==not executing */
2534 unsigned complete : 1; /* 1==complete, 0==not complete */
2535 unsigned freed : 1; /* 1==freed, 0==allocated */
2536 unsigned native : 1; /* 1==gcc-compiled task, 0==intel */
2537 unsigned reserved31 : 7; /* reserved for library use */
2538
2539} kmp_tasking_flags_t;
2540
2541typedef struct kmp_target_data {
2542 void *async_handle; // libomptarget async handle for task completion query
2543} kmp_target_data_t;
2544
2545struct kmp_taskdata { /* aligned during dynamic allocation */
2546 kmp_int32 td_task_id; /* id, assigned by debugger */
2547 kmp_tasking_flags_t td_flags; /* task flags */
2548 kmp_team_t *td_team; /* team for this task */
2549 kmp_info_p *td_alloc_thread; /* thread that allocated data structures */
2550 /* Currently not used except for perhaps IDB */
2551 kmp_taskdata_t *td_parent; /* parent task */
2552 kmp_int32 td_level; /* task nesting level */
2553 std::atomic<kmp_int32> td_untied_count; // untied task active parts counter
2554 ident_t *td_ident; /* task identifier */
2555 // Taskwait data.
2556 ident_t *td_taskwait_ident;
2557 kmp_uint32 td_taskwait_counter;
2558 kmp_int32 td_taskwait_thread; /* gtid + 1 of thread encountered taskwait */
2559 KMP_ALIGN_CACHE__attribute__((aligned(64))) kmp_internal_control_t
2560 td_icvs; /* Internal control variables for the task */
2561 KMP_ALIGN_CACHE__attribute__((aligned(64))) std::atomic<kmp_int32>
2562 td_allocated_child_tasks; /* Child tasks (+ current task) not yet
2563 deallocated */
2564 std::atomic<kmp_int32>
2565 td_incomplete_child_tasks; /* Child tasks not yet complete */
2566 kmp_taskgroup_t
2567 *td_taskgroup; // Each task keeps pointer to its current taskgroup
2568 kmp_dephash_t
2569 *td_dephash; // Dependencies for children tasks are tracked from here
2570 kmp_depnode_t
2571 *td_depnode; // Pointer to graph node if this task has dependencies
2572 kmp_task_team_t *td_task_team;
2573 size_t td_size_alloc; // Size of task structure, including shareds etc.
2574#if defined(KMP_GOMP_COMPAT)
2575 // 4 or 8 byte integers for the loop bounds in GOMP_taskloop
2576 kmp_int32 td_size_loop_bounds;
2577#endif
2578 kmp_taskdata_t *td_last_tied; // keep tied task for task scheduling constraint
2579#if defined(KMP_GOMP_COMPAT)
2580 // GOMP sends in a copy function for copy constructors
2581 void (*td_copy_func)(void *, void *);
2582#endif
2583 kmp_event_t td_allow_completion_event;
2584#if OMPT_SUPPORT1
2585 ompt_task_info_t ompt_task_info;
2586#endif
2587 kmp_target_data_t td_target_data;
2588}; // struct kmp_taskdata
2589
2590// Make sure padding above worked
2591KMP_BUILD_ASSERT(sizeof(kmp_taskdata_t) % sizeof(void *) == 0)static_assert(sizeof(kmp_taskdata_t) % sizeof(void *) == 0, "Build condition error"
)
;
2592
2593// Data for task team but per thread
2594typedef struct kmp_base_thread_data {
2595 kmp_info_p *td_thr; // Pointer back to thread info
2596 // Used only in __kmp_execute_tasks_template, maybe not avail until task is
2597 // queued?
2598 kmp_bootstrap_lock_t td_deque_lock; // Lock for accessing deque
2599 kmp_taskdata_t *
2600 *td_deque; // Deque of tasks encountered by td_thr, dynamically allocated
2601 kmp_int32 td_deque_size; // Size of deck
2602 kmp_uint32 td_deque_head; // Head of deque (will wrap)
2603 kmp_uint32 td_deque_tail; // Tail of deque (will wrap)
2604 kmp_int32 td_deque_ntasks; // Number of tasks in deque
2605 // GEH: shouldn't this be volatile since used in while-spin?
2606 kmp_int32 td_deque_last_stolen; // Thread number of last successful steal
2607#ifdef BUILD_TIED_TASK_STACK
2608 kmp_task_stack_t td_susp_tied_tasks; // Stack of suspended tied tasks for task
2609// scheduling constraint
2610#endif // BUILD_TIED_TASK_STACK
2611} kmp_base_thread_data_t;
2612
2613#define TASK_DEQUE_BITS8 8 // Used solely to define INITIAL_TASK_DEQUE_SIZE
2614#define INITIAL_TASK_DEQUE_SIZE(1 << 8) (1 << TASK_DEQUE_BITS8)
2615
2616#define TASK_DEQUE_SIZE(td)((td).td_deque_size) ((td).td_deque_size)
2617#define TASK_DEQUE_MASK(td)((td).td_deque_size - 1) ((td).td_deque_size - 1)
2618
2619typedef union KMP_ALIGN_CACHE__attribute__((aligned(64))) kmp_thread_data {
2620 kmp_base_thread_data_t td;
2621 double td_align; /* use worst case alignment */
2622 char td_pad[KMP_PAD(kmp_base_thread_data_t, CACHE_LINE)(sizeof(kmp_base_thread_data_t) + (64 - ((sizeof(kmp_base_thread_data_t
) - 1) % (64)) - 1))
];
2623} kmp_thread_data_t;
2624
2625typedef struct kmp_task_pri {
2626 kmp_thread_data_t td;
2627 kmp_int32 priority;
2628 kmp_task_pri *next;
2629} kmp_task_pri_t;
2630
2631// Data for task teams which are used when tasking is enabled for the team
2632typedef struct kmp_base_task_team {
2633 kmp_bootstrap_lock_t
2634 tt_threads_lock; /* Lock used to allocate per-thread part of task team */
2635 /* must be bootstrap lock since used at library shutdown*/
2636
2637 // TODO: check performance vs kmp_tas_lock_t
2638 kmp_bootstrap_lock_t tt_task_pri_lock; /* Lock to access priority tasks */
2639 kmp_task_pri_t *tt_task_pri_list;
2640
2641 kmp_task_team_t *tt_next; /* For linking the task team free list */
2642 kmp_thread_data_t
2643 *tt_threads_data; /* Array of per-thread structures for task team */
2644 /* Data survives task team deallocation */
2645 kmp_int32 tt_found_tasks; /* Have we found tasks and queued them while
2646 executing this team? */
2647 /* TRUE means tt_threads_data is set up and initialized */
2648 kmp_int32 tt_nproc; /* #threads in team */
2649 kmp_int32 tt_max_threads; // # entries allocated for threads_data array
2650 kmp_int32 tt_found_proxy_tasks; // found proxy tasks since last barrier
2651 kmp_int32 tt_untied_task_encountered;
2652 std::atomic<kmp_int32> tt_num_task_pri; // number of priority tasks enqueued
2653 // There is hidden helper thread encountered in this task team so that we must
2654 // wait when waiting on task team
2655 kmp_int32 tt_hidden_helper_task_encountered;
2656
2657 KMP_ALIGN_CACHE__attribute__((aligned(64)))
2658 std::atomic<kmp_int32> tt_unfinished_threads; /* #threads still active */
2659
2660 KMP_ALIGN_CACHE__attribute__((aligned(64)))
2661 volatile kmp_uint32
2662 tt_active; /* is the team still actively executing tasks */
2663} kmp_base_task_team_t;
2664
2665union KMP_ALIGN_CACHE__attribute__((aligned(64))) kmp_task_team {
2666 kmp_base_task_team_t tt;
2667 double tt_align; /* use worst case alignment */
2668 char tt_pad[KMP_PAD(kmp_base_task_team_t, CACHE_LINE)(sizeof(kmp_base_task_team_t) + (64 - ((sizeof(kmp_base_task_team_t
) - 1) % (64)) - 1))
];
2669};
2670
2671#if (USE_FAST_MEMORY3 == 3) || (USE_FAST_MEMORY3 == 5)
2672// Free lists keep same-size free memory slots for fast memory allocation
2673// routines
2674typedef struct kmp_free_list {
2675 void *th_free_list_self; // Self-allocated tasks free list
2676 void *th_free_list_sync; // Self-allocated tasks stolen/returned by other
2677 // threads
2678 void *th_free_list_other; // Non-self free list (to be returned to owner's
2679 // sync list)
2680} kmp_free_list_t;
2681#endif
2682#if KMP_NESTED_HOT_TEAMS1
2683// Hot teams array keeps hot teams and their sizes for given thread. Hot teams
2684// are not put in teams pool, and they don't put threads in threads pool.
2685typedef struct kmp_hot_team_ptr {
2686 kmp_team_p *hot_team; // pointer to hot_team of given nesting level
2687 kmp_int32 hot_team_nth; // number of threads allocated for the hot_team
2688} kmp_hot_team_ptr_t;
2689#endif
2690typedef struct kmp_teams_size {
2691 kmp_int32 nteams; // number of teams in a league
2692 kmp_int32 nth; // number of threads in each team of the league
2693} kmp_teams_size_t;
2694
2695// This struct stores a thread that acts as a "root" for a contention
2696// group. Contention groups are rooted at kmp_root threads, but also at
2697// each primary thread of each team created in the teams construct.
2698// This struct therefore also stores a thread_limit associated with
2699// that contention group, and a counter to track the number of threads
2700// active in that contention group. Each thread has a list of these: CG
2701// root threads have an entry in their list in which cg_root refers to
2702// the thread itself, whereas other workers in the CG will have a
2703// single entry where cg_root is same as the entry containing their CG
2704// root. When a thread encounters a teams construct, it will add a new
2705// entry to the front of its list, because it now roots a new CG.
2706typedef struct kmp_cg_root {
2707 kmp_info_p *cg_root; // "root" thread for a contention group
2708 // The CG root's limit comes from OMP_THREAD_LIMIT for root threads, or
2709 // thread_limit clause for teams primary threads
2710 kmp_int32 cg_thread_limit;
2711 kmp_int32 cg_nthreads; // Count of active threads in CG rooted at cg_root
2712 struct kmp_cg_root *up; // pointer to higher level CG root in list
2713} kmp_cg_root_t;
2714
2715// OpenMP thread data structures
2716
2717typedef struct KMP_ALIGN_CACHE__attribute__((aligned(64))) kmp_base_info {
2718 /* Start with the readonly data which is cache aligned and padded. This is
2719 written before the thread starts working by the primary thread. Uber
2720 masters may update themselves later. Usage does not consider serialized
2721 regions. */
2722 kmp_desc_t th_info;
2723 kmp_team_p *th_team; /* team we belong to */
2724 kmp_root_p *th_root; /* pointer to root of task hierarchy */
2725 kmp_info_p *th_next_pool; /* next available thread in the pool */
2726 kmp_disp_t *th_dispatch; /* thread's dispatch data */
2727 int th_in_pool; /* in thread pool (32 bits for TCR/TCW) */
2728
2729 /* The following are cached from the team info structure */
2730 /* TODO use these in more places as determined to be needed via profiling */
2731 int th_team_nproc; /* number of threads in a team */
2732 kmp_info_p *th_team_master; /* the team's primary thread */
2733 int th_team_serialized; /* team is serialized */
2734 microtask_t th_teams_microtask; /* save entry address for teams construct */
2735 int th_teams_level; /* save initial level of teams construct */
2736/* it is 0 on device but may be any on host */
2737
2738/* The blocktime info is copied from the team struct to the thread struct */
2739/* at the start of a barrier, and the values stored in the team are used */
2740/* at points in the code where the team struct is no longer guaranteed */
2741/* to exist (from the POV of worker threads). */
2742#if KMP_USE_MONITOR
2743 int th_team_bt_intervals;
2744 int th_team_bt_set;
2745#else
2746 kmp_uint64 th_team_bt_intervals;
2747#endif
2748
2749#if KMP_AFFINITY_SUPPORTED1
2750 kmp_affin_mask_t *th_affin_mask; /* thread's current affinity mask */
2751 kmp_affinity_ids_t th_topology_ids; /* thread's current topology ids */
2752 kmp_affinity_attrs_t th_topology_attrs; /* thread's current topology attrs */
2753#endif
2754 omp_allocator_handle_t th_def_allocator; /* default allocator */
2755 /* The data set by the primary thread at reinit, then R/W by the worker */
2756 KMP_ALIGN_CACHE__attribute__((aligned(64))) int
2757 th_set_nproc; /* if > 0, then only use this request for the next fork */
2758#if KMP_NESTED_HOT_TEAMS1
2759 kmp_hot_team_ptr_t *th_hot_teams; /* array of hot teams */
2760#endif
2761 kmp_proc_bind_t
2762 th_set_proc_bind; /* if != proc_bind_default, use request for next fork */
2763 kmp_teams_size_t
2764 th_teams_size; /* number of teams/threads in teams construct */
2765#if KMP_AFFINITY_SUPPORTED1
2766 int th_current_place; /* place currently bound to */
2767 int th_new_place; /* place to bind to in par reg */
2768 int th_first_place; /* first place in partition */
2769 int th_last_place; /* last place in partition */
2770#endif
2771 int th_prev_level; /* previous level for affinity format */
2772 int th_prev_num_threads; /* previous num_threads for affinity format */
2773#if USE_ITT_BUILD1
2774 kmp_uint64 th_bar_arrive_time; /* arrival to barrier timestamp */
2775 kmp_uint64 th_bar_min_time; /* minimum arrival time at the barrier */
2776 kmp_uint64 th_frame_time; /* frame timestamp */
2777#endif /* USE_ITT_BUILD */
2778 kmp_local_t th_local;
2779 struct private_common *th_pri_head;
2780
2781 /* Now the data only used by the worker (after initial allocation) */
2782 /* TODO the first serial team should actually be stored in the info_t
2783 structure. this will help reduce initial allocation overhead */
2784 KMP_ALIGN_CACHE__attribute__((aligned(64))) kmp_team_p
2785 *th_serial_team; /*serialized team held in reserve*/
2786
2787#if OMPT_SUPPORT1
2788 ompt_thread_info_t ompt_thread_info;
2789#endif
2790
2791 /* The following are also read by the primary thread during reinit */
2792 struct common_table *th_pri_common;
2793
2794 volatile kmp_uint32 th_spin_here; /* thread-local location for spinning */
2795 /* while awaiting queuing lock acquire */
2796
2797 volatile void *th_sleep_loc; // this points at a kmp_flag<T>
2798 flag_type th_sleep_loc_type; // enum type of flag stored in th_sleep_loc
2799
2800 ident_t *th_ident;
2801 unsigned th_x; // Random number generator data
2802 unsigned th_a; // Random number generator data
2803
2804 /* Tasking-related data for the thread */
2805 kmp_task_team_t *th_task_team; // Task team struct
2806 kmp_taskdata_t *th_current_task; // Innermost Task being executed
2807 kmp_uint8 th_task_state; // alternating 0/1 for task team identification
2808 kmp_uint8 *th_task_state_memo_stack; // Stack holding memos of th_task_state
2809 // at nested levels
2810 kmp_uint32 th_task_state_top; // Top element of th_task_state_memo_stack
2811 kmp_uint32 th_task_state_stack_sz; // Size of th_task_state_memo_stack
2812 kmp_uint32 th_reap_state; // Non-zero indicates thread is not
2813 // tasking, thus safe to reap
2814
2815 /* More stuff for keeping track of active/sleeping threads (this part is
2816 written by the worker thread) */
2817 kmp_uint8 th_active_in_pool; // included in count of #active threads in pool
2818 int th_active; // ! sleeping; 32 bits for TCR/TCW
2819 std::atomic<kmp_uint32> th_used_in_team; // Flag indicating use in team
2820 // 0 = not used in team; 1 = used in team;
2821 // 2 = transitioning to not used in team; 3 = transitioning to used in team
2822 struct cons_header *th_cons; // used for consistency check
2823#if KMP_USE_HIER_SCHED0
2824 // used for hierarchical scheduling
2825 kmp_hier_private_bdata_t *th_hier_bar_data;
2826#endif
2827
2828 /* Add the syncronizing data which is cache aligned and padded. */
2829 KMP_ALIGN_CACHE__attribute__((aligned(64))) kmp_balign_t th_bar[bs_last_barrier];
2830
2831 KMP_ALIGN_CACHE__attribute__((aligned(64))) volatile kmp_int32
2832 th_next_waiting; /* gtid+1 of next thread on lock wait queue, 0 if none */
2833
2834#if (USE_FAST_MEMORY3 == 3) || (USE_FAST_MEMORY3 == 5)
2835#define NUM_LISTS4 4
2836 kmp_free_list_t th_free_lists[NUM_LISTS4]; // Free lists for fast memory
2837// allocation routines
2838#endif
2839
2840#if KMP_OS_WINDOWS0
2841 kmp_win32_cond_t th_suspend_cv;
2842 kmp_win32_mutex_t th_suspend_mx;
2843 std::atomic<int> th_suspend_init;
2844#endif
2845#if KMP_OS_UNIX1
2846 kmp_cond_align_t th_suspend_cv;
2847 kmp_mutex_align_t th_suspend_mx;
2848 std::atomic<int> th_suspend_init_count;
2849#endif
2850
2851#if USE_ITT_BUILD1
2852 kmp_itt_mark_t th_itt_mark_single;
2853// alignment ???
2854#endif /* USE_ITT_BUILD */
2855#if KMP_STATS_ENABLED0
2856 kmp_stats_list *th_stats;
2857#endif
2858#if KMP_OS_UNIX1
2859 std::atomic<bool> th_blocking;
2860#endif
2861 kmp_cg_root_t *th_cg_roots; // list of cg_roots associated with this thread
2862} kmp_base_info_t;
2863
2864typedef union KMP_ALIGN_CACHE__attribute__((aligned(64))) kmp_info {
2865 double th_align; /* use worst case alignment */
2866 char th_pad[KMP_PAD(kmp_base_info_t, CACHE_LINE)(sizeof(kmp_base_info_t) + (64 - ((sizeof(kmp_base_info_t) - 1
) % (64)) - 1))
];
2867 kmp_base_info_t th;
2868} kmp_info_t;
2869
2870// OpenMP thread team data structures
2871
2872typedef struct kmp_base_data {
2873 volatile kmp_uint32 t_value;
2874} kmp_base_data_t;
2875
2876typedef union KMP_ALIGN_CACHE__attribute__((aligned(64))) kmp_sleep_team {
2877 double dt_align; /* use worst case alignment */
2878 char dt_pad[KMP_PAD(kmp_base_data_t, CACHE_LINE)(sizeof(kmp_base_data_t) + (64 - ((sizeof(kmp_base_data_t) - 1
) % (64)) - 1))
];
2879 kmp_base_data_t dt;
2880} kmp_sleep_team_t;
2881
2882typedef union KMP_ALIGN_CACHE__attribute__((aligned(64))) kmp_ordered_team {
2883 double dt_align; /* use worst case alignment */
2884 char dt_pad[KMP_PAD(kmp_base_data_t, CACHE_LINE)(sizeof(kmp_base_data_t) + (64 - ((sizeof(kmp_base_data_t) - 1
) % (64)) - 1))
];
2885 kmp_base_data_t dt;
2886} kmp_ordered_team_t;
2887
2888typedef int (*launch_t)(int gtid);
2889
2890/* Minimum number of ARGV entries to malloc if necessary */
2891#define KMP_MIN_MALLOC_ARGV_ENTRIES100 100
2892
2893// Set up how many argv pointers will fit in cache lines containing
2894// t_inline_argv. Historically, we have supported at least 96 bytes. Using a
2895// larger value for more space between the primary write/worker read section and
2896// read/write by all section seems to buy more performance on EPCC PARALLEL.
2897#if KMP_ARCH_X860 || KMP_ARCH_X86_641
2898#define KMP_INLINE_ARGV_BYTES(4 * 64 - ((3 * (sizeof(void *)) + 2 * sizeof(int) + 2 * sizeof
(kmp_int8) + sizeof(kmp_int16) + sizeof(kmp_uint32)) % 64))
\
2899 (4 * CACHE_LINE64 - \
2900 ((3 * KMP_PTR_SKIP(sizeof(void *)) + 2 * sizeof(int) + 2 * sizeof(kmp_int8) + \
2901 sizeof(kmp_int16) + sizeof(kmp_uint32)) % \
2902 CACHE_LINE64))
2903#else
2904#define KMP_INLINE_ARGV_BYTES(4 * 64 - ((3 * (sizeof(void *)) + 2 * sizeof(int) + 2 * sizeof
(kmp_int8) + sizeof(kmp_int16) + sizeof(kmp_uint32)) % 64))
\
2905 (2 * CACHE_LINE64 - ((3 * KMP_PTR_SKIP(sizeof(void *)) + 2 * sizeof(int)) % CACHE_LINE64))
2906#endif
2907#define KMP_INLINE_ARGV_ENTRIES(int)((4 * 64 - ((3 * (sizeof(void *)) + 2 * sizeof(int) + 2 *
sizeof(kmp_int8) + sizeof(kmp_int16) + sizeof(kmp_uint32)) %
64)) / (sizeof(void *)))
(int)(KMP_INLINE_ARGV_BYTES(4 * 64 - ((3 * (sizeof(void *)) + 2 * sizeof(int) + 2 * sizeof
(kmp_int8) + sizeof(kmp_int16) + sizeof(kmp_uint32)) % 64))
/ KMP_PTR_SKIP(sizeof(void *)))
2908
2909typedef struct KMP_ALIGN_CACHE__attribute__((aligned(64))) kmp_base_team {
2910 // Synchronization Data
2911 // ---------------------------------------------------------------------------
2912 KMP_ALIGN_CACHE__attribute__((aligned(64))) kmp_ordered_team_t t_ordered;
2913 kmp_balign_team_t t_bar[bs_last_barrier];
2914 std::atomic<int> t_construct; // count of single directive encountered by team
2915 char pad[sizeof(kmp_lock_t)]; // padding to maintain performance on big iron
2916
2917 // [0] - parallel / [1] - worksharing task reduction data shared by taskgroups
2918 std::atomic<void *> t_tg_reduce_data[2]; // to support task modifier
2919 std::atomic<int> t_tg_fini_counter[2]; // sync end of task reductions
2920
2921 // Primary thread only
2922 // ---------------------------------------------------------------------------
2923 KMP_ALIGN_CACHE__attribute__((aligned(64))) int t_master_tid; // tid of primary thread in parent team
2924 int t_master_this_cons; // "this_construct" single counter of primary thread
2925 // in parent team
2926 ident_t *t_ident; // if volatile, have to change too much other crud to
2927 // volatile too
2928 kmp_team_p *t_parent; // parent team
2929 kmp_team_p *t_next_pool; // next free team in the team pool
2930 kmp_disp_t *t_dispatch; // thread's dispatch data
2931 kmp_task_team_t *t_task_team[2]; // Task team struct; switch between 2
2932 kmp_proc_bind_t t_proc_bind; // bind type for par region
2933#if USE_ITT_BUILD1
2934 kmp_uint64 t_region_time; // region begin timestamp
2935#endif /* USE_ITT_BUILD */
2936
2937 // Primary thread write, workers read
2938 // --------------------------------------------------------------------------
2939 KMP_ALIGN_CACHE__attribute__((aligned(64))) void **t_argv;
2940 int t_argc;
2941 int t_nproc; // number of threads in team
2942 microtask_t t_pkfn;
2943 launch_t t_invoke; // procedure to launch the microtask
2944
2945#if OMPT_SUPPORT1
2946 ompt_team_info_t ompt_team_info;
2947 ompt_lw_taskteam_t *ompt_serialized_team_info;
2948#endif
2949
2950#if KMP_ARCH_X860 || KMP_ARCH_X86_641
2951 kmp_int8 t_fp_control_saved;
2952 kmp_int8 t_pad2b;
2953 kmp_int16 t_x87_fpu_control_word; // FP control regs
2954 kmp_uint32 t_mxcsr;
2955#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
2956
2957 void *t_inline_argv[KMP_INLINE_ARGV_ENTRIES(int)((4 * 64 - ((3 * (sizeof(void *)) + 2 * sizeof(int) + 2 *
sizeof(kmp_int8) + sizeof(kmp_int16) + sizeof(kmp_uint32)) %
64)) / (sizeof(void *)))
];
2958
2959 KMP_ALIGN_CACHE__attribute__((aligned(64))) kmp_info_t **t_threads;
2960 kmp_taskdata_t
2961 *t_implicit_task_taskdata; // Taskdata for the thread's implicit task
2962 int t_level; // nested parallel level
2963
2964 KMP_ALIGN_CACHE__attribute__((aligned(64))) int t_max_argc;
2965 int t_max_nproc; // max threads this team can handle (dynamically expandable)
2966 int t_serialized; // levels deep of serialized teams
2967 dispatch_shared_info_t *t_disp_buffer; // buffers for dispatch system
2968 int t_id; // team's id, assigned by debugger.
2969 int t_active_level; // nested active parallel level
2970 kmp_r_sched_t t_sched; // run-time schedule for the team
2971#if KMP_AFFINITY_SUPPORTED1
2972 int t_first_place; // first & last place in parent thread's partition.
2973 int t_last_place; // Restore these values to primary thread after par region.
2974#endif // KMP_AFFINITY_SUPPORTED
2975 int t_display_affinity;
2976 int t_size_changed; // team size was changed?: 0: no, 1: yes, -1: changed via
2977 // omp_set_num_threads() call
2978 omp_allocator_handle_t t_def_allocator; /* default allocator */
2979
2980// Read/write by workers as well
2981#if (KMP_ARCH_X860 || KMP_ARCH_X86_641)
2982 // Using CACHE_LINE=64 reduces memory footprint, but causes a big perf
2983 // regression of epcc 'parallel' and 'barrier' on fxe256lin01. This extra
2984 // padding serves to fix the performance of epcc 'parallel' and 'barrier' when
2985 // CACHE_LINE=64. TODO: investigate more and get rid if this padding.
2986 char dummy_padding[1024];
2987#endif
2988 // Internal control stack for additional nested teams.
2989 KMP_ALIGN_CACHE__attribute__((aligned(64))) kmp_internal_control_t *t_control_stack_top;
2990 // for SERIALIZED teams nested 2 or more levels deep
2991 // typed flag to store request state of cancellation
2992 std::atomic<kmp_int32> t_cancel_request;
2993 int t_master_active; // save on fork, restore on join
2994 void *t_copypriv_data; // team specific pointer to copyprivate data array
2995#if KMP_OS_WINDOWS0
2996 std::atomic<kmp_uint32> t_copyin_counter;
2997#endif
2998#if USE_ITT_BUILD1
2999 void *t_stack_id; // team specific stack stitching id (for ittnotify)
3000#endif /* USE_ITT_BUILD */
3001 distributedBarrier *b; // Distributed barrier data associated with team
3002} kmp_base_team_t;
3003
3004union KMP_ALIGN_CACHE__attribute__((aligned(64))) kmp_team {
3005 kmp_base_team_t t;
3006 double t_align; /* use worst case alignment */
3007 char t_pad[KMP_PAD(kmp_base_team_t, CACHE_LINE)(sizeof(kmp_base_team_t) + (64 - ((sizeof(kmp_base_team_t) - 1
) % (64)) - 1))
];
3008};
3009
3010typedef union KMP_ALIGN_CACHE__attribute__((aligned(64))) kmp_time_global {
3011 double dt_align; /* use worst case alignment */
3012 char dt_pad[KMP_PAD(kmp_base_data_t, CACHE_LINE)(sizeof(kmp_base_data_t) + (64 - ((sizeof(kmp_base_data_t) - 1
) % (64)) - 1))
];
3013 kmp_base_data_t dt;
3014} kmp_time_global_t;
3015
3016typedef struct kmp_base_global {
3017 /* cache-aligned */
3018 kmp_time_global_t g_time;
3019
3020 /* non cache-aligned */
3021 volatile int g_abort;
3022 volatile int g_done;
3023
3024 int g_dynamic;
3025 enum dynamic_mode g_dynamic_mode;
3026} kmp_base_global_t;
3027
3028typedef union KMP_ALIGN_CACHE__attribute__((aligned(64))) kmp_global {
3029 kmp_base_global_t g;
3030 double g_align; /* use worst case alignment */
3031 char g_pad[KMP_PAD(kmp_base_global_t, CACHE_LINE)(sizeof(kmp_base_global_t) + (64 - ((sizeof(kmp_base_global_t
) - 1) % (64)) - 1))
];
3032} kmp_global_t;
3033
3034typedef struct kmp_base_root {
3035 // TODO: GEH - combine r_active with r_in_parallel then r_active ==
3036 // (r_in_parallel>= 0)
3037 // TODO: GEH - then replace r_active with t_active_levels if we can to reduce
3038 // the synch overhead or keeping r_active
3039 volatile int r_active; /* TRUE if some region in a nest has > 1 thread */
3040 // keeps a count of active parallel regions per root
3041 std::atomic<int> r_in_parallel;
3042 // GEH: This is misnamed, should be r_active_levels
3043 kmp_team_t *r_root_team;
3044 kmp_team_t *r_hot_team;
3045 kmp_info_t *r_uber_thread;
3046 kmp_lock_t r_begin_lock;
3047 volatile int r_begin;
3048 int r_blocktime; /* blocktime for this root and descendants */
3049#if KMP_AFFINITY_SUPPORTED1
3050 int r_affinity_assigned;
3051#endif // KMP_AFFINITY_SUPPORTED
3052} kmp_base_root_t;
3053
3054typedef union KMP_ALIGN_CACHE__attribute__((aligned(64))) kmp_root {
3055 kmp_base_root_t r;
3056 double r_align; /* use worst case alignment */
3057 char r_pad[KMP_PAD(kmp_base_root_t, CACHE_LINE)(sizeof(kmp_base_root_t) + (64 - ((sizeof(kmp_base_root_t) - 1
) % (64)) - 1))
];
3058} kmp_root_t;
3059
3060struct fortran_inx_info {
3061 kmp_int32 data;
3062};
3063
3064// This list type exists to hold old __kmp_threads arrays so that
3065// old references to them may complete while reallocation takes place when
3066// expanding the array. The items in this list are kept alive until library
3067// shutdown.
3068typedef struct kmp_old_threads_list_t {
3069 kmp_info_t **threads;
3070 struct kmp_old_threads_list_t *next;
3071} kmp_old_threads_list_t;
3072
3073/* ------------------------------------------------------------------------ */
3074
3075extern int __kmp_settings;
3076extern int __kmp_duplicate_library_ok;
3077#if USE_ITT_BUILD1
3078extern int __kmp_forkjoin_frames;
3079extern int __kmp_forkjoin_frames_mode;
3080#endif
3081extern PACKED_REDUCTION_METHOD_T __kmp_force_reduction_method;
3082extern int __kmp_determ_red;
3083
3084#ifdef KMP_DEBUG1
3085extern int kmp_a_debug;
3086extern int kmp_b_debug;
3087extern int kmp_c_debug;
3088extern int kmp_d_debug;
3089extern int kmp_e_debug;
3090extern int kmp_f_debug;
3091#endif /* KMP_DEBUG */
3092
3093/* For debug information logging using rotating buffer */
3094#define KMP_DEBUG_BUF_LINES_INIT512 512
3095#define KMP_DEBUG_BUF_LINES_MIN1 1
3096
3097#define KMP_DEBUG_BUF_CHARS_INIT128 128
3098#define KMP_DEBUG_BUF_CHARS_MIN2 2
3099
3100extern int
3101 __kmp_debug_buf; /* TRUE means use buffer, FALSE means print to stderr */
3102extern int __kmp_debug_buf_lines; /* How many lines of debug stored in buffer */
3103extern int
3104 __kmp_debug_buf_chars; /* How many characters allowed per line in buffer */
3105extern int __kmp_debug_buf_atomic; /* TRUE means use atomic update of buffer
3106 entry pointer */
3107
3108extern char *__kmp_debug_buffer; /* Debug buffer itself */
3109extern std::atomic<int> __kmp_debug_count; /* Counter for number of lines
3110 printed in buffer so far */
3111extern int __kmp_debug_buf_warn_chars; /* Keep track of char increase
3112 recommended in warnings */
3113/* end rotating debug buffer */
3114
3115#ifdef KMP_DEBUG1
3116extern int __kmp_par_range; /* +1 => only go par for constructs in range */
3117
3118#define KMP_PAR_RANGE_ROUTINE_LEN1024 1024
3119extern char __kmp_par_range_routine[KMP_PAR_RANGE_ROUTINE_LEN1024];
3120#define KMP_PAR_RANGE_FILENAME_LEN1024 1024
3121extern char __kmp_par_range_filename[KMP_PAR_RANGE_FILENAME_LEN1024];
3122extern int __kmp_par_range_lb;
3123extern int __kmp_par_range_ub;
3124#endif
3125
3126/* For printing out dynamic storage map for threads and teams */
3127extern int
3128 __kmp_storage_map; /* True means print storage map for threads and teams */
3129extern int __kmp_storage_map_verbose; /* True means storage map includes
3130 placement info */
3131extern int __kmp_storage_map_verbose_specified;
3132
3133#if KMP_ARCH_X860 || KMP_ARCH_X86_641
3134extern kmp_cpuinfo_t __kmp_cpuinfo;
3135static inline bool __kmp_is_hybrid_cpu() { return __kmp_cpuinfo.flags.hybrid; }
3136#elif KMP_OS_DARWIN0 && KMP_ARCH_AARCH640
3137static inline bool __kmp_is_hybrid_cpu() { return true; }
3138#else
3139static inline bool __kmp_is_hybrid_cpu() { return false; }
3140#endif
3141
3142extern volatile int __kmp_init_serial;
3143extern volatile int __kmp_init_gtid;
3144extern volatile int __kmp_init_common;
3145extern volatile int __kmp_need_register_serial;
3146extern volatile int __kmp_init_middle;
3147extern volatile int __kmp_init_parallel;
3148#if KMP_USE_MONITOR
3149extern volatile int __kmp_init_monitor;
3150#endif
3151extern volatile int __kmp_init_user_locks;
3152extern volatile int __kmp_init_hidden_helper_threads;
3153extern int __kmp_init_counter;
3154extern int __kmp_root_counter;
3155extern int __kmp_version;
3156
3157/* list of address of allocated caches for commons */
3158extern kmp_cached_addr_t *__kmp_threadpriv_cache_list;
3159
3160/* Barrier algorithm types and options */
3161extern kmp_uint32 __kmp_barrier_gather_bb_dflt;
3162extern kmp_uint32 __kmp_barrier_release_bb_dflt;
3163extern kmp_bar_pat_e __kmp_barrier_gather_pat_dflt;
3164extern kmp_bar_pat_e __kmp_barrier_release_pat_dflt;
3165extern kmp_uint32 __kmp_barrier_gather_branch_bits[bs_last_barrier];
3166extern kmp_uint32 __kmp_barrier_release_branch_bits[bs_last_barrier];
3167extern kmp_bar_pat_e __kmp_barrier_gather_pattern[bs_last_barrier];
3168extern kmp_bar_pat_e __kmp_barrier_release_pattern[bs_last_barrier];
3169extern char const *__kmp_barrier_branch_bit_env_name[bs_last_barrier];
3170extern char const *__kmp_barrier_pattern_env_name[bs_last_barrier];
3171extern char const *__kmp_barrier_type_name[bs_last_barrier];
3172extern char const *__kmp_barrier_pattern_name[bp_last_bar];
3173
3174/* Global Locks */
3175extern kmp_bootstrap_lock_t __kmp_initz_lock; /* control initialization */
3176extern kmp_bootstrap_lock_t __kmp_forkjoin_lock; /* control fork/join access */
3177extern kmp_bootstrap_lock_t __kmp_task_team_lock;
3178extern kmp_bootstrap_lock_t
3179 __kmp_exit_lock; /* exit() is not always thread-safe */
3180#if KMP_USE_MONITOR
3181extern kmp_bootstrap_lock_t
3182 __kmp_monitor_lock; /* control monitor thread creation */
3183#endif
3184extern kmp_bootstrap_lock_t
3185 __kmp_tp_cached_lock; /* used for the hack to allow threadprivate cache and
3186 __kmp_threads expansion to co-exist */
3187
3188extern kmp_lock_t __kmp_global_lock; /* control OS/global access */
3189extern kmp_queuing_lock_t __kmp_dispatch_lock; /* control dispatch access */
3190extern kmp_lock_t __kmp_debug_lock; /* control I/O access for KMP_DEBUG */
3191
3192extern enum library_type __kmp_library;
3193
3194extern enum sched_type __kmp_sched; /* default runtime scheduling */
3195extern enum sched_type __kmp_static; /* default static scheduling method */
3196extern enum sched_type __kmp_guided; /* default guided scheduling method */
3197extern enum sched_type __kmp_auto; /* default auto scheduling method */
3198extern int __kmp_chunk; /* default runtime chunk size */
3199extern int __kmp_force_monotonic; /* whether monotonic scheduling forced */
3200
3201extern size_t __kmp_stksize; /* stack size per thread */
3202#if KMP_USE_MONITOR
3203extern size_t __kmp_monitor_stksize; /* stack size for monitor thread */
3204#endif
3205extern size_t __kmp_stkoffset; /* stack offset per thread */
3206extern int __kmp_stkpadding; /* Should we pad root thread(s) stack */
3207
3208extern size_t
3209 __kmp_malloc_pool_incr; /* incremental size of pool for kmp_malloc() */
3210extern int __kmp_env_stksize; /* was KMP_STACKSIZE specified? */
3211extern int __kmp_env_blocktime; /* was KMP_BLOCKTIME specified? */
3212extern int __kmp_env_checks; /* was KMP_CHECKS specified? */
3213extern int __kmp_env_consistency_check; // was KMP_CONSISTENCY_CHECK specified?
3214extern int __kmp_generate_warnings; /* should we issue warnings? */
3215extern int __kmp_reserve_warn; /* have we issued reserve_threads warning? */
3216
3217#ifdef DEBUG_SUSPEND
3218extern int __kmp_suspend_count; /* count inside __kmp_suspend_template() */
3219#endif
3220
3221extern kmp_int32 __kmp_use_yield;
3222extern kmp_int32 __kmp_use_yield_exp_set;
3223extern kmp_uint32 __kmp_yield_init;
3224extern kmp_uint32 __kmp_yield_next;
3225extern kmp_uint64 __kmp_pause_init;
3226
3227/* ------------------------------------------------------------------------- */
3228extern int __kmp_allThreadsSpecified;
3229
3230extern size_t __kmp_align_alloc;
3231/* following data protected by initialization routines */
3232extern int __kmp_xproc; /* number of processors in the system */
3233extern int __kmp_avail_proc; /* number of processors available to the process */
3234extern size_t __kmp_sys_min_stksize; /* system-defined minimum stack size */
3235extern int __kmp_sys_max_nth; /* system-imposed maximum number of threads */
3236// maximum total number of concurrently-existing threads on device
3237extern int __kmp_max_nth;
3238// maximum total number of concurrently-existing threads in a contention group
3239extern int __kmp_cg_max_nth;
3240extern int __kmp_teams_max_nth; // max threads used in a teams construct
3241extern int __kmp_threads_capacity; /* capacity of the arrays __kmp_threads and
3242 __kmp_root */
3243extern int __kmp_dflt_team_nth; /* default number of threads in a parallel
3244 region a la OMP_NUM_THREADS */
3245extern int __kmp_dflt_team_nth_ub; /* upper bound on "" determined at serial
3246 initialization */
3247extern int __kmp_tp_capacity; /* capacity of __kmp_threads if threadprivate is
3248 used (fixed) */
3249extern int __kmp_tp_cached; /* whether threadprivate cache has been created
3250 (__kmpc_threadprivate_cached()) */
3251extern int __kmp_dflt_blocktime; /* number of milliseconds to wait before
3252 blocking (env setting) */
3253extern bool __kmp_wpolicy_passive; /* explicitly set passive wait policy */
3254#if KMP_USE_MONITOR
3255extern int
3256 __kmp_monitor_wakeups; /* number of times monitor wakes up per second */
3257extern int __kmp_bt_intervals; /* number of monitor timestamp intervals before
3258 blocking */
3259#endif
3260#ifdef KMP_ADJUST_BLOCKTIME1
3261extern int __kmp_zero_bt; /* whether blocktime has been forced to zero */
3262#endif /* KMP_ADJUST_BLOCKTIME */
3263#ifdef KMP_DFLT_NTH_CORES
3264extern int __kmp_ncores; /* Total number of cores for threads placement */
3265#endif
3266/* Number of millisecs to delay on abort for Intel(R) VTune(TM) tools */
3267extern int __kmp_abort_delay;
3268
3269extern int __kmp_need_register_atfork_specified;
3270extern int __kmp_need_register_atfork; /* At initialization, call pthread_atfork
3271 to install fork handler */
3272extern int __kmp_gtid_mode; /* Method of getting gtid, values:
3273 0 - not set, will be set at runtime
3274 1 - using stack search
3275 2 - dynamic TLS (pthread_getspecific(Linux* OS/OS
3276 X*) or TlsGetValue(Windows* OS))
3277 3 - static TLS (__declspec(thread) __kmp_gtid),
3278 Linux* OS .so only. */
3279extern int
3280 __kmp_adjust_gtid_mode; /* If true, adjust method based on #threads */
3281#ifdef KMP_TDATA_GTID1
3282extern KMP_THREAD_LOCAL__thread int __kmp_gtid;
3283#endif
3284extern int __kmp_tls_gtid_min; /* #threads below which use sp search for gtid */
3285extern int __kmp_foreign_tp; // If true, separate TP var for each foreign thread
3286#if KMP_ARCH_X860 || KMP_ARCH_X86_641
3287extern int __kmp_inherit_fp_control; // copy fp creg(s) parent->workers at fork
3288extern kmp_int16 __kmp_init_x87_fpu_control_word; // init thread's FP ctrl reg
3289extern kmp_uint32 __kmp_init_mxcsr; /* init thread's mxscr */
3290#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
3291
3292// max_active_levels for nested parallelism enabled by default via
3293// OMP_MAX_ACTIVE_LEVELS, OMP_NESTED, OMP_NUM_THREADS, and OMP_PROC_BIND
3294extern int __kmp_dflt_max_active_levels;
3295// Indicates whether value of __kmp_dflt_max_active_levels was already
3296// explicitly set by OMP_MAX_ACTIVE_LEVELS or OMP_NESTED=false
3297extern bool __kmp_dflt_max_active_levels_set;
3298extern int __kmp_dispatch_num_buffers; /* max possible dynamic loops in
3299 concurrent execution per team */
3300#if KMP_NESTED_HOT_TEAMS1
3301extern int __kmp_hot_teams_mode;
3302extern int __kmp_hot_teams_max_level;
3303#endif
3304
3305#if KMP_OS_LINUX1
3306extern enum clock_function_type __kmp_clock_function;
3307extern int __kmp_clock_function_param;
3308#endif /* KMP_OS_LINUX */
3309
3310#if KMP_MIC_SUPPORTED((0 || 1) && (1 || 0))
3311extern enum mic_type __kmp_mic_type;
3312#endif
3313
3314#ifdef USE_LOAD_BALANCE1
3315extern double __kmp_load_balance_interval; // load balance algorithm interval
3316#endif /* USE_LOAD_BALANCE */
3317
3318// OpenMP 3.1 - Nested num threads array
3319typedef struct kmp_nested_nthreads_t {
3320 int *nth;
3321 int size;
3322 int used;
3323} kmp_nested_nthreads_t;
3324
3325extern kmp_nested_nthreads_t __kmp_nested_nth;
3326
3327#if KMP_USE_ADAPTIVE_LOCKS(0 || 1) && !0
3328
3329// Parameters for the speculative lock backoff system.
3330struct kmp_adaptive_backoff_params_t {
3331 // Number of soft retries before it counts as a hard retry.
3332 kmp_uint32 max_soft_retries;
3333 // Badness is a bit mask : 0,1,3,7,15,... on each hard failure we move one to
3334 // the right
3335 kmp_uint32 max_badness;
3336};
3337
3338extern kmp_adaptive_backoff_params_t __kmp_adaptive_backoff_params;
3339
3340#if KMP_DEBUG_ADAPTIVE_LOCKS0
3341extern const char *__kmp_speculative_statsfile;
3342#endif
3343
3344#endif // KMP_USE_ADAPTIVE_LOCKS
3345
3346extern int __kmp_display_env; /* TRUE or FALSE */
3347extern int __kmp_display_env_verbose; /* TRUE if OMP_DISPLAY_ENV=VERBOSE */
3348extern int __kmp_omp_cancellation; /* TRUE or FALSE */
3349extern int __kmp_nteams;
3350extern int __kmp_teams_thread_limit;
3351
3352/* ------------------------------------------------------------------------- */
3353
3354/* the following are protected by the fork/join lock */
3355/* write: lock read: anytime */
3356extern kmp_info_t **__kmp_threads; /* Descriptors for the threads */
3357/* Holds old arrays of __kmp_threads until library shutdown */
3358extern kmp_old_threads_list_t *__kmp_old_threads_list;
3359/* read/write: lock */
3360extern volatile kmp_team_t *__kmp_team_pool;
3361extern volatile kmp_info_t *__kmp_thread_pool;
3362extern kmp_info_t *__kmp_thread_pool_insert_pt;
3363
3364// total num threads reachable from some root thread including all root threads
3365extern volatile int __kmp_nth;
3366/* total number of threads reachable from some root thread including all root
3367 threads, and those in the thread pool */
3368extern volatile int __kmp_all_nth;
3369extern std::atomic<int> __kmp_thread_pool_active_nth;
3370
3371extern kmp_root_t **__kmp_root; /* root of thread hierarchy */
3372/* end data protected by fork/join lock */
3373/* ------------------------------------------------------------------------- */
3374
3375#define __kmp_get_gtid()__kmp_get_global_thread_id() __kmp_get_global_thread_id()
3376#define __kmp_entry_gtid()__kmp_get_global_thread_id_reg() __kmp_get_global_thread_id_reg()
3377#define __kmp_get_tid()(__kmp_tid_from_gtid(__kmp_get_global_thread_id())) (__kmp_tid_from_gtid(__kmp_get_gtid()__kmp_get_global_thread_id()))
3378#define __kmp_get_team()(__kmp_threads[(__kmp_get_global_thread_id())]->th.th_team
)
(__kmp_threads[(__kmp_get_gtid()__kmp_get_global_thread_id())]->th.th_team)
3379#define __kmp_get_thread()(__kmp_thread_from_gtid(__kmp_get_global_thread_id())) (__kmp_thread_from_gtid(__kmp_get_gtid()__kmp_get_global_thread_id()))
3380
3381// AT: Which way is correct?
3382// AT: 1. nproc = __kmp_threads[ ( gtid ) ] -> th.th_team -> t.t_nproc;
3383// AT: 2. nproc = __kmp_threads[ ( gtid ) ] -> th.th_team_nproc;
3384#define __kmp_get_team_num_threads(gtid)(__kmp_threads[(gtid)]->th.th_team->t.t_nproc) \
3385 (__kmp_threads[(gtid)]->th.th_team->t.t_nproc)
3386
3387static inline bool KMP_UBER_GTID(int gtid) {
3388 KMP_DEBUG_ASSERT(gtid >= KMP_GTID_MIN)if (!(gtid >= (-6))) { __kmp_debug_assert("gtid >= (-6)"
, "openmp/runtime/src/kmp.h", 3388); }
;
3389 KMP_DEBUG_ASSERT(gtid < __kmp_threads_capacity)if (!(gtid < __kmp_threads_capacity)) { __kmp_debug_assert
("gtid < __kmp_threads_capacity", "openmp/runtime/src/kmp.h"
, 3389); }
;
3390 return (gtid >= 0 && __kmp_root[gtid] && __kmp_threads[gtid] &&
3391 __kmp_threads[gtid] == __kmp_root[gtid]->r.r_uber_thread);
3392}
3393
3394static inline int __kmp_tid_from_gtid(int gtid) {
3395 KMP_DEBUG_ASSERT(gtid >= 0)if (!(gtid >= 0)) { __kmp_debug_assert("gtid >= 0", "openmp/runtime/src/kmp.h"
, 3395); }
;
8
Assuming 'gtid' is < 0
9
Taking true branch
10
Value assigned to 'ompt_enabled.enabled', which participates in a condition later
11
Value assigned to 'ompt_enabled.ompt_callback_sync_region_wait', which participates in a condition later
12
Value assigned to 'ompt_callbacks.ompt_callback_sync_region_wait_callback'
3396 return __kmp_threads[gtid]->th.th_info.ds.ds_tid;
3397}
3398
3399static inline int __kmp_gtid_from_tid(int tid, const kmp_team_t *team) {
3400 KMP_DEBUG_ASSERT(tid >= 0 && team)if (!(tid >= 0 && team)) { __kmp_debug_assert("tid >= 0 && team"
, "openmp/runtime/src/kmp.h", 3400); }
;
3401 return team->t.t_threads[tid]->th.th_info.ds.ds_gtid;
3402}
3403
3404static inline int __kmp_gtid_from_thread(const kmp_info_t *thr) {
3405 KMP_DEBUG_ASSERT(thr)if (!(thr)) { __kmp_debug_assert("thr", "openmp/runtime/src/kmp.h"
, 3405); }
;
3406 return thr->th.th_info.ds.ds_gtid;
3407}
3408
3409static inline kmp_info_t *__kmp_thread_from_gtid(int gtid) {
3410 KMP_DEBUG_ASSERT(gtid >= 0)if (!(gtid >= 0)) { __kmp_debug_assert("gtid >= 0", "openmp/runtime/src/kmp.h"
, 3410); }
;
3411 return __kmp_threads[gtid];
3412}
3413
3414static inline kmp_team_t *__kmp_team_from_gtid(int gtid) {
3415 KMP_DEBUG_ASSERT(gtid >= 0)if (!(gtid >= 0)) { __kmp_debug_assert("gtid >= 0", "openmp/runtime/src/kmp.h"
, 3415); }
;
3416 return __kmp_threads[gtid]->th.th_team;
3417}
3418
3419static inline void __kmp_assert_valid_gtid(kmp_int32 gtid) {
3420 if (UNLIKELY(gtid < 0 || gtid >= __kmp_threads_capacity)__builtin_expect(!!(gtid < 0 || gtid >= __kmp_threads_capacity
), 0)
)
3421 KMP_FATAL(ThreadIdentInvalid)__kmp_fatal(__kmp_msg_format(kmp_i18n_msg_ThreadIdentInvalid)
, __kmp_msg_null)
;
3422}
3423
3424#if KMP_HAVE_MWAIT((0 || 1) && (1 || 0) && !0) || KMP_HAVE_UMWAIT((0 || 1) && (1 || 0) && !0)
3425extern int __kmp_user_level_mwait; // TRUE or FALSE; from KMP_USER_LEVEL_MWAIT
3426extern int __kmp_umwait_enabled; // Runtime check if user-level mwait enabled
3427extern int __kmp_mwait_enabled; // Runtime check if ring3 mwait is enabled
3428extern int __kmp_mwait_hints; // Hints to pass in to mwait
3429#endif
3430
3431#if KMP_HAVE_UMWAIT((0 || 1) && (1 || 0) && !0)
3432extern int __kmp_waitpkg_enabled; // Runtime check if waitpkg exists
3433extern int __kmp_tpause_state; // 0 (default), 1=C0.1, 2=C0.2; from KMP_TPAUSE
3434extern int __kmp_tpause_hint; // 1=C0.1 (default), 0=C0.2; from KMP_TPAUSE
3435extern int __kmp_tpause_enabled; // 0 (default), 1 (KMP_TPAUSE is non-zero)
3436#endif
3437
3438/* ------------------------------------------------------------------------- */
3439
3440extern kmp_global_t __kmp_global; /* global status */
3441
3442extern kmp_info_t __kmp_monitor;
3443// For Debugging Support Library
3444extern std::atomic<kmp_int32> __kmp_team_counter;
3445// For Debugging Support Library
3446extern std::atomic<kmp_int32> __kmp_task_counter;
3447
3448#if USE_DEBUGGER0
3449#define _KMP_GEN_ID(counter)(~0) \
3450 (__kmp_debugging ? KMP_ATOMIC_INC(&counter)(&counter)->fetch_add(1, std::memory_order_acq_rel) + 1 : ~0)
3451#else
3452#define _KMP_GEN_ID(counter)(~0) (~0)
3453#endif /* USE_DEBUGGER */
3454
3455#define KMP_GEN_TASK_ID()(~0) _KMP_GEN_ID(__kmp_task_counter)(~0)
3456#define KMP_GEN_TEAM_ID()(~0) _KMP_GEN_ID(__kmp_team_counter)(~0)
3457
3458/* ------------------------------------------------------------------------ */
3459
3460extern void __kmp_print_storage_map_gtid(int gtid, void *p1, void *p2,
3461 size_t size, char const *format, ...);
3462
3463extern void __kmp_serial_initialize(void);
3464extern void __kmp_middle_initialize(void);
3465extern void __kmp_parallel_initialize(void);
3466
3467extern void __kmp_internal_begin(void);
3468extern void __kmp_internal_end_library(int gtid);
3469extern void __kmp_internal_end_thread(int gtid);
3470extern void __kmp_internal_end_atexit(void);
3471extern void __kmp_internal_end_dtor(void);
3472extern void __kmp_internal_end_dest(void *);
3473
3474extern int __kmp_register_root(int initial_thread);
3475extern void __kmp_unregister_root(int gtid);
3476extern void __kmp_unregister_library(void); // called by __kmp_internal_end()
3477
3478extern int __kmp_ignore_mppbeg(void);
3479extern int __kmp_ignore_mppend(void);
3480
3481extern int __kmp_enter_single(int gtid, ident_t *id_ref, int push_ws);
3482extern void __kmp_exit_single(int gtid);
3483
3484extern void __kmp_parallel_deo(int *gtid_ref, int *cid_ref, ident_t *loc_ref);
3485extern void __kmp_parallel_dxo(int *gtid_ref, int *cid_ref, ident_t *loc_ref);
3486
3487#ifdef USE_LOAD_BALANCE1
3488extern int __kmp_get_load_balance(int);
3489#endif
3490
3491extern int __kmp_get_global_thread_id(void);
3492extern int __kmp_get_global_thread_id_reg(void);
3493extern void __kmp_exit_thread(int exit_status);
3494extern void __kmp_abort(char const *format, ...);
3495extern void __kmp_abort_thread(void);
3496KMP_NORETURN[[noreturn]] extern void __kmp_abort_process(void);
3497extern void __kmp_warn(char const *format, ...);
3498
3499extern void __kmp_set_num_threads(int new_nth, int gtid);
3500
3501// Returns current thread (pointer to kmp_info_t). Current thread *must* be
3502// registered.
3503static inline kmp_info_t *__kmp_entry_thread() {
3504 int gtid = __kmp_entry_gtid()__kmp_get_global_thread_id_reg();
3505
3506 return __kmp_threads[gtid];
3507}
3508
3509extern void __kmp_set_max_active_levels(int gtid, int new_max_active_levels);
3510extern int __kmp_get_max_active_levels(int gtid);
3511extern int __kmp_get_ancestor_thread_num(int gtid, int level);
3512extern int __kmp_get_team_size(int gtid, int level);
3513extern void __kmp_set_schedule(int gtid, kmp_sched_t new_sched, int chunk);
3514extern void __kmp_get_schedule(int gtid, kmp_sched_t *sched, int *chunk);
3515
3516extern unsigned short __kmp_get_random(kmp_info_t *thread);
3517extern void __kmp_init_random(kmp_info_t *thread);
3518
3519extern kmp_r_sched_t __kmp_get_schedule_global(void);
3520extern void __kmp_adjust_num_threads(int new_nproc);
3521extern void __kmp_check_stksize(size_t *val);
3522
3523extern void *___kmp_allocate(size_t size KMP_SRC_LOC_DECL, char const *_file_, int _line_);
3524extern void *___kmp_page_allocate(size_t size KMP_SRC_LOC_DECL, char const *_file_, int _line_);
3525extern void ___kmp_free(void *ptr KMP_SRC_LOC_DECL, char const *_file_, int _line_);
3526#define __kmp_allocate(size)___kmp_allocate((size), "openmp/runtime/src/kmp.h", 3526) ___kmp_allocate((size)KMP_SRC_LOC_CURR, "openmp/runtime/src/kmp.h", 3526)
3527#define __kmp_page_allocate(size)___kmp_page_allocate((size), "openmp/runtime/src/kmp.h", 3527
)
___kmp_page_allocate((size)KMP_SRC_LOC_CURR, "openmp/runtime/src/kmp.h", 3527)
3528#define __kmp_free(ptr)___kmp_free((ptr), "openmp/runtime/src/kmp.h", 3528) ___kmp_free((ptr)KMP_SRC_LOC_CURR, "openmp/runtime/src/kmp.h", 3528)
3529
3530#if USE_FAST_MEMORY3
3531extern void *___kmp_fast_allocate(kmp_info_t *this_thr,
3532 size_t size KMP_SRC_LOC_DECL, char const *_file_, int _line_);
3533extern void ___kmp_fast_free(kmp_info_t *this_thr, void *ptr KMP_SRC_LOC_DECL, char const *_file_, int _line_);
3534extern void __kmp_free_fast_memory(kmp_info_t *this_thr);
3535extern void __kmp_initialize_fast_memory(kmp_info_t *this_thr);
3536#define __kmp_fast_allocate(this_thr, size)___kmp_fast_allocate((this_thr), (size), "openmp/runtime/src/kmp.h"
, 3536)
\
3537 ___kmp_fast_allocate((this_thr), (size)KMP_SRC_LOC_CURR, "openmp/runtime/src/kmp.h", 3537)
3538#define __kmp_fast_free(this_thr, ptr)___kmp_fast_free((this_thr), (ptr), "openmp/runtime/src/kmp.h"
, 3538)
\
3539 ___kmp_fast_free((this_thr), (ptr)KMP_SRC_LOC_CURR, "openmp/runtime/src/kmp.h", 3539)
3540#endif
3541
3542extern void *___kmp_thread_malloc(kmp_info_t *th, size_t size KMP_SRC_LOC_DECL, char const *_file_, int _line_);
3543extern void *___kmp_thread_calloc(kmp_info_t *th, size_t nelem,
3544 size_t elsize KMP_SRC_LOC_DECL, char const *_file_, int _line_);
3545extern void *___kmp_thread_realloc(kmp_info_t *th, void *ptr,
3546 size_t size KMP_SRC_LOC_DECL, char const *_file_, int _line_);
3547extern void ___kmp_thread_free(kmp_info_t *th, void *ptr KMP_SRC_LOC_DECL, char const *_file_, int _line_);
3548#define __kmp_thread_malloc(th, size)___kmp_thread_malloc((th), (size), "openmp/runtime/src/kmp.h"
, 3548)
\
3549 ___kmp_thread_malloc((th), (size)KMP_SRC_LOC_CURR, "openmp/runtime/src/kmp.h", 3549)
3550#define __kmp_thread_calloc(th, nelem, elsize)___kmp_thread_calloc((th), (nelem), (elsize), "openmp/runtime/src/kmp.h"
, 3550)
\
3551 ___kmp_thread_calloc((th), (nelem), (elsize)KMP_SRC_LOC_CURR, "openmp/runtime/src/kmp.h", 3551)
3552#define __kmp_thread_realloc(th, ptr, size)___kmp_thread_realloc((th), (ptr), (size), "openmp/runtime/src/kmp.h"
, 3552)
\
3553 ___kmp_thread_realloc((th), (ptr), (size)KMP_SRC_LOC_CURR, "openmp/runtime/src/kmp.h", 3553)
3554#define __kmp_thread_free(th, ptr)___kmp_thread_free((th), (ptr), "openmp/runtime/src/kmp.h", 3554
)
\
3555 ___kmp_thread_free((th), (ptr)KMP_SRC_LOC_CURR, "openmp/runtime/src/kmp.h", 3555)
3556
3557extern void __kmp_push_num_threads(ident_t *loc, int gtid, int num_threads);
3558
3559extern void __kmp_push_proc_bind(ident_t *loc, int gtid,
3560 kmp_proc_bind_t proc_bind);
3561extern void __kmp_push_num_teams(ident_t *loc, int gtid, int num_teams,
3562 int num_threads);
3563extern void __kmp_push_num_teams_51(ident_t *loc, int gtid, int num_teams_lb,
3564 int num_teams_ub, int num_threads);
3565
3566extern void __kmp_yield();
3567
3568extern void __kmpc_dispatch_init_4(ident_t *loc, kmp_int32 gtid,
3569 enum sched_type schedule, kmp_int32 lb,
3570 kmp_int32 ub, kmp_int32 st, kmp_int32 chunk);
3571extern void __kmpc_dispatch_init_4u(ident_t *loc, kmp_int32 gtid,
3572 enum sched_type schedule, kmp_uint32 lb,
3573 kmp_uint32 ub, kmp_int32 st,
3574 kmp_int32 chunk);
3575extern void __kmpc_dispatch_init_8(ident_t *loc, kmp_int32 gtid,
3576 enum sched_type schedule, kmp_int64 lb,
3577 kmp_int64 ub, kmp_int64 st, kmp_int64 chunk);
3578extern void __kmpc_dispatch_init_8u(ident_t *loc, kmp_int32 gtid,
3579 enum sched_type schedule, kmp_uint64 lb,
3580 kmp_uint64 ub, kmp_int64 st,
3581 kmp_int64 chunk);
3582
3583extern int __kmpc_dispatch_next_4(ident_t *loc, kmp_int32 gtid,
3584 kmp_int32 *p_last, kmp_int32 *p_lb,
3585 kmp_int32 *p_ub, kmp_int32 *p_st);
3586extern int __kmpc_dispatch_next_4u(ident_t *loc, kmp_int32 gtid,
3587 kmp_int32 *p_last, kmp_uint32 *p_lb,
3588 kmp_uint32 *p_ub, kmp_int32 *p_st);
3589extern int __kmpc_dispatch_next_8(ident_t *loc, kmp_int32 gtid,
3590 kmp_int32 *p_last, kmp_int64 *p_lb,
3591 kmp_int64 *p_ub, kmp_int64 *p_st);
3592extern int __kmpc_dispatch_next_8u(ident_t *loc, kmp_int32 gtid,
3593 kmp_int32 *p_last, kmp_uint64 *p_lb,
3594 kmp_uint64 *p_ub, kmp_int64 *p_st);
3595
3596extern void __kmpc_dispatch_fini_4(ident_t *loc, kmp_int32 gtid);
3597extern void __kmpc_dispatch_fini_8(ident_t *loc, kmp_int32 gtid);
3598extern void __kmpc_dispatch_fini_4u(ident_t *loc, kmp_int32 gtid);
3599extern void __kmpc_dispatch_fini_8u(ident_t *loc, kmp_int32 gtid);
3600
3601#ifdef KMP_GOMP_COMPAT
3602
3603extern void __kmp_aux_dispatch_init_4(ident_t *loc, kmp_int32 gtid,
3604 enum sched_type schedule, kmp_int32 lb,
3605 kmp_int32 ub, kmp_int32 st,
3606 kmp_int32 chunk, int push_ws);
3607extern void __kmp_aux_dispatch_init_4u(ident_t *loc, kmp_int32 gtid,
3608 enum sched_type schedule, kmp_uint32 lb,
3609 kmp_uint32 ub, kmp_int32 st,
3610 kmp_int32 chunk, int push_ws);
3611extern void __kmp_aux_dispatch_init_8(ident_t *loc, kmp_int32 gtid,
3612 enum sched_type schedule, kmp_int64 lb,
3613 kmp_int64 ub, kmp_int64 st,
3614 kmp_int64 chunk, int push_ws);
3615extern void __kmp_aux_dispatch_init_8u(ident_t *loc, kmp_int32 gtid,
3616 enum sched_type schedule, kmp_uint64 lb,
3617 kmp_uint64 ub, kmp_int64 st,
3618 kmp_int64 chunk, int push_ws);
3619extern void __kmp_aux_dispatch_fini_chunk_4(ident_t *loc, kmp_int32 gtid);
3620extern void __kmp_aux_dispatch_fini_chunk_8(ident_t *loc, kmp_int32 gtid);
3621extern void __kmp_aux_dispatch_fini_chunk_4u(ident_t *loc, kmp_int32 gtid);
3622extern void __kmp_aux_dispatch_fini_chunk_8u(ident_t *loc, kmp_int32 gtid);
3623
3624#endif /* KMP_GOMP_COMPAT */
3625
3626extern kmp_uint32 __kmp_eq_4(kmp_uint32 value, kmp_uint32 checker);
3627extern kmp_uint32 __kmp_neq_4(kmp_uint32 value, kmp_uint32 checker);
3628extern kmp_uint32 __kmp_lt_4(kmp_uint32 value, kmp_uint32 checker);
3629extern kmp_uint32 __kmp_ge_4(kmp_uint32 value, kmp_uint32 checker);
3630extern kmp_uint32 __kmp_le_4(kmp_uint32 value, kmp_uint32 checker);
3631extern kmp_uint32 __kmp_wait_4(kmp_uint32 volatile *spinner, kmp_uint32 checker,
3632 kmp_uint32 (*pred)(kmp_uint32, kmp_uint32),
3633 void *obj);
3634extern void __kmp_wait_4_ptr(void *spinner, kmp_uint32 checker,
3635 kmp_uint32 (*pred)(void *, kmp_uint32), void *obj);
3636
3637extern void __kmp_wait_64(kmp_info_t *this_thr, kmp_flag_64<> *flag,
3638 int final_spin
3639#if USE_ITT_BUILD1
3640 ,
3641 void *itt_sync_obj
3642#endif
3643);
3644extern void __kmp_release_64(kmp_flag_64<> *flag);
3645
3646extern void __kmp_infinite_loop(void);
3647
3648extern void __kmp_cleanup(void);
3649
3650#if KMP_HANDLE_SIGNALS(1 || 0)
3651extern int __kmp_handle_signals;
3652extern void __kmp_install_signals(int parallel_init);
3653extern void __kmp_remove_signals(void);
3654#endif
3655
3656extern void __kmp_clear_system_time(void);
3657extern void __kmp_read_system_time(double *delta);
3658
3659extern void __kmp_check_stack_overlap(kmp_info_t *thr);
3660
3661extern void __kmp_expand_host_name(char *buffer, size_t size);
3662extern void __kmp_expand_file_name(char *result, size_t rlen, char *pattern);
3663
3664#if KMP_ARCH_X860 || KMP_ARCH_X86_641 || (KMP_OS_WINDOWS0 && (KMP_ARCH_AARCH640 || KMP_ARCH_ARM))
3665extern void
3666__kmp_initialize_system_tick(void); /* Initialize timer tick value */
3667#endif
3668
3669extern void
3670__kmp_runtime_initialize(void); /* machine specific initialization */
3671extern void __kmp_runtime_destroy(void);
3672
3673#if KMP_AFFINITY_SUPPORTED1
3674extern char *__kmp_affinity_print_mask(char *buf, int buf_len,
3675 kmp_affin_mask_t *mask);
3676extern kmp_str_buf_t *__kmp_affinity_str_buf_mask(kmp_str_buf_t *buf,
3677 kmp_affin_mask_t *mask);
3678extern void __kmp_affinity_initialize(kmp_affinity_t &affinity);
3679extern void __kmp_affinity_uninitialize(void);
3680extern void __kmp_affinity_set_init_mask(
3681 int gtid, int isa_root); /* set affinity according to KMP_AFFINITY */
3682extern void __kmp_affinity_set_place(int gtid);
3683extern void __kmp_affinity_determine_capable(const char *env_var);
3684extern int __kmp_aux_set_affinity(void **mask);
3685extern int __kmp_aux_get_affinity(void **mask);
3686extern int __kmp_aux_get_affinity_max_proc();
3687extern int __kmp_aux_set_affinity_mask_proc(int proc, void **mask);
3688extern int __kmp_aux_unset_affinity_mask_proc(int proc, void **mask);
3689extern int __kmp_aux_get_affinity_mask_proc(int proc, void **mask);
3690extern void __kmp_balanced_affinity(kmp_info_t *th, int team_size);
3691#if KMP_OS_LINUX1 || KMP_OS_FREEBSD0
3692extern int kmp_set_thread_affinity_mask_initial(void);
3693#endif
3694static inline void __kmp_assign_root_init_mask() {
3695 int gtid = __kmp_entry_gtid()__kmp_get_global_thread_id_reg();
3696 kmp_root_t *r = __kmp_threads[gtid]->th.th_root;
3697 if (r->r.r_uber_thread == __kmp_threads[gtid] && !r->r.r_affinity_assigned) {
3698 __kmp_affinity_set_init_mask(gtid, TRUE(!0));
3699 r->r.r_affinity_assigned = TRUE(!0);
3700 }
3701}
3702static inline void __kmp_reset_root_init_mask(int gtid) {
3703 if (!KMP_AFFINITY_CAPABLE()(__kmp_affin_mask_size > 0))
3704 return;
3705 kmp_info_t *th = __kmp_threads[gtid];
3706 kmp_root_t *r = th->th.th_root;
3707 if (r->r.r_uber_thread == th && r->r.r_affinity_assigned) {
3708 __kmp_set_system_affinity(__kmp_affin_origMask, FALSE)(__kmp_affin_origMask)->set_system_affinity(0);
3709 KMP_CPU_COPY(th->th.th_affin_mask, __kmp_affin_origMask)(th->th.th_affin_mask)->copy(__kmp_affin_origMask);
3710 r->r.r_affinity_assigned = FALSE0;
3711 }
3712}
3713#else /* KMP_AFFINITY_SUPPORTED */
3714#define __kmp_assign_root_init_mask() /* Nothing */
3715static inline void __kmp_reset_root_init_mask(int gtid) {}
3716#endif /* KMP_AFFINITY_SUPPORTED */
3717// No need for KMP_AFFINITY_SUPPORTED guard as only one field in the
3718// format string is for affinity, so platforms that do not support
3719// affinity can still use the other fields, e.g., %n for num_threads
3720extern size_t __kmp_aux_capture_affinity(int gtid, const char *format,
3721 kmp_str_buf_t *buffer);
3722extern void __kmp_aux_display_affinity(int gtid, const char *format);
3723
3724extern void __kmp_cleanup_hierarchy();
3725extern void __kmp_get_hierarchy(kmp_uint32 nproc, kmp_bstate_t *thr_bar);
3726
3727#if KMP_USE_FUTEX(1 && (0 || 1 || KMP_ARCH_ARM || 0))
3728
3729extern int __kmp_futex_determine_capable(void);
3730
3731#endif // KMP_USE_FUTEX
3732
3733extern void __kmp_gtid_set_specific(int gtid);
3734extern int __kmp_gtid_get_specific(void);
3735
3736extern double __kmp_read_cpu_time(void);
3737
3738extern int __kmp_read_system_info(struct kmp_sys_info *info);
3739
3740#if KMP_USE_MONITOR
3741extern void __kmp_create_monitor(kmp_info_t *th);
3742#endif
3743
3744extern void *__kmp_launch_thread(kmp_info_t *thr);
3745
3746extern void __kmp_create_worker(int gtid, kmp_info_t *th, size_t stack_size);
3747
3748#if KMP_OS_WINDOWS0
3749extern int __kmp_still_running(kmp_info_t *th);
3750extern int __kmp_is_thread_alive(kmp_info_t *th, DWORD *exit_val);
3751extern void __kmp_free_handle(kmp_thread_t tHandle);
3752#endif
3753
3754#if KMP_USE_MONITOR
3755extern void __kmp_reap_monitor(kmp_info_t *th);
3756#endif
3757extern void __kmp_reap_worker(kmp_info_t *th);
3758extern void __kmp_terminate_thread(int gtid);
3759
3760extern int __kmp_try_suspend_mx(kmp_info_t *th);
3761extern void __kmp_lock_suspend_mx(kmp_info_t *th);
3762extern void __kmp_unlock_suspend_mx(kmp_info_t *th);
3763
3764extern void __kmp_elapsed(double *);
3765extern void __kmp_elapsed_tick(double *);
3766
3767extern void __kmp_enable(int old_state);
3768extern void __kmp_disable(int *old_state);
3769
3770extern void __kmp_thread_sleep(int millis);
3771
3772extern void __kmp_common_initialize(void);
3773extern void __kmp_common_destroy(void);
3774extern void __kmp_common_destroy_gtid(int gtid);
3775
3776#if KMP_OS_UNIX1
3777extern void __kmp_register_atfork(void);
3778#endif
3779extern void __kmp_suspend_initialize(void);
3780extern void __kmp_suspend_initialize_thread(kmp_info_t *th);
3781extern void __kmp_suspend_uninitialize_thread(kmp_info_t *th);
3782
3783extern kmp_info_t *__kmp_allocate_thread(kmp_root_t *root, kmp_team_t *team,
3784 int tid);
3785extern kmp_team_t *
3786__kmp_allocate_team(kmp_root_t *root, int new_nproc, int max_nproc,
3787#if OMPT_SUPPORT1
3788 ompt_data_t ompt_parallel_data,
3789#endif
3790 kmp_proc_bind_t proc_bind, kmp_internal_control_t *new_icvs,
3791 int argc USE_NESTED_HOT_ARG(kmp_info_t *thr), kmp_info_t *thr);
3792extern void __kmp_free_thread(kmp_info_t *);
3793extern void __kmp_free_team(kmp_root_t *,
3794 kmp_team_t *USE_NESTED_HOT_ARG(kmp_info_t *), kmp_info_t *);
3795extern kmp_team_t *__kmp_reap_team(kmp_team_t *);
3796
3797/* ------------------------------------------------------------------------ */
3798
3799extern void __kmp_initialize_bget(kmp_info_t *th);
3800extern void __kmp_finalize_bget(kmp_info_t *th);
3801
3802KMP_EXPORTextern void *kmpc_malloc(size_t size);
3803KMP_EXPORTextern void *kmpc_aligned_malloc(size_t size, size_t alignment);
3804KMP_EXPORTextern void *kmpc_calloc(size_t nelem, size_t elsize);
3805KMP_EXPORTextern void *kmpc_realloc(void *ptr, size_t size);
3806KMP_EXPORTextern void kmpc_free(void *ptr);
3807
3808/* declarations for internal use */
3809
3810extern int __kmp_barrier(enum barrier_type bt, int gtid, int is_split,
3811 size_t reduce_size, void *reduce_data,
3812 void (*reduce)(void *, void *));
3813extern void __kmp_end_split_barrier(enum barrier_type bt, int gtid);
3814extern int __kmp_barrier_gomp_cancel(int gtid);
3815
3816/*!
3817 * Tell the fork call which compiler generated the fork call, and therefore how
3818 * to deal with the call.
3819 */
3820enum fork_context_e {
3821 fork_context_gnu, /**< Called from GNU generated code, so must not invoke the
3822 microtask internally. */
3823 fork_context_intel, /**< Called from Intel generated code. */
3824 fork_context_last
3825};
3826extern int __kmp_fork_call(ident_t *loc, int gtid,
3827 enum fork_context_e fork_context, kmp_int32 argc,
3828 microtask_t microtask, launch_t invoker,
3829 kmp_va_list ap);
3830
3831extern void __kmp_join_call(ident_t *loc, int gtid
3832#if OMPT_SUPPORT1
3833 ,
3834 enum fork_context_e fork_context
3835#endif
3836 ,
3837 int exit_teams = 0);
3838
3839extern void __kmp_serialized_parallel(ident_t *id, kmp_int32 gtid);
3840extern void __kmp_internal_fork(ident_t *id, int gtid, kmp_team_t *team);
3841extern void __kmp_internal_join(ident_t *id, int gtid, kmp_team_t *team);
3842extern int __kmp_invoke_task_func(int gtid);
3843extern void __kmp_run_before_invoked_task(int gtid, int tid,
3844 kmp_info_t *this_thr,
3845 kmp_team_t *team);
3846extern void __kmp_run_after_invoked_task(int gtid, int tid,
3847 kmp_info_t *this_thr,
3848 kmp_team_t *team);
3849
3850// should never have been exported
3851KMP_EXPORTextern int __kmpc_invoke_task_func(int gtid);
3852extern int __kmp_invoke_teams_master(int gtid);
3853extern void __kmp_teams_master(int gtid);
3854extern int __kmp_aux_get_team_num();
3855extern int __kmp_aux_get_num_teams();
3856extern void __kmp_save_internal_controls(kmp_info_t *thread);
3857extern void __kmp_user_set_library(enum library_type arg);
3858extern void __kmp_aux_set_library(enum library_type arg);
3859extern void __kmp_aux_set_stacksize(size_t arg);
3860extern void __kmp_aux_set_blocktime(int arg, kmp_info_t *thread, int tid);
3861extern void __kmp_aux_set_defaults(char const *str, size_t len);
3862
3863/* Functions called from __kmp_aux_env_initialize() in kmp_settings.cpp */
3864void kmpc_set_blocktime(int arg);
3865void ompc_set_nested(int flag);
3866void ompc_set_dynamic(int flag);
3867void ompc_set_num_threads(int arg);
3868
3869extern void __kmp_push_current_task_to_thread(kmp_info_t *this_thr,
3870 kmp_team_t *team, int tid);
3871extern void __kmp_pop_current_task_from_thread(kmp_info_t *this_thr);
3872extern kmp_task_t *__kmp_task_alloc(ident_t *loc_ref, kmp_int32 gtid,
3873 kmp_tasking_flags_t *flags,
3874 size_t sizeof_kmp_task_t,
3875 size_t sizeof_shareds,
3876 kmp_routine_entry_t task_entry);
3877extern void __kmp_init_implicit_task(ident_t *loc_ref, kmp_info_t *this_thr,
3878 kmp_team_t *team, int tid,
3879 int set_curr_task);
3880extern void __kmp_finish_implicit_task(kmp_info_t *this_thr);
3881extern void __kmp_free_implicit_task(kmp_info_t *this_thr);
3882
3883extern kmp_event_t *__kmpc_task_allow_completion_event(ident_t *loc_ref,
3884 int gtid,
3885 kmp_task_t *task);
3886extern void __kmp_fulfill_event(kmp_event_t *event);
3887
3888extern void __kmp_free_task_team(kmp_info_t *thread,
3889 kmp_task_team_t *task_team);
3890extern void __kmp_reap_task_teams(void);
3891extern void __kmp_wait_to_unref_task_teams(void);
3892extern void __kmp_task_team_setup(kmp_info_t *this_thr, kmp_team_t *team,
3893 int always);
3894extern void __kmp_task_team_sync(kmp_info_t *this_thr, kmp_team_t *team);
3895extern void __kmp_task_team_wait(kmp_info_t *this_thr, kmp_team_t *team
3896#if USE_ITT_BUILD1
3897 ,
3898 void *itt_sync_obj
3899#endif /* USE_ITT_BUILD */
3900 ,
3901 int wait = 1);
3902extern void __kmp_tasking_barrier(kmp_team_t *team, kmp_info_t *thread,
3903 int gtid);
3904
3905extern int __kmp_is_address_mapped(void *addr);
3906extern kmp_uint64 __kmp_hardware_timestamp(void);
3907
3908#if KMP_OS_UNIX1
3909extern int __kmp_read_from_file(char const *path, char const *format, ...);
3910#endif
3911
3912/* ------------------------------------------------------------------------ */
3913//
3914// Assembly routines that have no compiler intrinsic replacement
3915//
3916
3917extern int __kmp_invoke_microtask(microtask_t pkfn, int gtid, int npr, int argc,
3918 void *argv[]
3919#if OMPT_SUPPORT1
3920 ,
3921 void **exit_frame_ptr
3922#endif
3923);
3924
3925/* ------------------------------------------------------------------------ */
3926
3927KMP_EXPORTextern void __kmpc_begin(ident_t *, kmp_int32 flags);
3928KMP_EXPORTextern void __kmpc_end(ident_t *);
3929
3930KMP_EXPORTextern void __kmpc_threadprivate_register_vec(ident_t *, void *data,
3931 kmpc_ctor_vec ctor,
3932 kmpc_cctor_vec cctor,
3933 kmpc_dtor_vec dtor,
3934 size_t vector_length);
3935KMP_EXPORTextern void __kmpc_threadprivate_register(ident_t *, void *data,
3936 kmpc_ctor ctor, kmpc_cctor cctor,
3937 kmpc_dtor dtor);
3938KMP_EXPORTextern void *__kmpc_threadprivate(ident_t *, kmp_int32 global_tid,
3939 void *data, size_t size);
3940
3941KMP_EXPORTextern kmp_int32 __kmpc_global_thread_num(ident_t *);
3942KMP_EXPORTextern kmp_int32 __kmpc_global_num_threads(ident_t *);
3943KMP_EXPORTextern kmp_int32 __kmpc_bound_thread_num(ident_t *);
3944KMP_EXPORTextern kmp_int32 __kmpc_bound_num_threads(ident_t *);
3945
3946KMP_EXPORTextern kmp_int32 __kmpc_ok_to_fork(ident_t *);
3947KMP_EXPORTextern void __kmpc_fork_call(ident_t *, kmp_int32 nargs,
3948 kmpc_micro microtask, ...);
3949KMP_EXPORTextern void __kmpc_fork_call_if(ident_t *loc, kmp_int32 nargs,
3950 kmpc_micro microtask, kmp_int32 cond,
3951 void *args);
3952
3953KMP_EXPORTextern void __kmpc_serialized_parallel(ident_t *, kmp_int32 global_tid);
3954KMP_EXPORTextern void __kmpc_end_serialized_parallel(ident_t *, kmp_int32 global_tid);
3955
3956KMP_EXPORTextern void __kmpc_flush(ident_t *);
3957KMP_EXPORTextern void __kmpc_barrier(ident_t *, kmp_int32 global_tid);
3958KMP_EXPORTextern kmp_int32 __kmpc_master(ident_t *, kmp_int32 global_tid);
3959KMP_EXPORTextern void __kmpc_end_master(ident_t *, kmp_int32 global_tid);
3960KMP_EXPORTextern kmp_int32 __kmpc_masked(ident_t *, kmp_int32 global_tid,
3961 kmp_int32 filter);
3962KMP_EXPORTextern void __kmpc_end_masked(ident_t *, kmp_int32 global_tid);
3963KMP_EXPORTextern void __kmpc_ordered(ident_t *, kmp_int32 global_tid);
3964KMP_EXPORTextern void __kmpc_end_ordered(ident_t *, kmp_int32 global_tid);
3965KMP_EXPORTextern void __kmpc_critical(ident_t *, kmp_int32 global_tid,
3966 kmp_critical_name *);
3967KMP_EXPORTextern void __kmpc_end_critical(ident_t *, kmp_int32 global_tid,
3968 kmp_critical_name *);
3969KMP_EXPORTextern void __kmpc_critical_with_hint(ident_t *, kmp_int32 global_tid,
3970 kmp_critical_name *, uint32_t hint);
3971
3972KMP_EXPORTextern kmp_int32 __kmpc_barrier_master(ident_t *, kmp_int32 global_tid);
3973KMP_EXPORTextern void __kmpc_end_barrier_master(ident_t *, kmp_int32 global_tid);
3974
3975KMP_EXPORTextern kmp_int32 __kmpc_barrier_master_nowait(ident_t *,
3976 kmp_int32 global_tid);
3977
3978KMP_EXPORTextern kmp_int32 __kmpc_single(ident_t *, kmp_int32 global_tid);
3979KMP_EXPORTextern void __kmpc_end_single(ident_t *, kmp_int32 global_tid);
3980
3981KMP_EXPORTextern kmp_int32 __kmpc_sections_init(ident_t *loc, kmp_int32 global_tid);
3982KMP_EXPORTextern kmp_int32 __kmpc_next_section(ident_t *loc, kmp_int32 global_tid,
3983 kmp_int32 numberOfSections);
3984KMP_EXPORTextern void __kmpc_end_sections(ident_t *loc, kmp_int32 global_tid);
3985
3986KMP_EXPORTextern void KMPC_FOR_STATIC_INIT(ident_t *loc, kmp_int32 global_tid,
3987 kmp_int32 schedtype, kmp_int32 *plastiter,
3988 kmp_int *plower, kmp_int *pupper,
3989 kmp_int *pstride, kmp_int incr,
3990 kmp_int chunk);
3991
3992KMP_EXPORTextern void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid);
3993
3994KMP_EXPORTextern void __kmpc_copyprivate(ident_t *loc, kmp_int32 global_tid,
3995 size_t cpy_size, void *cpy_data,
3996 void (*cpy_func)(void *, void *),
3997 kmp_int32 didit);
3998
3999KMP_EXPORTextern void *__kmpc_copyprivate_light(ident_t *loc, kmp_int32 gtid,
4000 void *cpy_data);
4001
4002extern void KMPC_SET_NUM_THREADS(int arg);
4003extern void KMPC_SET_DYNAMIC(int flag);
4004extern void KMPC_SET_NESTED(int flag);
4005
4006/* OMP 3.0 tasking interface routines */
4007KMP_EXPORTextern kmp_int32 __kmpc_omp_task(ident_t *loc_ref, kmp_int32 gtid,
4008 kmp_task_t *new_task);
4009KMP_EXPORTextern kmp_task_t *__kmpc_omp_task_alloc(ident_t *loc_ref, kmp_int32 gtid,
4010 kmp_int32 flags,
4011 size_t sizeof_kmp_task_t,
4012 size_t sizeof_shareds,
4013 kmp_routine_entry_t task_entry);
4014KMP_EXPORTextern kmp_task_t *__kmpc_omp_target_task_alloc(
4015 ident_t *loc_ref, kmp_int32 gtid, kmp_int32 flags, size_t sizeof_kmp_task_t,
4016 size_t sizeof_shareds, kmp_routine_entry_t task_entry, kmp_int64 device_id);
4017KMP_EXPORTextern void __kmpc_omp_task_begin_if0(ident_t *loc_ref, kmp_int32 gtid,
4018 kmp_task_t *task);
4019KMP_EXPORTextern void __kmpc_omp_task_complete_if0(ident_t *loc_ref, kmp_int32 gtid,
4020 kmp_task_t *task);
4021KMP_EXPORTextern kmp_int32 __kmpc_omp_task_parts(ident_t *loc_ref, kmp_int32 gtid,
4022 kmp_task_t *new_task);
4023KMP_EXPORTextern kmp_int32 __kmpc_omp_taskwait(ident_t *loc_ref, kmp_int32 gtid);
4024KMP_EXPORTextern kmp_int32 __kmpc_omp_taskyield(ident_t *loc_ref, kmp_int32 gtid,
4025 int end_part);
4026
4027#if TASK_UNUSED
4028void __kmpc_omp_task_begin(ident_t *loc_ref, kmp_int32 gtid, kmp_task_t *task);
4029void __kmpc_omp_task_complete(ident_t *loc_ref, kmp_int32 gtid,
4030 kmp_task_t *task);
4031#endif // TASK_UNUSED
4032
4033/* ------------------------------------------------------------------------ */
4034
4035KMP_EXPORTextern void __kmpc_taskgroup(ident_t *loc, int gtid);
4036KMP_EXPORTextern void __kmpc_end_taskgroup(ident_t *loc, int gtid);
4037
4038KMP_EXPORTextern kmp_int32 __kmpc_omp_task_with_deps(
4039 ident_t *loc_ref, kmp_int32 gtid, kmp_task_t *new_task, kmp_int32 ndeps,
4040 kmp_depend_info_t *dep_list, kmp_int32 ndeps_noalias,
4041 kmp_depend_info_t *noalias_dep_list);
4042KMP_EXPORTextern void __kmpc_omp_wait_deps(ident_t *loc_ref, kmp_int32 gtid,
4043 kmp_int32 ndeps,
4044 kmp_depend_info_t *dep_list,
4045 kmp_int32 ndeps_noalias,
4046 kmp_depend_info_t *noalias_dep_list);
4047/* __kmpc_omp_taskwait_deps_51 : Function for OpenMP 5.1 nowait clause.
4048 * Placeholder for taskwait with nowait clause.*/
4049KMP_EXPORTextern void __kmpc_omp_taskwait_deps_51(ident_t *loc_ref, kmp_int32 gtid,
4050 kmp_int32 ndeps,
4051 kmp_depend_info_t *dep_list,
4052 kmp_int32 ndeps_noalias,
4053 kmp_depend_info_t *noalias_dep_list,
4054 kmp_int32 has_no_wait);
4055
4056extern kmp_int32 __kmp_omp_task(kmp_int32 gtid, kmp_task_t *new_task,
4057 bool serialize_immediate);
4058
4059KMP_EXPORTextern kmp_int32 __kmpc_cancel(ident_t *loc_ref, kmp_int32 gtid,
4060 kmp_int32 cncl_kind);
4061KMP_EXPORTextern kmp_int32 __kmpc_cancellationpoint(ident_t *loc_ref, kmp_int32 gtid,
4062 kmp_int32 cncl_kind);
4063KMP_EXPORTextern kmp_int32 __kmpc_cancel_barrier(ident_t *loc_ref, kmp_int32 gtid);
4064KMP_EXPORTextern int __kmp_get_cancellation_status(int cancel_kind);
4065
4066KMP_EXPORTextern void __kmpc_proxy_task_completed(kmp_int32 gtid, kmp_task_t *ptask);
4067KMP_EXPORTextern void __kmpc_proxy_task_completed_ooo(kmp_task_t *ptask);
4068KMP_EXPORTextern void __kmpc_taskloop(ident_t *loc, kmp_int32 gtid, kmp_task_t *task,
4069 kmp_int32 if_val, kmp_uint64 *lb,
4070 kmp_uint64 *ub, kmp_int64 st, kmp_int32 nogroup,
4071 kmp_int32 sched, kmp_uint64 grainsize,
4072 void *task_dup);
4073KMP_EXPORTextern void __kmpc_taskloop_5(ident_t *loc, kmp_int32 gtid,
4074 kmp_task_t *task, kmp_int32 if_val,
4075 kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st,
4076 kmp_int32 nogroup, kmp_int32 sched,
4077 kmp_uint64 grainsize, kmp_int32 modifier,
4078 void *task_dup);
4079KMP_EXPORTextern void *__kmpc_task_reduction_init(int gtid, int num_data, void *data);
4080KMP_EXPORTextern void *__kmpc_taskred_init(int gtid, int num_data, void *data);
4081KMP_EXPORTextern void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void *d);
4082KMP_EXPORTextern void *__kmpc_task_reduction_modifier_init(ident_t *loc, int gtid,
4083 int is_ws, int num,
4084 void *data);
4085KMP_EXPORTextern void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int is_ws,
4086 int num, void *data);
4087KMP_EXPORTextern void __kmpc_task_reduction_modifier_fini(ident_t *loc, int gtid,
4088 int is_ws);
4089KMP_EXPORTextern kmp_int32 __kmpc_omp_reg_task_with_affinity(
4090 ident_t *loc_ref, kmp_int32 gtid, kmp_task_t *new_task, kmp_int32 naffins,
4091 kmp_task_affinity_info_t *affin_list);
4092KMP_EXPORTextern void __kmp_set_num_teams(int num_teams);
4093KMP_EXPORTextern int __kmp_get_max_teams(void);
4094KMP_EXPORTextern void __kmp_set_teams_thread_limit(int limit);
4095KMP_EXPORTextern int __kmp_get_teams_thread_limit(void);
4096
4097/* Interface target task integration */
4098KMP_EXPORTextern void **__kmpc_omp_get_target_async_handle_ptr(kmp_int32 gtid);
4099KMP_EXPORTextern bool __kmpc_omp_has_task_team(kmp_int32 gtid);
4100
4101/* Lock interface routines (fast versions with gtid passed in) */
4102KMP_EXPORTextern void __kmpc_init_lock(ident_t *loc, kmp_int32 gtid,
4103 void **user_lock);
4104KMP_EXPORTextern void __kmpc_init_nest_lock(ident_t *loc, kmp_int32 gtid,
4105 void **user_lock);
4106KMP_EXPORTextern void __kmpc_destroy_lock(ident_t *loc, kmp_int32 gtid,
4107 void **user_lock);
4108KMP_EXPORTextern void __kmpc_destroy_nest_lock(ident_t *loc, kmp_int32 gtid,
4109 void **user_lock);
4110KMP_EXPORTextern void __kmpc_set_lock(ident_t *loc, kmp_int32 gtid, void **user_lock);
4111KMP_EXPORTextern void __kmpc_set_nest_lock(ident_t *loc, kmp_int32 gtid,
4112 void **user_lock);
4113KMP_EXPORTextern void __kmpc_unset_lock(ident_t *loc, kmp_int32 gtid,
4114 void **user_lock);
4115KMP_EXPORTextern void __kmpc_unset_nest_lock(ident_t *loc, kmp_int32 gtid,
4116 void **user_lock);
4117KMP_EXPORTextern int __kmpc_test_lock(ident_t *loc, kmp_int32 gtid, void **user_lock);
4118KMP_EXPORTextern int __kmpc_test_nest_lock(ident_t *loc, kmp_int32 gtid,
4119 void **user_lock);
4120
4121KMP_EXPORTextern void __kmpc_init_lock_with_hint(ident_t *loc, kmp_int32 gtid,
4122 void **user_lock, uintptr_t hint);
4123KMP_EXPORTextern void __kmpc_init_nest_lock_with_hint(ident_t *loc, kmp_int32 gtid,
4124 void **user_lock,
4125 uintptr_t hint);
4126
4127/* Interface to fast scalable reduce methods routines */
4128
4129KMP_EXPORTextern kmp_int32 __kmpc_reduce_nowait(
4130 ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars, size_t reduce_size,
4131 void *reduce_data, void (*reduce_func)(void *lhs_data, void *rhs_data),
4132 kmp_critical_name *lck);
4133KMP_EXPORTextern void __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid,
4134 kmp_critical_name *lck);
4135KMP_EXPORTextern kmp_int32 __kmpc_reduce(
4136 ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars, size_t reduce_size,
4137 void *reduce_data, void (*reduce_func)(void *lhs_data, void *rhs_data),
4138 kmp_critical_name *lck);
4139KMP_EXPORTextern void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid,
4140 kmp_critical_name *lck);
4141
4142/* Internal fast reduction routines */
4143
4144extern PACKED_REDUCTION_METHOD_T __kmp_determine_reduction_method(
4145 ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars, size_t reduce_size,
4146 void *reduce_data, void (*reduce_func)(void *lhs_data, void *rhs_data),
4147 kmp_critical_name *lck);
4148
4149// this function is for testing set/get/determine reduce method
4150KMP_EXPORTextern kmp_int32 __kmp_get_reduce_method(void);
4151
4152KMP_EXPORTextern kmp_uint64 __kmpc_get_taskid();
4153KMP_EXPORTextern kmp_uint64 __kmpc_get_parent_taskid();
4154
4155// C++ port
4156// missing 'extern "C"' declarations
4157
4158KMP_EXPORTextern kmp_int32 __kmpc_in_parallel(ident_t *loc);
4159KMP_EXPORTextern void __kmpc_pop_num_threads(ident_t *loc, kmp_int32 global_tid);
4160KMP_EXPORTextern void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid,
4161 kmp_int32 num_threads);
4162
4163KMP_EXPORTextern void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid,
4164 int proc_bind);
4165KMP_EXPORTextern void __kmpc_push_num_teams(ident_t *loc, kmp_int32 global_tid,
4166 kmp_int32 num_teams,
4167 kmp_int32 num_threads);
4168/* Function for OpenMP 5.1 num_teams clause */
4169KMP_EXPORTextern void __kmpc_push_num_teams_51(ident_t *loc, kmp_int32 global_tid,
4170 kmp_int32 num_teams_lb,
4171 kmp_int32 num_teams_ub,
4172 kmp_int32 num_threads);
4173KMP_EXPORTextern void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc,
4174 kmpc_micro microtask, ...);
4175struct kmp_dim { // loop bounds info casted to kmp_int64
4176 kmp_int64 lo; // lower
4177 kmp_int64 up; // upper
4178 kmp_int64 st; // stride
4179};
4180KMP_EXPORTextern void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid,
4181 kmp_int32 num_dims,
4182 const struct kmp_dim *dims);
4183KMP_EXPORTextern void __kmpc_doacross_wait(ident_t *loc, kmp_int32 gtid,
4184 const kmp_int64 *vec);
4185KMP_EXPORTextern void __kmpc_doacross_post(ident_t *loc, kmp_int32 gtid,
4186 const kmp_int64 *vec);
4187KMP_EXPORTextern void __kmpc_doacross_fini(ident_t *loc, kmp_int32 gtid);
4188
4189KMP_EXPORTextern void *__kmpc_threadprivate_cached(ident_t *loc, kmp_int32 global_tid,
4190 void *data, size_t size,
4191 void ***cache);
4192
4193// The routines below are not exported.
4194// Consider making them 'static' in corresponding source files.
4195void kmp_threadprivate_insert_private_data(int gtid, void *pc_addr,
4196 void *data_addr, size_t pc_size);
4197struct private_common *kmp_threadprivate_insert(int gtid, void *pc_addr,
4198 void *data_addr,
4199 size_t pc_size);
4200void __kmp_threadprivate_resize_cache(int newCapacity);
4201void __kmp_cleanup_threadprivate_caches();
4202
4203// ompc_, kmpc_ entries moved from omp.h.
4204#if KMP_OS_WINDOWS0
4205#define KMPC_CONVENTION __cdecl
4206#else
4207#define KMPC_CONVENTION
4208#endif
4209
4210#ifndef __OMP_H
4211typedef enum omp_sched_t {
4212 omp_sched_static = 1,
4213 omp_sched_dynamic = 2,
4214 omp_sched_guided = 3,
4215 omp_sched_auto = 4
4216} omp_sched_t;
4217typedef void *kmp_affinity_mask_t;
4218#endif
4219
4220KMP_EXPORTextern void KMPC_CONVENTION ompc_set_max_active_levels(int);
4221KMP_EXPORTextern void KMPC_CONVENTION ompc_set_schedule(omp_sched_t, int);
4222KMP_EXPORTextern int KMPC_CONVENTION ompc_get_ancestor_thread_num(int);
4223KMP_EXPORTextern int KMPC_CONVENTION ompc_get_team_size(int);
4224KMP_EXPORTextern int KMPC_CONVENTION
4225kmpc_set_affinity_mask_proc(int, kmp_affinity_mask_t *);
4226KMP_EXPORTextern int KMPC_CONVENTION
4227kmpc_unset_affinity_mask_proc(int, kmp_affinity_mask_t *);
4228KMP_EXPORTextern int KMPC_CONVENTION
4229kmpc_get_affinity_mask_proc(int, kmp_affinity_mask_t *);
4230
4231KMP_EXPORTextern void KMPC_CONVENTION kmpc_set_stacksize(int);
4232KMP_EXPORTextern void KMPC_CONVENTION kmpc_set_stacksize_s(size_t);
4233KMP_EXPORTextern void KMPC_CONVENTION kmpc_set_library(int);
4234KMP_EXPORTextern void KMPC_CONVENTION kmpc_set_defaults(char const *);
4235KMP_EXPORTextern void KMPC_CONVENTION kmpc_set_disp_num_buffers(int);
4236void KMP_EXPAND_NAME(ompc_set_affinity_format)__kmp_api_ompc_set_affinity_format(char const *format);
4237size_t KMP_EXPAND_NAME(ompc_get_affinity_format)__kmp_api_ompc_get_affinity_format(char *buffer, size_t size);
4238void KMP_EXPAND_NAME(ompc_display_affinity)__kmp_api_ompc_display_affinity(char const *format);
4239size_t KMP_EXPAND_NAME(ompc_capture_affinity)__kmp_api_ompc_capture_affinity(char *buffer, size_t buf_size,
4240 char const *format);
4241
4242enum kmp_target_offload_kind {
4243 tgt_disabled = 0,
4244 tgt_default = 1,
4245 tgt_mandatory = 2
4246};
4247typedef enum kmp_target_offload_kind kmp_target_offload_kind_t;
4248// Set via OMP_TARGET_OFFLOAD if specified, defaults to tgt_default otherwise
4249extern kmp_target_offload_kind_t __kmp_target_offload;
4250extern int __kmpc_get_target_offload();
4251
4252// Constants used in libomptarget
4253#define KMP_DEVICE_DEFAULT-1 -1 // This is libomptarget's default device.
4254#define KMP_DEVICE_ALL-11 -11 // This is libomptarget's "all devices".
4255
4256// OMP Pause Resource
4257
4258// The following enum is used both to set the status in __kmp_pause_status, and
4259// as the internal equivalent of the externally-visible omp_pause_resource_t.
4260typedef enum kmp_pause_status_t {
4261 kmp_not_paused = 0, // status is not paused, or, requesting resume
4262 kmp_soft_paused = 1, // status is soft-paused, or, requesting soft pause
4263 kmp_hard_paused = 2 // status is hard-paused, or, requesting hard pause
4264} kmp_pause_status_t;
4265
4266// This stores the pause state of the runtime
4267extern kmp_pause_status_t __kmp_pause_status;
4268extern int __kmpc_pause_resource(kmp_pause_status_t level);
4269extern int __kmp_pause_resource(kmp_pause_status_t level);
4270// Soft resume sets __kmp_pause_status, and wakes up all threads.
4271extern void __kmp_resume_if_soft_paused();
4272// Hard resume simply resets the status to not paused. Library will appear to
4273// be uninitialized after hard pause. Let OMP constructs trigger required
4274// initializations.
4275static inline void __kmp_resume_if_hard_paused() {
4276 if (__kmp_pause_status == kmp_hard_paused) {
4277 __kmp_pause_status = kmp_not_paused;
4278 }
4279}
4280
4281extern void __kmp_omp_display_env(int verbose);
4282
4283// 1: it is initializing hidden helper team
4284extern volatile int __kmp_init_hidden_helper;
4285// 1: the hidden helper team is done
4286extern volatile int __kmp_hidden_helper_team_done;
4287// 1: enable hidden helper task
4288extern kmp_int32 __kmp_enable_hidden_helper;
4289// Main thread of hidden helper team
4290extern kmp_info_t *__kmp_hidden_helper_main_thread;
4291// Descriptors for the hidden helper threads
4292extern kmp_info_t **__kmp_hidden_helper_threads;
4293// Number of hidden helper threads
4294extern kmp_int32 __kmp_hidden_helper_threads_num;
4295// Number of hidden helper tasks that have not been executed yet
4296extern std::atomic<kmp_int32> __kmp_unexecuted_hidden_helper_tasks;
4297
4298extern void __kmp_hidden_helper_initialize();
4299extern void __kmp_hidden_helper_threads_initz_routine();
4300extern void __kmp_do_initialize_hidden_helper_threads();
4301extern void __kmp_hidden_helper_threads_initz_wait();
4302extern void __kmp_hidden_helper_initz_release();
4303extern void __kmp_hidden_helper_threads_deinitz_wait();
4304extern void __kmp_hidden_helper_threads_deinitz_release();
4305extern void __kmp_hidden_helper_main_thread_wait();
4306extern void __kmp_hidden_helper_worker_thread_wait();
4307extern void __kmp_hidden_helper_worker_thread_signal();
4308extern void __kmp_hidden_helper_main_thread_release();
4309
4310// Check whether a given thread is a hidden helper thread
4311#define KMP_HIDDEN_HELPER_THREAD(gtid)((gtid) >= 1 && (gtid) <= __kmp_hidden_helper_threads_num
)
\
4312 ((gtid) >= 1 && (gtid) <= __kmp_hidden_helper_threads_num)
4313
4314#define KMP_HIDDEN_HELPER_WORKER_THREAD(gtid)((gtid) > 1 && (gtid) <= __kmp_hidden_helper_threads_num
)
\
4315 ((gtid) > 1 && (gtid) <= __kmp_hidden_helper_threads_num)
4316
4317#define KMP_HIDDEN_HELPER_MAIN_THREAD(gtid)((gtid) == 1 && (gtid) <= __kmp_hidden_helper_threads_num
)
\
4318 ((gtid) == 1 && (gtid) <= __kmp_hidden_helper_threads_num)
4319
4320#define KMP_HIDDEN_HELPER_TEAM(team)(team->t.t_threads[0] == __kmp_hidden_helper_main_thread) \
4321 (team->t.t_threads[0] == __kmp_hidden_helper_main_thread)
4322
4323// Map a gtid to a hidden helper thread. The first hidden helper thread, a.k.a
4324// main thread, is skipped.
4325#define KMP_GTID_TO_SHADOW_GTID(gtid)((gtid) % (__kmp_hidden_helper_threads_num - 1) + 2) \
4326 ((gtid) % (__kmp_hidden_helper_threads_num - 1) + 2)
4327
4328// Return the adjusted gtid value by subtracting from gtid the number
4329// of hidden helper threads. This adjusted value is the gtid the thread would
4330// have received if there were no hidden helper threads.
4331static inline int __kmp_adjust_gtid_for_hidden_helpers(int gtid) {
4332 int adjusted_gtid = gtid;
4333 if (__kmp_hidden_helper_threads_num > 0 && gtid > 0 &&
4334 gtid - __kmp_hidden_helper_threads_num >= 0) {
4335 adjusted_gtid -= __kmp_hidden_helper_threads_num;
4336 }
4337 return adjusted_gtid;
4338}
4339
4340// Support for error directive
4341typedef enum kmp_severity_t {
4342 severity_warning = 1,
4343 severity_fatal = 2
4344} kmp_severity_t;
4345extern void __kmpc_error(ident_t *loc, int severity, const char *message);
4346
4347// Support for scope directive
4348KMP_EXPORTextern void __kmpc_scope(ident_t *loc, kmp_int32 gtid, void *reserved);
4349KMP_EXPORTextern void __kmpc_end_scope(ident_t *loc, kmp_int32 gtid, void *reserved);
4350
4351#ifdef __cplusplus201703L
4352}
4353#endif
4354
4355template <bool C, bool S>
4356extern void __kmp_suspend_32(int th_gtid, kmp_flag_32<C, S> *flag);
4357template <bool C, bool S>
4358extern void __kmp_suspend_64(int th_gtid, kmp_flag_64<C, S> *flag);
4359template <bool C, bool S>
4360extern void __kmp_atomic_suspend_64(int th_gtid,
4361 kmp_atomic_flag_64<C, S> *flag);
4362extern void __kmp_suspend_oncore(int th_gtid, kmp_flag_oncore *flag);
4363#if KMP_HAVE_MWAIT((0 || 1) && (1 || 0) && !0) || KMP_HAVE_UMWAIT((0 || 1) && (1 || 0) && !0)
4364template <bool C, bool S>
4365extern void __kmp_mwait_32(int th_gtid, kmp_flag_32<C, S> *flag);
4366template <bool C, bool S>
4367extern void __kmp_mwait_64(int th_gtid, kmp_flag_64<C, S> *flag);
4368template <bool C, bool S>
4369extern void __kmp_atomic_mwait_64(int th_gtid, kmp_atomic_flag_64<C, S> *flag);
4370extern void __kmp_mwait_oncore(int th_gtid, kmp_flag_oncore *flag);
4371#endif
4372template <bool C, bool S>
4373extern void __kmp_resume_32(int target_gtid, kmp_flag_32<C, S> *flag);
4374template <bool C, bool S>
4375extern void __kmp_resume_64(int target_gtid, kmp_flag_64<C, S> *flag);
4376template <bool C, bool S>
4377extern void __kmp_atomic_resume_64(int target_gtid,
4378 kmp_atomic_flag_64<C, S> *flag);
4379extern void __kmp_resume_oncore(int target_gtid, kmp_flag_oncore *flag);
4380
4381template <bool C, bool S>
4382int __kmp_execute_tasks_32(kmp_info_t *thread, kmp_int32 gtid,
4383 kmp_flag_32<C, S> *flag, int final_spin,
4384 int *thread_finished,
4385#if USE_ITT_BUILD1
4386 void *itt_sync_obj,
4387#endif /* USE_ITT_BUILD */
4388 kmp_int32 is_constrained);
4389template <bool C, bool S>
4390int __kmp_execute_tasks_64(kmp_info_t *thread, kmp_int32 gtid,
4391 kmp_flag_64<C, S> *flag, int final_spin,
4392 int *thread_finished,
4393#if USE_ITT_BUILD1
4394 void *itt_sync_obj,
4395#endif /* USE_ITT_BUILD */
4396 kmp_int32 is_constrained);
4397template <bool C, bool S>
4398int __kmp_atomic_execute_tasks_64(kmp_info_t *thread, kmp_int32 gtid,
4399 kmp_atomic_flag_64<C, S> *flag,
4400 int final_spin, int *thread_finished,
4401#if USE_ITT_BUILD1
4402 void *itt_sync_obj,
4403#endif /* USE_ITT_BUILD */
4404 kmp_int32 is_constrained);
4405int __kmp_execute_tasks_oncore(kmp_info_t *thread, kmp_int32 gtid,
4406 kmp_flag_oncore *flag, int final_spin,
4407 int *thread_finished,
4408#if USE_ITT_BUILD1
4409 void *itt_sync_obj,
4410#endif /* USE_ITT_BUILD */
4411 kmp_int32 is_constrained);
4412
4413extern int __kmp_nesting_mode;
4414extern int __kmp_nesting_mode_nlevels;
4415extern int *__kmp_nesting_nth_level;
4416extern void __kmp_init_nesting_mode();
4417extern void __kmp_set_nesting_mode_threads();
4418
4419/// This class safely opens and closes a C-style FILE* object using RAII
4420/// semantics. There are also methods which allow using stdout or stderr as
4421/// the underlying FILE* object. With the implicit conversion operator to
4422/// FILE*, an object with this type can be used in any function which takes
4423/// a FILE* object e.g., fprintf().
4424/// No close method is needed at use sites.
4425class kmp_safe_raii_file_t {
4426 FILE *f;
4427
4428 void close() {
4429 if (f && f != stdoutstdout && f != stderrstderr) {
4430 fclose(f);
4431 f = nullptr;
4432 }
4433 }
4434
4435public:
4436 kmp_safe_raii_file_t() : f(nullptr) {}
4437 kmp_safe_raii_file_t(const char *filename, const char *mode,
4438 const char *env_var = nullptr)
4439 : f(nullptr) {
4440 open(filename, mode, env_var);
4441 }
4442 ~kmp_safe_raii_file_t() { close(); }
4443
4444 /// Open filename using mode. This is automatically closed in the destructor.
4445 /// The env_var parameter indicates the environment variable the filename
4446 /// came from if != nullptr.
4447 void open(const char *filename, const char *mode,
4448 const char *env_var = nullptr) {
4449 KMP_ASSERT(!f)if (!(!f)) { __kmp_debug_assert("!f", "openmp/runtime/src/kmp.h"
, 4449); }
;
4450 f = fopen(filename, mode);
4451 if (!f) {
4452 int code = errno(*__errno_location ());
4453 if (env_var) {
4454 __kmp_fatal(KMP_MSG(CantOpenFileForReading, filename)__kmp_msg_format(kmp_i18n_msg_CantOpenFileForReading, filename
)
, KMP_ERR(code)__kmp_msg_error_code(code),
4455 KMP_HNT(CheckEnvVar, env_var, filename)__kmp_msg_format(kmp_i18n_hnt_CheckEnvVar, env_var, filename), __kmp_msg_null);
4456 } else {
4457 __kmp_fatal(KMP_MSG(CantOpenFileForReading, filename)__kmp_msg_format(kmp_i18n_msg_CantOpenFileForReading, filename
)
, KMP_ERR(code)__kmp_msg_error_code(code),
4458 __kmp_msg_null);
4459 }
4460 }
4461 }
4462 /// Instead of erroring out, return non-zero when
4463 /// unsuccessful fopen() for any reason
4464 int try_open(const char *filename, const char *mode) {
4465 KMP_ASSERT(!f)if (!(!f)) { __kmp_debug_assert("!f", "openmp/runtime/src/kmp.h"
, 4465); }
;
4466 f = fopen(filename, mode);
4467 if (!f)
4468 return errno(*__errno_location ());
4469 return 0;
4470 }
4471 /// Set the FILE* object to stdout and output there
4472 /// No open call should happen before this call.
4473 void set_stdout() {
4474 KMP_ASSERT(!f)if (!(!f)) { __kmp_debug_assert("!f", "openmp/runtime/src/kmp.h"
, 4474); }
;
4475 f = stdoutstdout;
4476 }
4477 /// Set the FILE* object to stderr and output there
4478 /// No open call should happen before this call.
4479 void set_stderr() {
4480 KMP_ASSERT(!f)if (!(!f)) { __kmp_debug_assert("!f", "openmp/runtime/src/kmp.h"
, 4480); }
;
4481 f = stderrstderr;
4482 }
4483 operator bool() { return bool(f); }
4484 operator FILE *() { return f; }
4485};
4486
4487template <typename SourceType, typename TargetType,
4488 bool isSourceSmaller = (sizeof(SourceType) < sizeof(TargetType)),
4489 bool isSourceEqual = (sizeof(SourceType) == sizeof(TargetType)),
4490 bool isSourceSigned = std::is_signed<SourceType>::value,
4491 bool isTargetSigned = std::is_signed<TargetType>::value>
4492struct kmp_convert {};
4493
4494// Both types are signed; Source smaller
4495template <typename SourceType, typename TargetType>
4496struct kmp_convert<SourceType, TargetType, true, false, true, true> {
4497 static TargetType to(SourceType src) { return (TargetType)src; }
4498};
4499// Source equal
4500template <typename SourceType, typename TargetType>
4501struct kmp_convert<SourceType, TargetType, false, true, true, true> {
4502 static TargetType to(SourceType src) { return src; }
4503};
4504// Source bigger
4505template <typename SourceType, typename TargetType>
4506struct kmp_convert<SourceType, TargetType, false, false, true, true> {
4507 static TargetType to(SourceType src) {
4508 KMP_ASSERT(src <= static_cast<SourceType>(if (!(src <= static_cast<SourceType>( (std::numeric_limits
<TargetType>::max)()))) { __kmp_debug_assert("src <= static_cast<SourceType>( (std::numeric_limits<TargetType>::max)())"
, "openmp/runtime/src/kmp.h", 4509); }
4509 (std::numeric_limits<TargetType>::max)()))if (!(src <= static_cast<SourceType>( (std::numeric_limits
<TargetType>::max)()))) { __kmp_debug_assert("src <= static_cast<SourceType>( (std::numeric_limits<TargetType>::max)())"
, "openmp/runtime/src/kmp.h", 4509); }
;
4510 KMP_ASSERT(src >= static_cast<SourceType>(if (!(src >= static_cast<SourceType>( (std::numeric_limits
<TargetType>::min)()))) { __kmp_debug_assert("src >= static_cast<SourceType>( (std::numeric_limits<TargetType>::min)())"
, "openmp/runtime/src/kmp.h", 4511); }
4511 (std::numeric_limits<TargetType>::min)()))if (!(src >= static_cast<SourceType>( (std::numeric_limits
<TargetType>::min)()))) { __kmp_debug_assert("src >= static_cast<SourceType>( (std::numeric_limits<TargetType>::min)())"
, "openmp/runtime/src/kmp.h", 4511); }
;
4512 return (TargetType)src;
4513 }
4514};
4515
4516// Source signed, Target unsigned
4517// Source smaller
4518template <typename SourceType, typename TargetType>
4519struct kmp_convert<SourceType, TargetType, true, false, true, false> {
4520 static TargetType to(SourceType src) {
4521 KMP_ASSERT(src >= 0)if (!(src >= 0)) { __kmp_debug_assert("src >= 0", "openmp/runtime/src/kmp.h"
, 4521); }
;
4522 return (TargetType)src;
4523 }
4524};
4525// Source equal
4526template <typename SourceType, typename TargetType>
4527struct kmp_convert<SourceType, TargetType, false, true, true, false> {
4528 static TargetType to(SourceType src) {
4529 KMP_ASSERT(src >= 0)if (!(src >= 0)) { __kmp_debug_assert("src >= 0", "openmp/runtime/src/kmp.h"
, 4529); }
;
4530 return (TargetType)src;
4531 }
4532};
4533// Source bigger
4534template <typename SourceType, typename TargetType>
4535struct kmp_convert<SourceType, TargetType, false, false, true, false> {
4536 static TargetType to(SourceType src) {
4537 KMP_ASSERT(src >= 0)if (!(src >= 0)) { __kmp_debug_assert("src >= 0", "openmp/runtime/src/kmp.h"
, 4537); }
;
4538 KMP_ASSERT(src <= static_cast<SourceType>(if (!(src <= static_cast<SourceType>( (std::numeric_limits
<TargetType>::max)()))) { __kmp_debug_assert("src <= static_cast<SourceType>( (std::numeric_limits<TargetType>::max)())"
, "openmp/runtime/src/kmp.h", 4539); }
4539 (std::numeric_limits<TargetType>::max)()))if (!(src <= static_cast<SourceType>( (std::numeric_limits
<TargetType>::max)()))) { __kmp_debug_assert("src <= static_cast<SourceType>( (std::numeric_limits<TargetType>::max)())"
, "openmp/runtime/src/kmp.h", 4539); }
;
4540 return (TargetType)src;
4541 }
4542};
4543
4544// Source unsigned, Target signed
4545// Source smaller
4546template <typename SourceType, typename TargetType>
4547struct kmp_convert<SourceType, TargetType, true, false, false, true> {
4548 static TargetType to(SourceType src) { return (TargetType)src; }
4549};
4550// Source equal
4551template <typename SourceType, typename TargetType>
4552struct kmp_convert<SourceType, TargetType, false, true, false, true> {
4553 static TargetType to(SourceType src) {
4554 KMP_ASSERT(src <= static_cast<SourceType>(if (!(src <= static_cast<SourceType>( (std::numeric_limits
<TargetType>::max)()))) { __kmp_debug_assert("src <= static_cast<SourceType>( (std::numeric_limits<TargetType>::max)())"
, "openmp/runtime/src/kmp.h", 4555); }
4555 (std::numeric_limits<TargetType>::max)()))if (!(src <= static_cast<SourceType>( (std::numeric_limits
<TargetType>::max)()))) { __kmp_debug_assert("src <= static_cast<SourceType>( (std::numeric_limits<TargetType>::max)())"
, "openmp/runtime/src/kmp.h", 4555); }
;
4556 return (TargetType)src;
4557 }
4558};
4559// Source bigger
4560template <typename SourceType, typename TargetType>
4561struct kmp_convert<SourceType, TargetType, false, false, false, true> {
4562 static TargetType to(SourceType src) {
4563 KMP_ASSERT(src <= static_cast<SourceType>(if (!(src <= static_cast<SourceType>( (std::numeric_limits
<TargetType>::max)()))) { __kmp_debug_assert("src <= static_cast<SourceType>( (std::numeric_limits<TargetType>::max)())"
, "openmp/runtime/src/kmp.h", 4564); }
4564 (std::numeric_limits<TargetType>::max)()))if (!(src <= static_cast<SourceType>( (std::numeric_limits
<TargetType>::max)()))) { __kmp_debug_assert("src <= static_cast<SourceType>( (std::numeric_limits<TargetType>::max)())"
, "openmp/runtime/src/kmp.h", 4564); }
;
4565 return (TargetType)src;
4566 }
4567};
4568
4569// Source unsigned, Target unsigned
4570// Source smaller
4571template <typename SourceType, typename TargetType>
4572struct kmp_convert<SourceType, TargetType, true, false, false, false> {
4573 static TargetType to(SourceType src) { return (TargetType)src; }
4574};
4575// Source equal
4576template <typename SourceType, typename TargetType>
4577struct kmp_convert<SourceType, TargetType, false, true, false, false> {
4578 static TargetType to(SourceType src) { return src; }
4579};
4580// Source bigger
4581template <typename SourceType, typename TargetType>
4582struct kmp_convert<SourceType, TargetType, false, false, false, false> {
4583 static TargetType to(SourceType src) {
4584 KMP_ASSERT(src <= static_cast<SourceType>(if (!(src <= static_cast<SourceType>( (std::numeric_limits
<TargetType>::max)()))) { __kmp_debug_assert("src <= static_cast<SourceType>( (std::numeric_limits<TargetType>::max)())"
, "openmp/runtime/src/kmp.h", 4585); }
4585 (std::numeric_limits<TargetType>::max)()))if (!(src <= static_cast<SourceType>( (std::numeric_limits
<TargetType>::max)()))) { __kmp_debug_assert("src <= static_cast<SourceType>( (std::numeric_limits<TargetType>::max)())"
, "openmp/runtime/src/kmp.h", 4585); }
;
4586 return (TargetType)src;
4587 }
4588};
4589
4590template <typename T1, typename T2>
4591static inline void __kmp_type_convert(T1 src, T2 *dest) {
4592 *dest = kmp_convert<T1, T2>::to(src);
4593}
4594
4595#endif /* KMP_H */